├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   ├── R-CMD-check.yaml
    │   └── test-coverage.yaml
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── R
    └── call.R
├── README.md
├── cleanup
├── configure
├── inst
    └── NEWS.Rd
├── man
    ├── charToFact.Rd
    ├── count.Rd
    ├── fpos.Rd
    ├── funique.Rd
    ├── iif.Rd
    ├── nif.Rd
    ├── psort.Rd
    ├── psum.Rd
    ├── setlevels.Rd
    ├── shareData.Rd
    ├── topn.Rd
    └── vswitch.Rd
├── src
    ├── Makevars.in
    ├── Makevars.win
    ├── dup.c
    ├── dupLen.c
    ├── fpos.c
    ├── iif.c
    ├── init.c
    ├── kit.h
    ├── nswitch.c
    ├── psort.c
    ├── psum.c
    ├── share.c
    ├── topn.c
    ├── utils.c
    └── vswitch.c
└── tests
    ├── test_kit.R
    └── test_kit.Rout.save


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^\.github$
2 | ^\.appveyor\.yml$
3 | ^README\.md$
4 | LICENSE
5 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check.yaml
10 | 
11 | permissions: read-all
12 | 
13 | jobs:
14 |   R-CMD-check:
15 |     runs-on: ${{ matrix.config.os }}
16 | 
17 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
18 | 
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         config:
23 |           - {os: macos-latest,   r: 'release'}
24 |           - {os: windows-latest, r: 'release'}
25 |           - {os: windows-latest, r: 'oldrel-4'}
26 |           - {os: ubuntu-latest,  r: 'devel', http-user-agent: 'release'}
27 |           - {os: ubuntu-latest,  r: 'release'}
28 |           - {os: ubuntu-latest,  r: 'oldrel-1'}
29 |           - {os: ubuntu-latest,  r: 'oldrel-2'}
30 |           - {os: ubuntu-latest,  r: 'oldrel-3'}
31 |           - {os: ubuntu-latest,  r: 'oldrel-4'}
32 | 
33 |     env:
34 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
35 |       R_KEEP_PKG_SOURCE: yes
36 | 
37 |     steps:
38 |       - uses: actions/checkout@v4
39 | 
40 |       - uses: r-lib/actions/setup-pandoc@v2
41 | 
42 |       - uses: r-lib/actions/setup-r@v2
43 |         with:
44 |           r-version: ${{ matrix.config.r }}
45 |           http-user-agent: ${{ matrix.config.http-user-agent }}
46 |           use-public-rspm: true
47 | 
48 |       - uses: r-lib/actions/setup-r-dependencies@v2
49 |         with:
50 |           extra-packages: any::rcmdcheck
51 |           needs: check
52 | 
53 |       - uses: r-lib/actions/check-r-package@v2
54 |         with:
55 |           upload-snapshots: true
56 |           build_args: 'c("--no-manual")'
57 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |       - master
 6 |   pull_request:
 7 |     branches:
 8 |       - main
 9 |       - master
10 | 
11 | name: test-coverage
12 | 
13 | jobs:
14 |   test-coverage:
15 |     runs-on: windows-latest
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |     steps:
19 |       - uses: actions/checkout@v4
20 | 
21 |       - uses: r-lib/actions/setup-r@v2
22 | 
23 |       - uses: r-lib/actions/setup-pandoc@v1
24 | 
25 |       - name: Query dependencies
26 |         run: |
27 |           install.packages('remotes')
28 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
29 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
30 |         shell: Rscript {0}
31 | 
32 |       - name: Restore R package cache
33 |         uses: actions/cache@v4
34 |         with:
35 |           path: ${{ env.R_LIBS_USER }}
36 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
37 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
38 | 
39 |       - name: Install dependencies
40 |         run: |
41 |           install.packages(c("remotes"))
42 |           remotes::install_deps(dependencies = TRUE)
43 |           remotes::install_cran("covr")
44 |         shell: Rscript {0}
45 | 
46 |       - name: Test coverage
47 |         run: covr::codecov()
48 |         shell: Rscript {0}
49 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: kit
 2 | Type: Package
 3 | Title: Data Manipulation Functions Implemented in C
 4 | Version: 0.0.20
 5 | Date: 2025-04-17
 6 | Authors@R: c(person("Morgan", "Jacob", role = c("aut", "cre", "cph"), email = "morgan.emailbox@gmail.com"), 
 7 |              person("Sebastian", "Krantz", role = "ctb"))
 8 | Author: Morgan Jacob [aut, cre, cph], Sebastian Krantz [ctb]
 9 | Maintainer: Morgan Jacob <morgan.emailbox@gmail.com>
10 | Description: Basic functions, implemented in C, for large data manipulation. Fast vectorised ifelse()/nested if()/switch() functions, psum()/pprod() functions equivalent to pmin()/pmax() plus others which are missing from base R. Most of these functions are callable at C level.
11 | License: GPL-3
12 | Depends: R (>= 3.1.0)
13 | Encoding: UTF-8
14 | BugReports: https://github.com/2005m/kit/issues
15 | NeedsCompilation: yes
16 | ByteCompile: TRUE
17 | Repository: CRAN
18 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | useDynLib(kit, .registration = TRUE)
2 | 
3 | export(
4 |   charToFact, count, countNA, countOccur, fduplicated, fpos, funique, iif, nif, nswitch,
5 |   pall, pallNA, pallv, pany, panyNA, panyv, pcount, pcountNA, pfirst, plast, pmean, pprod, psum, setlevels, topn, uniqLen, vswitch, psort,
6 |   getData, shareData, clearData
7 | )
8 | 


--------------------------------------------------------------------------------
/R/call.R:
--------------------------------------------------------------------------------
 1 | # Function calls
 2 | charToFact  = function(x, decreasing=FALSE, addNA=TRUE, nThread=getOption("kit.nThread")) .Call(CcharToFactR, x, decreasing, nThread, NA, parent.frame(), addNA)
 3 | clearData   = function(x, verbose=FALSE) .Call("CclearMappingObjectR", x, verbose)
 4 | count       = function(x, value) .Call(CcountR, x, value)
 5 | countNA     = function(x) .Call(CcountNAR, x)
 6 | countOccur  = function(x) .Call(CcountOccurR, x)
 7 | fduplicated = function(x, fromLast = FALSE) .Call(CdupR, x, FALSE, fromLast)
 8 | fpos        = function(needle, haystack, all=TRUE, overlap=TRUE) .Call(CfposR, needle, haystack, all, overlap)
 9 | funique     = function(x, fromLast = FALSE) .Call(CdupR, x, TRUE, fromLast)
10 | iif         = function(test, yes, no, na=NULL, tprom=FALSE, nThread=getOption("kit.nThread")) .Call(CiifR, test, yes, no, na, tprom, nThread)
11 | nif         = function(..., default=NULL) .Call(CnifR, default, parent.frame(), as.list(substitute(...())))
12 | nswitch     = function(x, ..., default=NULL, nThread=getOption("kit.nThread"), checkEnc = TRUE) .Call(CnswitchR, x, default, nThread, checkEnc, list(...))
13 | pall        = function(..., na.rm=FALSE) .Call(CpallR, na.rm, if (...length() == 1L && is.list(..1)) ..1 else list(...))
14 | pallNA      = function(...) { x = if (...length() == 1L && is.list(..1)) unclass(..1) else list(...); .Call(CpcountNAR, x) == length(x) }
15 | pallv       = function(..., value) { x = if (...length() == 1L && is.list(..1)) unclass(..1) else list(...); pcount(..., value = value) == length(x) }                                                                     
16 | pany        = function(..., na.rm=FALSE) .Call(CpanyR, na.rm, if (...length() == 1L && is.list(..1)) ..1 else list(...))
17 | panyNA      = function(...) as.logical(pcountNA(...))
18 | panyv       = function(..., value) as.logical(pcount(..., value = value))
19 | pcountNA    = function(...) .Call(CpcountNAR, if (...length() == 1L && is.list(..1)) ..1 else list(...))
20 | pfirst      = function(...) .Call(CpfirstR, FALSE, if (...length() == 1L && is.list(..1)) ..1 else list(...))
21 | plast       = function(...) .Call(CpfirstR, TRUE, if (...length() == 1L && is.list(..1)) ..1 else list(...))
22 | pmean       = function(..., na.rm=FALSE) .Call(CpmeanR, na.rm, if (...length() == 1L && is.list(..1)) ..1 else list(...))
23 | pprod       = function(..., na.rm=FALSE) .Call(CpprodR, na.rm, if (...length() == 1L && is.list(..1)) ..1 else list(...))
24 | psum        = function(..., na.rm=FALSE) .Call(CpsumR,  na.rm, if (...length() == 1L && is.list(..1)) ..1 else list(...))
25 | setlevels   = function(x, old = levels(x), new, skip_absent=FALSE) invisible(.Call(CsetlevelsR, x, old, new, skip_absent))
26 | topn        = function(vec, n=6L, decreasing=TRUE, hasna=TRUE,index=TRUE) if(index) .Call(CtopnR, vec, n, decreasing, hasna, parent.frame()) else vec[.Call(CtopnR, vec, n, decreasing, hasna, parent.frame())]
27 | uniqLen     = function(x) .Call(CdupLenR, x)
28 | vswitch     = function(x, values, outputs, default=NULL, nThread=getOption("kit.nThread"), checkEnc = TRUE) .Call(CvswitchR, x, values, outputs, default, nThread, checkEnc)
29 | 
30 | .onUnload   = function(libpath) library.dynam.unload("kit", libpath) #nocov
31 | 
32 | .onAttach   = function(libname, pkgname) { #nocov
33 |   omp = if(.Call(CompEnabledR)) "enabled" else "disabled" #nocov
34 |   nth = getOption("kit.nThread") #nocov
35 |   thd = if (nth > 1L) " threads)" else " thread)" #nocov
36 |   packageStartupMessage(paste0("Attaching kit 0.0.20 (OPENMP ",omp," using ",nth,thd)) #nocov
37 | } #nocov
38 | 
39 | .onLoad     = function(libname, pkgname) { #nocov
40 |   if (!("kit.nThread" %in% names(options()))) { #nocov
41 |     options("kit.nThread"=1L) #nocov
42 |   } #nocov
43 | } #nocov
44 | 
45 | pcount      = function(..., value) {
46 |   if(is.na(value[1])) {
47 |     .Call(CpcountNAR, if (...length() == 1L && is.list(..1)) ..1 else list(...))
48 |   } else {
49 |     .Call(CpcountR, value, if (...length() == 1L && is.list(..1)) ..1 else list(...))
50 |   }
51 | }
52 | 
53 | psort = function(x, decreasing = FALSE, na.last = NA, nThread=getOption("kit.nThread"), c.locale = TRUE) {
54 |   if (typeof(x) == "character") {
55 |     return(.Call(CcpsortR, x, decreasing, nThread, na.last,parent.frame(), FALSE, c.locale))
56 |   }
57 |   warning("Function 'psort' was only implemented for character vectors. Defaulting to base::sort.")
58 |   sort(x, decreasing = decreasing, na.last = na.last,method = if(c.locale) "radix" else "quick")
59 | }
60 | 
61 | shareData = function(data, map_name, verbose=FALSE) {
62 |   conn = rawConnection(raw(0L), "w")
63 |   serialize(data, conn)
64 |   seek(conn, 0L)
65 |   if (grepl('SunOS',Sys.info()['sysname'])) map_name = paste0("/",map_name)
66 |   x = .Call(
67 |     "CcreateMappingObjectR", map_name, paste0(map_name,"_key"),
68 |     rawConnectionValue(conn), verbose
69 |   )
70 |   close(conn)
71 |   x
72 | }
73 | 
74 | getData = function(map_name, verbose=FALSE) {
75 |   if (grepl('SunOS',Sys.info()['sysname'])) map_name = paste0("/",map_name)
76 |   output = .Call("CgetMappingObjectR", map_name, paste0(map_name,"_key"), verbose)
77 |   conn = rawConnection(output,"r")
78 |   obj = unserialize(conn)
79 |   close(conn)
80 |   obj
81 | }
82 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # kit
 2 | R Package: Basic functions implemented in C (and for some missing from base R)
 3 | 
 4 | [![CRAN](https://www.r-pkg.org/badges/version-last-release/kit?color=blue)](https://cran.r-project.org/package=kit)
 5 | [![CRAN](https://badges.cranchecks.info/flavor/release/kit.svg)](https://cran.r-project.org/web/checks/check_results_kit.html)
 6 | [![License: GPL v3](https://img.shields.io/github/license/2005m/kit)](https://www.gnu.org/licenses/gpl-3.0)
 7 | [![R-CMD-check](https://github.com/2005m/kit/workflows/R-CMD-check/badge.svg)](https://github.com/2005m/kit/actions)
 8 | [![Coverage Status](https://codecov.io/gh/2005m/kit/graph/badge.svg)](https://codecov.io/github/2005m/kit?branch=master)
 9 | [![downloads](https://cranlogs.r-pkg.org/badges/kit)](https://www.r-pkg.org/pkg/kit)
10 | [![kit status badge](https://fastverse.r-universe.dev/badges/kit)](https://fastverse.r-universe.dev)
11 | 


--------------------------------------------------------------------------------
/cleanup:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | rm -f src/Makevars
3 | 


--------------------------------------------------------------------------------
/configure:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | CC=`${R_HOME}/bin/R CMD config CC`
 3 | CFLAGS=`${R_HOME}/bin/R CMD config CFLAGS`
 4 | 
 5 | printf "#include <omp.h>\nint main () { return omp_get_num_threads(); }" | ${CC} ${CFLAGS} -fopenmp -xc ->/dev/null 2>&1 || R_NO_OPENMP=1;
 6 | rm a.out >/dev/null 2>&1
 7 | 
 8 | if [ "$R_NO_OPENMP" = "1" ]; then
 9 |   printf "#include <omp.h>\nint main () { return omp_get_num_threads(); }" | ${CC} ${CFLAGS} -Xpreprocessor -fopenmp -lomp -xc ->/dev/null 2>&1 || R_NO_OPENMP=2;
10 |   rm a.out >/dev/null 2>&1
11 |   if [ "$R_NO_OPENMP" = "2" ]; then
12 |     if [ "`uname`" = "Linux" ] ;then
13 |       echo "*** On Linux and OpenMP not supported!"
14 |       sed -e "s|@openmp_cflags@||g; s|@openmp_libslags@||g; s|@realtime_libslags@|-lrt|g;" src/Makevars.in > src/Makevars
15 |     else
16 |       echo "*** Not on Linux and OpenMP not supported!"
17 |       sed -e "s|@openmp_cflags@||g; s|@openmp_libslags@||g; s|@realtime_libslags@||g;" src/Makevars.in > src/Makevars
18 |     fi
19 |   else
20 |     echo "*** OpenMP supported (-Xpreprocessor)!"
21 |     sed -e "s|@openmp_cflags@|-Xpreprocessor -fopenmp|g; s|@openmp_libslags@|-lomp|g; s|@realtime_libslags@||g;" src/Makevars.in > src/Makevars
22 |   fi
23 | else
24 |   if [ "`uname`" = "Darwin" ] ;then
25 |       if gcc --version | grep -i "gcc" > /dev/null; then
26 |         echo "*** On Darwin and GCC OpenMP supported!"
27 |         sed -e "s|@openmp_cflags@|-Xpreprocessor -fopenmp|g; s|@openmp_libslags@|-lgomp|g; s|@realtime_libslags@||g;" src/Makevars.in > src/Makevars
28 |       elif clang --version | grep -i "clang" > /dev/null; then
29 |         echo "*** On Darwin and CLANG OpenMP supported!"
30 |         sed -e "s|@openmp_cflags@|-Xpreprocessor -fopenmp|g; s|@openmp_libslags@|-lomp|g; s|@realtime_libslags@||g;" src/Makevars.in > src/Makevars
31 |       fi
32 |   else
33 |     echo "*** On Linux and OpenMP supported!"
34 |     sed -e "s|@openmp_cflags@|\$(SHLIB_OPENMP_CFLAGS)|g; s|@openmp_libslags@|\$(SHLIB_OPENMP_CFLAGS)|g; s|@realtime_libslags@|-lrt|g;" src/Makevars.in > src/Makevars
35 |   fi
36 | fi
37 | 
38 | exit 0
39 | 


--------------------------------------------------------------------------------
/inst/NEWS.Rd:
--------------------------------------------------------------------------------
  1 | \name{NEWS}
  2 | \title{News for \R Package \pkg{kit}}
  3 | \encoding{UTF-8}
  4 | 
  5 | \newcommand{\CRANpkg}{\href{https://CRAN.R-project.org/package=#1}{\pkg{#1}}}
  6 | 
  7 | \section{version 0.0.20 (2025-04-17)}{
  8 |   \subsection{Notes}{
  9 |     \itemize{
 10 |       \item Update copyright date in c files
 11 | 
 12 |       \item Fix note on CRAN regarding Rf_isFrame
 13 |     }
 14 |   }
 15 | }
 16 | 
 17 | \section{version 0.0.19 (2024-09-07)}{
 18 |   \subsection{Bug Fixes}{
 19 |     \itemize{
 20 |       \item Fix multiple warnings in C code.
 21 |     }
 22 |   }
 23 | }
 24 | 
 25 | \section{version 0.0.18 (2024-06-06)}{
 26 |   \subsection{Bug Fixes}{
 27 |     \itemize{
 28 |       \item Fix \code{iif} tests for new version of R.
 29 |     }
 30 |   }
 31 | }
 32 | 
 33 | \section{version 0.0.17 (2024-05-03)}{
 34 |   \subsection{Bug Fixes}{
 35 |     \itemize{
 36 |       \item Fix \code{nswitch}. Thanks to Sebastian Krantz for raising an issue.
 37 |     }
 38 |   }
 39 |   \subsection{Notes}{
 40 |     \itemize{
 41 |       \item Update copyright date in c files
 42 | 
 43 |       \item Fix note on CRAN regarding SETLENGTH
 44 |     }
 45 |   }
 46 | }
 47 | 
 48 | \section{version 0.0.16 (2024-03-01)}{
 49 |   \subsection{Notes}{
 50 |     \itemize{
 51 |      \item Check if \code{"kit.nThread"} is defined before setting it to \code{1L}
 52 |     }
 53 |   }
 54 | }
 55 | 
 56 | \section{version 0.0.15 (2023-10-01)}{
 57 |   \subsection{Notes}{
 58 |     \itemize{
 59 |      \item Correct typo in configure file
 60 |     }
 61 |   }
 62 | }
 63 | 
 64 | \section{version 0.0.14 (2023-08-12)}{
 65 |   \subsection{Notes}{
 66 |     \itemize{
 67 |      \item Update configure file to extend support for GCC
 68 | 
 69 |      \item Correct warnings in NEWS.Rd (strong)
 70 | 
 71 |      \item Correct typo in funique.Rd thanks to @davidbudzynski
 72 |     }
 73 |   }
 74 | }
 75 | 
 76 | \section{version 0.0.13 (2023-02-24)}{
 77 |   \subsection{Notes}{
 78 |     \itemize{
 79 |      \item Function \code{pprod} now returns double output even if inputs are integer - in line with \code{base::prod} - to avoid integer overflows.
 80 | 
 81 |      \item Update configure file
 82 |     }
 83 |   }
 84 | }
 85 | 
 86 | \section{version 0.0.12 (2022-10-26)}{
 87 |   \subsection{New Features}{
 88 |     \itemize{
 89 |       \item Function \code{pcountNA} is equivalent to \code{pcount(..., value = NA)}.
 90 |       
 91 |       \item Function \code{pcountNA} and \code{pcount(..., value = NA)} allow \code{NA} counting with mixed data type (including \code{data.frame}). \code{pcountNA} also supports list-vectors as inputs and counts empty or \code{NULL} elements as \code{NA}.
 92 |       
 93 |       \item Functions \code{panyv}, \code{panyNA}, \code{pallv} and \code{pallNA} are added as efficient wrappers around \code{pcount} and \code{pcountNA}. They are parallel equivalents of scalar functions \code{base::anyNA} and \code{anyv}, \code{allv} and \code{allNA} in the 'collapse' R package.
 94 |       
 95 |       \item Functions \code{pfirst} and \code{plast} are added to efficiently obtain the row-wise first and last non-missing value or non-empty element of lists. They are parallel equivalents to the (column-wise) \code{ffirst} and \code{flast} functions in the 'collapse' R package. Implemented by @SebKrantz.
 96 |       
 97 |       \item Functions \code{psum/pprod/pmean} also support logical vectors as input. Implemented by @SebKrantz.
 98 |     }
 99 |   }
100 |   \subsection{Bug Fixes}{
101 |     \itemize{
102 |       \item Function \code{charToFact} was not returning proper results. Thanks to @alex-raw for raising an issue.
103 |     }
104 |   }
105 |   \subsection{Notes}{
106 |     \itemize{
107 |      \item Function \code{pprod} now returns double output even if inputs are integer - in line with \code{base::prod} - to avoid integer overflows.
108 |     
109 |       \item C compiler warnings on CRAN R-devel caused by compilation with -Wstrict-prototypes are now fixed. Declaration of functions without prototypes is depreciated in all versions of C. Thanks to Sebastian Krantz for the PR.
110 |     }
111 |   }
112 | }
113 | 
114 | \section{version 0.0.11 (2022-03-19)}{
115 |   \subsection{New Features}{
116 |     \itemize{
117 |       \item Function \code{pcount} now supports data.frame.
118 |     }
119 |   }
120 |   \subsection{Bug Fixes}{
121 |     \itemize{
122 |       \item Function \code{pcount} now works with specific NA values, i.e. NA_real_, NA_character_ etc...
123 |     }
124 |   }
125 | }
126 | 
127 | \section{version 0.0.10 (2021-11-28)}{
128 |   \subsection{New Features}{
129 |     \itemize{
130 |       \item Function \code{psum}, \code{pmean}, \code{pprod}, \code{pany} and \code{pall} now support lists. Thanks to Sebastian Krantz for the request and code suggestion.
131 |     }
132 |   }
133 |   \subsection{Bug Fixes}{
134 |     \itemize{
135 |       \item Function \code{topn} should now work for ALTREP object. Thanks to @ben-schwen for raising an issue.
136 |     }
137 |   }
138 | }
139 | 
140 | \section{version 0.0.9 (2021-09-12)}{
141 |   \subsection{Notes}{
142 |     \itemize{
143 |       \item Re-organise header to prevent compilation errors with new version of Clang due to conflicts between R C headers and OpenMP.
144 |     }
145 |   }
146 | }
147 | 
148 | \section{version 0.0.8 (2021-08-21)}{
149 |   \subsection{New Features}{
150 |     \itemize{
151 |       \item Function \code{funique} now preserves the attributes if the input is a 
152 |       \code{data.table}, \code{tibble} or similar objects. Thanks to Sebastian Krantz for the request.
153 |       
154 |       \item Function \code{topn} now defaults to base R \code{order} for large value of \code{n}.
155 |       Please see updated documentation for more information \code{?kit::topn}.
156 |       
157 |       \item Function \code{charToFact} gains a new argument \code{addNA=TRUE} to be used
158 |       to include (or not) \code{NA} in levels of the output.
159 |       
160 |       \item Function \code{shareData}, \code{getData} and \code{clearData} implemented
161 |       to share data objects between \R sessions. These functions are experimental and might change in the future.
162 |       Feedback is welcome. Please see \code{?kit::shareData} for more information.
163 |     }
164 |   }
165 |   \subsection{Notes}{
166 |     \itemize{
167 |       \item Few \code{calloc} functions at C level have been replaced by R C API function 
168 |       \code{Calloc} to avoid valgrind errors/warnings in Travis CI.
169 |       
170 |       \item Errors reported by \code{rchk} on CRAN have been fixed.
171 |     }
172 |   }
173 | }
174 | 
175 | \section{version 0.0.7 (2021-03-07)}{
176 |   \subsection{New Features}{
177 |     \itemize{
178 |       \item Function \code{charToFact} gains a new argument \code{decreasing=FALSE} to be used
179 |       to order levels of the output in decreasing or increasing order.
180 |       
181 |       \item Function \code{topn} gains a new argument \code{index=TRUE} to be used return
182 |       index (\code{TRUE}) or values (\code{FALSE}) of input vector.
183 |     }
184 |   }
185 |   \subsection{Bug Fixes}{
186 |     \itemize{
187 |       \item Some tests of memory access errors using valgrind and AddressSanitizer were reported by CRAN.
188 |       An attempt to fix these errors has been submitted as part of this package version. It also seems that
189 |       these same errors were causing some tests to fail for \code{funique} and \code{psort} on some platforms.
190 |     }
191 |   }
192 |   \subsection{Notes}{
193 |     \itemize{
194 |       \item Functions \code{pmean}, \code{pprod} and \code{psum} will result
195 |       in error if used with factors. Documentation has been updated.
196 |     }
197 |   }
198 | }
199 | 
200 | \section{version 0.0.6 (2021-02-21)}{
201 |   \subsection{New Features}{
202 |     \itemize{
203 |       \item Function \code{funique} and \code{fduplicated} gain an additional argument
204 |       \code{fromLast=FALSE} to indicate whether the search should start from the end or beginning 
205 |       \href{https://github.com/2005m/kit/pull/11}{PR#11}.
206 |       
207 |       \item Functions \code{pall}, \code{pany}, \code{pmean},
208 |       \code{pprod} and \code{psum} accept \code{data.frame} as input
209 |       \href{https://github.com/2005m/kit/pull/15}{PR#15}. Please see documentation for more
210 |       information.
211 |       
212 |       \item Function \code{charToFact} is equivalent to to base R \code{as.factor} but is much
213 |       quicker and only converts character vector to factor. Note that it is parallelised. For more
214 |       details and benchmark please see \code{?kit::charToFact}.
215 |       
216 |       \item Function \code{psort} is experimental and equivalent to to base R \code{sort} 
217 |       but is only for character vector. It can sort by "C locale" or by "R session locale".
218 |       For more details and benchmark please see \code{?kit::psort}.
219 |     }
220 |   }
221 |   \subsection{Notes}{
222 |     \itemize{
223 |       \item A few OpenMP directives were missing for functions \code{vswitch} and
224 |       \code{nswitch} for character vectors. These have been added in
225 |       \href{https://github.com/2005m/kit/pull/12}{PR#12}.
226 |       
227 |       \item Function \code{funique} was not preserving attributes for character, logical and
228 |       complex vectors/data.frames. Thanks to Sebastian Krantz (@SebKrantz) for bringing that to my
229 |       attention. This has been fixed in \href{https://github.com/2005m/kit/pull/13}{PR#13}.
230 |       
231 |       \item Functions \code{funique} and \code{uniqLen} should now be faster for 
232 |       \code{factor} and \code{logical} vectors \href{https://github.com/2005m/kit/pull/14}{PR#14}.
233 |     }
234 |   }
235 | }
236 | 
237 | \section{version 0.0.5 (2020-11-21)}{
238 |   \subsection{New Features}{
239 |     \itemize{
240 |       \item Function \code{uniqLen(x)} is equivalent to base R \code{length(unique(x))} and
241 |       \code{uniqueN} in package \CRANpkg{data.table}. Function \code{uniqLen}, implemented in C, supports
242 |       vectors, \code{data.frame} and \code{matrix}. It should be faster than these functions. For more
243 |       details and benchmark please see \code{?kit::uniqLen}.
244 |       
245 |       \item Function \code{vswitch} now supports mixed encoding and gains an additional argument 
246 |       \code{checkEnc=TRUE}. Thanks to Xianying Tan (@shrektan) for the request and review 
247 |       \href{https://github.com/2005m/kit/pull/7}{PR#7}.
248 |       
249 |       \item Function \code{nswitch} is a nested version of function \code{vswitch} 
250 |       and also supports mixed encoding. Please see please see \code{?kit::nswitch} for further details. 
251 |       Thanks to Xianying Tan (@shrektan) for the request and review \href{https://github.com/2005m/kit/pull/10}{PR#10}.
252 |     }
253 |   }
254 |   \subsection{Notes}{
255 |     \itemize{
256 |       \item Small algorithmic improvement for functions \code{fduplicated}, \code{funique}
257 |       and \code{countOccur} for \code{vectors}, \code{data.frame} and \code{matrix}.
258 |       
259 |       \item A tests folder has been added to the source package to track coverage and bugs.
260 |     }
261 |   }
262 |   \subsection{C-Level Facilities}{
263 |     \itemize{
264 |       \item Function \code{nif} has been split into two distinctive functions at C level,
265 |       one has its arguments evaluated in a lazy way and is for R users and the other one (nifInternalR)
266 |       is not lazy and is intended for usage at C level.
267 |     }
268 |   }
269 | }
270 | 
271 | \section{version 0.0.4 (2020-07-21)}{
272 |   \subsection{New Features}{
273 |     \itemize{
274 |       \item Function \code{countOccur(x)}, implemented in C, is comparable to \code{base}
275 |       \R function \code{table}. It returns a \code{data.frame} and is between 3 to 50 times faster.
276 |       For more details, please see \code{?kit::countOccur}.
277 | 
278 |       \item Functions \code{funique} and \code{fduplicated} now support matrices.
279 |       Additionally, these two functions should also have better performance compare to previous release.
280 | 
281 |       \item Functions \code{topn} has an additional argument \code{hasna=TRUE} to indicates whether
282 |       data contains \code{NA} value or not. If the data does not contain \code{NA} values, the function
283 |       should be faster.
284 |     }
285 |   }
286 |   \subsection{C-Level Facilities}{
287 |     \itemize{
288 |       \item A few C functions have been added to subset \code{data.frame} and \code{matrix} as well as
289 | 	  do other operations. These functions are not exported or visible to the user but might become
290 | 	  available and callable at C level in the future.
291 |     }
292 |   }
293 |   \subsection{Bug Fixes}{
294 |     \itemize{
295 |       \item Function \code{fpos} was not properly handling \code{NaN} and \code{NA} for complex 
296 |       and double. This should now be fixed. The function has also been changed in case the 'needle' and
297 |       'haysatck' are vectors so that a vector is returned.
298 | 
299 |       \item Functions \code{funique} and \code{fduplicated} were not properly handling
300 |       data containing \code{POSIX} data. This has now been fixed.
301 |     }
302 |   }
303 | }
304 | 
305 | \section{version 0.0.3 (2020-06-21)}{
306 |   \subsection{New Features}{
307 |     \itemize{
308 |       \item Functions \code{fduplicated(x)} and \code{funique(x)}, implemented in C, 
309 |       are comparable to \code{base} \R functions \code{duplicated} and \code{unique}. For more details, 
310 |       please see \code{?kit::funique}.
311 | 
312 |       \item Functions \code{psum} and \code{pprod} have now better performance for 
313 |       type double and complex.
314 |     }
315 |   }
316 |   \subsection{Bug Fixes}{
317 |     \itemize{
318 |       \item Function \code{count(x, y)} now checks that \code{x} and \code{y} have the same class and
319 |       levels. So does \code{pcount}.
320 | 
321 |       \item Function \code{pmean} was not callable at C level because of a typo. This is now fixed.
322 |     }
323 |   }
324 | }
325 | 
326 | \section{version 0.0.2 (2020-05-22)}{
327 |   \subsection{New Features}{
328 |     \itemize{
329 |       \item Function \code{count(x, value)}, implemented in C, to simply count the number of times 
330 |       an element \code{value} occurs in a vector or in a list \code{x}. For more details, please see
331 |       \code{?kit::count}.
332 | 
333 |       \item Function \code{pmean(..., na.rm=FALSE)}, \code{pall(..., na.rm=FALSE)}, 
334 |       \code{pany(..., na.rm=FALSE)} and \code{pcount(..., value)}, implemented in C, 
335 |       are similar to already available function \code{psum} and \code{pprod}. These 
336 |       functions respectively apply base \R functions \code{mean}, \code{all} and \code{any} element-wise. 
337 |       For more details, benchmarks and help, please see \code{?kit::pmean}.
338 |     }
339 |   }
340 |   \subsection{Bug Fixes}{
341 |     \itemize{
342 |       \item Fix Solaris Unicode warnings for NEWS file. Benchmarks have been moved from the NEWS file to 
343 |       each function Rd file.
344 |       
345 |       \item Fix some \code{NA} edge cases for \code{pprod} and \code{psum} so these 
346 |       functions behave more like base \R function \code{prod} and \code{sum}.
347 | 
348 |       \item Fix installation errors for version of R (<3.5.0).
349 |     }
350 |   }
351 | }
352 | 
353 | \section{version 0.0.1 (2020-05-03)}{
354 |   \subsection{Initial Release}{
355 |     \itemize{
356 |       \item Function \code{fpos(needle, haystack, all=TRUE, overlap=TRUE)}, implemented in C, is
357 |       inspired by base function \code{which} when used in the following form 
358 |       \code{which(x == y, arr.ind =TRUE}). Function \code{fpos} returns the index(es) or position(s)
359 |       of a matrix/vector within a larger matrix/vector. Please see \code{?kit::fpos} for more 
360 |       details.
361 |       
362 |       \item Function \code{iif(test, yes, no, na=NULL, tprom=FALSE, nThread=getOption("kit.nThread"))}, 
363 |       originally contributed as \code{fifelse} in package \CRANpkg{data.table}, was moved to package kit
364 |       to be developed independently. Unlike the current version of \code{fifelse}, \code{iif} allows
365 |       type promotion like base function \code{ifelse}. For further details about the differences 
366 |       with \code{fifelse}, as well as \code{hutils::if_else} and \code{dplyr::if_else}, please see
367 |       \code{?kit::iif}.
368 |       
369 |       \item Function \code{nif(..., default=NULL)}, implemented in C, is inspired by 
370 |       \emph{SQL CASE WHEN}. It is comparable to \CRANpkg{dplyr} function \code{case_when} however it 
371 |       evaluates it arguments in a lazy way (i.e only when needed). Function \code{nif} was 
372 |       originally contributed as function \code{fcase} in the \CRANpkg{data.table} package but then moved
373 |       to package kit so its development may resume independently. Please see \code{?kit::nif} for 
374 |       more details.
375 |       
376 |       \item Function \code{pprod(..., na.rm=FALSE)} and \code{psum(..., na.rm=FALSE)},
377 |       implemented in C, are inspired by base function \code{pmin} and \code{pmax}. These new 
378 |       functions work only for integer, double and complex types and do not recycle vectors. Please 
379 |       see \code{?kit::psum} for more details.
380 |       
381 |       \item Function \code{setlevels(x, old, new, skip_absent=FALSE)}, implemented in C, 
382 |       may be used to set levels of a factor object. Please see \code{?kit::setlevels} for more details. 
383 |       
384 |       \item Function \code{topn(vec, n=6L, decreasing=TRUE)}, implemented in C, returns the top 
385 |       largest or smallest \code{n} values for a given numeric vector \code{vec}. It is inspired by 
386 |       \code{dplyr::top_n} and equivalent to base functions order and sort in specific cases as shown
387 |       in the documentation. Please see \code{?kit::topn} for more details.
388 |       
389 |       \item Function \code{vswitch(x, values, outputs, default=NULL, nThread=getOption("kit.nThread"))}
390 |       , implemented in C, is a vectorised version of \code{base} \R function \code{switch}. This 
391 |       function can also be seen as a particular case of function \code{nif}. Please see 
392 |       \code{?kit::switch} for more details.
393 |     }
394 |   }
395 | }
396 | 


--------------------------------------------------------------------------------
/man/charToFact.Rd:
--------------------------------------------------------------------------------
 1 | \name{charToFact}
 2 | \alias{charToFact}
 3 | \title{Convert Character Vector to Factor}
 4 | \description{
 5 |   Similar to \code{base::as.factor} but much faster and only for converting character vector to factor.
 6 | }
 7 | \usage{
 8 | charToFact(x, decreasing=FALSE, addNA=TRUE,
 9 |            nThread=getOption("kit.nThread"))
10 | }
11 | \arguments{
12 |   \item{x}{ A vector of type character}
13 |   \item{decreasing}{ A boolean. Whether to order levels in decreasing order or not. Default is \code{FALSE}.}
14 |   \item{addNA}{ A boolean. Whether to include \code{NA} in levels of the output or not. Default is \code{TRUE}.}
15 |   \item{nThread}{ Number of thread to use.}
16 | }
17 | \value{
18 |   The character vector input as a factor. Please note that, unlike \code{as.factor}, \code{NA} levels are preserved by default, however this can be changed by setting argument \code{addNA} to \code{FALSE}.
19 | }
20 | \examples{
21 | x = c("b","A","B","a","\xe4","a")
22 | Encoding(x) = "latin1"
23 | identical(charToFact(x), as.factor(x))
24 | identical(charToFact(c("a","b",NA,"a")), addNA(as.factor(c("a","b",NA,"a"))))
25 | identical(charToFact(c("a","b",NA,"a"), addNA=FALSE), as.factor(c("a","b",NA,"a")))
26 | 
27 | # Benchmarks
28 | # ----------
29 | # x = sample(letters,3e7,TRUE)
30 | # microbenchmark::microbenchmark(
31 | #   kit=kit::charToFact(x,nThread = 1L),
32 | #   base=as.factor(x),
33 | #   times = 5L
34 | # )
35 | # Unit: milliseconds
36 | # expr  min   lq   mean  median   uq  max neval
37 | # kit   188  190    196     194  200  208     5
38 | # base 1402 1403   1455    1414 1420 1637     5
39 | }
40 | 


--------------------------------------------------------------------------------
/man/count.Rd:
--------------------------------------------------------------------------------
 1 | \name{count}
 2 | \alias{count}
 3 | \alias{countNA}
 4 | \alias{countOccur}
 5 | \title{ count, countNA and countOccur}
 6 | \description{
 7 | Simple functions to count the number of times an element occurs.
 8 | }
 9 | \usage{
10 |   count(x, value)
11 |   countNA(x)
12 |   countOccur(x)
13 | }
14 | \arguments{
15 |   \item{x}{ A vector or list for \code{countNA}. A vector for \code{count} and a vector or \code{data.frame} for \code{countOccur}.}
16 |   \item{value}{ An element to look for. Must be non \code{NULL}, of length 1 and same type as \code{x}.}
17 | }
18 | \value{
19 | For a vector \code{countNA} will return the total number of \code{NA} value. For a list, \code{countNA} will return a list with the number of \code{NA} in each item of the list.
20 | This is a major difference with \code{sum(is.na(x))} which will return the aggregated number of \code{NA}.
21 | Also, please note that every item of a list can be of different type and \code{countNA} will take them into account whether they are of type logical (\code{NA}), integer (\code{NA_integer_}), double (\code{NA_real_}), complex (\code{NA_complex_}) or character (\code{NA_character_}).
22 | As opposed to \code{countNA}, \code{count} does not support list type and requires \code{x} and \code{value} to be of the same type.
23 | Function \code{countOccur} takes vectors or data.frame as inputs and returns a \code{data.frame} with the number of times each value in the vector occurs or number of times each row in a \code{data.frame} occurs.
24 | }
25 | \seealso{
26 |   \code{\link{pcount}}
27 | }
28 | \author{Morgan Jacob}
29 | \examples{
30 | x = c(1, 3, NA, 5)
31 | count(x, 3)
32 | 
33 | countNA(x)
34 | countNA(as.list(x))
35 | 
36 | countOccur(x)
37 | 
38 | # Benchmarks countNA
39 | # ------------------
40 | # x = sample(c(TRUE,NA,FALSE),1e8,TRUE) # 382 Mb
41 | # microbenchmark::microbenchmark(
42 | #   countNA(x),
43 | #   sum(is.na(x)),
44 | #   times=5L
45 | # )
46 | # Unit: milliseconds
47 | #          expr   min    lq   mean  median    uq   max neval
48 | # countNA(x)     98.7  99.2  101.2   100.1 101.4 106.4     5
49 | # sum(is.na(x)) 405.4 441.3  478.9   461.1 523.9 562.6     5
50 | #
51 | # Benchmarks countOccur
52 | # ---------------------
53 | # x = rnorm(1e6)
54 | # y = data.table::data.table(x)
55 | # microbenchmark::microbenchmark(
56 | #   kit= countOccur(x),
57 | #   data.table = y[, .N, keyby = x],
58 | #   table(x),
59 | #   times = 10L
60 | # )
61 | # Unit: milliseconds
62 | # expr        min         lq     mean    median      uq     max neval
63 | # kit          62.26   63.88    89.29     75.49   95.17  162.40    10
64 | # data.table  189.17  194.08   235.30    227.43  263.74  337.74    10 # setDTthreads(1L)
65 | # data.table  140.15  143.91   190.04    182.85  234.48  261.43    10 # setDTthreads(2L)
66 | # table(x)   3560.77 3705.06  3843.47   3807.12 4048.40 4104.11    10
67 | }
68 | 


--------------------------------------------------------------------------------
/man/fpos.Rd:
--------------------------------------------------------------------------------
 1 | \name{fpos}
 2 | \alias{fpos}
 3 | \title{ Find a matrix position inside a larger matrix }
 4 | \description{
 5 | The function \code{fpos} returns the locations (row and column index) where a small matrix may be found in a larger matrix. The function also works with vectors.  
 6 | }
 7 | \usage{
 8 |   fpos(needle, haystack, all=TRUE, overlap=TRUE)
 9 | }
10 | \arguments{
11 |   \item{needle}{ A matrix or vector to search for in the larger matrix or vector \code{haystack}. Note that the \code{needle} dimensions (row and column size) must be smaller than the \code{haystack} dimensions. }
12 |   \item{haystack}{ A matrix or vector to look into.}
13 |   \item{all}{ A logical value to indicate whether to return all occurrences (\code{TRUE}) or only the first one (\code{FALSE}). Default value is \code{TRUE}.}
14 |   \item{overlap}{ A logical value to indicate whether to allow the small matrix occurrences to overlap or not. Default value is \code{TRUE}.}
15 | }
16 | \value{
17 | A two columns matrix that contains the position or index where the small matrix (needle) can be found in the larger matrix. The first column refers to rows and the second to columns. In case both the needle and haystack are vectors, a vector is returned.
18 | }
19 | \author{Morgan Jacob}
20 | \examples{
21 | # Example 1: find a matrix inside a larger one
22 | big_matrix = matrix(c(1:30), nrow = 10)
23 | small_matrix = matrix(c(14, 15, 24, 25), nrow = 2)
24 | 
25 | fpos(small_matrix, big_matrix)
26 | 
27 | # Example 2: find a vector inside a larger one  
28 | fpos(14:15, 1:30)
29 | 
30 | # Example 3: 
31 | big_matrix = matrix(c(1:5), nrow = 10, ncol = 5)
32 | small_matrix = matrix(c(2:3), nrow = 2, ncol = 2)
33 | 
34 | # return all occurences
35 | fpos(small_matrix, big_matrix)
36 | 
37 | # return only the first
38 | fpos(small_matrix, big_matrix, all = FALSE)
39 | 
40 | # return non overlapping occurences
41 | fpos(small_matrix, big_matrix, overlap = FALSE)
42 | 
43 | # Benchmarks
44 | # ----------
45 | # x = matrix(1:5, nrow=1e4, ncol=5e3) # 191Mb
46 | # microbenchmark::microbenchmark(
47 | #  fpos=kit::fpos(1L, x),
48 | #  which=which(x==1L, arr.ind=TRUE),
49 | #  times=10L
50 | # )
51 | # Unit: milliseconds
52 | #  expr  min  lq  mean median   uq  max neval
53 | # fpos   202  206  220    221  231  241    10
54 | # which  612  637  667    653  705  724    10
55 | }
56 | 


--------------------------------------------------------------------------------
/man/funique.Rd:
--------------------------------------------------------------------------------
 1 | \name{fduplicated/funique}
 2 | \alias{fduplicated}
 3 | \alias{funique}
 4 | \alias{uniqLen}
 5 | \title{ Fast duplicated and unique}
 6 | \description{
 7 | Similar to base R functions \code{duplicated} and \code{unique}, \code{fduplicated} and \code{funique} are slightly faster for vectors and much faster for \code{data.frame}. Function \code{uniqLen} is equivalent to base R \code{length(unique)} or \code{data.table::uniqueN}.
 8 | }
 9 | \usage{
10 |   fduplicated(x, fromLast = FALSE)
11 |   funique(x, fromLast = FALSE)
12 |   uniqLen(x)
13 | }
14 | \arguments{
15 |   \item{x}{ A vector, data.frame or matrix.}
16 |   \item{fromLast}{ A logical value to indicate whether the search should start from the end or beginning. Default is \code{FALSE}.}
17 | }
18 | \value{
19 | Function \code{fduplicated} returns a logical vector and \code{funique} returns a vector of the same type as \code{x} without the duplicated value. Function \code{uniqLen} returns an integer.
20 | }
21 | \author{Morgan Jacob}
22 | \examples{
23 | # Example 1: fduplicated
24 | fduplicated(iris$Species)
25 | 
26 | # Example 2: funique
27 | funique(iris$Species)
28 | 
29 | # Example 3: uniqLen
30 | uniqLen(iris$Species)
31 | 
32 | # Benchmarks
33 | # ----------
34 | # x = sample(c(1:10,NA_integer_),1e8,TRUE) # 382 Mb
35 | # microbenchmark::microbenchmark(
36 | #   duplicated(x),
37 | #   fduplicated(x),
38 | #   times = 5L
39 | # )
40 | # Unit: seconds
41 | #           expr  min   lq  mean  median   uq   max neval
42 | # duplicated(x)  2.21 2.21  2.48    2.21 2.22  3.55     5
43 | # fduplicated(x) 0.38 0.39  0.45    0.48 0.49  0.50     5
44 | #
45 | # vs data.table
46 | # -------------
47 | # df = iris[,5:1]
48 | # for (i in 1:16) df = rbind(df, df)  # 338 Mb
49 | # dt = data.table::as.data.table(df)
50 | # microbenchmark::microbenchmark(
51 | #   kit = funique(df),
52 | #   data.table = unique(dt),
53 | #   times = 5L
54 | # )
55 | # Unit: seconds
56 | #       expr  min   lq  mean  median    uq  max neval
57 | # kit        1.22 1.27  1.33    1.27  1.36 1.55     5
58 | # data.table 6.20 6.24  6.43    6.33  6.46 6.93     5 # (setDTthreads(1L))
59 | # data.table 4.20 4.25  4.47    4.26  4.32 5.33     5 # (setDTthreads(2L))
60 | #
61 | # microbenchmark::microbenchmark(
62 | #   kit=uniqLen(x),
63 | #   data.table=uniqueN(x),
64 | #   times = 5L, unit = "s"
65 | # )
66 | # Unit: seconds
67 | #       expr  min    lq  mean  median   uq  max neval
68 | # kit        0.17  0.17  0.17   0.17  0.17 0.17     5
69 | # data.table 1.66  1.68  1.70   1.71  1.71 1.72     5 # (setDTthreads(1L))
70 | # data.table 1.13  1.15  1.16   1.16  1.18 1.18     5 # (setDTthreads(2L))
71 | }
72 | 


--------------------------------------------------------------------------------
/man/iif.Rd:
--------------------------------------------------------------------------------
 1 | \name{iif}
 2 | \alias{iif}
 3 | \title{ Fast if else }
 4 | \description{
 5 | \code{iif} is a faster and more robust replacement of \code{\link[base]{ifelse}}. It is comparable to \code{dplyr::if_else}, \code{hutils::if_else} and \code{data.table::fifelse}. It returns a value with the same length as \code{test} filled with corresponding values from \code{yes}, \code{no} or eventually \code{na}, depending on \code{test}. It does not support S4 classes.
 6 | }
 7 | \usage{
 8 |   iif(test, yes, no, na=NULL, tprom=FALSE, nThread=getOption("kit.nThread"))
 9 | }
10 | \arguments{
11 |   \item{test}{ A logical vector. }
12 |   \item{yes, no}{ Values to return depending on \code{TRUE}/\code{FALSE} element of \code{test}. They must be the same type and be either length \code{1} or the same length of \code{test}. }
13 |   \item{na}{ Value to return if an element of \code{test} is missing. It must be the same type as \code{yes}/\code{no} and be either length \code{1} or the same length of \code{test}. Please note that \code{NA} is treated as logical value of length 1 as per the R documentation. \code{NA_integer_}, \code{NA_real_}, \code{NA_complex_} and \code{NA_character_} are equivalent to \code{NA} but for integer, double, complex and character. Default value for argument \code{na} is \code{NULL} and will automatically default to the equivalent NA type of argument \code{yes}.}
14 |   \item{tprom}{ Argument to indicate whether type promotion of \code{yes} and \code{no} is allowed or not. Either \code{FALSE} or \code{TRUE}, default is \code{FALSE} to not allow type promotion. }
15 |   \item{nThread}{ A integer for the number of threads to use with \emph{openmp}. Default value is \code{getOption("kit.nThread")}.}
16 | }
17 | \details{
18 | In contrast to \code{\link[base]{ifelse}} attributes are copied from \code{yes} to the output. This is useful when returning \code{Date}, \code{factor} or other classes.
19 | Like \code{dplyr::if_else} and \code{hutils::if_else}, the \code{na} argument is by default set to \code{NULL}. This argument is set to \code{NA} in data.table::fifelse.
20 | Similarly to \code{dplyr::if_else} and when \code{tprom=FALSE}, \code{iif} requires same type for arguments \code{yes} and \code{no}. This is not strictly the case for \code{data.table::fifelse} which will coerce integer to double. 
21 | When \code{tprom=TRUE}, \code{iif} behavior is similar to \code{base::ifelse} in the sense that it will promote or coerce \code{yes} and \code{no}to the "highest" used type. Note, however, that unlike \code{base::ifelse} attributes are still conserved.
22 | }
23 | \value{
24 | A vector of the same length as \code{test} and attributes as \code{yes}. Data values are taken from the values of \code{yes} and \code{no}, eventually \code{na}.
25 | }
26 | \seealso{
27 |   \code{\link{nif}}
28 |   \code{\link{vswitch}}
29 | }
30 | \author{Morgan Jacob}
31 | \examples{
32 | x = c(1:4, 3:2, 1:4)
33 | iif(x > 2L, x, x - 1L)
34 | 
35 | # unlike ifelse, iif preserves attributes, taken from the 'yes' argument
36 | dates = as.Date(c("2011-01-01","2011-01-02","2011-01-03","2011-01-04","2011-01-05"))
37 | ifelse(dates == "2011-01-01", dates - 1, dates)
38 | iif(dates == "2011-01-01", dates - 1, dates)
39 | yes = factor(c("a","b","c"))
40 | no = yes[1L]
41 | ifelse(c(TRUE,FALSE,TRUE), yes, no)
42 | iif(c(TRUE,FALSE,TRUE), yes, no)
43 | 
44 | # Example of using the 'na' argument
45 | iif(test = c(-5L:5L < 0L, NA), yes = 1L, no = 0L, na = 2L)
46 | 
47 | # Example of using the 'tprom' argument
48 | iif(test = c(-5L:5L < 0L, NA), yes = 1L, no = "0", na = 2L, tprom = TRUE)
49 | }
50 | 


--------------------------------------------------------------------------------
/man/nif.Rd:
--------------------------------------------------------------------------------
 1 | \name{nif}
 2 | \alias{nif}
 3 | \title{Nested if else}
 4 | \description{
 5 | \code{nif} is a fast implementation of SQL \code{CASE WHEN} statement for R. Conceptually, \code{nif} is a nested version of \code{\link{iif}} (with smarter implementation than manual nesting). It is not the same but it is comparable to \code{dplyr::case_when} and \code{data.table::fcase}.
 6 | }
 7 | \usage{
 8 |   nif(..., default=NULL)
 9 | }
10 | \arguments{
11 | \item{...}{ A sequence consisting of logical condition (\code{when})-resulting value (\code{value}) \emph{pairs} in the following order \code{when1, value1, when2, value2, ..., whenN, valueN}. Logical conditions \code{when1, when2, ..., whenN} must all have the same length, type and attributes. Each \code{value} may either share length with \code{when} or be length 1. Please see Examples section for further details.}
12 | \item{default}{ Default return value, \code{NULL} by default, for when all of the logical conditions \code{when1, when2, ..., whenN} are \code{FALSE} or missing for some entries. Argument \code{default} can be a vector either of length 1 or length of logical conditions \code{when1, when2, ..., whenN}. Note that argument 'default' must be named explicitly.}
13 | }
14 | \value{
15 |   Vector with the same length as the logical conditions (\code{when}) in \code{...}, filled with the corresponding values (\code{value}) from \code{...}, or eventually \code{default}. Attributes of output values \code{value1, value2, ...valueN} in \code{...} are preserved.
16 | }
17 | \details{
18 |   Unlike \code{data.table::fcase}, the \code{default} argument is set to \code{NULL}. In addition, \code{nif} can be called by other packages at C level. Note that at C level, the function has an additional argument \code{SEXP md} which is either \code{TRUE} for lazy evaluation or \code{FALSE} for non lazy evaluation. This argument is not exposed to R users and is more for C users.
19 | }
20 | \seealso{
21 |   \code{\link{iif}}
22 |   \code{\link{vswitch}}
23 | }
24 | \author{Morgan Jacob}
25 | \examples{
26 | x = 1:10
27 | nif(
28 | 	x < 5L, 1L,
29 | 	x > 5L, 3L
30 | )
31 | 
32 | nif(
33 | 	x < 5L, 1L:10L,
34 | 	x > 5L, 3L:12L
35 | )
36 | 
37 | # Lazy evaluation example
38 | nif(
39 | 	x < 5L, 1L,
40 | 	x >= 5L, 3L,
41 | 	x == 5L, stop("provided value is an unexpected one!")
42 | )
43 | 
44 | # nif preserves attributes, example with dates
45 | nif(
46 | 	x < 5L, as.Date("2019-10-11"),
47 | 	x > 5L, as.Date("2019-10-14")
48 | )
49 | 
50 | # nif example with factor; note the matching levels
51 | nif(
52 | 	x < 5L, factor("a", levels=letters[1:3]),
53 | 	x > 5L, factor("b", levels=letters[1:3])
54 | )
55 | 
56 | # Example of using the 'default' argument
57 | nif(
58 | 	x < 5L, 1L,
59 | 	x > 5L, 3L,
60 | 	default = 5L
61 | )
62 | 
63 | nif(
64 | 	x < 5L, 1L,
65 | 	x > 5L, 3L,
66 | 	default = rep(5L, 10L)
67 | )
68 | }
69 | 


--------------------------------------------------------------------------------
/man/psort.Rd:
--------------------------------------------------------------------------------
 1 | \name{psort}
 2 | \alias{psort}
 3 | \title{Parallel Sort}
 4 | \description{
 5 |   Similar to \code{base::sort} but just for character vector and partially using parallelism.
 6 |   It is currently experimental and might change in the future. Use with caution.
 7 | }
 8 | \usage{
 9 |   psort(x, decreasing=FALSE, na.last=NA,
10 |         nThread=getOption("kit.nThread"),c.locale=TRUE)
11 | }
12 | \arguments{
13 |   \item{x}{ A vector of type character. If other, it will default to \code{base::sort}}
14 |   \item{na.last}{ For controlling the treatment of \code{NA}s. If \code{TRUE}, missing values in the data are put last; if \code{FALSE}, they are put first; if \code{NA}, they are removed. }
15 |   \item{decreasing}{ A boolean indicating where to sort the data in decreasing way. Default is \code{FALSE}. }
16 |   \item{nThread}{ Number of thread to use. Default value is \code{1L}.}
17 |   \item{c.locale}{ A boolean, whether to use C Locale or R session locale. Default TRUE.}
18 | }
19 | \value{
20 |   Returns the input \code{x} in sorted order similar to \code{base::sort} but usually faster. If \code{c.locale=FALSE}, \code{psort} will return the same output as \code{base::sort} with \code{method="quick"}, i.e. using R session locale. If \code{c.locale=TRUE}, \code{psort} will return the same output as \code{base::sort} with \code{method="radix"}, i.e. using C locale. See example below.
21 | }
22 | \author{Morgan Jacob}
23 | \examples{
24 | x = c("b","A","B","a","\xe4")
25 | Encoding(x) = "latin1"
26 | identical(psort(x, c.locale=FALSE), sort(x))
27 | identical(psort(x, c.locale=TRUE), sort(x, method="radix"))
28 | 
29 | # Benchmarks
30 | # ----------
31 | # strings = as.character(as.hexmode(1:1000))
32 | # x = sample(strings, 1e8, replace=TRUE)
33 | # system.time({kit::psort(x, na.last = TRUE, nThread = 1L)})
34 | #   user  system elapsed 
35 | #  2.833   0.434   3.277
36 | # system.time({sort(x,method="radix",na.last = TRUE)})
37 | #   user  system elapsed 
38 | #  5.597   0.559   6.176
39 | # system.time({x[order(x,method="radix",na.last = TRUE)]})
40 | #   user  system elapsed 
41 | #  5.561   0.563   6.143
42 | }
43 | 


--------------------------------------------------------------------------------
/man/psum.Rd:
--------------------------------------------------------------------------------
  1 | \name{parallel-funs}
  2 | \alias{psum}
  3 | \alias{pprod}
  4 | \alias{pall}
  5 | \alias{pallNA}
  6 | \alias{pallv}
  7 | \alias{pany}
  8 | \alias{panyNA}
  9 | \alias{panyv}
 10 | \alias{pmean}
 11 | \alias{pcount}
 12 | \alias{pcountNA}
 13 | \alias{pfirst}
 14 | \alias{plast}
 15 | \title{Parallel (Statistical) Functions}
 16 | \description{
 17 | Vector-valued (statistical) functions operating in parallel over vectors passed as arguments, or a single list of vectors (such as a data frame). Similar to \code{\link{pmin}} and \code{\link{pmax}}, except that these functions do not recycle vectors. 
 18 | }
 19 | \usage{
 20 |   psum(..., na.rm = FALSE)
 21 |   pprod(..., na.rm = FALSE)
 22 |   pmean(..., na.rm = FALSE)
 23 |   pfirst(...)  # (na.rm = TRUE)
 24 |   plast(...)   # (na.rm = TRUE)
 25 |   pall(..., na.rm = FALSE)
 26 |   pallNA(...)
 27 |   pallv(..., value)
 28 |   pany(..., na.rm = FALSE)
 29 |   panyNA(...)
 30 |   panyv(..., value)
 31 |   pcount(..., value)
 32 |   pcountNA(...)
 33 | }
 34 | \arguments{
 35 |   \item{...}{ suitable (atomic) vectors of the same length, or a single list of vectors (such as a \code{data.frame}). See Details on the allowed data types for each function, and Examples.}
 36 |   \item{na.rm}{ A logical indicating whether missing values should be removed. Default value is \code{FALSE}, except for \code{pfirst} and \code{plast}.}
 37 |   \item{value}{ A non \code{NULL} value of length 1. } % \code{pcount} will count how many times it occurs.
 38 | }
 39 | \details{
 40 | Functions \code{psum}, \code{pprod} work for integer, logical, double and complex types. \code{pmean} only supports integer, logical and double types. All 3 functions will error if used with factors.
 41 | 
 42 | \code{pfirst}/\code{plast} select the first/last non-missing value (or non-empty or \code{NULL} value for list-vectors). They accept all vector types with defined missing values + lists, but can only jointly handle integer and double types (not numeric and complex or character and factor). If factors are passed, they all need to have identical levels.
 43 | 
 44 | \code{pany} and \code{pall} are derived from base functions \code{all} and \code{any} and only allow logical inputs.
 45 | 
 46 | \code{pcount} counts the occurrence of \code{value}, and expects arguments of the same data type (except for \code{value = NA}). \code{pcountNA} is equivalent to \code{pcount} with \code{value = NA}, and they both allow \code{NA} counting in mixed-type data. \code{pcountNA} additionally supports list vectors and counts empty or \code{NULL} elements as \code{NA}. 
 47 | 
 48 | Functions \code{panyv/pallv} are wrappers around \code{pcount}, and \code{panyNA/pallNA} are wrappers around \code{pcountNA}. They return a logical vector instead of the integer count. 
 49 | 
 50 | None of these functions recycle vectors i.e. all input vectors need to have the same length. All functions support long vectors with up to \code{2^64-1} elements.
 51 | }
 52 | \value{
 53 | \code{psum/pprod/pmean} return the sum, product or mean of all arguments. The value returned will be of the highest argument type (integer < double < complex). \code{pprod} only returns double or complex. \code{pall[v/NA]} and \code{pany[v/NA]} return a logical vector. \code{pcount[NA]} returns an integer vector. \code{pfirst/plast} return a vector of the same type as the inputs. 
 54 | }
 55 | \seealso{
 56 | Package 'collapse' provides column-wise and scalar-valued analogues to many of these functions.
 57 | }
 58 | \author{Morgan Jacob and Sebastian Krantz}
 59 | \examples{
 60 | x = c(1, 3, NA, 5)
 61 | y = c(2, NA, 4, 1)
 62 | z = c(3, 4, 4, 1)
 63 | 
 64 | # Example 1: psum 
 65 | psum(x, y, z, na.rm = FALSE)
 66 | psum(x, y, z, na.rm = TRUE)
 67 | 
 68 | # Example 2: pprod
 69 | pprod(x, y, z, na.rm = FALSE)
 70 | pprod(x, y, z, na.rm = TRUE)
 71 | 
 72 | # Example 3: pmean
 73 | pmean(x, y, z, na.rm = FALSE)
 74 | pmean(x, y, z, na.rm = TRUE)
 75 | 
 76 | # Example 4: pfirst and plast
 77 | pfirst(x, y, z)
 78 | plast(x, y, z)
 79 | 
 80 | # Adjust x, y, and z to use in pall and pany
 81 | x = c(TRUE, FALSE, NA, FALSE)
 82 | y = c(TRUE, NA, TRUE, TRUE)
 83 | z = c(TRUE, TRUE, FALSE, NA)
 84 | 
 85 | # Example 5: pall
 86 | pall(x, y, z, na.rm = FALSE)
 87 | pall(x, y, z, na.rm = TRUE)
 88 | 
 89 | # Example 6: pany
 90 | pany(x, y, z, na.rm = FALSE)
 91 | pany(x, y, z, na.rm = TRUE)
 92 | 
 93 | # Example 7: pcount
 94 | pcount(x, y, z, value = TRUE)
 95 | pcountNA(x, y, z)
 96 | 
 97 | # Example 8: list/data.frame as an input
 98 | pprod(iris[,1:2])
 99 | psum(iris[,1:2])
100 | pmean(iris[,1:2])
101 | 
102 | # Benchmarks
103 | # ----------
104 | # n = 1e8L
105 | # x = rnorm(n) # 763 Mb
106 | # y = rnorm(n)
107 | # z = rnorm(n)
108 | # 
109 | # microbenchmark::microbenchmark(
110 | #   kit=psum(x, y, z, na.rm = TRUE),
111 | #   base=rowSums(do.call(cbind,list(x, y, z)), na.rm=TRUE),
112 | #   times = 5L, unit = "s"
113 | # )
114 | # Unit: Second
115 | # expr  min   lq mean median   uq  max neval
116 | # kit  0.52 0.52 0.65   0.55 0.83 0.84     5
117 | # base 2.16 2.27 2.34   2.35 2.43 2.49     5
118 | #
119 | # x = sample(c(TRUE, FALSE, NA), n, TRUE) # 382 Mb
120 | # y = sample(c(TRUE, FALSE, NA), n, TRUE)
121 | # z = sample(c(TRUE, FALSE, NA), n, TRUE)
122 | # 
123 | # microbenchmark::microbenchmark(
124 | #   kit=pany(x, y, z, na.rm = TRUE),
125 | #   base=sapply(1:n, function(i) any(x[i],y[i],z[i],na.rm=TRUE)),
126 | #   times = 5L
127 | # )
128 | # Unit: Second
129 | # expr    min     lq   mean   median     uq    max neval
130 | # kit    1.07   1.09   1.15     1.10   1.23   1.23     5
131 | # base 111.31 112.02 112.78   112.97 113.55 114.03     5
132 | }
133 | 


--------------------------------------------------------------------------------
/man/setlevels.Rd:
--------------------------------------------------------------------------------
 1 | \name{setlevels}
 2 | \alias{setlevels}
 3 | \title{ Set levels of a factor object }
 4 | \description{
 5 |   A function to set levels of a factor object.
 6 | }
 7 | \usage{
 8 | setlevels(x, old=levels(x), new, skip_absent=FALSE)
 9 | }
10 | \arguments{
11 |   \item{x}{ A factor object. }
12 |   \item{old}{ A character vector containing the factor levels to be changed. Default is levels of \code{x}. }
13 |   \item{new}{ The new character vector containing the factor levels to be added. }
14 |   \item{skip_absent}{ Skip items in \code{old} that are missing (i.e. absent) in `names(x)`. Default \code{FALSE} halts with error if any are missing. }
15 | }
16 | \value{
17 |   Returns an invisible and modified factor object.
18 | }
19 | \author{Morgan Jacob}
20 | \examples{
21 | x = factor(c("A", "A", "B", "B", "B", "C")) # factor vector with levels A B C
22 | setlevels(x, new = c("X", "Y", "Z"))        # set factor levels to: X Y Z
23 | setlevels(x, old = "X", new = "A")          # set factor levels X to A
24 | }
25 | 


--------------------------------------------------------------------------------
/man/shareData.Rd:
--------------------------------------------------------------------------------
 1 | \name{shareData/getData/clearData}
 2 | \alias{shareData}
 3 | \alias{getData}
 4 | \alias{clearData}
 5 | \title{ Share Data between R Sessions}
 6 | \description{
 7 | Experimental functions that enable the user to share a R object between 2 \R sessions.
 8 | }
 9 | \usage{
10 |   shareData(data, map_name, verbose=FALSE)
11 |   getData(map_name, verbose=FALSE)
12 |   clearData(x, verbose=FALSE)
13 | }
14 | \arguments{
15 |   \item{data}{ A \R object like a vector or a \code{data.frame}.}
16 |   \item{map_name}{ A character. A name for the memory map location where to store the data.}
17 |   \item{x}{ An external pointer like the one returned by function \code{shareData}.}
18 |   \item{verbose}{ A logical value \code{TRUE} or \code{FALSE} to provide or not information to the user.}
19 | }
20 | \value{
21 | \code{shareData} returns a external pointer.
22 | \code{getData} returns an \R object stored in the memory location \code{map_name}.
23 | \code{clearData} returns \code{TRUE} or \code{FALSE} depending on whether the data have been cleared in memory.
24 | }
25 | \author{Morgan Jacob}
26 | \examples{
27 | # In R session 1: share data in memory
28 | # > x = shareData(mtcars,"share1")
29 | #
30 | # In R session 2: get data from session 1
31 | # > getData("share1")
32 | #
33 | # In R session 1: clear data in memory
34 | # > clearData(x)
35 | }
36 | 


--------------------------------------------------------------------------------
/man/topn.Rd:
--------------------------------------------------------------------------------
 1 | \name{topn}
 2 | \alias{topn}
 3 | \title{ Top N values index}
 4 | \description{
 5 |   \code{topn} is used to get the indices of the few values of an input. This is an extension of \code{\link{which.max}}/\code{\link{which.min}} which provide \emph{only} the first such index. 
 6 |   
 7 |   The output is the same as \code{order(vec)[1:n]}, but internally optimized not to sort the irrelevant elements of the input (and therefore much faster, for small \code{n} relative to input size).
 8 | }
 9 | \usage{
10 |   topn(vec, n=6L, decreasing=TRUE, hasna=TRUE, index=TRUE)
11 | }
12 | \arguments{
13 |   \item{vec}{ A numeric vector of type numeric or integer. Other types are not supported yet. }
14 |   \item{n}{ A positive integer value greater or equal to 1. }
15 |   \item{decreasing}{ A logical value (default \code{TRUE}) to indicate whether to sort \code{vec} in decreasing or increasing order. Equivalent to argument \code{decreasing} in function \code{base::order}. Please note that unlike \code{topn} default value in \code{base::order} is \code{FALSE}.}
16 |   \item{hasna}{ A logical value (default \code{TRUE}) to indicate whether \code{vec} contains \code{NA} values. }
17 |   \item{index}{ A logical value (default \code{TRUE}) to indicate whether indexes or values of \code{vec}. }
18 | }
19 | \value{
20 |   \code{integer} vector of indices of the most extreme (according to \code{decreasing}) \code{n} values in vector \code{vec}. Please note that for large value of \code{n}, i.e. 1500 or 2000 (depending on the value of \code{hasna}), \code{topn} will default to base R function \code{order}.
21 | }
22 | \author{Morgan Jacob}
23 | \examples{
24 | x = rnorm(1e4)
25 | 
26 | # Example 1: index of top 6 negative values 
27 | topn(x, 6L, decreasing=FALSE)
28 | order(x)[1:6]
29 | 
30 | # Example 2: index of top 6 positive values
31 | topn(x, 6L, decreasing = TRUE)
32 | order(x, decreasing=TRUE)[1:6]
33 | 
34 | # Example 3: top 6 negative values
35 | topn(x, 6L, decreasing=FALSE, index=FALSE)
36 | sort(x)[1:6]
37 | 
38 | # Benchmarks
39 | # ----------
40 | # x = rnorm(1e7) # 76Mb
41 | # microbenchmark::microbenchmark(
42 | #   topn=kit::topn(x, 6L),
43 | #   order=order(x, decreasing=TRUE)[1:6],
44 | #   times=10L
45 | # )
46 | # Unit: milliseconds
47 | #  expr min   lq  mean median   uq  max neval
48 | # topn   11   11    13     11   12   18    10
49 | # order 563  565   587    566  602  661    10
50 | #
51 | # microbenchmark::microbenchmark(
52 | #  topn=kit::topn(x, 6L, decreasing=FALSE, index=FALSE),
53 | #  sort=sort(x, partial=1:6)[1:6],
54 | #  times=10L
55 | # )
56 | # Unit: milliseconds
57 | # expr min  lq  mean median   uq  max neval
58 | # topn  11  11    11     11   12   12    10
59 | # sort 167 175   197    178  205  303    10
60 | }
61 | 


--------------------------------------------------------------------------------
/man/vswitch.Rd:
--------------------------------------------------------------------------------
  1 | \name{vswitch/nswitch}
  2 | \alias{vswitch}
  3 | \alias{nswitch}
  4 | \title{ Vectorised switch }
  5 | \description{
  6 |   \code{vswitch}/ \code{nswitch} is a vectorised version of \code{base} function \code{switch}. This function can also be seen as a particular case of function \code{nif}, as shown in examples below, and should also be faster.
  7 | }
  8 | \usage{
  9 |     vswitch(x, values, outputs, default=NULL,
 10 |             nThread=getOption("kit.nThread"),
 11 |             checkEnc=TRUE)
 12 |     nswitch(x, ..., default=NULL,
 13 |             nThread=getOption("kit.nThread"),
 14 |             checkEnc=TRUE)
 15 | }
 16 | \arguments{
 17 |   \item{x}{A vector or list.}
 18 |   \item{values}{A vector or list with values from \code{x} to match. Note that \code{x} and \code{values} must have the same class and attributes.}
 19 |   \item{outputs}{A list or vector with the outputs to return for every matching values. Each item of the list must be of length 1 or length of \code{x}. Note that if all list items are of length 1 then it might be simpler to use a vector.}
 20 |   \item{...}{A sequence of values and outputs in the following order \code{value1, output1, value2, output2, ..., valueN, outputN}. Values \code{value1, value2, ..., valueN} must all have length1, same type and attributes. Each \code{output} may either share length with \code{x} or be length 1. Please see Examples section for further details.}
 21 |   \item{default}{Values to return is no match. Must be a vector or list of length 1 or same length as \code{x}. Also, \code{default} must have the same type, class and attributes as items from \code{outputs}.}
 22 |   \item{nThread}{ A integer for the number of threads to use with \emph{openmp}. Default value is \code{getOption("kit.nThread")}.}
 23 |   \item{checkEnc}{ A logical value whether or not to check if \code{x} and \code{values} have comparable and consistent encoding. Default is \code{TRUE}.}
 24 | }
 25 | \value{
 26 |   A vector or list of the same length as \code{x} with values from \code{outputs} items and from \code{default} if missing.
 27 | }
 28 | \seealso{
 29 |   \code{\link{iif}}
 30 |   \code{\link{nif}}
 31 | }
 32 | \author{Morgan Jacob}
 33 | \examples{
 34 | x = sample(c(10L, 20L, 30L, 40L, 50L, 60L), 3e2, replace=TRUE)
 35 | 
 36 | # The below example of 'vswitch' is
 37 | a1 = vswitch(
 38 |   x = x,
 39 |   values = c(10L,20L,30L,40L,50L),
 40 |   outputs = c(11L,21L,31L,41L,51L),
 41 |   default = NA_integer_
 42 | )
 43 | 
 44 | # equivalent to the following 'nif' example.
 45 | # However for large vectors 'vswitch' should be faster.
 46 | b1 = nif(
 47 |   x==10L, 11L,
 48 |   x==20L, 21L,
 49 |   x==30L, 31L,
 50 |   x==40L, 41L,
 51 |   x==50L, 51L,
 52 |   default = NA_integer_
 53 | )
 54 | identical(a1, b1)
 55 | 
 56 | # nswitch can also be used as follows:
 57 | c1 = nswitch(x,
 58 |   10L, 11L,
 59 |   20L, 21L,
 60 |   30L, 31L,
 61 |   40L, 41L,
 62 |   50L, 51L,
 63 |   default = NA_integer_
 64 | )
 65 | identical(a1, c1)
 66 | 
 67 | # Example with list in 'outputs' argument
 68 | y = c(1, 0, NA_real_)
 69 | a2 = vswitch(
 70 |   x = y,
 71 |   values = c(1, 0),
 72 |   outputs = list(c(2, 3, 4), c(5, 6, 7)),
 73 |   default = 8
 74 | )
 75 | 
 76 | b2 = nif(
 77 |   y==1, c(2, 3, 4),
 78 |   y==0, c(5, 6, 7),
 79 |   default = 8
 80 | )
 81 | 
 82 | identical(a2, b2)
 83 | 
 84 | c2 = nswitch(y,
 85 |   1, c(2, 3, 4),
 86 |   0, c(5, 6, 7),
 87 |   default = 8
 88 | )
 89 | 
 90 | identical(a2, c2)
 91 | 
 92 | # Benchmarks
 93 | # ----------
 94 | # x = sample(1:100, 3e8, TRUE) # 1.1Gb
 95 | # microbenchmark::microbenchmark(
 96 | # nif=kit::nif(
 97 | #   x==10L,  0L,
 98 | #   x==20L, 10L,
 99 | #   x==30L, 20L,
100 | #   default= 30L
101 | #  ),
102 | # vswitch=kit::vswitch(
103 | #   x, c( 10L, 20L, 30L), list(0L, 10L, 20L), 30L
104 | # ),
105 | # times=10L
106 | # )
107 | # Unit: seconds
108 | #    expr   min    lq  mean median   uq  max neval
109 | # nif      4.27  4.37  4.43   4.42 4.52 4.53    10
110 | # vswitch  1.08  1.09  1.20   1.10 1.43 1.44    10 # 1 thread
111 | # vswitch  0.46  0.57  0.57   0.58 0.58 0.60    10 # 2 threads
112 | }
113 | 


--------------------------------------------------------------------------------
/src/Makevars.in:
--------------------------------------------------------------------------------
1 | PKG_CFLAGS = @openmp_cflags@
2 | PKG_LIBS = @openmp_libslags@ @realtime_libslags@
3 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS)
2 | PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
3 | 


--------------------------------------------------------------------------------
/src/dupLen.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * kit : Useful R Functions Implemented in C
  3 |  * Copyright (C) 2020-2025  Morgan Jacob
  4 |  * 
  5 |  * This program is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  * 
 10 |  * This program is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  * 
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 |  */
 18 | 
 19 | #include "kit.h"
 20 | 
 21 | /*
 22 |  *  Main Function
 23 |  */
 24 | 
 25 | SEXP dupLenR(SEXP x) {
 26 |   if (isDataFrame(x)) {
 27 |     SEXP ans = PROTECT(dupLenDataFrameR(x));
 28 |     UNPROTECT(1);
 29 |     return ans;
 30 |   }
 31 |   if (isMatrix(x)) {
 32 |     SEXP ans = PROTECT(dupLenMatrixR(x));
 33 |     UNPROTECT(1);
 34 |     return ans;
 35 |   }
 36 |   if (isArray(x)) {
 37 |     error("Arrays are not yet supported. (please raise a feature request if needed)");
 38 |   }
 39 |   SEXP ans = PROTECT(dupLenVecR(x));
 40 |   UNPROTECT(1);
 41 |   return ans;
 42 | }
 43 | 
 44 | /*
 45 |  *  Data.Frame
 46 |  */
 47 | 
 48 | SEXP dupLenDataFrameR(SEXP x) {
 49 |   const SEXP *restrict px = SEXPPTR_RO(x);
 50 |   const R_xlen_t len_x = xlength(x);
 51 |   bool allT = true;
 52 |   const SEXPTYPE t0 = UTYPEOF(px[0]); 
 53 |   for (int i = 1; i < len_x; ++i) {
 54 |     if (UTYPEOF(px[i]) != t0) {
 55 |       allT = false;
 56 |       break;
 57 |     }
 58 |   }
 59 |   if (allT) {
 60 |     SEXP output = PROTECT(dupLenMatrixR(PROTECT(dfToMatrix(x))));
 61 |     UNPROTECT(2);
 62 |     return output;
 63 |   }
 64 |   const R_xlen_t len_i = xlength(px[0]);
 65 |   SEXP mlv = PROTECT(allocMatrix(INTSXP, (int)len_i, (int)len_x));
 66 |   for (R_xlen_t i = 0; i < len_x; ++i) {
 67 |     memcpy(INTEGER(mlv)+i*len_i, INTEGER(PROTECT(dupVecIndexOnlyR(px[i]))), (unsigned)len_i*sizeof(int));
 68 |   }
 69 |   UNPROTECT((int)len_x);
 70 |   const size_t n2 = 2U * (size_t) len_i;
 71 |   size_t M = 256;
 72 |   int K = 8;
 73 |   while (M < n2) {
 74 |     M *= 2;
 75 |     K++;
 76 |   }
 77 |   R_xlen_t count = 0;
 78 |   int *restrict h = (int*) R_Calloc(M, int);
 79 |   const int *restrict v = INTEGER(mlv);
 80 |   size_t id = 0;
 81 |   for (R_xlen_t i = 0; i < len_i; ++i) {
 82 |     R_xlen_t key = 0;
 83 |     for (R_xlen_t j = 0; j < len_x; ++j) {
 84 |       key ^= HASH(v[i+j*len_i],K)*97*(j+1);
 85 |     }
 86 |     id = HASH(key, K);
 87 |     while (h[id]) {
 88 |       for (R_xlen_t j = 0; j < len_x; ++j) {
 89 |         if (v[h[id]-1+j*len_i] != v[i+j*len_i]) {
 90 |           goto label1;
 91 |         }
 92 |       }
 93 |       goto label2;
 94 |       label1:;
 95 |       id++; id %= M;
 96 |     }
 97 |     h[id] = (int) i + 1;
 98 |     count++;
 99 |     label2:;
100 |   }
101 |   R_Free(h);
102 |   UNPROTECT(1);
103 |   return ScalarInteger(count);
104 | }
105 | 
106 | /*
107 |  *  Matrix
108 |  */
109 | 
110 | SEXP dupLenMatrixR(SEXP x) {
111 |   const R_xlen_t len_x = ncols(x);
112 |   const R_xlen_t len_i = nrows(x);
113 |   const size_t n2 = 2U * (size_t) len_i;
114 |   size_t M = 256;
115 |   int K = 8;
116 |   while (M < n2) {
117 |     M *= 2;
118 |     K++;
119 |   }
120 |   R_xlen_t count = 0;
121 |   int *restrict h = (int*) R_Calloc(M, int);
122 |   size_t id = 0;
123 |   switch(UTYPEOF(x)) {
124 |   case LGLSXP : {
125 |     const int *restrict px = LOGICAL(x);
126 |     for (R_xlen_t i = 0; i < len_i; ++i) {
127 |       id = 0;
128 |       for (R_xlen_t j = 0; j < len_x; ++j) {
129 |         id ^= ((unsigned)(j+1) * ((px[i+j*len_i] == NA_LOGICAL) ? 2U : (size_t) px[i+j*len_i]))*97*(j+1);
130 |       }
131 |       id = HASH(id, K);
132 |       while (h[id]) {
133 |         for (R_xlen_t j = 0; j < len_x; ++j) {
134 |           if (px[h[id]-1+j*len_i] != px[i+j*len_i]) {
135 |             goto labelml1; // # nocov
136 |           }
137 |         }
138 |         goto labelml2;
139 |         labelml1:;// # nocov
140 |         id++; id %= M; // # nocov
141 |       }
142 |       h[id] = (int) i + 1;
143 |       count++;
144 |       labelml2:;
145 |     }
146 |   } break;
147 |   case INTSXP : {
148 |     const int *restrict px = INTEGER(x);
149 |     for (R_xlen_t i = 0; i < len_i; ++i) {
150 |       R_xlen_t key = 0;
151 |       for (R_xlen_t j = 0; j < len_x; ++j) {
152 |         key ^= HASH(((px[i+j*len_i] == NA_INTEGER) ? 0 : px[i+j*len_i]),K)*97*(j+1);
153 |       }
154 |       id = HASH(key, K);
155 |       while (h[id]) {
156 |         for (R_xlen_t j = 0; j < len_x; ++j) {
157 |           if (px[h[id]-1+j*len_i] != px[i+j*len_i]) {
158 |             goto labelmi1; // # nocov
159 |           }
160 |         }
161 |         goto labelmi2;
162 |         labelmi1:;
163 |         id++; id %= M; // # nocov
164 |       }
165 |       h[id] = (int) i + 1;
166 |       count++;
167 |       labelmi2:;
168 |     }
169 |   } break;
170 |   case REALSXP : {
171 |     const double *restrict px = REAL(x);
172 |     union uno tpv;
173 |     for (R_xlen_t i = 0; i < len_i; ++i) {
174 |       R_xlen_t key = 0;
175 |       for (R_xlen_t j = 0; j < len_x; ++j) {
176 |         tpv.d = px[i+j*len_i];
177 |         key ^= HASH(tpv.u[0] + tpv.u[1],K)*97*(j+1);
178 |       }
179 |       tpv.d = key;
180 |       id = HASH(tpv.u[0] + tpv.u[1], K);
181 |       while (h[id]) {
182 |         for (R_xlen_t j = 0; j < len_x; ++j) {
183 |           if (!REQUAL(px[h[id]-1+j*len_i], px[i+j*len_i])) {
184 |             goto labelmr1;
185 |           }
186 |         }
187 |         goto labelmr2;
188 |         labelmr1:;
189 |         id++; id %= M;
190 |       }
191 |       h[id] = (int) i + 1;
192 |       count++;
193 |       labelmr2:;
194 |     }
195 |   } break;
196 |   case CPLXSXP : {
197 |     const Rcomplex *restrict px = COMPLEX(x);
198 |     unsigned int u;
199 |     union uno tpv;
200 |     Rcomplex tmp;
201 |     for (R_xlen_t i = 0; i < len_i; ++i) {
202 |       R_xlen_t key = 0;
203 |       for (R_xlen_t j = 0; j < len_x; ++j) {
204 |         tmp.r = (px[i+j*len_i].r == 0.0) ? 0.0 : px[i+j*len_i].r;
205 |         tmp.i = (px[i+j*len_i].i == 0.0) ? 0.0 : px[i+j*len_i].i;
206 |         if (C_IsNA(tmp)) {
207 |           tmp.r = tmp.i = NA_REAL;
208 |         } else if (C_IsNaN(tmp)) {
209 |           tmp.r = tmp.i = R_NaN;
210 |         }
211 |         tpv.d = tmp.r;
212 |         u = tpv.u[0] ^ tpv.u[1];
213 |         tpv.d = tmp.i;
214 |         u ^= tpv.u[0] ^ tpv.u[1];
215 |         key ^= HASH(u, K)*97*(j+1);
216 |       }
217 |       id = HASH(key, K);
218 |       while (h[id]) {
219 |         for (R_xlen_t j = 0; j < len_x; ++j) {
220 |           if (!CEQUAL(px[h[id]-1+j*len_i], px[i+j*len_i])) {
221 |             goto labelmc1;
222 |           }
223 |         }
224 |         goto labelmc2;
225 |         labelmc1:;
226 |         id++; id %= M;
227 |       }
228 |       h[id] = (int) i + 1;
229 |       count++;
230 |       labelmc2:;
231 |     }
232 |   } break;
233 |   case STRSXP : {
234 |     const SEXP *restrict px = STRING_PTR_RO(x);
235 |     for (R_xlen_t i = 0; i < len_i; ++i) {
236 |       R_xlen_t key = 0;
237 |       for (R_xlen_t j = 0; j < len_x; ++j) {
238 |         key ^= HASH(((intptr_t) px[i+j*len_i] & 0xffffffff),K)*97*(j+1);
239 |       }
240 |       id = HASH(key, K);
241 |       while (h[id]) {
242 |         for (R_xlen_t j = 0; j < len_x; ++j) {
243 |           if (px[h[id]-1+j*len_i] != px[i+j*len_i]) {
244 |             goto labelms1; // # nocov
245 |           }
246 |         }
247 |         goto labelms2;
248 |         labelms1:; // # nocov
249 |         id++; id %= M; // # nocov
250 |       }
251 |       h[id] = (int) i + 1;
252 |       count++;
253 |       labelms2:;
254 |     }
255 |   } break;
256 |   default: {
257 |     R_Free(h);
258 |     error("Matrix of type %s are not supported.", type2char(UTYPEOF(x)));
259 |   }
260 |   }
261 |   R_Free(h);
262 |   return ScalarInteger(count);
263 | }
264 | 
265 | /*
266 |  *  Vector
267 |  */
268 | 
269 | SEXP dupLenVecR(SEXP x) {
270 |   if (isFactor(x)) {
271 |     const int len = LENGTH(PROTECT(getAttrib(x, R_LevelsSymbol)));
272 |     UNPROTECT(1);
273 |     bool *restrict count = (bool*)R_Calloc(len+1,bool);
274 |     const int *restrict px = INTEGER(x);
275 |     const int xlen = LENGTH(x);
276 |     int j = 0;
277 |     for (int i = 0; i < xlen; ++i) {
278 |       if (!count[px[i]]) {
279 |         j++;
280 |         if (j == len)
281 |           break;
282 |         count[px[i]] = true;
283 |       }
284 |     }
285 |     R_Free(count);
286 |     return ScalarInteger(j);
287 |   }
288 |   if (isLogical(x)) {
289 |     bool *restrict count = (bool*)R_Calloc(3,bool);
290 |     const int *restrict px = LOGICAL(x);
291 |     const int xlen = LENGTH(x);
292 |     int j = 0;
293 |     for (int i = 0; i < xlen; ++i) {
294 |       const int cs = px[i] == NA_LOGICAL ? 2 : px[i];
295 |       if (!count[cs]) {
296 |         j++;
297 |         if (j == 3)
298 |           break;
299 |         count[cs] = true;
300 |       }
301 |     }
302 |     R_Free(count);
303 |     return ScalarInteger(j);
304 |   }
305 |   const R_xlen_t n = xlength(x);
306 |   const SEXPTYPE tx = UTYPEOF(x);
307 |   int K;
308 |   size_t M;
309 |   if (tx == INTSXP || tx == STRSXP || tx == REALSXP || tx == CPLXSXP ) {
310 |     if(n >= 1073741824) {
311 |       error("Length of 'x' is too large. (Long vector not supported yet)"); // # nocov
312 |     }
313 |     const size_t n2 = 2U * (size_t) n;
314 |     M = 256;
315 |     K = 8;
316 |     while (M < n2) {
317 |       M *= 2;
318 |       K++;
319 |     }
320 |   } else {
321 |     error("Type %s is not supported.", type2char(tx)); // # nocov
322 |   }
323 |   R_xlen_t count = 0;
324 |   int *restrict h = (int*)R_Calloc(M, int);
325 |   switch (tx) {
326 |   case INTSXP: {
327 |     const int *restrict px = INTEGER(x);
328 |     size_t id = 0;
329 |     for (int i = 0; i < n; ++i) {
330 |       id = (px[i] == NA_INTEGER) ? 0 : HASH(px[i], K);
331 |       while (h[id]) {
332 |         if (px[h[id]-1]==px[i]) {
333 |           goto ibl;
334 |         }
335 |         id++; id %= M; // # nocov
336 |       }
337 |       h[id] = (int) i + 1;
338 |       count++;
339 |       ibl:;
340 |     }
341 |   } break;
342 |   case REALSXP: {
343 |     const double *restrict px = REAL(x);
344 |     size_t id = 0;
345 |     union uno tpv;
346 |     for (int i = 0; i < n; ++i) {
347 |       tpv.d = R_IsNA(px[i]) ? NA_REAL : (R_IsNaN(px[i]) ? R_NaN :px[i]);
348 |       id = HASH(tpv.u[0] + tpv.u[1], K);
349 |       while (h[id]) {
350 |         if (REQUAL(px[h[id]-1], px[i])) {
351 |           goto rbl;
352 |         }
353 |         id++; id %= M;
354 |       }
355 |       h[id] = (int) i + 1;
356 |       count++;
357 |       rbl:;
358 |     }
359 |   } break;
360 |   case CPLXSXP: {
361 |     const Rcomplex *restrict px = COMPLEX(x);
362 |     size_t id = 0;
363 |     unsigned int u;
364 |     union uno tpv;
365 |     Rcomplex tmp;
366 |     for (int i = 0; i < n; ++i) {
367 |       tmp.r = (px[i].r == 0.0) ? 0.0 : px[i].r;
368 |       tmp.i = (px[i].i == 0.0) ? 0.0 : px[i].i;
369 |       if (C_IsNA(tmp)) {
370 |         tmp.r = tmp.i = NA_REAL;
371 |       } else if (C_IsNaN(tmp)) {
372 |         tmp.r = tmp.i = R_NaN;
373 |       }
374 |       tpv.d = tmp.r;
375 |       u = tpv.u[0] ^ tpv.u[1];
376 |       tpv.d = tmp.i;
377 |       u ^= tpv.u[0] ^ tpv.u[1];
378 |       id = HASH(u, K);
379 |       while (h[id]) {
380 |         if (CEQUAL(px[h[id] - 1],px[i])) {
381 |           goto cbl;
382 |         }
383 |         id++; id %= M;
384 |       }
385 |       h[id] = (int) i + 1;
386 |       count++;
387 |       cbl:;
388 |     }
389 |   } break;
390 |   case STRSXP: {
391 |     const SEXP *restrict px = STRING_PTR_RO(x);
392 |     size_t id = 0;
393 |     for (int i = 0; i < n; ++i) {
394 |       id = HASH(((intptr_t) px[i] & 0xffffffff), K);
395 |       while (h[id]) {
396 |         if (px[h[id] - 1]==px[i]) {
397 |           goto sbl;
398 |         }
399 |         id++; id %= M;
400 |       }
401 |       h[id] = (int) i + 1;
402 |       count++;
403 |       sbl:;
404 |     }
405 |   } break;
406 |   }
407 |   R_Free(h);
408 |   return ScalarInteger(count);
409 | }
410 | 


--------------------------------------------------------------------------------
/src/fpos.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * kit : Useful R Functions Implemented in C
  3 |  * Copyright (C) 2020-2025  Morgan Jacob
  4 |  * 
  5 |  * This program is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  * 
 10 |  * This program is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  * 
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 |  */
 18 | 
 19 | #include "kit.h"
 20 | 
 21 | SEXP fposR(SEXP ndle, SEXP hsk, SEXP all, SEXP overlap) {
 22 |   SEXP ans = R_NilValue;
 23 |   int nprotect = 0;
 24 |   if (isS4(hsk) || isS4(ndle)) {
 25 |     error("S4 class objects are not supported.");
 26 |   }
 27 |   if (isDataFrame(ndle) || isDataFrame(hsk)) {
 28 |     error("Please note that data.frame(s) are not supported.");
 29 |   }
 30 |   if (!R_compute_identical(PROTECT(GetArrayDimnames(ndle)), R_NilValue, 0)) {
 31 |     error("Arrays are not supported for argument 'needle'.");
 32 |   }
 33 |   UNPROTECT(1);
 34 |   if (!R_compute_identical(PROTECT(GetArrayDimnames(hsk)), R_NilValue, 0)) {
 35 |     error("Arrays are not supported for argument 'haystack'.");
 36 |   }
 37 |   UNPROTECT(1);
 38 |   if (isMatrix(ndle) || isMatrix(hsk)) {
 39 |     ans = PROTECT(fposMatR(ndle, hsk, all, overlap));
 40 |     nprotect++;
 41 |   } else if (isVector(ndle) && isVector(hsk)) {
 42 |     ans = PROTECT(fposVectR(ndle, hsk, all, overlap));
 43 |     nprotect++;
 44 |   }
 45 |   UNPROTECT(nprotect);
 46 |   return ans;
 47 | }
 48 | 
 49 | SEXP fposMatR(SEXP ndle, SEXP hsk, SEXP all, SEXP overlap) {
 50 |   if (!IS_BOOL(all)) {
 51 |     error("Argument 'all' must be TRUE or FALSE and length 1.");
 52 |   }
 53 |   if (!IS_BOOL(overlap)) {
 54 |     error("Argument 'overlap' must be TRUE or FALSE and length 1.");
 55 |   }
 56 |   SEXPTYPE thsk = UTYPEOF(hsk);
 57 |   SEXPTYPE tndle = UTYPEOF(ndle);
 58 |   if (thsk != INTSXP && thsk != REALSXP && thsk != LGLSXP &&
 59 |       thsk != CPLXSXP && thsk != STRSXP) {
 60 |     error("Type %s for 'haystack' is not supported.", type2char(thsk));
 61 |   }
 62 |   if (tndle != INTSXP && tndle != REALSXP && tndle != LGLSXP &&
 63 |       tndle != CPLXSXP && tndle != STRSXP) {
 64 |     error("Type %s for 'needle' is not supported.", type2char(tndle));
 65 |   }
 66 |   const int n = nrows(hsk);
 67 |   const int m = ncols(hsk);
 68 |   const int k = nrows(ndle);
 69 |   const int l = ncols(ndle);
 70 |   if (k > n || l > m) {
 71 |     error("One of the dimension of the small matrix is greater than the large matrix.");
 72 |   }
 73 |   int nprotect = 0;
 74 |   if (thsk != tndle) {
 75 |     if (tndle == INTSXP && thsk == REALSXP) {
 76 |       ndle = PROTECT(coerceVector(ndle, thsk)); nprotect++;
 77 |       tndle = thsk;
 78 |     } else if (tndle == REALSXP && thsk == INTSXP) {
 79 |       hsk = PROTECT(coerceVector(hsk, tndle)); nprotect++;
 80 |       thsk = tndle;
 81 |     } else {
 82 |       error("Haystack type (%s) and needle type (%s) are different."
 83 |               " Please make sure that they have the same type.",
 84 |               type2char(thsk), type2char(tndle));
 85 |     }
 86 |   }
 87 |   const int lim_x = n - k + 1;
 88 |   const int lim_y = m - l + 1;
 89 |   const int sz = lim_x * lim_y;
 90 |   int i, j, p, q, id, tj = 0, ti = 0;
 91 |   int pos_h = 0, pos_n = 0, x = 0;
 92 |   SEXP col = PROTECT(allocVector(INTSXP, sz)); nprotect++;
 93 |   SEXP row = PROTECT(allocVector(INTSXP, sz)); nprotect++;
 94 |   int *restrict pcol = INTEGER(col);
 95 |   int *restrict prow = INTEGER(row);
 96 |   const int pall = !asLogical(all);
 97 |   const int poverlap = !asLogical(overlap);
 98 |   switch(thsk) {
 99 |   case LGLSXP: {
100 |     const int *restrict int_h = LOGICAL(hsk);
101 |     const int *restrict int_n = LOGICAL(ndle);
102 |     for (i = 0; i < lim_y; ++i) {
103 |       for (j = 0; j < lim_x; ++j) {
104 |         id = 1;
105 |         if (i < ti && j < tj) {
106 |           continue;
107 |         }
108 |         for (p = 0; p < l; ++p) {
109 |           pos_h = (i+p) * n  + j;
110 |           pos_n = p * k;
111 |           for (q = 0; q < k; ++q) {
112 |             if (int_h[pos_h + q] != int_n[pos_n + q]) {
113 |               id = 0;
114 |               break;
115 |             }
116 |           }
117 |           if (!id) {
118 |             break;
119 |           }
120 |         }
121 |         if (id) {
122 |           prow[x] = j + 1;
123 |           pcol[x++] = i + 1;
124 |           if (pall) {
125 |             goto label;
126 |           }
127 |           if (poverlap) {
128 |             ti = i + l;
129 |             tj = j + k;
130 |           }
131 |         }
132 |       }
133 |     }
134 |   } break;
135 |   case INTSXP: {
136 |     const int *restrict int_h = INTEGER(hsk);
137 |     const int *restrict int_n = INTEGER(ndle);
138 |     for (i = 0; i < lim_y; ++i) {
139 |       for (j = 0; j < lim_x; ++j) {
140 |         id = 1;
141 |         if (i < ti && j < tj) {
142 |           continue;
143 |         }
144 |         for (p = 0; p < l; ++p) {
145 |           pos_h = (i+p) * n  + j;
146 |           pos_n = p * k;
147 |           for (q = 0; q < k; ++q) {
148 |             if (int_h[pos_h + q] != int_n[pos_n + q]) {
149 |               id = 0;
150 |               break;
151 |             }
152 |           }
153 |           if (!id) {
154 |             break;
155 |           }
156 |         }
157 |         if (id) {
158 |           prow[x] = j + 1;
159 |           pcol[x++] = i + 1;
160 |           if (pall) {
161 |             goto label;
162 |           }
163 |           if (poverlap) {
164 |             ti = i + l;
165 |             tj = j + k;
166 |           }
167 |         }
168 |       }
169 |     }
170 |   } break;
171 |   case REALSXP: {
172 |     const double *restrict dbl_h = REAL(hsk);
173 |     const double *restrict dbl_n = REAL(ndle);
174 |     for (i = 0; i < lim_y; ++i) {
175 |       for (j = 0; j < lim_x; ++j) {
176 |         id = 1;
177 |         if (i < ti && j < tj) {
178 |           continue;
179 |         }
180 |         for (p = 0; p < l; ++p)
181 |         {
182 |           pos_h = (i+p) * n  + j;
183 |           pos_n = p * k;
184 |           for (q = 0; q < k; ++q) {
185 |             if (!REQUAL(dbl_h[pos_h + q], dbl_n[pos_n + q])) {
186 |               id = 0;
187 |               break;
188 |             }
189 |           }
190 |           if (!id) {
191 |             break;
192 |           }
193 |         }
194 |         if (id) {
195 |           prow[x] = j + 1;
196 |           pcol[x++] = i + 1;
197 |           if (pall) {
198 |             goto label;
199 |           }
200 |           if (poverlap) {
201 |             ti = i + l;
202 |             tj = j + k;
203 |           }
204 |         }
205 |       }
206 |     }
207 |   } break;
208 |   case CPLXSXP: {
209 |     const Rcomplex *restrict cpl_h = COMPLEX(hsk);
210 |     const Rcomplex *restrict cpl_n = COMPLEX(ndle);
211 |     for (i = 0; i < lim_y; ++i) {
212 |       for (j = 0; j < lim_x; ++j) {
213 |         id = 1;
214 |         if (i < ti && j < tj) {
215 |           continue;
216 |         }
217 |         for (p = 0; p < l; ++p) {
218 |           pos_h = (i+p) * n  + j;
219 |           pos_n = p * k;
220 |           for (q = 0; q < k; ++q) {
221 |             if (!CEQUAL(cpl_h[pos_h + q], cpl_n[pos_n + q])) {
222 |               id = 0;
223 |               break;
224 |             }
225 |           }
226 |           if (!id) {
227 |             break;
228 |           }
229 |         }
230 |         if (id) {
231 |           prow[x] = j + 1;
232 |           pcol[x++] = i + 1;
233 |           if (pall) {
234 |             goto label;
235 |           }
236 |           if (poverlap) {
237 |             ti = i + l;
238 |             tj = j + k;
239 |           }
240 |         }
241 |       }
242 |     }
243 |   } break;
244 |   case STRSXP: {
245 |     for (i = 0; i < lim_y; ++i) {
246 |     for (j = 0; j < lim_x; ++j) {
247 |       id = 1;
248 |       if (i < ti && j < tj) {
249 |         continue;
250 |       }
251 |       for (p = 0; p < l; ++p) {
252 |         pos_h = (i+p) * n  + j;
253 |         pos_n = p * k;
254 |         for (q = 0; q < k; ++q) {
255 |           if (RCHAR(hsk, pos_h + q) != RCHAR(ndle, pos_n + q)) {
256 |             id = 0;
257 |             break;
258 |           }
259 |         }
260 |         if (!id) {
261 |           break;
262 |         }
263 |       }
264 |       if (id) {
265 |         prow[x] = j + 1;
266 |         pcol[x++] = i + 1;
267 |         if (pall) {
268 |           goto label;
269 |         }
270 |         if (poverlap) {
271 |           ti = i + l;
272 |           tj = j + k;
273 |         }
274 |       }
275 |     }
276 |   }
277 |   } break;
278 |   }
279 |   label:;
280 |   if (x == 0) {
281 |     UNPROTECT(nprotect);
282 |     return R_NilValue;
283 |   }
284 |   SEXP ans = PROTECT(allocMatrix(INTSXP, x, 2)); nprotect++;
285 |   memcpy(INTEGER(ans), prow, (unsigned)x*sizeof(int));
286 |   memcpy(INTEGER(ans)+x, pcol, (unsigned)x*sizeof(int));
287 |   UNPROTECT(nprotect);
288 |   return ans;
289 | }
290 | 
291 | SEXP fposVectR(SEXP ndle, SEXP hsk, SEXP all, SEXP overlap) {
292 |   if (!IS_BOOL(all)) {
293 |     error("Argument 'all' must be TRUE or FALSE and length 1.");
294 |   }
295 |   if (!IS_BOOL(overlap)) {
296 |     error("Argument 'overlap' must be TRUE or FALSE and length 1.");
297 |   }
298 |   SEXPTYPE thsk = UTYPEOF(hsk);
299 |   SEXPTYPE tndle = UTYPEOF(ndle);
300 |   if (thsk != INTSXP && thsk != REALSXP && thsk != LGLSXP &&
301 |       thsk != CPLXSXP && thsk != STRSXP) {
302 |     error("Type %s for 'haystack' is not supported.", type2char(thsk));
303 |   }
304 |   if (tndle != INTSXP && tndle != REALSXP && tndle != LGLSXP &&
305 |       tndle != CPLXSXP && tndle != STRSXP) {
306 |     error("Type %s for 'needle' is not supported.", type2char(tndle));
307 |   }
308 |   const R_xlen_t n = xlength(hsk);
309 |   const R_xlen_t k = xlength(ndle);
310 |   if (k > n) {
311 |     error("The 'needle' vector length is greater than the 'haystack' vector length.");
312 |   }
313 |   int nprotect = 0;
314 |   if (thsk != tndle) {
315 |     if (tndle == INTSXP && thsk == REALSXP) {
316 |       ndle = PROTECT(coerceVector(ndle, thsk)); nprotect++;
317 |       tndle = thsk;
318 |     } else if (tndle == REALSXP && thsk == INTSXP) {
319 |       hsk = PROTECT(coerceVector(hsk, tndle)); nprotect++;
320 |       thsk = tndle;
321 |     } else {
322 |       error("Haystack type (%s) and needle type (%s) are different."
323 |               " Please make sure that they have the same type.",
324 |               type2char(thsk), type2char(tndle));
325 |     }
326 |   }
327 |   const R_xlen_t lim_x = n - k + 1;
328 |   R_xlen_t ti = 0, x = 0;
329 |   SEXP row = PROTECT(allocVector(INTSXP, lim_x)); nprotect++;
330 |   int *restrict prow = INTEGER(row);
331 |   const int pall = !asLogical(all);
332 |   const int poverlap = !asLogical(overlap);
333 |   switch(thsk) {
334 |   case LGLSXP: {
335 |     const int *restrict int_h = LOGICAL(hsk);
336 |     const int *restrict int_n = LOGICAL(ndle);
337 |     for (int i = 0; i < lim_x; ++i) {
338 |       if (i < ti) {
339 |         continue;
340 |       }
341 |       for (int j = 0; j < k; ++j) {
342 |         if (int_h[i+j] != int_n[j]) {
343 |           goto lbl;
344 |         }
345 |       }
346 |       prow[x++] = i + 1;
347 |       if (pall) {
348 |         break;
349 |       }
350 |       if (poverlap) {
351 |         ti = i + k;
352 |       }
353 |       lbl:;
354 |     }
355 |   } break;
356 |   case INTSXP: {
357 |     const int *restrict int_h = INTEGER(hsk);
358 |     const int *restrict int_n = INTEGER(ndle);
359 |     for (int i = 0; i < lim_x; ++i) {
360 |       if (i < ti) {
361 |         continue;
362 |       }
363 |       for (int j = 0; j < k; ++j) {
364 |         if (int_h[i+j] != int_n[j]) {
365 |           goto lbi;
366 |         }
367 |       }
368 |       prow[x++] = i + 1;
369 |       if (pall) {
370 |         break;
371 |       }
372 |       if (poverlap) {
373 |         ti = i + k;
374 |       }
375 |       lbi:;
376 |     }
377 |   } break;
378 |   case REALSXP: {
379 |     const double *restrict dbl_h = REAL(hsk);
380 |     const double *restrict dbl_n = REAL(ndle);
381 |     for (int i = 0; i < lim_x; ++i) {
382 |       if (i < ti) {
383 |         continue;
384 |       }
385 |       for (int j = 0; j < k; ++j) {
386 |         if (!REQUAL(dbl_h[i+j], dbl_n[j])) {
387 |           goto lbr;
388 |         }
389 |       }
390 |       prow[x++] = i + 1;
391 |       if (pall) {
392 |         break;
393 |       }
394 |       if (poverlap) {
395 |         ti = i + k;
396 |       }
397 |       lbr:;
398 |     }
399 |   } break;
400 |   case CPLXSXP: {
401 |     const Rcomplex *restrict cpl_h = COMPLEX(hsk);
402 |     const Rcomplex *restrict cpl_n = COMPLEX(ndle);
403 |     for (int i = 0; i < lim_x; ++i) {
404 |       if (i < ti) {
405 |         continue;
406 |       }
407 |       for (int j = 0; j < k; ++j) {
408 |         if (!CEQUAL(cpl_h[i+j], cpl_n[j])) {
409 |           goto lbc;
410 |         }
411 |       }
412 |       prow[x++] = i + 1;
413 |       if (pall) {
414 |         break;
415 |       }
416 |       if (poverlap) {
417 |         ti = i + k;
418 |       }
419 |       lbc:;
420 |     }
421 |   } break;
422 |   case STRSXP: {
423 |     for (int i = 0; i < lim_x; ++i) {
424 |       if (i < ti) {
425 |         continue;
426 |       }
427 |       for (int j = 0; j < k; ++j) {
428 |         if (RCHAR(hsk, i+j) != RCHAR(ndle, j)) {
429 |           goto lbs;
430 |         }
431 |       }
432 |       prow[x++] = i + 1;
433 |       if (pall) {
434 |         break;
435 |       }
436 |       if (poverlap) {
437 |         ti = i + k;
438 |       }
439 |       lbs:;
440 |     }
441 |   } break;
442 |   }
443 |   if (x == 0) {
444 |     UNPROTECT(nprotect);
445 |     return R_NilValue;
446 |   }
447 |   SEXP ans = PROTECT(allocVector(INTSXP, x)); nprotect++;
448 |   memcpy(INTEGER(ans), prow, (unsigned)x*sizeof(int));
449 |   UNPROTECT(nprotect);
450 |   return ans;
451 | }
452 | 


--------------------------------------------------------------------------------
/src/iif.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * kit : Useful R Functions Implemented in C
  3 |  * Copyright (C) 2020-2025  Morgan Jacob
  4 |  * 
  5 |  * This program is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  * 
 10 |  * This program is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  * 
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 |  */
 18 | 
 19 | #include "kit.h"
 20 | 
 21 | #define IIF_LOOP(a, b, n)   OMP_PARALLEL_FOR(nth)                                                  \
 22 | for (ssize_t i=0; i<len_l; ++i) {                                                                  \
 23 |   pans[i] = pl[i]==0 ? b : (pl[i]==1 ? a : n);                                                     \
 24 | }                                                                                                  \
 25 | 
 26 | #define IIF_LOGIC(x) if(len_a>1) {                                                                 \
 27 | if(len_b>1) {                                                                                      \
 28 |   if(len_na>1) {                                                                                   \
 29 |     IIF_LOOP(pa[i], pb[i], pna[i])                                                                 \
 30 |   } else {                                                                                         \
 31 |     IIF_LOOP(pa[i], pb[i], pna[0])                                                                 \
 32 |   }                                                                                                \
 33 | } else {                                                                                           \
 34 |   if(len_na>1) {                                                                                   \
 35 |     IIF_LOOP(pa[i], pb[0], pna[i])                                                                 \
 36 |   } else {                                                                                         \
 37 |     IIF_LOOP(pa[i], pb[0], pna[0])                                                                 \
 38 |   }                                                                                                \
 39 | }                                                                                                  \
 40 | } else {                                                                                           \
 41 |   if(len_b>1) {                                                                                    \
 42 |     if(len_na>1) {                                                                                 \
 43 |       IIF_LOOP(pa[0], pb[i], pna[i])                                                               \
 44 |     } else {                                                                                       \
 45 |       IIF_LOOP(pa[0], pb[i], pna[0])                                                               \
 46 |     }                                                                                              \
 47 |   } else {                                                                                         \
 48 |     if(len_na>1) {                                                                                 \
 49 |       IIF_LOOP(pa[0], pb[0], pna[i])                                                               \
 50 |     } else {                                                                                       \
 51 |       IIF_LOOP(pa[0], pb[0], pna[0])                                                               \
 52 |     }                                                                                              \
 53 |   }                                                                                                \
 54 | }                                                                                                  \
 55 | } else {                                                                                           \
 56 |   if(len_a>1) {                                                                                    \
 57 |     if(len_b>1) {                                                                                  \
 58 |       IIF_LOOP(pa[i], pb[i], x)                                                                    \
 59 |     } else {                                                                                       \
 60 |       IIF_LOOP(pa[i], pb[0], x)                                                                    \
 61 |     }                                                                                              \
 62 |   } else {                                                                                         \
 63 |     if(len_b>1) {                                                                                  \
 64 |       IIF_LOOP(pa[0], pb[i], x)                                                                    \
 65 |     } else {                                                                                       \
 66 |       IIF_LOOP(pa[0], pb[0], x)                                                                    \
 67 |     }                                                                                              \
 68 |   }                                                                                                \
 69 | 
 70 | SEXP iifR(SEXP l, SEXP a, SEXP b, SEXP na, SEXP tprom, SEXP nthreads) {
 71 |   if (!isLogical(l)) {
 72 |     error("Argument 'test' must be logical.");
 73 |   }
 74 |   if (isS4(a) || isS4(b)) {
 75 |     error("S4 class objects are not supported.");
 76 |   }
 77 |   if (!IS_BOOL(tprom)) {
 78 |     error("Argument 'tprom' must be either FALSE or TRUE and length 1.");
 79 |   }
 80 |   int nth = asInteger(nthreads);
 81 |   nth = nth > max_thread ? max_thread : (nth < min_thread ? min_thread : nth); //revisit this
 82 |   const R_xlen_t len_l  = xlength(l);
 83 |   const R_xlen_t len_a  = xlength(a);
 84 |   const R_xlen_t len_b  = xlength(b);
 85 |   const R_xlen_t len_na = xlength(na);
 86 |   SEXPTYPE ta = UTYPEOF(a);
 87 |   SEXPTYPE tb = UTYPEOF(b);
 88 |   SEXPTYPE tn = UTYPEOF(na);
 89 |   const bool na_non_null = !isNull(na);
 90 |   const bool tp = LOGICAL(tprom)[0] == 0;
 91 |   int nprotect = 0;
 92 |   if (ta != tb) {
 93 |     if (tp) {
 94 |       error("'yes' is of type %s but 'no' is of type %s. Please make sure that both arguments have the same type.", type2char(ta), type2char(tb));
 95 |     } else {
 96 |       if(IS_VALID_TYPE(ta) && IS_VALID_TYPE(tb)) {
 97 |         if(ta < tb) {
 98 |           SEXP tmp = PROTECT(coerceVector(a, tb)); nprotect++;
 99 |           a = tmp;
100 |           ta = tb;
101 |           copyMostAttrib(b, a);
102 |         } else {
103 |           SEXP tmp = PROTECT(coerceVector(b, ta)); nprotect++;
104 |           b = tmp;
105 |           tb = ta;
106 |           copyMostAttrib(a, b);
107 |         }
108 |       } else {
109 |         if (!IS_VALID_TYPE(ta)) {
110 |           error("Type %s (argument 'yes') is not supported.", type2char(ta));
111 |         } else {
112 |           error("Type %s (argument 'no') is not supported.", type2char(tb));
113 |         }
114 |       }
115 |     }
116 |   }
117 |   if (len_a!=1 && len_a!=len_l) {
118 |     error("Length of 'yes' is %zu but must be 1 or length of 'test' (%zu).", len_a, len_l);
119 |   }
120 |   if (len_b!=1 && len_b!=len_l) {
121 |     error("Length of 'no' is %zu but must be 1 or length of 'test' (%zu).", len_b, len_l);
122 |   }
123 |   SEXP class_a = PROTECT(getAttrib(a, R_ClassSymbol)); nprotect++;
124 |   SEXP class_b = PROTECT(getAttrib(b, R_ClassSymbol)); nprotect++;
125 |   const bool same_class = !R_compute_identical(class_a, class_b, 0);
126 |   if (tp) {
127 |     if (same_class) {
128 |       error("'yes' has different class than 'no'. Please make sure that both arguments have the same class."); 
129 |     }
130 |   } else {
131 |     if (!same_class) {
132 |       copyMostAttrib(a, b);
133 |     }
134 |   }
135 |   if (na_non_null) {
136 |     if (len_na!=1 && len_na!=len_l) {
137 |       error("Length of 'na' is %zu but must be 1 or length of 'test' (%zu).", len_na, len_l);
138 |     }
139 |     if (tn != ta) {
140 |       if (tp) {
141 |         error("'yes' is of type %s but 'na' is of type %s. Please make sure that both arguments have the same type.", type2char(ta), type2char(tn));
142 |       } else if (tn > ta) {
143 |         error("Type of 'na' (%s) is higher than %s (highest type of 'yes' and 'no'). Please make sure that it is at lower or the same.", type2char(tn), type2char(ta));  
144 |       } else {
145 |         SEXP tmp = PROTECT(coerceVector(na, ta)); nprotect++;
146 |         na = tmp;
147 |         tn = ta;
148 |         copyMostAttrib(a, na);
149 |       }
150 |     }
151 |     SEXP class_na = PROTECT(getAttrib(na, R_ClassSymbol)); nprotect++;
152 |     if (tp) {
153 |       if (!R_compute_identical(class_a, class_na, 0)) {
154 |         error("'yes' has different class than 'na'. Please make sure that both arguments have the same class.");  
155 |       }
156 |     } else {
157 |       if (!R_compute_identical(class_a, class_na, 0)) {
158 |         copyMostAttrib(a, na); 
159 |       }
160 |     }
161 |   }
162 |   if (isFactor(a)) {
163 |     SEXP level_a = PROTECT(getAttrib(a, R_LevelsSymbol)); nprotect++;
164 |     SEXP level_b = PROTECT(getAttrib(b, R_LevelsSymbol)); nprotect++;
165 |     if (!R_compute_identical(level_a, level_b, 0)) {
166 |       error("'yes' and 'no' are both type factor but their levels are different.");
167 |     }
168 |     if (na_non_null) {
169 |       SEXP level_na = PROTECT(getAttrib(na, R_LevelsSymbol)); nprotect++;
170 |       if (!R_compute_identical(level_a, level_na, 0)) {
171 |         error("'yes' and 'na' are both type factor but their levels are different.");
172 |       }
173 |     }
174 |   }
175 |   const int *restrict pl = LOGICAL(l);
176 |   SEXP ans = PROTECT(allocVector(ta, len_l)); nprotect++;
177 |   copyMostAttrib(a, ans);
178 |   switch(ta) {
179 |   case LGLSXP: {
180 |     int *restrict pans = LOGICAL(ans);
181 |     const int *restrict pa = LOGICAL(a);
182 |     const int *restrict pb = LOGICAL(b);
183 |     if(na_non_null) {
184 |       const int *restrict pna = LOGICAL(na);
185 |       IIF_LOGIC(NA_LOGICAL)
186 |     }
187 |   } break;
188 |   case INTSXP: {
189 |     int *restrict pans = INTEGER(ans);
190 |     const int *restrict pa = INTEGER(a);
191 |     const int *restrict pb = INTEGER(b);
192 |     if(na_non_null) {
193 |       const int *restrict pna = INTEGER(na);
194 |       IIF_LOGIC(NA_INTEGER)
195 |     }
196 |   } break;
197 |   case REALSXP: {
198 |     double *restrict pans = REAL(ans);
199 |     const double *restrict pa = REAL(a);
200 |     const double *restrict pb = REAL(b);
201 |     if(na_non_null) {
202 |       const double *restrict pna = REAL(na);
203 |       IIF_LOGIC(NA_REAL)
204 |     }
205 |   } break;
206 |   case CPLXSXP : {
207 |     Rcomplex *restrict pans = COMPLEX(ans);
208 |     const Rcomplex *restrict pa = COMPLEX(a);
209 |     const Rcomplex *restrict pb = COMPLEX(b);
210 |     Rcomplex NA_CPLX; NA_CPLX.r = NA_REAL; NA_CPLX.i = NA_REAL; // deal with that across all functions
211 |     if(na_non_null) {
212 |       const Rcomplex *restrict pna = COMPLEX(na);
213 |       IIF_LOGIC(NA_CPLX)
214 |     }
215 |   } break;
216 |   case STRSXP : {
217 |     const ssize_t amask = len_a>1 ? SSIZE_MAX : 0;
218 |     const ssize_t bmask = len_b>1 ? SSIZE_MAX : 0;
219 |     const ssize_t namask = len_na>1 ? SSIZE_MAX : 0;
220 |     const SEXP *restrict pa = STRING_PTR_RO(a);
221 |     const SEXP *restrict pb = STRING_PTR_RO(b);
222 |     const SEXP *restrict pna = na_non_null ? STRING_PTR_RO(na) : NULL;
223 |     for (ssize_t i=0; i<len_l; ++i) {
224 |       SET_STRING_ELT(ans, i, pl[i] == 1 ? pa[i & amask] : (pl[i] == 0 ? pb[i & bmask] :(na_non_null ? pna[i & namask] : NA_STRING)));
225 |     }
226 |   } break;
227 |   case VECSXP : {
228 |     const ssize_t amask = len_a>1 ? SSIZE_MAX : 0;
229 |     const ssize_t bmask = len_b>1 ? SSIZE_MAX : 0;
230 |     const ssize_t namask = len_na>1 ? SSIZE_MAX : 0;
231 |     const SEXP *restrict pa  = SEXPPTR_RO(a);
232 |     const SEXP *restrict pb  = SEXPPTR_RO(b);
233 |     const SEXP *restrict pna = na_non_null ? SEXPPTR_RO(na) : NULL;
234 |     for (ssize_t i=0; i<len_l; ++i) {
235 |       if (pl[i]==NA_LOGICAL) {
236 |         if (na_non_null) {
237 |           SET_VECTOR_ELT(ans, i, pna[i & namask]);
238 |         }
239 |         continue;
240 |       }
241 |       SET_VECTOR_ELT(ans, i, pl[i]==0 ? pb[i & bmask] : pa[i & amask]);
242 |     }
243 |   } break;
244 |   default:
245 |     error("Type %s is not supported.", type2char(ta));
246 |   }
247 |   SEXP l_names = PROTECT(getAttrib(l, R_NamesSymbol)); nprotect++;
248 |   if (!isNull(l_names)) {
249 |     setAttrib(ans, R_NamesSymbol, l_names);
250 |   }
251 |   UNPROTECT(nprotect);
252 |   return ans;
253 | }
254 | 
255 | SEXP nifR(SEXP na, SEXP rho, SEXP args) {
256 |   R_len_t n=length(args);
257 |   if (n % 2) {
258 |     error("Received %d inputs; please supply an even number of arguments in ... "
259 |             "consisting of logical condition, resulting value pairs (in that order). "
260 |             "Note that argument 'default' must be named explicitly (e.g.: default=0)", n);
261 |   }
262 |   SEXP cons0 = PROTECT(eval(PTR_ETL(args,0),rho));
263 |   SEXP out0 = PROTECT(eval(PTR_ETL(args,1),rho));
264 |   if (isS4(out0)) {
265 |     error("S4 class objects are not supported.");
266 |   }
267 |   SEXP out0c = PROTECT(getAttrib(out0, R_ClassSymbol));
268 |   SEXP out0l = PROTECT(getAttrib(out0, R_LevelsSymbol));
269 |   SEXP cons = R_NilValue, outs = R_NilValue;
270 |   PROTECT_INDEX Icons, Iouts;
271 |   PROTECT_WITH_INDEX(cons, &Icons);
272 |   PROTECT_WITH_INDEX(outs, &Iouts);
273 |   const SEXPTYPE type0 = UTYPEOF(out0);
274 |   const R_xlen_t len0 = xlength(cons0);
275 |   const R_xlen_t len_na = xlength(na);
276 |   R_xlen_t len2 = len0;
277 |   const bool nonna = !isNull(na);
278 |   const bool ifact = isFactor(out0);
279 |   if (nonna) {
280 |     if (len_na != 1 && len_na != len0) {
281 |       error("Length of 'default' must either be 1 or length of logical condition."); // maybe improve message
282 |     }
283 |     SEXPTYPE tn = UTYPEOF(na);
284 |     if (tn != type0) {
285 |       error("Resulting value is of type %s but 'default' is of type %s. "
286 |               "Please make sure that both arguments have the same type.", type2char(type0), type2char(tn));
287 |     }
288 |     if (!R_compute_identical(out0c, PROTECT(getAttrib(na, R_ClassSymbol)), 0)) {
289 |       error("Resulting value has different class than 'default'. "
290 |               "Please make sure that both arguments have the same class.");
291 |     }
292 |     UNPROTECT(1);
293 |     if (ifact) {
294 |       if (!R_compute_identical(out0l, PROTECT(getAttrib(na, R_LevelsSymbol)), 0)) {
295 |         error("Resulting value and 'default' are both type factor but their levels are different.");
296 |       }
297 |       UNPROTECT(1);
298 |     }
299 |   }
300 |   SEXP ans = PROTECT(allocVector(type0, len0));
301 |   SEXP tracker = PROTECT(allocVector(INTSXP, len0));
302 |   int *restrict p = INTEGER(tracker);
303 |   copyMostAttrib(out0, ans);
304 |   n = n/2;
305 |   const int dn = n-1;
306 |   for (int i=0; i<n; ++i) {
307 |     const bool imask = i==0;
308 |     REPROTECT(cons = eval(PTR_ETL(args, 2*i), rho), Icons);
309 |     REPROTECT(outs = eval(PTR_ETL(args, 2*i+1), rho), Iouts);
310 |     if (isS4(outs)) {
311 |       error("S4 class objects are not supported.");
312 |     }
313 |     if (!isLogical(cons)) {
314 |       error("Argument #%d must be logical.", 2*i+1);
315 |     }
316 |     const int *restrict pcons = LOGICAL(cons);
317 |     R_xlen_t l = 0;
318 |     SEXPTYPE type1 = UTYPEOF(outs);
319 |     if (!imask) {
320 |       if (xlength(cons) != len0) {
321 |         error("Argument #%d has a different length than argument #1. "
322 |                 "Please make sure all logical conditions have the same length.",
323 |                 i*2+1);
324 |       }
325 |       if (type1 != type0) {
326 |         error("Argument #%d is of type %s, however argument #2 is of type %s. "
327 |                 "Please make sure all output values have the same type.",
328 |                 i*2+2, type2char(type1), type2char(type0));
329 |       }
330 |       if (!R_compute_identical(out0c, PROTECT(getAttrib(outs, R_ClassSymbol)), 0)) {
331 |         error("Argument #%d has different class than argument #2, "
332 |                 "Please make sure all output values have the same class.", i*2+2);
333 |       }
334 |       UNPROTECT(1);
335 |       if (ifact) {
336 |         if (!R_compute_identical(out0l, PROTECT(getAttrib(outs, R_LevelsSymbol)), 0)) {
337 |           error("Argument #2 and argument #%d are both factor but their levels are different.", i*2+2);
338 |         }
339 |         UNPROTECT(1);
340 |       }
341 |     }
342 |     const R_xlen_t len1 = xlength(outs);
343 |     if (len1 != len0 && len1 != 1) {
344 |       error("Length of output value #%d must either be 1 or length of logical condition.", i*2+2);
345 |     }
346 |     const ssize_t amask = len1>1 ? SSIZE_MAX : 0;
347 |     switch(type1) {
348 |     case LGLSXP: {
349 |       const int *restrict pouts = LOGICAL(outs);
350 |       int *restrict pans = LOGICAL(ans);
351 |       if (imask) {
352 |         for (ssize_t j=0; j<len2; ++j) {
353 |           if (pcons[j]==1) {
354 |             pans[j] = pouts[j & amask];
355 |           } else {
356 |             p[l++] = j;
357 |           }
358 |         }
359 |       } else {
360 |         for (ssize_t j=0; j<len2; ++j) {
361 |           if (pcons[p[j]]==1) {
362 |             pans[p[j]] = pouts[p[j] & amask];
363 |           } else {
364 |             p[l++] = p[j];
365 |           }
366 |         }
367 |       }
368 |       if (i==dn) {
369 |         const ssize_t bmask = len_na>1 ? SSIZE_MAX : 0;
370 |         const int *restrict pna = nonna ? LOGICAL(na) : NULL;
371 |         for (ssize_t j=0; j<l; ++j) {
372 |           pans[p[j]] = nonna ? pna[p[j] & bmask] : NA_LOGICAL;
373 |         }
374 |       }
375 |     } break;
376 |     case INTSXP: {
377 |       const int *restrict pouts = INTEGER(outs);
378 |       int *restrict pans = INTEGER(ans);
379 |       if (imask) {
380 |         for (ssize_t j=0; j<len2; ++j) {
381 |           if (pcons[j]==1) {
382 |             pans[j] = pouts[j & amask];
383 |           } else {
384 |             p[l++] = j;
385 |           }
386 |         }
387 |       } else {
388 |         for (ssize_t j=0; j<len2; ++j) {
389 |           if (pcons[p[j]]==1) {
390 |             pans[p[j]] = pouts[p[j] & amask];
391 |           } else {
392 |             p[l++] = p[j];
393 |           }
394 |         }
395 |       }
396 |       if (i==dn) {
397 |         const ssize_t bmask = len_na>1 ? SSIZE_MAX : 0;
398 |         const int *restrict pna = nonna ? INTEGER(na) : NULL;
399 |         for (ssize_t j=0; j<l; ++j) {
400 |           pans[p[j]] = nonna ? pna[p[j] & bmask] : NA_INTEGER;
401 |         }
402 |       }
403 |     } break;
404 |     case REALSXP: {
405 |       const double *restrict pouts = REAL(outs);
406 |       double *restrict pans = REAL(ans);
407 |       if (imask) {
408 |         for (ssize_t j=0; j<len2; ++j) {
409 |           if (pcons[j]==1) {
410 |             pans[j] = pouts[j & amask];
411 |           } else {
412 |             p[l++] = j;
413 |           }
414 |         }
415 |       } else {
416 |         for (ssize_t j=0; j<len2; ++j) {
417 |           if (pcons[p[j]]==1) {
418 |             pans[p[j]] = pouts[p[j] & amask];
419 |           } else {
420 |             p[l++] = p[j];
421 |           }
422 |         }
423 |       }
424 |       if (i==dn) {
425 |         const ssize_t bmask = len_na>1 ? SSIZE_MAX : 0;
426 |         const double *restrict pna = nonna ? REAL(na) : NULL;
427 |         for (ssize_t j=0; j<l; ++j) {
428 |           pans[p[j]] = nonna ? pna[p[j] & bmask] : NA_REAL;
429 |         }
430 |       }
431 |     } break;
432 |     case CPLXSXP: {
433 |       const Rcomplex *restrict pouts = COMPLEX(outs);
434 |       Rcomplex *restrict pans = COMPLEX(ans);
435 |       if (imask) {
436 |         for (ssize_t j=0; j<len2; ++j) {
437 |           if (pcons[j]==1) {
438 |             pans[j] = pouts[j & amask];
439 |           } else {
440 |             p[l++] = j;
441 |           }
442 |         }
443 |       } else {
444 |         for (ssize_t j=0; j<len2; ++j) {
445 |           if (pcons[p[j]]==1) {
446 |             pans[p[j]] = pouts[p[j] & amask];
447 |           } else {
448 |             p[l++] = p[j];
449 |           }
450 |         }
451 |       }
452 |       if (i==dn) {
453 |         const ssize_t bmask = len_na>1 ? SSIZE_MAX : 0;
454 |         Rcomplex NA_CPLX; NA_CPLX.r = NA_REAL; NA_CPLX.i = NA_REAL; // deal with that across all functions
455 |         const Rcomplex *restrict pna = nonna ? COMPLEX(na) : NULL;
456 |         for (ssize_t j=0; j<l; ++j) {
457 |           pans[p[j]] = nonna ? pna[p[j] & bmask] : NA_CPLX;
458 |         }
459 |       }
460 |     } break;
461 |     case STRSXP: {
462 |       const SEXP *restrict pouts = STRING_PTR_RO(outs);
463 |       if (imask) {
464 |         for (ssize_t j=0; j<len2; ++j) {
465 |           if (pcons[j]==1) {
466 |             SET_STRING_ELT(ans, j, pouts[j & amask]);
467 |           } else {
468 |             p[l++] = j;
469 |           }
470 |         }
471 |       } else {
472 |         for (ssize_t j=0; j<len2; ++j) {
473 |           if (pcons[p[j]]==1) {
474 |             SET_STRING_ELT(ans, p[j], pouts[p[j] & amask]);
475 |           } else {
476 |             p[l++] = p[j];
477 |           }
478 |         }
479 |       }
480 |       if (i==dn) {
481 |         const ssize_t bmask = len_na>1 ? SSIZE_MAX : 0;
482 |         const SEXP *restrict pna = nonna ? STRING_PTR_RO(na) : NULL;
483 |         for (ssize_t j=0; j<l; ++j) {
484 |           SET_STRING_ELT(ans, p[j], nonna ? pna[p[j] & bmask] : NA_STRING);
485 |         }
486 |       }
487 |     } break;
488 |     case VECSXP: {
489 |       const SEXP *restrict pouts = SEXPPTR_RO(outs);
490 |       if (imask) {
491 |         for (ssize_t j=0; j<len2; ++j) {
492 |           if (pcons[j]==1) {
493 |             SET_VECTOR_ELT(ans, j, pouts[j & amask]);
494 |           } else {
495 |             p[l++] = j;
496 |           }
497 |         }
498 |       } else {
499 |         for (ssize_t j=0; j<len2; ++j) {
500 |           if (pcons[p[j]]==1) {
501 |             SET_VECTOR_ELT(ans, p[j], pouts[p[j] & amask]);
502 |           } else {
503 |             p[l++] = p[j];
504 |           }
505 |         }
506 |       }
507 |       if (i==dn) {
508 |         const ssize_t bmask = len_na>1 ? SSIZE_MAX : 0;
509 |         const SEXP *restrict pna = nonna ? SEXPPTR_RO(na) : NULL;
510 |         for (ssize_t j=0; j<l; ++j) {
511 |           if (nonna) {
512 |             SET_VECTOR_ELT(ans, p[j], pna[p[j] & bmask]);
513 |           }
514 |         }
515 |       }
516 |     } break;
517 |     default:
518 |       error("Type %s is not supported.", type2char(type1));
519 |     }
520 |     if (l==0) {
521 |       break;
522 |     }
523 |     len2 = l;
524 |   }
525 |   UNPROTECT(8);
526 |   return ans;
527 | }
528 | 
529 | SEXP nifInternalR(SEXP na, SEXP args) { // # nocov start
530 |   R_len_t n=length(args);
531 |   if (n % 2) {
532 |     error("Received %d inputs; please supply an even number of arguments in ... "
533 |             "consisting of logical condition, resulting value pairs (in that order). "
534 |             "Note that argument 'default' must be named explicitly (e.g.: default=0)", n);
535 |   }
536 |   SEXP cons0 = PROTECT(PTR_ETL(args,0));
537 |   SEXP out0 = PROTECT(PTR_ETL(args,1));
538 |   if (isS4(out0)) {
539 |       error("S4 class objects are not supported.");
540 |   }
541 |   SEXP out0c = PROTECT(getAttrib(out0, R_ClassSymbol));
542 |   SEXP out0l = PROTECT(getAttrib(out0, R_LevelsSymbol));
543 |   SEXP cons = R_NilValue, outs = R_NilValue;
544 |   PROTECT_INDEX Icons, Iouts;
545 |   PROTECT_WITH_INDEX(cons, &Icons);
546 |   PROTECT_WITH_INDEX(outs, &Iouts);
547 |   const SEXPTYPE type0 = UTYPEOF(out0);
548 |   const R_xlen_t len0 = xlength(cons0);
549 |   const R_xlen_t len_na = xlength(na);
550 |   R_xlen_t len2 = len0;
551 |   const bool nonna = !isNull(na);
552 |   const bool ifact = isFactor(out0);
553 |   if (nonna) {
554 |     if (len_na != 1 && len_na != len0) {
555 |       error("Length of 'default' must either be 1 or length of logical condition."); // maybe improve message
556 |     }
557 |     SEXPTYPE tn = UTYPEOF(na);
558 |     if (tn != type0) {
559 |       error("Resulting value is of type %s but 'default' is of type %s. "
560 |             "Please make sure that both arguments have the same type.", type2char(type0), type2char(tn));
561 |     }
562 |     if (!R_compute_identical(out0c, PROTECT(getAttrib(na, R_ClassSymbol)), 0)) {
563 |       error("Resulting value has different class than 'default'. "
564 |             "Please make sure that both arguments have the same class.");
565 |     }
566 |     UNPROTECT(1);
567 |     if (ifact) {
568 |       if (!R_compute_identical(out0l, PROTECT(getAttrib(na, R_LevelsSymbol)), 0)) {
569 |         error("Resulting value and 'default' are both type factor but their levels are different.");
570 |       }
571 |       UNPROTECT(1);
572 |     }
573 |   }
574 |   SEXP ans = PROTECT(allocVector(type0, len0));
575 |   SEXP tracker = PROTECT(allocVector(INTSXP, len0));
576 |   int *restrict p = INTEGER(tracker);
577 |   copyMostAttrib(out0, ans);
578 |   n = n/2;
579 |   const int dn = n-1;
580 |   for (int i=0; i<n; ++i) {
581 |     const bool imask = i==0;
582 |     REPROTECT(cons = PTR_ETL(args, 2*i), Icons);
583 |     REPROTECT(outs = PTR_ETL(args, 2*i+1), Iouts);
584 |       if (isS4(outs)) {
585 |       error("S4 class objects are not supported.");
586 |     }
587 |     if (!isLogical(cons)) {
588 |       error("Argument #%d must be logical.", 2*i+1);
589 |     }
590 |     const int *restrict pcons = LOGICAL(cons);
591 |     R_xlen_t l = 0;
592 |     SEXPTYPE type1 = UTYPEOF(outs);
593 |     if (!imask) {
594 |       if (xlength(cons) != len0) {
595 |         error("Argument #%d has a different length than argument #1. "
596 |                 "Please make sure all logical conditions have the same length.",
597 |                 i*2+1);
598 |       }
599 |       if (type1 != type0) {
600 |         error("Argument #%d is of type %s, however argument #2 is of type %s. "
601 |                 "Please make sure all output values have the same type.",
602 |                 i*2+2, type2char(type1), type2char(type0));
603 |       }
604 |       if (!R_compute_identical(out0c, PROTECT(getAttrib(outs, R_ClassSymbol)), 0)) {
605 |         error("Argument #%d has different class than argument #2, "
606 |                 "Please make sure all output values have the same class.", i*2+2);
607 |       }
608 |       UNPROTECT(1);
609 |       if (ifact) {
610 |         if (!R_compute_identical(out0l, PROTECT(getAttrib(outs, R_LevelsSymbol)), 0)) {
611 |           error("Argument #2 and argument #%d are both factor but their levels are different.", i*2+2);
612 |         }
613 |         UNPROTECT(1);
614 |       }
615 |     }
616 |     const R_xlen_t len1 = xlength(outs);
617 |     if (len1 != len0 && len1 != 1) {
618 |       error("Length of output value #%d must either be 1 or length of logical condition.", i*2+2);
619 |     }
620 |     const ssize_t amask = len1>1 ? SSIZE_MAX : 0;
621 |     switch(type1) {
622 |     case LGLSXP: {
623 |       const int *restrict pouts = LOGICAL(outs);
624 |       int *restrict pans = LOGICAL(ans);
625 |       if (imask) {
626 |         for (ssize_t j=0; j<len2; ++j) {
627 |           if (pcons[j]==1) {
628 |             pans[j] = pouts[j & amask];
629 |           } else {
630 |             p[l++] = j;
631 |           }
632 |         }
633 |       } else {
634 |         for (ssize_t j=0; j<len2; ++j) {
635 |           if (pcons[p[j]]==1) {
636 |             pans[p[j]] = pouts[p[j] & amask];
637 |           } else {
638 |             p[l++] = p[j];
639 |           }
640 |         }
641 |       }
642 |       if (i==dn) {
643 |         const ssize_t bmask = len_na>1 ? SSIZE_MAX : 0;
644 |         const int *restrict pna = nonna ? LOGICAL(na) : NULL;
645 |         for (ssize_t j=0; j<l; ++j) {
646 |           pans[p[j]] = nonna ? pna[p[j] & bmask] : NA_LOGICAL;
647 |         }
648 |       }
649 |     } break;
650 |     case INTSXP: {
651 |       const int *restrict pouts = INTEGER(outs);
652 |       int *restrict pans = INTEGER(ans);
653 |       if (imask) {
654 |         for (ssize_t j=0; j<len2; ++j) {
655 |           if (pcons[j]==1) {
656 |             pans[j] = pouts[j & amask];
657 |           } else {
658 |             p[l++] = j;
659 |           }
660 |         }
661 |       } else {
662 |         for (ssize_t j=0; j<len2; ++j) {
663 |           if (pcons[p[j]]==1) {
664 |             pans[p[j]] = pouts[p[j] & amask];
665 |           } else {
666 |             p[l++] = p[j];
667 |           }
668 |         }
669 |       }
670 |       if (i==dn) {
671 |         const ssize_t bmask = len_na>1 ? SSIZE_MAX : 0;
672 |         const int *restrict pna = nonna ? INTEGER(na) : NULL;
673 |         for (ssize_t j=0; j<l; ++j) {
674 |           pans[p[j]] = nonna ? pna[p[j] & bmask] : NA_INTEGER;
675 |         }
676 |       }
677 |     } break;
678 |     case REALSXP: {
679 |       const double *restrict pouts = REAL(outs);
680 |       double *restrict pans = REAL(ans);
681 |       if (imask) {
682 |         for (ssize_t j=0; j<len2; ++j) {
683 |           if (pcons[j]==1) {
684 |             pans[j] = pouts[j & amask];
685 |           } else {
686 |             p[l++] = j;
687 |           }
688 |         }
689 |       } else {
690 |         for (ssize_t j=0; j<len2; ++j) {
691 |           if (pcons[p[j]]==1) {
692 |             pans[p[j]] = pouts[p[j] & amask];
693 |           } else {
694 |             p[l++] = p[j];
695 |           }
696 |         }
697 |       }
698 |       if (i==dn) {
699 |         const ssize_t bmask = len_na>1 ? SSIZE_MAX : 0;
700 |         const double *restrict pna = nonna ? REAL(na) : NULL;
701 |         for (ssize_t j=0; j<l; ++j) {
702 |           pans[p[j]] = nonna ? pna[p[j] & bmask] : NA_REAL;
703 |         }
704 |       }
705 |     } break;
706 |     case CPLXSXP: {
707 |       const Rcomplex *restrict pouts = COMPLEX(outs);
708 |       Rcomplex *restrict pans = COMPLEX(ans);
709 |       if (imask) {
710 |         for (ssize_t j=0; j<len2; ++j) {
711 |           if (pcons[j]==1) {
712 |             pans[j] = pouts[j & amask];
713 |           } else {
714 |             p[l++] = j;
715 |           }
716 |         }
717 |       } else {
718 |         for (ssize_t j=0; j<len2; ++j) {
719 |           if (pcons[p[j]]==1) {
720 |             pans[p[j]] = pouts[p[j] & amask];
721 |           } else {
722 |             p[l++] = p[j];
723 |           }
724 |         }
725 |       }
726 |       if (i==dn) {
727 |         const ssize_t bmask = len_na>1 ? SSIZE_MAX : 0;
728 |         Rcomplex NA_CPLX; NA_CPLX.r = NA_REAL; NA_CPLX.i = NA_REAL; // deal with that across all functions
729 |         const Rcomplex *restrict pna = nonna ? COMPLEX(na) : NULL;
730 |         for (ssize_t j=0; j<l; ++j) {
731 |           pans[p[j]] = nonna ? pna[p[j] & bmask] : NA_CPLX;
732 |         }
733 |       }
734 |     } break;
735 |     case STRSXP: {
736 |       const SEXP *restrict pouts = STRING_PTR_RO(outs);
737 |       if (imask) {
738 |         for (ssize_t j=0; j<len2; ++j) {
739 |           if (pcons[j]==1) {
740 |             SET_STRING_ELT(ans, j, pouts[j & amask]);
741 |           } else {
742 |             p[l++] = j;
743 |           }
744 |         }
745 |       } else {
746 |         for (ssize_t j=0; j<len2; ++j) {
747 |           if (pcons[p[j]]==1) {
748 |             SET_STRING_ELT(ans, p[j], pouts[p[j] & amask]);
749 |           } else {
750 |             p[l++] = p[j];
751 |           }
752 |         }
753 |       }
754 |       if (i==dn) {
755 |         const ssize_t bmask = len_na>1 ? SSIZE_MAX : 0;
756 |         const SEXP *restrict pna = nonna ? STRING_PTR_RO(na) : NULL;
757 |         for (ssize_t j=0; j<l; ++j) {
758 |           SET_STRING_ELT(ans, p[j], nonna ? pna[p[j] & bmask] : NA_STRING);
759 |         }
760 |       }
761 |     } break;
762 |     case VECSXP: {
763 |       const SEXP *restrict pouts = SEXPPTR_RO(outs);
764 |       if (imask) {
765 |         for (ssize_t j=0; j<len2; ++j) {
766 |           if (pcons[j]==1) {
767 |             SET_VECTOR_ELT(ans, j, pouts[j & amask]);
768 |           } else {
769 |             p[l++] = j;
770 |           }
771 |         }
772 |       } else {
773 |         for (ssize_t j=0; j<len2; ++j) {
774 |           if (pcons[p[j]]==1) {
775 |             SET_VECTOR_ELT(ans, p[j], pouts[p[j] & amask]);
776 |           } else {
777 |             p[l++] = p[j];
778 |           }
779 |         }
780 |       }
781 |       if (i==dn) {
782 |         const ssize_t bmask = len_na>1 ? SSIZE_MAX : 0;
783 |         const SEXP *restrict pna = nonna ? SEXPPTR_RO(na) : NULL;
784 |         for (ssize_t j=0; j<l; ++j) {
785 |           if (nonna) {
786 |             SET_VECTOR_ELT(ans, p[j], pna[p[j] & bmask]);
787 |           }
788 |         }
789 |       }
790 |     } break;
791 |     default:
792 |       error("Type %s is not supported.", type2char(type1));
793 |     }
794 |     if (l==0) {
795 |       break;
796 |     }
797 |     len2 = l;
798 |   }
799 |   UNPROTECT(8);
800 |   return ans;
801 | } // # nocov end
802 | 


--------------------------------------------------------------------------------
/src/init.c:
--------------------------------------------------------------------------------
 1 | #include "kit.h"
 2 | 
 3 | static const R_CallMethodDef CallEntries[] = {
 4 |   {"CcharToFactR",             (DL_FUNC) &charToFactR,             -1},
 5 |   {"CcountNAR",                (DL_FUNC) &countNAR,                -1},
 6 |   {"CcountOccurR",             (DL_FUNC) &countOccurR,             -1},
 7 |   {"CcountR",                  (DL_FUNC) &countR,                  -1},
 8 |   {"CcpsortR",                 (DL_FUNC) &cpsortR,                 -1},
 9 |   {"CdupR",                    (DL_FUNC) &dupR,                    -1},
10 |   {"CdupLenR",                 (DL_FUNC) &dupLenR,                 -1},
11 |   {"CfposR",                   (DL_FUNC) &fposR,                   -1},
12 |   {"CiifR",                    (DL_FUNC) &iifR,                    -1},
13 |   {"CnifR",                    (DL_FUNC) &nifR,                    -1},
14 |   {"CnswitchR",                (DL_FUNC) &nswitchR,                -1},
15 |   {"CompEnabledR",             (DL_FUNC) &ompEnabledR,             -1},
16 |   {"CpanyR",                   (DL_FUNC) &panyR,                   -1},
17 |   {"CpallR",                   (DL_FUNC) &pallR,                   -1},
18 |   {"CpcountR",                 (DL_FUNC) &pcountR,                 -1},
19 |   {"CpcountNAR",               (DL_FUNC) &pcountNAR,               -1},
20 |   {"CpfirstR",                 (DL_FUNC) &pfirstR,                 -1},
21 |   {"CpmeanR",                  (DL_FUNC) &pmeanR,                  -1},
22 |   {"CpprodR",                  (DL_FUNC) &pprodR,                  -1},
23 |   {"CpsumR",                   (DL_FUNC) &psumR,                   -1},
24 |   {"CsetlevelsR",              (DL_FUNC) &setlevelsR,              -1},
25 |   {"CtopnR",                   (DL_FUNC) &topnR,                   -1},
26 |   {"CvswitchR",                (DL_FUNC) &vswitchR,                -1},
27 |   {"CcreateMappingObjectR",    (DL_FUNC) &createMappingObjectR,    -1},
28 |   {"CgetMappingObjectR",       (DL_FUNC) &getMappingObjectR,       -1},
29 |   {"CclearMappingObjectR",     (DL_FUNC) &clearMappingObjectR,     -1},
30 |   {NULL,                       NULL,                               -1}
31 | };
32 | 
33 | void R_init_kit(DllInfo *dll) {
34 |   R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
35 |   R_useDynamicSymbols(dll, FALSE);
36 |   R_RegisterCCallable("kit", "CcharToFactR",             (DL_FUNC) &charToFactR);
37 |   R_RegisterCCallable("kit", "CcountNAR",                (DL_FUNC) &countNAR);
38 |   R_RegisterCCallable("kit", "CcountOccurR",             (DL_FUNC) &countOccurR);
39 |   R_RegisterCCallable("kit", "CcountR",                  (DL_FUNC) &countR);
40 |   R_RegisterCCallable("kit", "CcpsortR",                 (DL_FUNC) &cpsortR);
41 |   R_RegisterCCallable("kit", "CdupR",                    (DL_FUNC) &dupR);
42 |   R_RegisterCCallable("kit", "CdupLenR",                 (DL_FUNC) &dupLenR);
43 |   R_RegisterCCallable("kit", "CfposR",                   (DL_FUNC) &fposR);
44 |   R_RegisterCCallable("kit", "CiifR",                    (DL_FUNC) &iifR);
45 |   R_RegisterCCallable("kit", "CnifR",                    (DL_FUNC) &nifR);
46 |   R_RegisterCCallable("kit", "CnswitchR",                (DL_FUNC) &nswitchR);
47 |   R_RegisterCCallable("kit", "CpanyR",                   (DL_FUNC) &panyR);
48 |   R_RegisterCCallable("kit", "CpallR",                   (DL_FUNC) &pallR);
49 |   R_RegisterCCallable("kit", "CpcountR",                 (DL_FUNC) &pcountR);
50 |   R_RegisterCCallable("kit", "CpcountNAR",               (DL_FUNC) &pcountNAR);
51 |   R_RegisterCCallable("kit", "CpfirstR",                 (DL_FUNC) &pfirstR);
52 |   R_RegisterCCallable("kit", "CpmeanR",                  (DL_FUNC) &pmeanR);
53 |   R_RegisterCCallable("kit", "CpprodR",                  (DL_FUNC) &pprodR);
54 |   R_RegisterCCallable("kit", "CpsumR",                   (DL_FUNC) &psumR);
55 |   R_RegisterCCallable("kit", "CsetlevelsR",              (DL_FUNC) &setlevelsR);
56 |   R_RegisterCCallable("kit", "CtopnR",                   (DL_FUNC) &topnR);
57 |   R_RegisterCCallable("kit", "CvswitchR",                (DL_FUNC) &vswitchR);
58 |   R_RegisterCCallable("kit", "CcreateMappingObjectR",    (DL_FUNC) &createMappingObjectR);
59 |   R_RegisterCCallable("kit", "CgetMappingObjectR",       (DL_FUNC) &getMappingObjectR);
60 |   R_RegisterCCallable("kit", "CclearMappingObjectR",     (DL_FUNC) &clearMappingObjectR);
61 | }
62 | 


--------------------------------------------------------------------------------
/src/kit.h:
--------------------------------------------------------------------------------
  1 | #ifdef _OPENMP
  2 |   #include<omp.h>
  3 |   #define omp_enabled true
  4 |   #define max_thread omp_get_num_procs()
  5 |   #define min_thread 1
  6 |   #define OMP_PARALLEL_FOR(nth) _Pragma("omp parallel for num_threads(nth)")
  7 | #else
  8 |   #define omp_enabled false
  9 |   #define max_thread 1
 10 |   #define min_thread 1
 11 |   #define omp_get_thread_num() 0
 12 |   #define OMP_PARALLEL_FOR(n)
 13 | #endif
 14 | 
 15 | #include <R.h>
 16 | #include <R_ext/Rdynload.h>
 17 | #include <Rversion.h>
 18 | 
 19 | #if !defined(R_VERSION) || R_VERSION < R_Version(3, 5, 0)
 20 |   #define USE_RINTERNALS
 21 |   #define DATAPTR_RO(x) ((const void *)DATAPTR(x))
 22 | #endif
 23 | 
 24 | #include <Rinternals.h>
 25 | #include <stdlib.h>
 26 | #include <stdint.h>
 27 | #include <stdbool.h>
 28 | 
 29 | #if R_VERSION < R_Version(4, 5, 0)
 30 | # define isDataFrame(x) Rf_isFrame(x)
 31 | #endif
 32 | 
 33 | #if !defined SSIZE_MAX
 34 |   #define SSIZE_MAX LLONG_MAX
 35 | #endif
 36 | 
 37 | #ifdef WIN32
 38 |   #include <windows.h>
 39 | #else
 40 |   #include <sys/mman.h>
 41 |   #include <sys/stat.h>
 42 |   #include <sys/types.h>
 43 |   #include <fcntl.h>
 44 |   #include <unistd.h>
 45 |   #include <errno.h>
 46 | #endif
 47 | 
 48 | #define UTYPEOF(x) ((unsigned)TYPEOF(x))
 49 | #define IS_BOOL(x) (LENGTH(x)==1 && TYPEOF(x)==LGLSXP && LOGICAL(x)[0]!=NA_LOGICAL)
 50 | #define IS_VALID_TYPE(x) ((x) == LGLSXP || (x)==INTSXP || (x)==REALSXP || (x)==CPLXSXP || (x)==STRSXP || (x)==VECSXP)
 51 | #define PTR_ETL(x, y) (((const SEXP *)DATAPTR_RO(x))[y])
 52 | #define SEXPPTR_RO(x) ((const SEXP *)DATAPTR_RO(x))
 53 | #define ISNA_COMPLEX(x) (ISNA(x.r) || ISNA(x.i))
 54 | #define ISNAN_COMPLEX(x) (ISNAN(x.r) || ISNAN(x.i))
 55 | #define EQUAL_CPLX(x, y) (((x.r) == (y.r)) && ((x.i) == (y.i)))
 56 | #define RCHAR(x, y) CHAR(STRING_ELT(x, y))
 57 | #define SEXP_F ScalarLogical(FALSE)
 58 | #define SEXP_T ScalarLogical(TRUE)
 59 | #define HASH(key, K)  (3141592653U * (unsigned int)(key) >> (32 - (K)))
 60 | #define N_ISNAN(x, y) (!ISNAN(x) && !ISNAN(y))
 61 | #define B_IsNA(x, y)  (R_IsNA(x) && R_IsNA(y))
 62 | #define B_IsNaN(x, y) (R_IsNaN(x) && R_IsNaN(y))
 63 | #define B_ISNAN(x, y) (ISNAN(x) && ISNAN(y))
 64 | #define C_IsNA(x)     (R_IsNA(x.r) || R_IsNA(x.i))
 65 | #define C_IsNaN(x)    (R_IsNaN(x.r) || R_IsNaN(x.i))
 66 | #define C_ISNAN(x, y) (B_ISNAN(x, y) || (N_ISNAN(x, y) && x == y))
 67 | #define REQUAL(x, y)  (N_ISNAN(x, y) ? (x == y) : (B_IsNA(x, y) || B_IsNaN(x, y)))
 68 | #define CEQUAL(x, y) ((N_ISNAN(x.r, x.i) && N_ISNAN(y.r, y.i)) ? (x.r == y.r && x.i == y.i) : (C_IsNA(x) ? C_IsNA(y) : (C_IsNA(y) ? 0 : (C_ISNAN(x.r, y.r) && C_ISNAN(x.i, y.i)))))
 69 | #define STR_DF mkString("data.frame")
 70 | #define MAX(a,b) (((a)>(b))?(a):(b))
 71 | #define IS_LOGICAL(x) (isLogical(x) && LENGTH(x)==1)
 72 | 
 73 | extern SEXP addColToDataFrame(SEXP df, SEXP mcol, SEXP coln);
 74 | extern SEXP callToOrder (SEXP x, const char* method, bool desc, Rboolean na, SEXP env);
 75 | extern SEXP charToFactR(SEXP x, SEXP decreasingArg, SEXP nthread, SEXP nalast, SEXP env, SEXP addNA);
 76 | extern SEXP countR(SEXP x, SEXP y);
 77 | extern SEXP countNAR(SEXP x);
 78 | extern SEXP countOccurR(SEXP x);
 79 | extern SEXP countOccurDataFrameR(SEXP x);
 80 | extern SEXP cpsortR(SEXP x, SEXP decreasing, SEXP nthread, SEXP nalast, SEXP env, SEXP index, SEXP clocale);
 81 | extern SEXP dfToMatrix(SEXP df);
 82 | extern SEXP dupR(SEXP x, SEXP uniq, SEXP fromLast);
 83 | extern SEXP dupVecR(SEXP x, SEXP uniq, SEXP fromLast);
 84 | extern SEXP dupVecIndexOnlyR(SEXP x);
 85 | extern SEXP dupDataFrameR(SEXP x, SEXP uniq, SEXP fromLast);
 86 | extern SEXP dupMatrixR(SEXP x, SEXP uniq, Rboolean idx, SEXP fromLast);
 87 | extern SEXP dupLenR(SEXP x);
 88 | extern SEXP dupLenDataFrameR(SEXP x);
 89 | extern SEXP dupLenMatrixR(SEXP x);
 90 | extern SEXP dupLenVecR(SEXP x);
 91 | extern SEXP fposR(SEXP needle, SEXP haystack, SEXP all, SEXP overlap);
 92 | extern SEXP fposMatR(SEXP needle, SEXP haystack, SEXP all, SEXP overlap);
 93 | extern SEXP fposVectR(SEXP ndle, SEXP hsk, SEXP all, SEXP overlap);
 94 | extern SEXP iifR(SEXP l, SEXP a, SEXP b, SEXP na, SEXP tprom, SEXP nthreads);
 95 | extern SEXP nifR(SEXP na, SEXP rho, SEXP args);
 96 | extern SEXP nifInternalR(SEXP na, SEXP args);
 97 | extern SEXP nswitchR(SEXP x, SEXP na, SEXP nthreads, SEXP chkenc, SEXP args);
 98 | extern SEXP ompEnabledR(void);
 99 | extern SEXP pallR(SEXP na, SEXP args);
100 | extern SEXP panyR(SEXP na, SEXP args);
101 | extern SEXP pcountR(SEXP x, SEXP args);
102 | extern SEXP pcountNAR(SEXP args);
103 | extern SEXP pfirstR(SEXP last, SEXP args);
104 | extern SEXP pmeanR(SEXP na, SEXP args);
105 | extern SEXP pprodR(SEXP na, SEXP args);
106 | extern SEXP psumR(SEXP na, SEXP args);
107 | extern SEXP setlevelsR(SEXP x, SEXP old_lvl, SEXP new_lvl, SEXP skip_absent);
108 | extern SEXP subSetColDataFrame(SEXP df, SEXP str);
109 | extern SEXP subSetColMatrix(SEXP x, R_xlen_t idx);
110 | extern SEXP subSetRowDataFrame(SEXP df, SEXP rws);
111 | extern SEXP subSetRowMatrix(SEXP mat, SEXP rws);
112 | extern SEXP topnR(SEXP vec, SEXP n, SEXP dec, SEXP hasna, SEXP env);
113 | extern SEXP vswitchR(SEXP x, SEXP values, SEXP outputs, SEXP na, SEXP nthreads, SEXP chkenc);
114 | 
115 | extern SEXP createMappingObjectR(SEXP MapName, SEXP MapLength, SEXP DataObject, SEXP verboseArg);
116 | extern SEXP getMappingObjectR(SEXP MapName, SEXP MapLength, SEXP verboseArg);
117 | extern SEXP clearMappingObjectR(SEXP ext, SEXP verboseArg);
118 | 
119 | union uno { double d; unsigned int u[2]; };
120 | bool isMixEnc(SEXP x);
121 | SEXP enc2UTF8(SEXP x);
122 | 


--------------------------------------------------------------------------------
/src/nswitch.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * kit : Useful R Functions Implemented in C
  3 |  * Copyright (C) 2020-2025  Morgan Jacob
  4 |  * 
  5 |  * This program is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  * 
 10 |  * This program is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  * 
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 |  */
 18 | 
 19 | #include "kit.h"
 20 | 
 21 | SEXP nswitchR(SEXP x, SEXP na, SEXP nthreads, SEXP chkenc, SEXP args) {
 22 |   if (!IS_BOOL(chkenc)) {
 23 |     error("Argument 'checkEnc' must be TRUE or FALSE and length 1.");
 24 |   }
 25 |   if (isS4(x)) {
 26 |     error("S4 class objects for argument 'x' are not supported.");
 27 |   }
 28 |   if (isS4(na)) {
 29 |     error("S4 class objects for argument 'na' are not supported.");
 30 |   }
 31 |   int nth = asInteger(nthreads);
 32 |   nth = nth > max_thread ? max_thread : (nth < min_thread ? min_thread : nth); //revisit this
 33 |   R_len_t n = length(args);
 34 |   if (n % 2) {
 35 |     error("Received %d inputs; please supply an even number of arguments in ... "
 36 |             "consisting of target value, resulting output pairs (in that order). "
 37 |             "Note that argument 'default' must be named explicitly (e.g.: default=0)", n);
 38 |   }
 39 |   n = n / 2;
 40 |   const bool pchkenc = asLogical(chkenc);
 41 |   const bool nonna = !isNull(na);
 42 |   const bool ifact = isFactor(PTR_ETL(args, 1));
 43 |   const R_xlen_t len_na = xlength(na);
 44 |   const R_xlen_t len_x = xlength(x);
 45 |   const SEXPTYPE type0 = UTYPEOF(PTR_ETL(args, 1));
 46 |   const SEXPTYPE type1 = UTYPEOF(PTR_ETL(args, 0));
 47 |   const SEXPTYPE type_x = UTYPEOF(x);
 48 |   if (type_x != type1) {
 49 |     error("Type of 'x' and 'values' are different. Please make sure they are the same.");
 50 |   }
 51 |   SEXP out0c = PROTECT(getAttrib(PTR_ETL(args, 1), R_ClassSymbol));
 52 |   SEXP out0l = PROTECT(getAttrib(PTR_ETL(args, 1), R_LevelsSymbol));
 53 |   if (nonna) {
 54 |     if (len_na != 1 && len_na != len_x) {
 55 |       error("Length of 'default' must either be 1 or length of 'x'."); // maybe improve message
 56 |     }
 57 |     SEXPTYPE tn = UTYPEOF(na);
 58 |     if (tn != type0) {
 59 |       error("Resulting value is of type %s but 'default' is of type %s. "
 60 |               "Please make sure that both arguments have the same type.",
 61 |               type2char(type0), type2char(tn));
 62 |     }
 63 |     if (!R_compute_identical(out0c, PROTECT(getAttrib(na, R_ClassSymbol)), 0)) {
 64 |       error("Resulting value has different class than 'default'. "
 65 |               "Please make sure that both arguments have the same class.");
 66 |     }
 67 |     UNPROTECT(1);
 68 |     if (ifact) {
 69 |       if (!R_compute_identical(out0l, PROTECT(getAttrib(na, R_LevelsSymbol)), 0)) {
 70 |         error("Resulting value and 'default' are both type factor but their levels are different.");
 71 |       }
 72 |       UNPROTECT(1);
 73 |     }
 74 |   }
 75 |   ssize_t amask[n];
 76 |   for (ssize_t i=0; i<n;++i) {
 77 |     if (isS4(PTR_ETL(args, 2*i+1))) {
 78 |       error("S4 class objects for argument '...' (item %zu) are not supported.", i+1);
 79 |     }
 80 |     if (type1 != UTYPEOF(PTR_ETL(args,2*i))) {
 81 |       error("Item 1 and %zu of '...' are not of the same type.",2*i+1);
 82 |     }
 83 |     if (type0 != UTYPEOF(PTR_ETL(args,2*i+1))) {
 84 |       error("Item 2 and %zu of '...' are not of the same type.",2*i+2);
 85 |     }
 86 |     R_xlen_t len_i = xlength(PTR_ETL(args, 2*i+1));
 87 |     if (len_i != len_x && len_i != 1) {
 88 |       error("Length of item %zu of '...' is different than 1 and length of 'x'. "
 89 |               "Please make sure that all items of 'output' have length 1 or length of 'x'(%zu).",
 90 |               2*i+2, len_x);
 91 |     }
 92 |     if (xlength(PTR_ETL(args, 2*i)) != 1) {
 93 |       error("Length of item %zu of '...' is different than 1. Please make sure it has length 1.",2*i+1);
 94 |     }
 95 |     if (!R_compute_identical(out0c, PROTECT(getAttrib(PTR_ETL(args, 2*i+1), R_ClassSymbol)), 0)) {
 96 |       error("Items 2 and  %zu of '...' must have same class.", 2*i+2);
 97 |     }
 98 |     UNPROTECT(1);
 99 |     if (ifact) {
100 |       if (!R_compute_identical(out0l, PROTECT(getAttrib(PTR_ETL(args, 2*i+1), R_LevelsSymbol)), 0)) {
101 |         error("Items 2 and  %zu of '...' are both factor but their levels are different.", 2*i+2);
102 |       }
103 |       UNPROTECT(1);
104 |     }
105 |     amask[i] = len_i>1 ? SSIZE_MAX : 0;
106 |   }
107 |   SEXP xans = R_NilValue, vans = R_NilValue;
108 |   int nprotect = 0;
109 |   bool utfcon = false;
110 |   if (pchkenc && type_x == STRSXP) {
111 |     if (!isMixEnc(x)) {
112 |       const cetype_t cx = getCharCE(STRING_PTR_RO(x)[0]);
113 |       for (ssize_t i = 0; i < n; ++i) {
114 |         if(cx != getCharCE(STRING_PTR_RO(PTR_ETL(args,2*i))[0])) {
115 |           utfcon = true;
116 |           break;
117 |         }
118 |       }
119 |     } else {
120 |       utfcon = true;
121 |     }
122 |     if (utfcon) {
123 |       xans = PROTECT(enc2UTF8(x));
124 |       vans = PROTECT(allocVector(STRSXP, n));
125 |       nprotect = 2;
126 |       for (ssize_t i = 0; i < n; ++i) {
127 |         SET_STRING_ELT(vans, i, STRING_PTR_RO(enc2UTF8(PTR_ETL(args,2*i)))[0]);
128 |       }
129 |     }
130 |   }
131 |   SEXP ans = PROTECT(allocVector(type0, len_x));
132 |   copyMostAttrib(PTR_ETL(args, 1), ans);
133 |   switch(type0) {
134 |   /*
135 |    *  This part is for LOGICAL
136 |    */
137 |   case LGLSXP:{
138 |     int *restrict pans = LOGICAL(ans);
139 |     const int *restrict pna = nonna ? LOGICAL(na) : NULL; 
140 |     ssize_t namask = len_na>1 ? SSIZE_MAX : 0;
141 |     OMP_PARALLEL_FOR(nth)
142 |     for (ssize_t j=0; j<len_x; ++j) {
143 |       pans[j]= nonna ? pna[j & namask] : NA_LOGICAL;
144 |     }
145 |     switch(type_x) {
146 |     case LGLSXP: {
147 |       const int *restrict px = LOGICAL(x);
148 |       OMP_PARALLEL_FOR(nth)
149 |       for (ssize_t i = 0; i < n; ++i) {
150 |         const int *restrict pto = LOGICAL(PTR_ETL(args, 2*i+1));
151 |         const int *restrict pvalues = LOGICAL(PTR_ETL(args, 2*i));
152 |         for (ssize_t j = 0; j < len_x; ++j) {
153 |           if (px[j] == pvalues[0]) {
154 |             pans[j] = pto[j & amask[i]];
155 |           }
156 |         }
157 |       }
158 |     } break;
159 |     case INTSXP: {
160 |       const int *restrict px = INTEGER(x);
161 |       OMP_PARALLEL_FOR(nth)
162 |       for (ssize_t i = 0; i < n; ++i) {
163 |         const int *restrict pto = LOGICAL(PTR_ETL(args, 2*i+1));
164 |         const int *restrict pvalues = INTEGER(PTR_ETL(args, 2*i));
165 |         for (ssize_t j = 0; j < len_x; ++j) {
166 |           if (px[j] == pvalues[0]) {
167 |             pans[j] = pto[j & amask[i]];
168 |           }
169 |         }
170 |       }
171 |     } break;
172 |     case REALSXP: {
173 |       const double *restrict px = REAL(x);
174 |       OMP_PARALLEL_FOR(nth)
175 |       for (ssize_t i = 0; i < n; ++i) {
176 |         const int *restrict pto = LOGICAL(PTR_ETL(args, 2*i+1));
177 |         const double *restrict pvalues = REAL(PTR_ETL(args, 2*i));
178 |         for (ssize_t j = 0; j < len_x; ++j) {
179 |           if (px[j] == pvalues[0]) {
180 |             pans[j] = pto[j & amask[i]];
181 |           }
182 |         }
183 |       }
184 |     } break;
185 |     case CPLXSXP: {
186 |       const Rcomplex *restrict px = COMPLEX(x);
187 |       OMP_PARALLEL_FOR(nth)
188 |       for (ssize_t i = 0; i < n; ++i) {
189 |         const int *restrict pto = LOGICAL(PTR_ETL(args, 2*i+1));
190 |         const Rcomplex *restrict pvalues = COMPLEX(PTR_ETL(args, 2*i));
191 |         for (ssize_t j = 0; j < len_x; ++j) {
192 |           if (EQUAL_CPLX(px[j], pvalues[0])) {
193 |             pans[j] = pto[j & amask[i]];
194 |           }
195 |         }
196 |       }
197 |     } break;
198 |     case STRSXP: {
199 |       const SEXP *restrict px = STRING_PTR_RO(utfcon ? xans : x);
200 |       OMP_PARALLEL_FOR(nth)
201 |       for (ssize_t i = 0; i < n; ++i) {
202 |         const int *restrict pto = LOGICAL(PTR_ETL(args, 2*i+1));
203 |         const SEXP *restrict pvalues = STRING_PTR_RO(utfcon ? vans : PTR_ETL(args, 2*i));
204 |         for (ssize_t j = 0; j < len_x; ++j) {
205 |           if (px[j] == pvalues[0]) {
206 |             pans[j] = pto[j & amask[i]];
207 |           }
208 |         }
209 |       }
210 |     } break;
211 |     case VECSXP: {
212 |       const SEXP *restrict px = SEXPPTR_RO(x);
213 |       for (ssize_t i = 0; i < n; ++i) {
214 |         const int *restrict pto = LOGICAL(PTR_ETL(args, 2*i+1));
215 |         const SEXP *restrict pvalues = SEXPPTR_RO(PTR_ETL(args, 2*i));
216 |         for (ssize_t j = 0; j < len_x; ++j) {
217 |           if (R_compute_identical(px[j],pvalues[0],0)) {
218 |             pans[j] = pto[i & amask[i]];
219 |           }
220 |         }
221 |       }    
222 |     } break;
223 |     default:
224 |       error("Type %s is not supported for argument 'x'.", type2char(type_x));
225 |     }
226 |   } break;
227 |     /*
228 |      *  This part is for INTEGER
229 |      */
230 |   case INTSXP:{
231 |     int *restrict pans = INTEGER(ans);
232 |     const int *restrict pna = nonna ? INTEGER(na) : NULL; 
233 |     ssize_t namask = len_na>1 ? SSIZE_MAX : 0;
234 |     OMP_PARALLEL_FOR(nth)
235 |     for (ssize_t j=0; j<len_x; ++j) {
236 |       pans[j]= nonna ? pna[j & namask] : NA_INTEGER;
237 |     }
238 |     switch(type_x) {
239 |     case LGLSXP: {
240 |       const int *restrict px = LOGICAL(x);
241 |       OMP_PARALLEL_FOR(nth)
242 |       for (ssize_t i = 0; i < n; ++i) {
243 |         const int *restrict pto = INTEGER(PTR_ETL(args, 2*i+1));
244 |         const int *restrict pvalues = LOGICAL(PTR_ETL(args, 2*i));
245 |         for (ssize_t j = 0; j < len_x; ++j) {
246 |           if (px[j] == pvalues[0]) {
247 |             pans[j] = pto[j & amask[i]];
248 |           }
249 |         }
250 |       }
251 |     } break;
252 |     case INTSXP: {
253 |       const int *restrict px = INTEGER(x);
254 |       OMP_PARALLEL_FOR(nth)
255 |       for (ssize_t i = 0; i < n; ++i) {
256 |         const int *restrict pto = INTEGER(PTR_ETL(args, 2*i+1));
257 |         const int *restrict pvalues = INTEGER(PTR_ETL(args, 2*i));
258 |         for (ssize_t j = 0; j < len_x; ++j) {
259 |           if (px[j] == pvalues[0]) {
260 |             pans[j] = pto[j & amask[i]];
261 |           }
262 |         }
263 |       }
264 |     } break;
265 |     case REALSXP: {
266 |       const double *restrict px = REAL(x);
267 |       OMP_PARALLEL_FOR(nth)
268 |       for (ssize_t i = 0; i < n; ++i) {
269 |         const int *restrict pto = INTEGER(PTR_ETL(args, 2*i+1));
270 |         const double *restrict pvalues = REAL(PTR_ETL(args, 2*i));
271 |         for (ssize_t j = 0; j < len_x; ++j) {
272 |           if (px[j] == pvalues[0]) {
273 |             pans[j] = pto[j & amask[i]];
274 |           }
275 |         }
276 |       }
277 |     } break;
278 |     case CPLXSXP: {
279 |       const Rcomplex *restrict px = COMPLEX(x);
280 |       OMP_PARALLEL_FOR(nth)
281 |       for (ssize_t i = 0; i < n; ++i) {
282 |         const int *restrict pto = INTEGER(PTR_ETL(args, 2*i+1));
283 |         const Rcomplex *restrict pvalues = COMPLEX(PTR_ETL(args, 2*i));
284 |         for (ssize_t j = 0; j < len_x; ++j) {
285 |           if (EQUAL_CPLX(px[j], pvalues[0])) {
286 |             pans[j] = pto[j & amask[i]];
287 |           }
288 |         }
289 |       }
290 |     } break;
291 |     case STRSXP: {
292 |       const SEXP *restrict px = STRING_PTR_RO(utfcon ? xans : x);
293 |       OMP_PARALLEL_FOR(nth)
294 |       for (ssize_t i = 0; i < n; ++i) {
295 |         const int *restrict pto = INTEGER(PTR_ETL(args, 2*i+1));
296 |         const SEXP *restrict pvalues = STRING_PTR_RO(utfcon ? vans : PTR_ETL(args, 2*i));
297 |         for (ssize_t j = 0; j < len_x; ++j) {
298 |           if (px[j] == pvalues[0]) {
299 |             pans[j] = pto[j & amask[i]];
300 |           }
301 |         }
302 |       }
303 |     } break;
304 |     case VECSXP: {
305 |       const SEXP *restrict px = SEXPPTR_RO(x);
306 |       for (ssize_t i = 0; i < n; ++i) {
307 |         const int *restrict pto = INTEGER(PTR_ETL(args, 2*i+1));
308 |         const SEXP *restrict pvalues = SEXPPTR_RO(PTR_ETL(args, 2*i));
309 |         for (ssize_t j = 0; j < len_x; ++j) {
310 |           if (R_compute_identical(px[j],pvalues[0],0)) {
311 |             pans[j] = pto[j & amask[i]];
312 |           }
313 |         }
314 |       }
315 |     } break;
316 |     default:
317 |       error("Type %s is not supported for argument 'x'.", type2char(type_x));
318 |     }
319 |   } break;
320 |     /*
321 |      *  This part is for REAL
322 |      */
323 |   case REALSXP:{
324 |     double *restrict pans = REAL(ans);
325 |     const double *restrict pna = nonna ? REAL(na) : NULL; 
326 |     ssize_t namask = len_na>1 ? SSIZE_MAX : 0;
327 |     OMP_PARALLEL_FOR(nth)
328 |     for (ssize_t j=0; j<len_x; ++j) {
329 |       pans[j]= nonna ? pna[j & namask] : NA_REAL;
330 |     }
331 |     switch(type_x) {
332 |     case LGLSXP: {
333 |       const int *restrict px = LOGICAL(x);
334 |       OMP_PARALLEL_FOR(nth)
335 |       for (ssize_t i = 0; i < n; ++i) {
336 |         const double *restrict pto = REAL(PTR_ETL(args, 2*i+1));
337 |         const int *restrict pvalues = LOGICAL(PTR_ETL(args, 2*i));
338 |         for (ssize_t j = 0; j < len_x; ++j) {
339 |           if (px[j] == pvalues[0]) {
340 |             pans[j] = pto[j & amask[i]];
341 |           }
342 |         }
343 |       }
344 |     } break;
345 |     case INTSXP: {
346 |       const int *restrict px = INTEGER(x);
347 |       OMP_PARALLEL_FOR(nth)
348 |       for (ssize_t i = 0; i < n; ++i) {
349 |         const double *restrict pto = REAL(PTR_ETL(args, 2*i+1));
350 |         const int *restrict pvalues = INTEGER(PTR_ETL(args, 2*i));
351 |         for (ssize_t j = 0; j < len_x; ++j) {
352 |           if (px[j] == pvalues[0]) {
353 |             pans[j] = pto[j & amask[i]];
354 |           }
355 |         }
356 |       }
357 |     } break;
358 |     case REALSXP: {
359 |       const double *restrict px = REAL(x);
360 |       OMP_PARALLEL_FOR(nth)
361 |       for (ssize_t i = 0; i < n; ++i) {
362 |         const double *restrict pto = REAL(PTR_ETL(args, 2*i+1));
363 |         const double *restrict pvalues = REAL(PTR_ETL(args, 2*i));
364 |         for (ssize_t j = 0; j < len_x; ++j) {
365 |           if (px[j] == pvalues[0]) {
366 |             pans[j] = pto[j & amask[i]];
367 |           }
368 |         }
369 |       }
370 |     } break;
371 |     case CPLXSXP: {
372 |       const Rcomplex *restrict px = COMPLEX(x);
373 |       OMP_PARALLEL_FOR(nth)
374 |       for (ssize_t i = 0; i < n; ++i) {
375 |         const double *restrict pto = REAL(PTR_ETL(args, 2*i+1));
376 |         const Rcomplex *restrict pvalues = COMPLEX(PTR_ETL(args, 2*i));
377 |         for (ssize_t j = 0; j < len_x; ++j) {
378 |           if (EQUAL_CPLX(px[j], pvalues[0])) {
379 |             pans[j] = pto[j & amask[i]];
380 |           }
381 |         }
382 |       }
383 |     } break;
384 |     case STRSXP: {
385 |       const SEXP *restrict px = STRING_PTR_RO(utfcon ? xans : x);
386 |       OMP_PARALLEL_FOR(nth)
387 |       for (ssize_t i = 0; i < n; ++i) {
388 |         const double *restrict pto = REAL(PTR_ETL(args, 2*i+1));
389 |         const SEXP *restrict pvalues = STRING_PTR_RO(utfcon ? vans : PTR_ETL(args, 2*i));
390 |         for (ssize_t j = 0; j < len_x; ++j) {
391 |           if (px[j] == pvalues[0]) {
392 |             pans[j] = pto[j & amask[i]];
393 |           }
394 |         }
395 |       }
396 |     } break;
397 |     case VECSXP: {
398 |       const SEXP *restrict px = SEXPPTR_RO(x);
399 |       for (ssize_t i = 0; i < n; ++i) {
400 |         const double *restrict pto = REAL(PTR_ETL(args, 2*i+1));
401 |         const SEXP *restrict pvalues = SEXPPTR_RO(PTR_ETL(args, 2*i));
402 |         for (ssize_t j = 0; j < len_x; ++j) {
403 |           if (R_compute_identical(px[j],pvalues[0],0)) {
404 |             pans[j] = pto[j & amask[i]];
405 |           }
406 |         }
407 |       }
408 |     } break;
409 |     default:
410 |       error("Type %s is not supported for argument 'x'.", type2char(type_x));
411 |     }
412 |   } break;
413 |     /*
414 |      *  This part is for COMPLEX
415 |      */
416 |   case CPLXSXP:{
417 |     Rcomplex *restrict pans = COMPLEX(ans);
418 |     const Rcomplex *restrict pna = nonna ? COMPLEX(na) : NULL; 
419 |     ssize_t namask = len_na>1 ? SSIZE_MAX : 0;
420 |     Rcomplex NA_CPLX; NA_CPLX.r = NA_REAL; NA_CPLX.i = NA_REAL; // deal with that across all functions
421 |     OMP_PARALLEL_FOR(nth)
422 |     for (ssize_t j=0; j<len_x; ++j) {
423 |       pans[j]= nonna ? pna[j & namask] : NA_CPLX;
424 |     }
425 |     switch(type_x) {
426 |     case LGLSXP: {
427 |       const int *restrict px = LOGICAL(x);
428 |       OMP_PARALLEL_FOR(nth)
429 |       for (ssize_t i = 0; i < n; ++i) {
430 |         const Rcomplex *restrict pto = COMPLEX(PTR_ETL(args, 2*i+1));
431 |         const int *restrict pvalues = LOGICAL(PTR_ETL(args, 2*i));
432 |         for (ssize_t j = 0; j < len_x; ++j) {
433 |           if (px[j] == pvalues[0]) {
434 |             pans[j] = pto[j & amask[i]];
435 |           }
436 |         }
437 |       }
438 |     } break;
439 |     case INTSXP: {
440 |       const int *restrict px = INTEGER(x);
441 |       OMP_PARALLEL_FOR(nth)
442 |       for (ssize_t i = 0; i < n; ++i) {
443 |         const Rcomplex *restrict pto = COMPLEX(PTR_ETL(args, 2*i+1));
444 |         const int *restrict pvalues = INTEGER(PTR_ETL(args, 2*i));
445 |         for (ssize_t j = 0; j < len_x; ++j) {
446 |           if (px[j] == pvalues[0]) {
447 |             pans[j] = pto[j & amask[i]];
448 |           }
449 |         }
450 |       }
451 |     } break;
452 |     case REALSXP: {
453 |       const double *restrict px = REAL(x);
454 |       OMP_PARALLEL_FOR(nth)
455 |       for (ssize_t i = 0; i < n; ++i) {
456 |         const Rcomplex *restrict pto = COMPLEX(PTR_ETL(args, 2*i+1));
457 |         const double *restrict pvalues = REAL(PTR_ETL(args, 2*i));
458 |         for (ssize_t j = 0; j < len_x; ++j) {
459 |           if (px[j] == pvalues[0]) {
460 |             pans[j] = pto[j & amask[i]];
461 |           }
462 |         }
463 |       }
464 |     } break;
465 |     case CPLXSXP: {
466 |       const Rcomplex *restrict px = COMPLEX(x);
467 |       OMP_PARALLEL_FOR(nth)
468 |       for (ssize_t i = 0; i < n; ++i) {
469 |         const Rcomplex *restrict pto = COMPLEX(PTR_ETL(args, 2*i+1));
470 |         const Rcomplex *restrict pvalues = COMPLEX(PTR_ETL(args, 2*i));
471 |         for (ssize_t j = 0; j < len_x; ++j) {
472 |           if (EQUAL_CPLX(px[j],pvalues[0])) {
473 |             pans[j] = pto[j & amask[i]];
474 |           }
475 |         }
476 |       }
477 |     } break;
478 |     case STRSXP: {
479 |       const SEXP *restrict px = STRING_PTR_RO(utfcon ? xans : x);
480 |       OMP_PARALLEL_FOR(nth)
481 |       for (ssize_t i = 0; i < n; ++i) {
482 |         const Rcomplex *restrict pto = COMPLEX(PTR_ETL(args, 2*i+1));
483 |         const SEXP *restrict pvalues = STRING_PTR_RO(utfcon ? vans : PTR_ETL(args, 2*i));
484 |         for (ssize_t j = 0; j < len_x; ++j) {
485 |           if (px[j] == pvalues[0]) {
486 |             pans[j] = pto[j & amask[i]];
487 |           }
488 |         }
489 |       }
490 |     } break;
491 |     case VECSXP: {
492 |       const SEXP *restrict px = SEXPPTR_RO(x);
493 |       for (ssize_t i = 0; i < n; ++i) {
494 |         const Rcomplex *restrict pto = COMPLEX(PTR_ETL(args, 2*i+1));
495 |         const SEXP *restrict pvalues = SEXPPTR_RO(PTR_ETL(args, 2*i));
496 |         for (ssize_t j = 0; j < len_x; ++j) {
497 |           if (R_compute_identical(px[j],pvalues[0],0)) {
498 |             pans[j] = pto[j & amask[i]];
499 |           }
500 |         }
501 |       }
502 |     } break;
503 |     default:
504 |       error("Type %s is not supported for argument 'x'.", type2char(type_x));
505 |     }
506 |   } break;
507 |     /*
508 |      *  This part is for STRING
509 |      */
510 |   case STRSXP:{
511 |     const SEXP *restrict pna = nonna ? STRING_PTR_RO(na) : NULL; 
512 |     ssize_t namask = len_na>1 ? SSIZE_MAX : 0;
513 |     for (ssize_t j=0; j<len_x; ++j) {
514 |       SET_STRING_ELT(ans, j, nonna ? pna[j & namask] : NA_STRING);
515 |     }
516 |     switch(type_x) {
517 |     case LGLSXP: {
518 |       const int *restrict px = LOGICAL(x);
519 |       for (ssize_t i = 0; i < n; ++i) {
520 |         const SEXP *restrict pto = STRING_PTR_RO(PTR_ETL(args, 2*i+1));
521 |         const int *restrict pvalues = LOGICAL(PTR_ETL(args, 2*i));
522 |         for (ssize_t j = 0; j < len_x; ++j) {
523 |           if (px[j] == pvalues[0]) {
524 |             SET_STRING_ELT(ans, j, pto[j & amask[i]]);
525 |           }
526 |         }
527 |       }
528 |     } break;
529 |     case INTSXP: {
530 |       const int *restrict px = INTEGER(x);
531 |       for (ssize_t i = 0; i < n; ++i) {
532 |         const SEXP *restrict pto = STRING_PTR_RO(PTR_ETL(args, 2*i+1));
533 |         const int *restrict pvalues = INTEGER(PTR_ETL(args, 2*i));
534 |         for (ssize_t j = 0; j < len_x; ++j) {
535 |           if (px[j] == pvalues[0]) {
536 |             SET_STRING_ELT(ans, j, pto[j & amask[i]]);
537 |           }
538 |         }
539 |       }
540 |     } break;
541 |     case REALSXP: {
542 |       const double *restrict px = REAL(x);
543 |       for (ssize_t i = 0; i < n; ++i) {
544 |         const SEXP *restrict pto = STRING_PTR_RO(PTR_ETL(args, 2*i+1));
545 |         const double *restrict pvalues = REAL(PTR_ETL(args, 2*i));
546 |         for (ssize_t j = 0; j < len_x; ++j) {
547 |           if (px[j] == pvalues[0]) {
548 |             SET_STRING_ELT(ans, j, pto[j & amask[i]]);
549 |           }
550 |         }
551 |       }
552 |     } break;
553 |     case CPLXSXP: {
554 |       const Rcomplex *restrict px = COMPLEX(x);
555 |       for (ssize_t i = 0; i < n; ++i) {
556 |         const SEXP *restrict pto = STRING_PTR_RO(PTR_ETL(args, 2*i+1));
557 |         const Rcomplex *restrict pvalues = COMPLEX(PTR_ETL(args, 2*i));
558 |         for (ssize_t j = 0; j < len_x; ++j) {
559 |           if (EQUAL_CPLX(px[j],pvalues[0])) {
560 |             SET_STRING_ELT(ans, j, pto[j & amask[i]]);
561 |           }
562 |         }
563 |       }
564 |     } break;
565 |     case STRSXP: {
566 |       const SEXP *restrict px = STRING_PTR_RO(utfcon ? xans : x);
567 |       for (ssize_t i = 0; i < n; ++i) {
568 |         const SEXP *restrict pto = STRING_PTR_RO(PTR_ETL(args, 2*i+1));
569 |         const SEXP *restrict pvalues = STRING_PTR_RO(utfcon ? vans : PTR_ETL(args, 2*i));
570 |         for (ssize_t j = 0; j < len_x; ++j) {
571 |           if (px[j] == pvalues[0]) {
572 |             SET_STRING_ELT(ans, j, pto[j & amask[i]]);
573 |           }
574 |         }
575 |       }
576 |     } break;
577 |     case VECSXP: {
578 |       const SEXP *restrict px = SEXPPTR_RO(x);
579 |       for (ssize_t i = 0; i < n; ++i) {
580 |         const SEXP *restrict pto = STRING_PTR_RO(PTR_ETL(args, 2*i+1));
581 |         const SEXP *restrict pvalues = SEXPPTR_RO(PTR_ETL(args, 2*i));
582 |         for (ssize_t j = 0; j < len_x; ++j) {
583 |           if (R_compute_identical(px[j],pvalues[0],0)) {
584 |             SET_STRING_ELT(ans, j, pto[j & amask[i]]);
585 |           }
586 |         }
587 |       }
588 |     } break;
589 |     default:
590 |       error("Type %s is not supported for argument 'x'.", type2char(type_x));
591 |     }
592 |   } break;
593 |     /*
594 |      *  This part is for LIST
595 |      */
596 |   case VECSXP:{
597 |     const SEXP *restrict pna = nonna ? SEXPPTR_RO(na) : NULL; 
598 |     ssize_t namask = len_na>1 ? SSIZE_MAX : 0;
599 |     for (ssize_t j=0; j<len_x; ++j) {
600 |       if (nonna) {
601 |         SET_VECTOR_ELT(ans, j, pna[j & namask]); 
602 |       }
603 |     }
604 |     switch(type_x) {
605 |     case LGLSXP: {
606 |       const int *restrict px = LOGICAL(x);
607 |       for (ssize_t i = 0; i < n; ++i) {
608 |         const SEXP *restrict pto = SEXPPTR_RO(PTR_ETL(args, 2*i+1));
609 |         const int *restrict pvalues = LOGICAL(PTR_ETL(args, 2*i));
610 |         for (ssize_t j = 0; j < len_x; ++j) {
611 |           if (px[j] == pvalues[0]) {
612 |             SET_VECTOR_ELT(ans, j, pto[j & amask[i]]);
613 |           }
614 |         }
615 |       }
616 |     } break;
617 |     case INTSXP: {
618 |       const int *restrict px = INTEGER(x);
619 |       for (ssize_t i = 0; i < n; ++i) {
620 |         const SEXP *restrict pto = SEXPPTR_RO(PTR_ETL(args, 2*i+1));
621 |         const int *restrict pvalues = INTEGER(PTR_ETL(args, 2*i));
622 |         for (ssize_t j = 0; j < len_x; ++j) {
623 |           if (px[j] == pvalues[0]) {
624 |             SET_VECTOR_ELT(ans, j, pto[j & amask[i]]);
625 |           }
626 |         }
627 |       }
628 |     } break;
629 |     case REALSXP: {
630 |       const double *restrict px = REAL(x);
631 |       for (ssize_t i = 0; i < n; ++i) {
632 |         const SEXP *restrict pto = SEXPPTR_RO(PTR_ETL(args, 2*i+1));
633 |         const double *restrict pvalues = REAL(PTR_ETL(args, 2*i));
634 |         for (ssize_t j = 0; j < len_x; ++j) {
635 |           if (px[j] == pvalues[0]) {
636 |             SET_VECTOR_ELT(ans, j, pto[j & amask[i]]);
637 |           }
638 |         }
639 |       }
640 |     } break;
641 |     case CPLXSXP: {
642 |       const Rcomplex *restrict px = COMPLEX(x);
643 |       for (ssize_t i = 0; i < n; ++i) {
644 |         const SEXP *restrict pto = SEXPPTR_RO(PTR_ETL(args, 2*i+1));
645 |         const Rcomplex *restrict pvalues = COMPLEX(PTR_ETL(args, 2*i));
646 |         for (ssize_t j = 0; j < len_x; ++j) {
647 |           if (EQUAL_CPLX(px[j], pvalues[0])) {
648 |             SET_VECTOR_ELT(ans, j, pto[j & amask[i]]);
649 |           }
650 |         }
651 |       }
652 |     } break;
653 |     case STRSXP: {
654 |       const SEXP *restrict px = STRING_PTR_RO(utfcon ? xans : x);
655 |       for (ssize_t i = 0; i < n; ++i) {
656 |         const SEXP *restrict pto = SEXPPTR_RO(PTR_ETL(args, 2*i+1));
657 |         const SEXP *restrict pvalues = STRING_PTR_RO(utfcon ? vans : PTR_ETL(args, 2*i));
658 |         for (ssize_t j = 0; j < len_x; ++j) {
659 |           if (px[j] == pvalues[0]) {
660 |             SET_VECTOR_ELT(ans, j, pto[j & amask[i]]);
661 |           }
662 |         }
663 |       }
664 |     } break;
665 |     case VECSXP: {
666 |       const SEXP *restrict px = SEXPPTR_RO(x);
667 |       for (ssize_t i = 0; i < n; ++i) {
668 |         const SEXP *restrict pto = SEXPPTR_RO(PTR_ETL(args, 2*i+1));
669 |         const SEXP *restrict pvalues = SEXPPTR_RO(PTR_ETL(args, 2*i));
670 |         for (ssize_t j = 0; j < len_x; ++j) {
671 |           if (R_compute_identical(px[j],pvalues[0],0)) {
672 |             SET_VECTOR_ELT(ans, j, pto[j & amask[i]]);
673 |           }
674 |         }
675 |       }
676 |     } break;
677 |     default:
678 |       error("Type %s is not supported for argument 'x'.", type2char(type_x));
679 |     }
680 |   } break;
681 |   default :
682 |     error("Type %s is not supported for argument 'outputs'", type2char(type0));
683 |   }
684 |   UNPROTECT(nprotect + 3);
685 |   return ans;
686 | }
687 | 


--------------------------------------------------------------------------------
/src/psort.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * kit : Useful R Functions Implemented in C
  3 |  * Copyright (C) 2020-2025  Morgan Jacob
  4 |  * 
  5 |  * This program is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  * 
 10 |  * This program is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  * 
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 |  */
 18 | 
 19 | #include "kit.h"
 20 | 
 21 | #define HASHSTR(x) (HASH(((intptr_t) (x) & 0xffffffff), K))
 22 | #define LOOKUP_VAL(x) (lookupTable[getIndex((x), pvalSorted, lookupTable)]-1)
 23 | 
 24 | #define STR_SORT   install("sort")
 25 | #define STR_ORDER  install("order")
 26 | #define STR_METHOD install("method")
 27 | #define STR_NALAST install("na.last")
 28 | #define STR_DECREA install("decreasing")
 29 | #define STR_FACTOR mkChar("factor")
 30 | 
 31 | static int K = 8;
 32 | static size_t M = 256;
 33 | 
 34 | static void recursiveRadix(const SEXP *restrict pans, const size_t k, size_t *restrict pos,
 35 |                            size_t *restrict incr, uint8_t *restrict test, SEXP tmp,
 36 |                            const SEXP *restrict ptmp, size_t start, size_t *restrict newpos) {
 37 |   for (uint16_t i = 1; i < 257; ++i){
 38 |     if (pos[i] ==  1) {
 39 |       start++; continue;
 40 |     }
 41 |     if (pos[i] > 1) {
 42 |       const size_t ct = pos[i];
 43 |       SEXP *npans = (SEXP*)pans + start;
 44 |       memset(incr, 0, 257*sizeof(size_t));
 45 |       for(size_t j = 0; j < ct; ++j) {
 46 |         test[j] = (uint8_t)(CHAR(npans[j])[k]);
 47 |         incr[++test[j]]++;
 48 |       }
 49 |       if (incr[1] == ct) {
 50 |         start += ct;
 51 |         continue;
 52 |       }
 53 |       newpos[0] = incr[0];
 54 |       for (uint16_t j = 1;  j < 257; ++j){
 55 |         newpos[j] = incr[j] + newpos[j-1];
 56 |       }
 57 |       for (size_t j = 0; j < ct; ++j){
 58 |         SET_STRING_ELT(tmp, --newpos[test[j]], npans[j]);
 59 |       }
 60 |       memcpy(npans, ptmp, ct*sizeof(SEXP));
 61 |       size_t maxLen=0;
 62 |       for (uint16_t j = 2; j < 257; ++j){
 63 |         if(incr[j] > maxLen) {
 64 |           maxLen = incr[j];
 65 |         }
 66 |       }
 67 |       if (maxLen > 1) {
 68 |         size_t *restrict secpos = malloc(257*sizeof(size_t));
 69 |         recursiveRadix(pans, k+1, incr, newpos, test, tmp, ptmp, start,secpos);
 70 |         free(secpos);
 71 |       }
 72 |       start += ct;
 73 |     }
 74 |   }
 75 | }
 76 | 
 77 | static void recursiveRadixrev(const SEXP *restrict pans, const size_t k, size_t *restrict pos,
 78 |                               size_t *restrict incr, uint8_t *restrict test, SEXP tmp,
 79 |                               const SEXP *restrict ptmp, size_t start, size_t *restrict newpos) {
 80 |   for (uint16_t i = 256; i > 0; --i){
 81 |     if (pos[i] ==  1) {
 82 |       start++; continue;
 83 |     }
 84 |     if (pos[i] > 1) {
 85 |       const size_t ct = pos[i];
 86 |       SEXP *npans = (SEXP*)pans + start;
 87 |       memset(incr, 0, 257*sizeof(size_t));
 88 |       for(size_t j = 0; j < ct; ++j) {
 89 |         test[j] = (uint8_t)(CHAR(npans[j])[k]);
 90 |         incr[++test[j]]++;
 91 |       }
 92 |       if (incr[1] == ct) {
 93 |         start += ct;
 94 |         continue;
 95 |       }
 96 |       newpos[256] = incr[256];
 97 |       for (uint16_t j = 255;  j >= 1; --j){
 98 |         newpos[j] = incr[j] + newpos[j+1];
 99 |       }
100 |       for (size_t j = 0; j < ct; ++j){
101 |         SET_STRING_ELT(tmp, --newpos[test[j]], npans[j]);
102 |       }
103 |       memcpy(npans, ptmp, ct*sizeof(SEXP));
104 |       size_t maxLen=0;
105 |       for (uint16_t j = 2; j < 257; ++j){
106 |         if(incr[j] > maxLen) {
107 |           maxLen = incr[j];
108 |         }
109 |       }
110 |       if (maxLen > 1) {
111 |         size_t *restrict secpos = malloc(257*sizeof(size_t));
112 |         recursiveRadixrev(pans, k+1, incr, newpos, test, tmp, ptmp, start,secpos);
113 |         free(secpos);
114 |       }
115 |       start += ct;
116 |     }
117 |   }
118 | }
119 | 
120 | static SEXP rsort (SEXP x) {
121 |   const size_t len = LENGTH(x);
122 |   const SEXP *restrict px = STRING_PTR_RO(x);
123 |   SEXP ans = PROTECT(allocVector(STRSXP, len));
124 |   uint8_t nprotect = 1;
125 |   
126 |   uint8_t *restrict test = malloc(len * sizeof(uint8_t));
127 |   size_t *restrict pos = calloc(257, sizeof(size_t));
128 |   size_t *restrict incr = malloc(257*sizeof(size_t));
129 |   
130 |   for (size_t i = 0; i < len; ++i){
131 |     test[i] = (uint8_t)(CHAR(px[i])[0]);
132 |     pos[++test[i]]++;
133 |   }
134 |   
135 |   size_t maxLen=0;
136 |   for (uint16_t i = 2; i < 257; ++i){ // start at 2, is that correct?
137 |     if(pos[i] > maxLen) {
138 |       maxLen = pos[i];
139 |     }
140 |   }
141 |   
142 |   incr[0] = pos[0];
143 |   for (uint16_t i = 1; i < 257 ; ++i){
144 |     incr[i] = pos[i] + incr[i-1];
145 |   }
146 |   
147 |   for (size_t i = 0; i < len; ++i){
148 |     SET_STRING_ELT(ans, --incr[test[i]], px[i]);
149 |   }
150 |   
151 |   if (maxLen > 1) {
152 |     SEXP tmp = PROTECT(allocVector(STRSXP, maxLen));
153 |     nprotect++;
154 |     size_t start = 0;
155 |     size_t *restrict newpos = malloc(257*sizeof(size_t));
156 |     recursiveRadix(STRING_PTR_RO(ans), 1, pos, incr, test, tmp, STRING_PTR_RO(tmp), start,newpos);
157 |     free(newpos);
158 |   }
159 |   
160 |   free(pos);
161 |   free(test);
162 |   free(incr);
163 |   
164 |   UNPROTECT(nprotect);
165 |   return ans;
166 | }
167 | 
168 | static SEXP rsortrev (SEXP x) {
169 |   const size_t len = LENGTH(x);
170 |   const SEXP *restrict px = STRING_PTR_RO(x);
171 |   SEXP ans = PROTECT(allocVector(STRSXP, len));
172 |   uint8_t nprotect = 1;
173 |   
174 |   uint8_t *restrict test = malloc(len * sizeof(uint8_t));
175 |   size_t *restrict pos = calloc(257, sizeof(size_t));
176 |   size_t *restrict incr = malloc(257*sizeof(size_t));
177 |   
178 |   for (size_t i = 0; i < len; ++i){
179 |     test[i] = (uint8_t)(CHAR(px[i])[0]);
180 |     pos[++test[i]]++;
181 |   }
182 |   
183 |   size_t maxLen=0;
184 |   for (uint16_t i = 2; i < 257; ++i){ // start at 2, is that correct?
185 |     if(pos[i] > maxLen) {
186 |       maxLen = pos[i];
187 |     }
188 |   }
189 |   
190 |   incr[256] = pos[256];
191 |   for (uint16_t i = 255; i >= 1 ; --i){
192 |     incr[i] = pos[i] + incr[i+1];
193 |   }
194 |   
195 |   for (size_t i = 0; i < len; ++i){
196 |     SET_STRING_ELT(ans, --incr[test[i]], px[i]);
197 |   }
198 |   
199 |   if (maxLen > 1) {
200 |     SEXP tmp = PROTECT(allocVector(STRSXP, maxLen));
201 |     nprotect++;
202 |     size_t start = 0;
203 |     size_t *restrict newpos = malloc(257*sizeof(size_t));
204 |     recursiveRadixrev(STRING_PTR_RO(ans), 1, pos, incr, test, tmp, STRING_PTR_RO(tmp), start,newpos);
205 |     free(newpos);
206 |   }
207 |   
208 |   free(pos);
209 |   free(test);
210 |   free(incr);
211 |   
212 |   UNPROTECT(nprotect);
213 |   return ans;
214 | }
215 | 
216 | static SEXP dupVecSort(SEXP x) {
217 |   const R_xlen_t n = xlength(x);
218 |   int K =8;
219 |   size_t M = 256;
220 |   const size_t n2 = 2U * (size_t) n;
221 |   while (M < n2) {
222 |     M *= 2;
223 |     K++;
224 |   }
225 |   R_xlen_t count = 0;
226 |   size_t id = 0;
227 |   int *restrict h = (int*)calloc(M, sizeof(int));
228 |   int *restrict pans = (int*)calloc(n, sizeof(int));
229 |   const SEXP *restrict px = STRING_PTR_RO(x);
230 |   for (int i = 0; i < n; ++i) {
231 |     id = HASHSTR(px[i]);
232 |     while (h[id]) {
233 |       if (px[h[id] - 1]==px[i]) {
234 |         goto sbl;
235 |       }
236 |       id++;
237 |       id %= M;
238 |     }
239 |     h[id] = (int) i + 1;
240 |     pans[i]++;
241 |     count++;
242 |     sbl:;
243 |   }
244 |   free(h);
245 |   SEXP indx = PROTECT(allocVector(STRSXP, count));
246 |   R_xlen_t ct = 0;
247 |   for (int i = 0; ct < count; ++i) {
248 |     if (pans[i]) {
249 |       SET_STRING_ELT(indx, ct++, px[i]);
250 |     }
251 |   }
252 |   free(pans);
253 |   UNPROTECT(1);
254 |   return indx;
255 | }
256 | 
257 | static int *buildTable (SEXP x) {
258 |   const R_xlen_t n = xlength(x);
259 |   K = 8;
260 |   M = 256;
261 |   const size_t n2 = 2U * (size_t) n;
262 |   while (M < n2) { M *= 2; K++;}
263 |   size_t id = 0;
264 |   int *h = (int*)calloc(M, sizeof(int));
265 |   const SEXP *restrict px = STRING_PTR_RO(x);
266 |   for (int i = 0; i < n; ++i) {
267 |     id = HASHSTR(px[i]);
268 |     while (h[id]) {
269 |       if (px[h[id] - 1]==px[i]) {
270 |         goto bl; // # nocov
271 |       }
272 |       id++;
273 |       id %= M;
274 |     }
275 |     h[id] = (int) i + 1;
276 |     bl:;
277 |   }
278 |   return h;
279 | }
280 | 
281 | static inline int getIndex(SEXP ptr, const SEXP *restrict cmp, int *lkpTbl) {
282 |   size_t id = HASHSTR(ptr);
283 |   while(true) {
284 |     if (cmp[lkpTbl[id] - 1]==ptr) {
285 |       return id;
286 |     }
287 |     id++;
288 |     id %= M;
289 |   }
290 | }
291 | 
292 | static SEXP callToSort (SEXP x, const char* method, SEXP env) {
293 |   SEXP call = PROTECT(allocVector(LANGSXP, 4));
294 |   SETCAR(call, STR_SORT);
295 |   
296 |   SEXP s = CDR(call);
297 |   SETCAR(s, x);
298 |   SET_TAG(s, install("x"));
299 |   
300 |   s = CDR(s);
301 |   SETCAR(s, PROTECT(mkString(method)));
302 |   SET_TAG(s, STR_METHOD);
303 |   
304 |   s = CDR(s);
305 |   SETCAR(s, ScalarLogical(0));
306 |   SET_TAG(s, STR_NALAST);
307 |   
308 |   SEXP out = PROTECT(eval(call, env));
309 |   UNPROTECT(3);
310 |   return out;
311 | }
312 | 
313 | static SEXP callToSort2 (SEXP x, const char* method, const int desc, const int na, SEXP env) {
314 |   SEXP call = PROTECT(allocVector(LANGSXP, 5));
315 |   SETCAR(call, STR_SORT);
316 |   
317 |   SEXP s = CDR(call);
318 |   SETCAR(s, x);
319 |   SET_TAG(s, install("x"));
320 |   
321 |   s = CDR(s);
322 |   SETCAR(s, PROTECT(mkString(method)));
323 |   SET_TAG(s, STR_METHOD);
324 |   
325 |   s = CDR(s);
326 |   SETCAR(s, ScalarLogical(na));
327 |   SET_TAG(s, STR_NALAST);
328 |   
329 |   s = CDR(s);
330 |   SETCAR(s, ScalarLogical(desc));
331 |   SET_TAG(s, STR_DECREA);
332 |   
333 |   SEXP out = PROTECT(eval(call, env));
334 |   UNPROTECT(3);
335 |   return out;
336 | }
337 | 
338 | SEXP callToOrder (SEXP x, const char* method, bool desc, Rboolean na, SEXP env) {
339 |   SEXP call = PROTECT(allocVector(LANGSXP, 5));
340 |   SETCAR(call, STR_ORDER);
341 |   
342 |   SEXP s = CDR(call);
343 |   SETCAR(s, x);
344 |   SET_TAG(s, install("..."));
345 |   
346 |   s = CDR(s);
347 |   SETCAR(s, PROTECT(mkString(method)));
348 |   SET_TAG(s, STR_METHOD);
349 |   
350 |   s = CDR(s);
351 |   SETCAR(s, ScalarLogical(na));
352 |   SET_TAG(s, STR_NALAST);
353 |   
354 |   s = CDR(s);
355 |   SETCAR(s, ScalarLogical(desc));
356 |   SET_TAG(s, STR_DECREA);
357 |   
358 |   SEXP out = PROTECT(eval(call, env));
359 |   UNPROTECT(3);
360 |   return out;
361 | }
362 | 
363 | /*
364 |  *  Character sorting
365 |  */
366 | 
367 | SEXP cpsortR (SEXP x, SEXP decreasing, SEXP nthread, SEXP nalast, SEXP env, SEXP index, SEXP clocale) {
368 | 
369 |   if (!IS_BOOL(decreasing)) {
370 |     error("Argument 'decreasing' must be TRUE or FALSE.");
371 |   }
372 |   /*if (!IS_BOOL(index)) {
373 |     error("Argument 'index.return' must be TRUE or FALSE.");
374 |   }*/
375 |   if (!IS_LOGICAL(nalast)) {
376 |     error("Argument 'na.last' must be TRUE, FALSE or NA.");
377 |   }
378 |   if (TYPEOF(nthread) != INTSXP) {
379 |     error("Argument 'nThread' (%s) must be of type integer.",type2char(TYPEOF(nthread)));
380 |   }
381 |   if (!IS_BOOL(clocale)) {
382 |     error("Argument 'c.locale' must be TRUE or FALSE.");
383 |   }
384 |   
385 |   const int na_pos = asLogical(nalast);
386 |   const int cindex = asLogical(index);
387 |   const int dcr    = asLogical(decreasing);
388 |   const int cl     = asLogical(clocale);
389 |   const int xlen   = LENGTH(x);
390 |   
391 |   SEXP uVals = PROTECT(dupVecSort(x));
392 |   const int n = LENGTH(uVals);
393 |   
394 |   const int early = xlen == n;
395 |   SEXP valSorted = early ? (
396 |     cindex ? PROTECT(callToOrder(uVals, "shell", dcr, na_pos, env)) : 
397 |              (cl ? (dcr ? PROTECT(rsortrev(uVals)) : PROTECT(rsort(uVals))) :
398 |                    PROTECT(callToSort2(uVals, "quick", dcr, na_pos, env)))
399 |   ) : ( cl ? PROTECT(rsort(uVals)) : 
400 |              PROTECT(callToSort(uVals, "quick", env)));
401 | 
402 |   /*if (early && cindex) {
403 |     UNPROTECT(2);
404 |     return valSorted;
405 |   }*/
406 |   
407 |   SEXP *restrict pvalSorted = (SEXP*)STRING_PTR_RO(valSorted);
408 |   const int nlen = LENGTH(valSorted);
409 |   
410 |   int NAidx = -1;
411 |   for (int i = 0; i < nlen; ++i) {
412 |     if (pvalSorted[i] == NA_STRING) {
413 |       NAidx = i;
414 |       break;
415 |     }
416 |   }
417 |   if (cl) {
418 |     if ( ((na_pos != 0 && !dcr) || (na_pos == 0 && dcr && !early) || (na_pos != 0 && dcr && early)) && NAidx != nlen-1 ) {
419 |       if (NAidx >= 0) {
420 |         memmove(pvalSorted+NAidx, pvalSorted+NAidx+1, (nlen - (NAidx + 1))*sizeof(SEXP));
421 |         pvalSorted[nlen-1] = NA_STRING;
422 |       }
423 |     } else if ( (na_pos == 0 && !dcr) || (na_pos != 0 && dcr) || (na_pos == 0 && dcr && early)){
424 |       if (NAidx > 0 ) {
425 |         memmove(pvalSorted+1, pvalSorted, NAidx*sizeof(SEXP));
426 |         pvalSorted[0] = NA_STRING;
427 |       }
428 |     }
429 |   } else {
430 |     if ( ((na_pos != 0 && !dcr) || (na_pos == 0 && dcr)) && NAidx != nlen-1) {
431 |       if (NAidx >= 0) {
432 |         memmove(pvalSorted+NAidx, pvalSorted+NAidx+1, (nlen - (NAidx + 1))*sizeof(SEXP));
433 |         pvalSorted[nlen-1] = NA_STRING;
434 |       }
435 |     } else if ( (na_pos == 0 && !dcr) || (na_pos != 0 && dcr) ){
436 |       if (NAidx > 0 ) {
437 |         memmove(pvalSorted+1, pvalSorted, NAidx*sizeof(SEXP));
438 |         pvalSorted[0] = NA_STRING;
439 |       }
440 |     }
441 |   }
442 |   
443 |   if (early) {
444 |     if (na_pos == NA_LOGICAL && cl) {
445 |       const SEXP *restrict pa = STRING_PTR_RO(valSorted); // already used pvalSorted
446 |       int ct = 0;
447 |       for (int i = nlen-1; i >= 0; --i) {
448 |         if(pa[i] == NA_STRING) {
449 |           ct++;
450 |         } else {
451 |           break;
452 |         }
453 |       }
454 |       if (ct > 0) {
455 |         valSorted = Rf_xlengthgets(valSorted, nlen-ct);
456 |       }
457 |     }
458 |     UNPROTECT(2);
459 |     return valSorted;
460 |   }
461 |   
462 |   int *restrict lookupTable = buildTable(valSorted);
463 |   const SEXP *restrict px = STRING_PTR_RO(x);
464 |   int nth = asInteger(nthread);
465 |   nth = nth > max_thread ? max_thread : (nth < min_thread ? min_thread : nth); //revisit this
466 |   
467 |   SEXP ans = PROTECT(allocVector(cindex ? INTSXP : STRSXP, xlen));
468 |   if (!cindex) {
469 |     copyMostAttrib(x, ans);
470 |   }
471 |   
472 |   int *restrict pos = (int*)R_alloc(n, sizeof(int));
473 |   memset(pos, 0, n*sizeof(int));
474 |   int *restrict lv = (int*)R_alloc(xlen, sizeof(int));
475 |   
476 |   OMP_PARALLEL_FOR(nth)
477 |   for (int j = 0; j < xlen; ++j) {
478 |     lv[j] = LOOKUP_VAL(px[j]);
479 |   }
480 |   free(lookupTable);
481 |   
482 |   for (int j=0; j<xlen; ++j) {
483 |     pos[lv[j]]++;
484 |   }
485 |   
486 |   int cumul = 0, temp = 0;
487 |   if (dcr) {
488 |     for (int i=n-1; i>=0; --i) {
489 |       temp = pos[i];
490 |       pos[i] = cumul;
491 |       cumul += temp; 
492 |     }
493 |   } else {
494 |     for (int i=0; i<n; ++i) {
495 |       temp = pos[i];
496 |       pos[i] = cumul;
497 |       cumul += temp; 
498 |     }
499 |   }
500 |   
501 |   /*if (cindex) {
502 |     int *restrict pans = INTEGER(ans);
503 |     for (int j=0; j<xlen; ++j) {
504 |       pans[pos[lv[j]]++] = j + 1;
505 |     }
506 |     if (na_pos == NA_LOGICAL) {
507 |       int ct = 0;
508 |       for (int i = xlen-1; i >= 0; --i) {
509 |         if( px[pans[i]-1] == NA_STRING) {
510 |           ct++;
511 |         } else {
512 |           break;
513 |         }
514 |       }
515 |       if (ct > 0) {
516 |         ans = Rf_xlengthgets(ans, xlen-ct);
517 |       }
518 |     }
519 |   } else {*/
520 |     for (int j = 0; j < xlen; ++j) {
521 |       SET_STRING_ELT(ans, pos[lv[j]]++ , px[j]);
522 |     }
523 |     if (na_pos == NA_LOGICAL) {
524 |       const SEXP *restrict pa = STRING_PTR_RO(ans);
525 |       int ct = 0;
526 |       for (int i = xlen-1; i >= 0; --i) {
527 |         if(pa[i] == NA_STRING) {
528 |           ct++;
529 |         } else {
530 |           break;
531 |         }
532 |       }
533 |       if (ct > 0) {
534 |         ans = Rf_xlengthgets(ans, xlen-ct);
535 |         if (!cindex) {
536 |           copyMostAttrib(x, ans);
537 |         }
538 |       }
539 |     }
540 |   //}
541 |   UNPROTECT(3);
542 |   return ans;
543 | }
544 | 
545 | /*
546 |  *  Character to factor conversion
547 |  */
548 | 
549 | SEXP charToFactR (SEXP x, SEXP decreasing, SEXP nthread, SEXP nalast, SEXP env, SEXP addNA) {
550 |   
551 |   if (!IS_BOOL(decreasing)) {
552 |     error("Argument 'decreasing' must be TRUE or FALSE.");
553 |   }
554 |   if (!IS_BOOL(addNA)) {
555 |     error("Argument 'addNA' must be TRUE or FALSE.");
556 |   }
557 |   /*if (!IS_LOGICAL(nalast)) {
558 |     error("Argument 'na.last' must be TRUE, FALSE or NA.");
559 |   }*/
560 |   if (TYPEOF(x) != STRSXP) {
561 |     error("Argument 'x' must be of type character.");
562 |   }
563 |   if (TYPEOF(nthread) != INTSXP) {
564 |     error("Argument 'nThread' (%s) must be of type integer.",type2char(TYPEOF(nthread)));
565 |   }
566 |   const int na_pos = asLogical(nalast);
567 |   const int dcr = asLogical(decreasing);
568 |   const int addNAv = asLogical(addNA);
569 |   const int xlen = LENGTH(x);
570 |   SEXP uVals = PROTECT(dupVecSort(x));
571 |   const int n = LENGTH(uVals);
572 |   SEXP valSorted = PROTECT(callToSort2(uVals, "quick", dcr, 1, env));
573 |   SEXP *restrict pvalSorted = (SEXP*)STRING_PTR_RO(valSorted);
574 |   
575 |   int NAidx = -1;
576 |   for (int i = 0; i < n; ++i) {
577 |     if (pvalSorted[i] == NA_STRING) {
578 |       NAidx = i; break;
579 |     }
580 |   }
581 | 
582 |   if ( ((na_pos != 0 && !dcr) || (na_pos == 0 && dcr)) && NAidx != n-1) {
583 |     if (NAidx >= 0) {
584 |       memmove(pvalSorted+NAidx, pvalSorted+NAidx+1, (n - ((NAidx + 1)))*sizeof(SEXP));
585 |       pvalSorted[n-1] = NA_STRING;
586 |     }
587 |   } else if ( (na_pos == 0 && !dcr) || (na_pos != 0 && dcr) ){
588 |     if (NAidx > 0 ) {
589 |       memmove(pvalSorted+1, pvalSorted, NAidx*sizeof(SEXP));
590 |       pvalSorted[0] = NA_STRING;
591 |     }
592 |   }
593 | 
594 |   int *restrict lookupTable = buildTable(valSorted);
595 |   const SEXP *restrict px = STRING_PTR_RO(x);
596 |   int nth = asInteger(nthread);
597 |   nth = nth > max_thread ? max_thread : (nth < min_thread ? min_thread : nth); //revisit this
598 |   
599 |   SEXP ans = PROTECT(allocVector(INTSXP, xlen));
600 |   int *restrict pans = INTEGER(ans);
601 |   
602 |   if (addNAv == 0) {
603 |     OMP_PARALLEL_FOR(nth)
604 |     for (int j=0; j<xlen; ++j) {
605 |       pans[j] = px[j] == NA_STRING ? NA_INTEGER : LOOKUP_VAL(px[j]) + 1;
606 |     }
607 |   } else {
608 |     OMP_PARALLEL_FOR(nth)
609 |     for (int j=0; j<xlen; ++j) {
610 |       pans[j] = LOOKUP_VAL(px[j]) + 1;
611 |     }
612 |   }
613 |   
614 |   free(lookupTable);
615 |   if (addNAv == 0) {
616 |     valSorted = Rf_xlengthgets(valSorted, LENGTH(valSorted)-1);
617 |   }
618 |   setAttrib(ans, R_LevelsSymbol, valSorted);
619 |   SEXP classV = PROTECT(allocVector(STRSXP,1));
620 |   SET_STRING_ELT(classV, 0, STR_FACTOR);
621 |   classgets(ans, classV);
622 | 
623 |   UNPROTECT(4);
624 |   return ans;
625 | }
626 | 


--------------------------------------------------------------------------------
/src/share.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * kit : Useful R Functions Implemented in C
  3 |  * Copyright (C) 2020-2025  Morgan Jacob
  4 |  * 
  5 |  * This program is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  * 
 10 |  * This program is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  * 
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 |  */
 18 | 
 19 | #include "kit.h"
 20 | 
 21 | /*
 22 |  *  Structure to hold Length and Address 
 23 |  *  of data to be shared in memory segment
 24 |  */
 25 | 
 26 | struct OBJECT {
 27 | #ifdef WIN32
 28 |   HANDLE hMapFile;
 29 |   HANDLE hMapLength;
 30 |   LPCTSTR lpMapAddress;
 31 |   LPCTSTR lpMapLength;
 32 | #else
 33 |   int fd_addr;
 34 |   int fd_length;
 35 |   size_t STORAGE_SIZE;
 36 |   void *addr;
 37 |   void *length;
 38 |   const char *STORAGE_ID;
 39 |   const char *LENGTH_ID;
 40 | #endif
 41 | };
 42 | 
 43 | /*
 44 |  *  Function to finalize memory map pointer
 45 |  */
 46 | 
 47 | static bool verbose_finalizer = false;
 48 | 
 49 | static void map_finalizer (SEXP ext) {
 50 |   if (verbose_finalizer) Rprintf("* Finalize...\n");
 51 |   if (NULL == R_ExternalPtrAddr(ext)) {
 52 |     return;
 53 |   }
 54 |   if (verbose_finalizer) Rprintf("* Clear external pointer...\n");
 55 |   struct OBJECT *ptr = (struct OBJECT*) R_ExternalPtrAddr(ext);
 56 | #ifdef WIN32  
 57 |   UnmapViewOfFile(ptr->lpMapAddress);
 58 |   CloseHandle(ptr->hMapFile);
 59 |   UnmapViewOfFile(ptr->lpMapLength);
 60 |   CloseHandle(ptr->hMapLength);
 61 | #else
 62 |   munmap(ptr->addr, ptr->STORAGE_SIZE);
 63 |   shm_unlink(ptr->STORAGE_ID);
 64 |   munmap(ptr->length, 256);
 65 |   shm_unlink(ptr->LENGTH_ID);
 66 | #endif
 67 |   R_Free(ptr);
 68 |   R_ClearExternalPtr(ext);
 69 |   if (verbose_finalizer) Rprintf("* Clear external pointer...OK\n");
 70 | }
 71 | 
 72 | /*
 73 |  *  Function to create data
 74 |  */
 75 | 
 76 | SEXP createMappingObjectR (SEXP MapObjectName, SEXP MapLengthName, SEXP DataObject, SEXP verboseArg) {
 77 |   if (TYPEOF(MapObjectName) != STRSXP || LENGTH(MapObjectName) != 1) {
 78 |     error("Argument 'MapObjectName' must be of type character and length 1.");
 79 |   }
 80 |   if (!IS_BOOL(verboseArg)) {
 81 |     error("Argument 'verbose' must be TRUE or FALSE.");
 82 |   }
 83 |   const bool verbose = asLogical(verboseArg);
 84 |   verbose_finalizer = verbose;
 85 |   const size_t len = LENGTH(DataObject);
 86 |   const size_t BUF_SIZE = len*sizeof(Rbyte);
 87 |   if (verbose) Rprintf("* Data object size: %zu\n",len*sizeof(Rbyte));
 88 |   if (verbose) Rprintf("* Start mapping object...OK\n");
 89 |   struct OBJECT *foo = R_Calloc(1, struct OBJECT);
 90 |   SEXP ext = PROTECT(R_MakeExternalPtr(foo, R_NilValue, R_NilValue));
 91 |   R_RegisterCFinalizerEx(ext, map_finalizer, TRUE);
 92 |   if (verbose) Rprintf("* Register finalizer...OK\n");
 93 | #ifdef WIN32 
 94 |   LPSTR pMN = (LPSTR) CHAR(STRING_PTR_RO(MapObjectName)[0]);
 95 |   LPSTR pML = (LPSTR) CHAR(STRING_PTR_RO(MapLengthName)[0]);
 96 |   foo->hMapFile = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, BUF_SIZE, pMN);
 97 |   foo->hMapLength = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, 256, pML);
 98 |   if (foo->hMapFile == INVALID_HANDLE_VALUE || foo->hMapLength == INVALID_HANDLE_VALUE) {
 99 | #else
100 |   const char *pMN = CHAR(STRING_PTR_RO(MapObjectName)[0]);
101 |   const char *pML = CHAR(STRING_PTR_RO(MapLengthName)[0]);
102 |   foo->STORAGE_ID = pMN; 
103 |   foo->LENGTH_ID = pML;
104 |   foo->STORAGE_SIZE = BUF_SIZE;
105 |   foo->fd_addr = shm_open(foo->STORAGE_ID, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
106 |   foo->fd_length = shm_open(foo->LENGTH_ID, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
107 |   if (foo->fd_addr == -1 || foo->fd_length == -1) {
108 |     Rprintf("shm_open error, errno(%d): %s\n", errno, strerror(errno));
109 | #endif
110 |     error("* Creating file mapping...ERROR");
111 |   }
112 |   if (verbose) Rprintf("* Creating file maping...OK\n");
113 | #ifdef WIN32  
114 | #else
115 |   struct stat mapstat;
116 |   if (-1 != fstat(foo->fd_addr, &mapstat) && mapstat.st_size == 0) {
117 |     if(ftruncate(foo->fd_addr, BUF_SIZE) == -1) {
118 |       error("* Extend shared memory object (1)...ERROR");
119 |     }
120 |   }
121 |   if (-1 != fstat(foo->fd_length, &mapstat) && mapstat.st_size == 0) {
122 |     if(ftruncate(foo->fd_length, 256) == -1) {
123 |       error("* Extend shared memory object (2)...ERROR");
124 |     }
125 |   }
126 |   if (verbose) Rprintf("* Extend shared memory object...OK\n");
127 | #endif  
128 |   
129 | #ifdef WIN32
130 |   foo->lpMapAddress = (LPCTSTR) MapViewOfFile (foo->hMapFile, FILE_MAP_ALL_ACCESS, 0, 0, BUF_SIZE);
131 |   foo->lpMapLength = (LPCTSTR) MapViewOfFile (foo->hMapLength, FILE_MAP_ALL_ACCESS, 0, 0, 256);
132 |   if (foo->lpMapAddress == NULL || foo->lpMapLength == NULL) {
133 | #else
134 |     foo->addr = mmap(NULL, BUF_SIZE, PROT_WRITE, MAP_SHARED, foo->fd_addr, 0);
135 |     foo->length = mmap(NULL, 256, PROT_WRITE, MAP_SHARED, foo->fd_length, 0);
136 |     if (foo->addr == MAP_FAILED || foo->length == MAP_FAILED) {
137 | #endif
138 |     error("* Map view file...ERROR");
139 |   }
140 |   if (verbose) Rprintf("* Map view file...OK\n");
141 | #ifdef WIN32
142 |   CopyMemory((LPVOID)foo->lpMapAddress, RAW(DataObject), BUF_SIZE);
143 |   CopyMemory((LPVOID)foo->lpMapLength, &len, sizeof(size_t));
144 | #else
145 |   memcpy(foo->addr, RAW(DataObject), BUF_SIZE);
146 |   memcpy(foo->length, &len, sizeof(size_t));
147 | #endif
148 |   if (verbose) Rprintf("* Copy memory...OK\n");
149 |   UNPROTECT(1);
150 |   return ext;
151 | }
152 | 
153 | /*
154 |  *  Function to retrieve data
155 |  */
156 | 
157 | SEXP getMappingObjectR (SEXP MapObjectName, SEXP MapLengthName, SEXP verboseArg) {
158 |   if (TYPEOF(MapObjectName) != STRSXP || LENGTH(MapObjectName) != 1) {
159 |     error("Argument 'MapObjectName' must be of type character and length 1.");
160 |   }
161 |   if (!IS_BOOL(verboseArg)) {
162 |     error("Argument 'verbose' must be TRUE or FALSE.");
163 |   }
164 |   const bool verbose = asLogical(verboseArg);
165 | #ifdef WIN32
166 |   LPSTR pMN = (LPSTR) CHAR(STRING_PTR_RO(MapObjectName)[0]);
167 |   LPSTR pML = (LPSTR) CHAR(STRING_PTR_RO(MapLengthName)[0]);
168 |   HANDLE hMapFile = OpenFileMapping(FILE_MAP_ALL_ACCESS, FALSE, pMN);
169 |   HANDLE hMapLength = OpenFileMapping(FILE_MAP_ALL_ACCESS, FALSE, pML);
170 |   if (hMapFile == INVALID_HANDLE_VALUE || hMapLength == INVALID_HANDLE_VALUE) {
171 | #else
172 |   const char *pMN = CHAR(STRING_PTR_RO(MapObjectName)[0]);
173 |   const char *pML = CHAR(STRING_PTR_RO(MapLengthName)[0]);
174 |   int fd_addr = shm_open(pMN, O_RDONLY, S_IRUSR | S_IWUSR);
175 |   int fd_length = shm_open(pML, O_RDONLY, S_IRUSR | S_IWUSR);
176 |   if (fd_addr == -1 || fd_length == -1) {
177 | #endif
178 |     error("* Creating file mapping...ERROR");
179 |   }
180 |   if (verbose) Rprintf("* Creating file maping...OK\n");
181 | #ifdef WIN32
182 |   LPCTSTR lpMapLength = (LPCTSTR) MapViewOfFile (hMapLength, FILE_MAP_ALL_ACCESS, 0, 0, 256);
183 |   if (lpMapLength == NULL) {
184 |     CloseHandle(hMapLength);
185 | #else
186 |   void *length = mmap(NULL, 256, PROT_READ, MAP_SHARED, fd_length, 0);
187 |   if (length == MAP_FAILED) {
188 |     shm_unlink(pML);
189 | #endif
190 |     error("* Map view file (length)...ERROR");
191 |   }
192 |   if (verbose) Rprintf("* Map view file (length)...OK\n");
193 | #ifdef WIN32
194 |   size_t len = *(size_t*)lpMapLength;
195 |   LPCTSTR lpMapAddress = (LPCTSTR) MapViewOfFile (hMapFile, FILE_MAP_ALL_ACCESS, 0, 0, len*sizeof(Rbyte));
196 |   if (lpMapAddress == NULL) {
197 |     CloseHandle(hMapFile);
198 | #else
199 |   size_t len = *(size_t*)length;  
200 |   void *addr = mmap(NULL, len*sizeof(Rbyte), PROT_READ, MAP_SHARED, fd_addr, 0);
201 |   if (addr == MAP_FAILED) {
202 |     shm_unlink(pMN);
203 | #endif
204 |     error("* Map view file (address)...ERROR");
205 |   }
206 |   if (verbose) Rprintf("* Map view file (address)...OK\n");
207 |   SEXP ans = PROTECT(allocVector(RAWSXP, len));
208 |   if (verbose) Rprintf("* Create RAW Vector...OK\n");
209 | #ifdef WIN32
210 |   CopyMemory(RAW(ans), (Rbyte*)lpMapAddress, len*sizeof(Rbyte));
211 | #else
212 |   memcpy(RAW(ans), (Rbyte*)addr, len*sizeof(Rbyte)); // maybe need +1
213 | #endif
214 |   if (verbose) Rprintf("* Copy map memory...OK\n");
215 |   
216 | #ifdef WIN32
217 |   if (!UnmapViewOfFile(lpMapLength)) {
218 | #else
219 |   if (munmap(length, 256) == -1) {
220 | #endif
221 |     error("* Closing mapping file (length)...ERROR");
222 |   }
223 |   if (verbose) Rprintf("* Closing mapping file (length)...OK\n");
224 | #ifdef WIN32
225 |   if (!CloseHandle(hMapLength)) {
226 | #else
227 |   if (shm_unlink(pML) == -1) {
228 | #endif
229 |     error("* Closing mapping handle (length)...ERROR");
230 |   }
231 |   if (verbose) Rprintf("* Closing mapping handle (length)...OK\n");
232 |   
233 | #ifdef WIN32
234 |   if (!UnmapViewOfFile(lpMapAddress)) {
235 | #else
236 |   if (munmap(length, len*sizeof(Rbyte)) == -1) {
237 | #endif
238 |     error("* Closing mapping file (address)...ERROR");
239 |   }
240 |   if (verbose) Rprintf("* Closing mapping file (address)...OK\n");
241 | #ifdef WIN32
242 |   if (!CloseHandle(hMapFile)) {
243 | #else
244 |   if (shm_unlink(pMN) == -1) {
245 | #endif
246 |     error("* Closing mapping handle (address)...ERROR");
247 |   }
248 |   if (verbose) Rprintf("* Closing mapping handle (address)...OK\n");
249 |   UNPROTECT(1);
250 |   return ans;
251 | }
252 | 
253 | /*
254 |  *  Function to clear mapping object
255 |  */
256 | 
257 | SEXP clearMappingObjectR  (SEXP ext, SEXP verboseArg) {
258 |   if (!IS_BOOL(verboseArg)) {
259 |     error("Argument 'verbose' must be TRUE or FALSE.");
260 |   }
261 |   verbose_finalizer = asLogical(verboseArg);
262 |   if (NULL == R_ExternalPtrAddr(ext)) {
263 |     return ScalarLogical(FALSE);
264 |   }
265 |   map_finalizer(ext);
266 |   return ScalarLogical(TRUE);
267 | }
268 | 


--------------------------------------------------------------------------------
/src/topn.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * kit : Useful R Functions Implemented in C
  3 |  * Copyright (C) 2020-2025  Morgan Jacob
  4 |  * 
  5 |  * This program is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  * 
 10 |  * This program is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  * 
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 |  */
 18 | 
 19 | #include "kit.h"
 20 | 
 21 | SEXP topnR(SEXP vec, SEXP n, SEXP dec, SEXP hasna, SEXP env) {
 22 |   int len0 = asInteger(n);
 23 |   const R_xlen_t len1 = xlength(vec);
 24 |   if (isS4(vec)) {
 25 |     error("S4 class objects are not supported.");
 26 |   }
 27 |   if (len0 > len1) {
 28 |     warning("'n' is larger than length of 'vec'. 'n' will be set to length of 'vec'.");
 29 |     len0 = (int)len1;
 30 |   }
 31 |   if (len0 < 1) {
 32 |     error("Please enter a positive integer larger or equal to 1.");
 33 |   }
 34 |   if (!IS_BOOL(dec)) {
 35 |     error("Argument 'decreasing' must be TRUE or FALSE and length 1.");
 36 |   }
 37 |   if (!IS_BOOL(hasna)) {
 38 |     error("Argument 'hasna' must be TRUE or FALSE and length 1.");
 39 |   }
 40 |   const Rboolean dcr = asLogical(dec);
 41 |   const SEXPTYPE tvec = UTYPEOF(vec);
 42 |   const Rboolean vhasna = asLogical(hasna);
 43 |   if ( ((len0 > 2000 && vhasna == FALSE) || (len0 > 1500 && vhasna == TRUE)) && (tvec == INTSXP || tvec == REALSXP)) {
 44 |     SEXP prem = PROTECT(callToOrder(vec, "radix", dcr, TRUE, env));
 45 |     SEXP ans = PROTECT(allocVector(UTYPEOF(prem), len0));
 46 |     switch(UTYPEOF(prem)) {
 47 |     case INTSXP: {
 48 |       memcpy(INTEGER(ans), INTEGER(prem), len0 *sizeof(int));
 49 |     } break;
 50 |     case REALSXP: {
 51 |       memcpy(REAL(ans), REAL(prem), len0 *sizeof(double));
 52 |     } break;
 53 |     }
 54 |     UNPROTECT(2);
 55 |     return ans;
 56 |   }
 57 |   SEXP ans = PROTECT(allocVector(INTSXP, len0));
 58 |   int *restrict pans = INTEGER(ans);
 59 |   int tmp;
 60 |   if (dcr) {
 61 |     switch(tvec) {
 62 |     case INTSXP: {
 63 |       int i, j, idx = 0;
 64 |       const int *restrict pvec = INTEGER(vec);
 65 |       int min_value = pvec[0];
 66 |       if (vhasna) {
 67 |         for (i = 0; i < len0; ++i) {
 68 |           pans[i] = i;
 69 |           if (pvec[i] <= min_value || pvec[i] == NA_INTEGER) {
 70 |             min_value = pvec[i];
 71 |             idx = i;
 72 |           }
 73 |         }
 74 |         for (i = len0; i < len1; ++i) {
 75 |           if (pvec[i] == NA_INTEGER) {
 76 |             continue;
 77 |           }
 78 |           if (pvec[i] > min_value) {
 79 |             min_value = pvec[i];
 80 |             pans[idx] = i;
 81 |             for (j = 0; j <len0; ++j) {
 82 |               if ((min_value > pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) || pvec[pans[j]] == NA_INTEGER) {
 83 |                 min_value = pvec[pans[j]];
 84 |                 idx = j;
 85 |               }
 86 |             }
 87 |           }
 88 |         }
 89 |         for (i = 0; i < len0; ++i) {
 90 |           tmp = pans[i];
 91 |           for (j = i; j > 0 && (pvec[tmp] > pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1])); --j) {
 92 |             pans[j] = pans[j-1];
 93 |           }
 94 |           pans[j] = tmp;
 95 |         }
 96 |         for (i =0; i < len0; ++i) {
 97 |           pans[i]++;
 98 |         }
 99 |       } else {
100 |         for (i = 0; i < len0; ++i) {
101 |           pans[i] = i;
102 |           if (pvec[i] <= min_value) {
103 |             min_value = pvec[i];
104 |             idx = i;
105 |           }
106 |         }
107 |         for (i = len0; i < len1; ++i) {
108 |           if (pvec[i] > min_value) {
109 |             min_value = pvec[i];
110 |             pans[idx] = i;
111 |             for (j = 0; j <len0; ++j) {
112 |               if (min_value > pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) {
113 |                 min_value = pvec[pans[j]];
114 |                 idx = j;
115 |               }
116 |             }
117 |           }
118 |         }
119 |         for (i = 0; i < len0; ++i) {
120 |           tmp = pans[i];
121 |           for (j = i; j > 0 && (pvec[tmp] > pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1])); --j) {
122 |             pans[j] = pans[j-1];
123 |           }
124 |           pans[j] = tmp;
125 |         }
126 |         for (i =0; i < len0; ++i) {
127 |           pans[i]++;
128 |         }
129 |       }
130 |     } break;
131 |     case REALSXP: {
132 |       int i, j, idx = 0;
133 |       const double *restrict pvec = REAL(vec);
134 |       double min_value = pvec[0];
135 |       if (vhasna) {
136 |         for (i = 0; i < len0; ++i) {
137 |           pans[i] = i;
138 |           if (pvec[i] <= min_value || ISNAN(pvec[i])) {
139 |             min_value = pvec[i];
140 |             idx = i;
141 |           }
142 |         }
143 |         for (i = len0; i < len1; ++i) {
144 |           if (ISNAN(pvec[i])) {
145 |             continue;
146 |           }
147 |           if (pvec[i] > min_value || ISNAN(min_value)) {
148 |             min_value = pvec[i];
149 |             pans[idx] = i;
150 |             for (j = 0; j <len0; ++j) {
151 |               if ((min_value > pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) || ISNAN(pvec[pans[j]])) {
152 |                 min_value = pvec[pans[j]];
153 |                 idx = j;
154 |               }
155 |             }
156 |           }
157 |         }
158 |         for (i = 0; i < len0; ++i) {
159 |           tmp = pans[i];
160 |           for (j = i; j > 0 && (pvec[tmp] > pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || (!ISNAN(pvec[tmp]) && ISNAN(pvec[pans[j-1]]))); --j) {
161 |             pans[j] = pans[j-1];
162 |           }
163 |           pans[j] = tmp;
164 |         }
165 |         for (i =0; i < len0; ++i) {
166 |           pans[i]++;
167 |         }
168 |       } else {
169 |         for (i = 0; i < len0; ++i) {
170 |           pans[i] = i;
171 |           if (pvec[i] <= min_value) {
172 |             min_value = pvec[i];
173 |             idx = i;
174 |           }
175 |         }
176 |         for (i = len0; i < len1; ++i) {
177 |           if (pvec[i] > min_value) {
178 |             min_value = pvec[i];
179 |             pans[idx] = i;
180 |             for (j = 0; j <len0; ++j) {
181 |               if (min_value > pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) {
182 |                 min_value = pvec[pans[j]];
183 |                 idx = j;
184 |               }
185 |             }
186 |           }
187 |         }
188 |         for (i = 0; i < len0; ++i) {
189 |           tmp = pans[i];
190 |           for (j = i; j > 0 && (pvec[tmp] > pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1])); --j) {
191 |             pans[j] = pans[j-1];
192 |           }
193 |           pans[j] = tmp;
194 |         }
195 |         for (i =0; i < len0; ++i) {
196 |           pans[i]++;
197 |         }
198 |       }
199 |     } break;
200 |     default:
201 |       error("Type %s is not supported.", type2char(tvec));
202 |     }
203 |   } else {
204 |     switch(tvec) {
205 |     case INTSXP: {
206 |       int i, j, idx = 0;
207 |       const int *restrict pvec = INTEGER(vec);
208 |       int min_value = pvec[0];
209 |       if (vhasna) {
210 |         for (i = 0; i < len0; ++i) {
211 |           pans[i] = i;
212 |           if ((pvec[i] >= min_value && min_value != NA_INTEGER) || pvec[i] == NA_INTEGER) {
213 |             min_value = pvec[i];
214 |             idx = i;
215 |           }
216 |         }
217 |         for (i = len0; i < len1; ++i) {
218 |           if (pvec[i] == NA_INTEGER) {
219 |             continue;
220 |           }
221 |           if (pvec[i] < min_value || min_value == NA_INTEGER) {
222 |             min_value = pvec[i];
223 |             pans[idx] = i;
224 |             for (j = 0; j <len0; ++j) {
225 |               if (((min_value < pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) && min_value != NA_INTEGER) || pvec[pans[j]] == NA_INTEGER) {
226 |                 min_value = pvec[pans[j]];
227 |                 idx = j;
228 |               }
229 |             }
230 |           }
231 |         }
232 |         for (i = 0; i < len0; ++i) {
233 |           tmp = pans[i];
234 |           if (pvec[tmp] == NA_INTEGER) {
235 |             continue;
236 |           }
237 |           for (j = i; j > 0 && (pvec[tmp] < pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || pvec[pans[j-1]] == NA_INTEGER); --j) {
238 |             pans[j] = pans[j-1];
239 |           }
240 |           pans[j] = tmp;
241 |         }
242 |         for (i =0; i < len0; ++i) {
243 |           pans[i]++;
244 |         }
245 |       } else {
246 |         for (i = 0; i < len0; ++i) {
247 |           pans[i] = i;
248 |           if (pvec[i] >= min_value) {
249 |             min_value = pvec[i];
250 |             idx = i;
251 |           }
252 |         }
253 |         for (i = len0; i < len1; ++i) {
254 |           if (pvec[i] < min_value) {
255 |             min_value = pvec[i];
256 |             pans[idx] = i;
257 |             for (j = 0; j <len0; ++j) {
258 |               if (min_value < pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) {
259 |                 min_value = pvec[pans[j]];
260 |                 idx = j;
261 |               }
262 |             }
263 |           }
264 |         }
265 |         for (i = 0; i < len0; ++i) {
266 |           tmp = pans[i];
267 |           for (j = i; j > 0 && (pvec[tmp] < pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1])); --j) {
268 |             pans[j] = pans[j-1];
269 |           }
270 |           pans[j] = tmp;
271 |         }
272 |         for (i =0; i < len0; ++i) {
273 |           pans[i]++;
274 |         }
275 |       }
276 |     } break;
277 |     case REALSXP: {
278 |       int i, j, idx = 0;
279 |       const double *restrict pvec = REAL(vec);
280 |       double min_value = pvec[0];
281 |       if (vhasna) {
282 |         for (i = 0; i < len0; ++i) {
283 |           pans[i] = i;
284 |           if (pvec[i] >= min_value || ISNAN(pvec[i])) {
285 |             min_value = pvec[i];
286 |             idx = i;
287 |           }
288 |         }
289 |         for (i = len0; i < len1; ++i) {
290 |           if (ISNAN(pvec[i])) {
291 |             continue;
292 |           }
293 |           if (pvec[i] < min_value || ISNAN(min_value)) {
294 |             min_value = pvec[i];
295 |             pans[idx] = i;
296 |             for (j = 0; j <len0; ++j) {
297 |               if ((min_value < pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) || ISNAN(pvec[pans[j]])) {
298 |                 min_value = pvec[pans[j]];
299 |                 idx = j;
300 |               }
301 |             }
302 |           }
303 |         }
304 |         for (i = 0; i < len0; ++i) {
305 |           tmp = pans[i];
306 |           for (j = i; j > 0 && (pvec[tmp] < pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || (!ISNAN(pvec[tmp]) && ISNAN(pvec[pans[j-1]]))); --j) {
307 |             pans[j] = pans[j-1];
308 |           }
309 |           pans[j] = tmp;
310 |         }
311 |         for (i =0; i < len0; ++i) {
312 |           pans[i]++;
313 |         }
314 |       } else {
315 |         for (i = 0; i < len0; ++i) {
316 |           pans[i] = i;
317 |           if (pvec[i] >= min_value) {
318 |             min_value = pvec[i];
319 |             idx = i;
320 |           }
321 |         }
322 |         for (i = len0; i < len1; ++i) {
323 |           if (pvec[i] < min_value) {
324 |             min_value = pvec[i];
325 |             pans[idx] = i;
326 |             for (j = 0; j <len0; ++j) {
327 |               if (min_value < pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) {
328 |                 min_value = pvec[pans[j]];
329 |                 idx = j;
330 |               }
331 |             }
332 |           }
333 |         }
334 |         for (i = 0; i < len0; ++i) {
335 |           tmp = pans[i];
336 |           for (j = i; j > 0 && (pvec[tmp] < pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1])); --j) {
337 |             pans[j] = pans[j-1];
338 |           }
339 |           pans[j] = tmp;
340 |         }
341 |         for (i =0; i < len0; ++i) {
342 |           pans[i]++;
343 |         }
344 |       }
345 |     } break;
346 |     default:
347 |       error("Type %s is not supported.", type2char(tvec));
348 |     }
349 |   }
350 |   UNPROTECT(1);
351 |   return ans;
352 | }
353 | 


--------------------------------------------------------------------------------
/src/utils.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * kit : Useful R Functions Implemented in C
  3 |  * Copyright (C) 2020-2025  Morgan Jacob
  4 |  * 
  5 |  * This program is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public License as published by
  7 |  * the Free Software Foundation, either version 3 of the License, or
  8 |  * (at your option) any later version.
  9 |  * 
 10 |  * This program is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public License for more details.
 14 |  * 
 15 |  * You should have received a copy of the GNU General Public License
 16 |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 |  */
 18 | 
 19 | #include "kit.h"
 20 | 
 21 | SEXP ompEnabledR(void) {
 22 |   return omp_enabled ? ScalarLogical(TRUE) : ScalarLogical(FALSE);
 23 | }
 24 | 
 25 | SEXP setlevelsR(SEXP x, SEXP old_lvl, SEXP new_lvl, SEXP skip_absent) {
 26 |   if (!IS_BOOL(skip_absent)) {
 27 |     error("Argument 'skip_absent' must be TRUE or FALSE and length 1.");
 28 |   }
 29 |   if (any_duplicated(old_lvl, FALSE)) {
 30 |     error("'old' has duplicated value. Please make sure no duplicated values are introduced.");
 31 |   }
 32 |   if (any_duplicated(new_lvl, FALSE)) {
 33 |     error("'new' has duplicated value. Please make sure no duplicated values are introduced.");
 34 |   }
 35 |   if (!isFactor(x)) {
 36 |     error("'setlevels' must be passed a factor.");
 37 |   }
 38 |   if (UTYPEOF(old_lvl) != STRSXP) {
 39 |     error("Type of 'old' must be character.");
 40 |   }
 41 |   if (UTYPEOF(new_lvl) != STRSXP) {
 42 |     error("Type of 'new' must be character.");
 43 |   }
 44 |   const R_xlen_t nlvl = xlength(old_lvl);
 45 |   if (nlvl != xlength(new_lvl)) {
 46 |     error("'old' and 'new' are not the same length.");
 47 |   }
 48 |   const bool absent = !LOGICAL(skip_absent)[0];
 49 |   SEXP xchar = PROTECT(getAttrib(x, R_LevelsSymbol));
 50 |   const R_xlen_t nx = xlength(xchar);
 51 |   for (ssize_t j=0; j<nlvl; ++j) {
 52 |     for (ssize_t i=0; i<nx; ++i) {
 53 |       if (STRING_ELT(xchar, i) == STRING_ELT(old_lvl, j)) {
 54 |         SET_STRING_ELT(xchar, i, STRING_ELT(new_lvl, j));
 55 |         goto label;
 56 |       }
 57 |     }
 58 |     if (absent) {
 59 |       error("Element '%s' of 'old' does not exist in 'x'.", RCHAR(old_lvl, j));
 60 |     }
 61 |     label:;
 62 |   }
 63 |   SEXP ans = PROTECT(duplicate(x));
 64 |   setAttrib(ans, R_LevelsSymbol, xchar);
 65 |   UNPROTECT(2);
 66 |   return ans;
 67 | }
 68 | 
 69 | SEXP countR(SEXP x, SEXP y) {
 70 |   const R_xlen_t len_x = xlength(x);
 71 |   const R_xlen_t len_y = xlength(y);
 72 |   if (len_y != 1 || isNull(y)) {
 73 |     error("Argument 'value' must be non NULL and length 1.");
 74 |   }
 75 |   SEXPTYPE tx = UTYPEOF(x);
 76 |   SEXPTYPE ty = UTYPEOF(y);
 77 |   if (tx != ty) {
 78 |     error("Type of 'value' (%s) is different than type of 'x' (%s). Please make sure both have the same type.", type2char(ty), type2char(tx));
 79 |   }
 80 |   if(!R_compute_identical(PROTECT(getAttrib(x, R_ClassSymbol)), PROTECT(getAttrib(y, R_ClassSymbol)), 0)) {
 81 |     error("'x' has different class than 'y'. Please make sure that both arguments have the same class.");
 82 |   }
 83 |   UNPROTECT(2);
 84 |   if (isFactor(x)) {
 85 |     if (!R_compute_identical(PROTECT(getAttrib(x, R_LevelsSymbol)), PROTECT(getAttrib(y, R_LevelsSymbol)), 0)) {
 86 |       error("'x' and 'y' are both type factor but their levels are different.");
 87 |     }
 88 |     UNPROTECT(2);
 89 |   }
 90 |   R_xlen_t cnt = 0;
 91 |   switch(tx) {
 92 |   case LGLSXP: {
 93 |     const int *restrict px = LOGICAL(x);
 94 |     const int py = LOGICAL(y)[0];
 95 |     for (ssize_t i=0; i<len_x; ++i) {
 96 |       if (px[i]== py) {
 97 |         cnt++;
 98 |       }
 99 |     }
100 |   } break;
101 |   case INTSXP: {
102 |     const int *restrict px = INTEGER(x);
103 |     const int py = INTEGER(y)[0];
104 |     for (ssize_t i=0; i<len_x; ++i) {
105 |       if (px[i]== py) {
106 |         cnt++;
107 |       }
108 |     }
109 |   } break;
110 |   case REALSXP: {
111 |     const double *restrict px = REAL(x);
112 |     const double py = REAL(y)[0];
113 |     for (ssize_t i=0; i<len_x; ++i) {
114 |       if (px[i]== py) {
115 |         cnt++;
116 |       }
117 |     }
118 |   } break;
119 |   case CPLXSXP: {
120 |     const Rcomplex *restrict px = COMPLEX(x);
121 |     const Rcomplex py = COMPLEX(y)[0];
122 |     for (ssize_t i=0; i<len_x; ++i) {
123 |       if (EQUAL_CPLX(px[i], py)) {
124 |         cnt++;
125 |       }
126 |     }
127 |   } break;
128 |   case STRSXP: {
129 |     const char* py = RCHAR(y, 0);
130 |     for (ssize_t i=0; i<len_x; ++i) {
131 |       if (RCHAR(x, i) == py) {
132 |         cnt++;
133 |       }
134 |     }
135 |   } break;
136 |   default:
137 |     error("Type %s is not supported.", type2char(tx));
138 |   }
139 |   return cnt > INT_MAX ? ScalarReal((double)cnt) : ScalarInteger((int)cnt);
140 | }
141 | 
142 | SEXP countNAR(SEXP x) {
143 |   const R_xlen_t len_x = xlength(x);
144 |   SEXPTYPE tx = UTYPEOF(x);
145 |   R_xlen_t cnt = 0;
146 |   switch(tx) {
147 |   case NILSXP: break;
148 |   case LGLSXP: {
149 |     const int *restrict px = LOGICAL(x);
150 |     for (ssize_t i=0; i<len_x; ++i) {
151 |       if (px[i]== NA_LOGICAL) {
152 |         cnt++;
153 |       }
154 |     }
155 |   } break;
156 |   case INTSXP: {
157 |     const int *restrict px = INTEGER(x);
158 |     for (ssize_t i=0; i<len_x; ++i) {
159 |       if (px[i]== NA_INTEGER) {
160 |         cnt++;
161 |       }
162 |     }
163 |   } break;
164 |   case REALSXP: {
165 |     const double *restrict px = REAL(x);
166 |     for (ssize_t i=0; i<len_x; ++i) {
167 |       if (ISNAN(px[i])) {
168 |         cnt++;
169 |       }
170 |     }
171 |   } break;
172 |   case CPLXSXP: {
173 |     const Rcomplex *restrict px = COMPLEX(x);
174 |     for (ssize_t i=0; i<len_x; ++i) {
175 |       if (ISNAN_COMPLEX(px[i])) {
176 |         cnt++;
177 |       }
178 |     }
179 |   } break;
180 |   case STRSXP: {
181 |     const SEXP *restrict px = STRING_PTR_RO(x);
182 |     for (ssize_t i=0; i<len_x; ++i) {
183 |       if (px[i] == NA_STRING) {
184 |         cnt++;
185 |       }
186 |     }
187 |   } break;
188 |   case VECSXP: {
189 |     const SEXP *restrict px = SEXPPTR_RO(x);
190 |     SEXP ans = PROTECT(allocVector(VECSXP, len_x));
191 |     for (ssize_t i=0; i<len_x; ++i) {
192 |       SET_VECTOR_ELT(ans, i, countNAR(px[i]));
193 |     }
194 |     UNPROTECT(1);
195 |     return ans;
196 |   } break;
197 |   default:
198 |     error("Type %s is not supported.", type2char(tx));
199 |   }
200 |   return cnt > INT_MAX ? ScalarReal((double)cnt) : ScalarInteger((int)cnt);
201 | }
202 | 
203 | SEXP subSetRowDataFrame(SEXP df, SEXP rws) {
204 |   const SEXP *restrict pdf = SEXPPTR_RO(df);
205 |   const int *restrict prws = INTEGER(rws);
206 |   const R_xlen_t len_df = xlength(df);
207 |   const R_xlen_t len_rws = xlength(rws);
208 |   SEXP dfo = PROTECT(allocVector(VECSXP, len_df));
209 |   DUPLICATE_ATTRIB(dfo, df);
210 |   setAttrib(dfo, R_NamesSymbol, PROTECT(getAttrib(df, R_NamesSymbol)));
211 |   SEXP rownam = PROTECT(allocVector(INTSXP, 2));
212 |   INTEGER(rownam)[0] = NA_INTEGER;
213 |   INTEGER(rownam)[1] = -(int)len_rws;
214 |   setAttrib(dfo, R_RowNamesSymbol, rownam);
215 |   for (R_xlen_t i = 0; i < len_df; ++i) {
216 |     switch(UTYPEOF(pdf[i])) {
217 |     case LGLSXP : {
218 |       const int *restrict ptmp = LOGICAL(pdf[i]);
219 |       SEXP TYPECOL = PROTECT(allocVector(LGLSXP, len_rws));
220 |       int *restrict pc = LOGICAL(TYPECOL);
221 |       for (R_xlen_t j = 0; j < len_rws; ++j) {
222 |         pc[j] = ptmp[prws[j]];
223 |       }
224 |       copyMostAttrib(pdf[i], TYPECOL);
225 |       SET_VECTOR_ELT(dfo, i, TYPECOL);
226 |       UNPROTECT(1);
227 |     } break;
228 |     case INTSXP : {
229 |       const int *restrict ptmp = INTEGER(pdf[i]);
230 |       SEXP TYPECOL = PROTECT(allocVector(INTSXP, len_rws));
231 |       int *restrict pc = INTEGER(TYPECOL);
232 |       for (R_xlen_t j = 0; j < len_rws; ++j) {
233 |         pc[j] = ptmp[prws[j]];
234 |       }
235 |       copyMostAttrib(pdf[i], TYPECOL);
236 |       SET_VECTOR_ELT(dfo, i, TYPECOL);
237 |       UNPROTECT(1);
238 |     } break;
239 |     case REALSXP : {
240 |       const double *restrict ptmp = REAL(pdf[i]);
241 |       SEXP TYPECOL = PROTECT(allocVector(REALSXP, len_rws));
242 |       double *restrict pc = REAL(TYPECOL);
243 |       for (R_xlen_t j = 0; j < len_rws; ++j) {
244 |         pc[j] = ptmp[prws[j]];
245 |       }
246 |       copyMostAttrib(pdf[i], TYPECOL);
247 |       SET_VECTOR_ELT(dfo, i, TYPECOL);
248 |       UNPROTECT(1);
249 |     } break;
250 |     case CPLXSXP : {
251 |       const Rcomplex *restrict ptmp = COMPLEX(pdf[i]);
252 |       SEXP TYPECOL = PROTECT(allocVector(CPLXSXP, len_rws));
253 |       Rcomplex *restrict pc = COMPLEX(TYPECOL);
254 |       for (R_xlen_t j = 0; j < len_rws; ++j) {
255 |         pc[j] = ptmp[prws[j]];
256 |       }
257 |       copyMostAttrib(pdf[i], TYPECOL);
258 |       SET_VECTOR_ELT(dfo, i, TYPECOL);
259 |       UNPROTECT(1);
260 |     } break;
261 |     case STRSXP : {
262 |       const SEXP *restrict ptmp = STRING_PTR_RO(pdf[i]);
263 |       SEXP TYPECOL = PROTECT(allocVector(STRSXP, len_rws));
264 |       for (R_xlen_t j = 0; j < len_rws; ++j) {
265 |         SET_STRING_ELT(TYPECOL,j, ptmp[prws[j]]);
266 |       }
267 |       copyMostAttrib(pdf[i], TYPECOL);
268 |       SET_VECTOR_ELT(dfo, i, TYPECOL);
269 |       UNPROTECT(1);
270 |     } break;
271 |     default: // # nocov
272 |       error("Type %s is not supported.", type2char(UTYPEOF(pdf[i]))); // # nocov
273 |     }
274 |   }
275 |   UNPROTECT(3);
276 |   return dfo;
277 | }
278 | 
279 | SEXP subSetRowMatrix(SEXP mat, SEXP rws) {
280 |   const int *restrict prws = INTEGER(rws);
281 |   const int col_mat = ncols(mat);
282 |   const int row_mat = nrows(mat);
283 |   const int len_rws = length(rws);
284 |   SEXP mato;
285 |   switch(UTYPEOF(mat)) {
286 |   case LGLSXP : {
287 |     mato = PROTECT(allocMatrix(LGLSXP, len_rws, col_mat));
288 |     const int *restrict pmat = LOGICAL(mat);
289 |     int *restrict pmato = LOGICAL(mato);
290 |     for (int i = 0; i < col_mat; ++i) {
291 |       for (int j = 0; j < len_rws; ++j) {
292 |         pmato[j+len_rws*i] = pmat[prws[j]+row_mat*i];
293 |       }
294 |     }
295 |   } break;
296 |   case INTSXP : {
297 |     mato = PROTECT(allocMatrix(INTSXP, len_rws, col_mat));
298 |     const int *restrict pmat = INTEGER(mat);
299 |     int *restrict pmato = INTEGER(mato);
300 |     for (int i = 0; i < col_mat; ++i) {
301 |       for (int j = 0; j < len_rws; ++j) {
302 |         pmato[j+len_rws*i] = pmat[prws[j]+row_mat*i];
303 |       }
304 |     }
305 |   } break;
306 |   case REALSXP : {
307 |     mato = PROTECT(allocMatrix(REALSXP, len_rws, col_mat));
308 |     const double *restrict pmat = REAL(mat);
309 |     double *restrict pmato = REAL(mato);
310 |     for (int i = 0; i < col_mat; ++i) {
311 |       for (int j = 0; j < len_rws; ++j) {
312 |         pmato[j+len_rws*i] = pmat[prws[j]+row_mat*i];
313 |       }
314 |     }
315 |   } break;
316 |   case CPLXSXP : {
317 |     mato = PROTECT(allocMatrix(CPLXSXP, len_rws, col_mat));
318 |     const Rcomplex *restrict pmat = COMPLEX(mat);
319 |     Rcomplex *restrict pmato = COMPLEX(mato);
320 |     for (int i = 0; i < col_mat; ++i) {
321 |       for (int j = 0; j < len_rws; ++j) {
322 |         pmato[j+len_rws*i] = pmat[prws[j]+row_mat*i];
323 |       }
324 |     }
325 |   } break;
326 |   case STRSXP : {
327 |     mato = PROTECT(allocMatrix(STRSXP, len_rws, col_mat));
328 |     const SEXP *restrict pmat = STRING_PTR_RO(mat);
329 |     for (int i = 0; i < col_mat; ++i) {
330 |       for (int j = 0; j < len_rws; ++j) {
331 |         SET_STRING_ELT(mato, j+len_rws*i, pmat[prws[j]+row_mat*i]);
332 |       }
333 |     }
334 |   } break;
335 |   default: // # nocov
336 |     error("Type %s is not supported.", type2char(UTYPEOF(mat))); // # nocov
337 |   }
338 |   UNPROTECT(1);
339 |   return mato;
340 | }
341 | 
342 | // No checks in this functions (not used for now)
343 | SEXP subSetColDataFrame(SEXP df, SEXP str) { // # nocov start
344 |   SEXP nm = PROTECT(getAttrib(df, R_NamesSymbol));
345 |   const int len = length(str);
346 |   const int cnm = length(nm);
347 |   if (len == 1) {
348 |     int i = 0;
349 |     const SEXP pstr = STRING_ELT(str, 0);
350 |     for (; i < cnm; ++i) {
351 |       if (STRING_ELT(nm, i) == pstr) {
352 |         break;
353 |       }
354 |     }
355 |     if (i == cnm) {
356 |       error("Column '%s' is not in data.frame.", RCHAR(str, 0));
357 |     }
358 |     UNPROTECT(1);
359 |     return VECTOR_ELT(df, i);
360 |   }
361 |   SEXP dfo = PROTECT(allocVector(VECSXP, len));
362 |   int ct = 0;
363 |   for (int i = 0; ct < len; ++i) {
364 |     if (i == cnm) {
365 |       error("Column '%s' is not in data.frame.", RCHAR(str, ct));
366 |     }
367 | 	  if (STRING_ELT(nm, i) == STRING_ELT(str, ct)) {
368 |       SET_VECTOR_ELT(dfo, ct++, VECTOR_ELT(df, i));
369 | 	    i = -1;
370 | 	  }
371 |   }
372 |   DUPLICATE_ATTRIB(dfo, df);
373 |   namesgets(dfo, str);
374 |   SEXP rownam = PROTECT(allocVector(INTSXP, 2));
375 |   INTEGER(rownam)[0] = NA_INTEGER;
376 |   INTEGER(rownam)[1] = -(int)length(VECTOR_ELT(df, 0));
377 |   setAttrib(dfo, R_RowNamesSymbol, rownam);
378 |   UNPROTECT(3);
379 |   return dfo;
380 | } // # nocov end
381 | 
382 | // No checks in this functions (subset just one column) (not used for now)
383 | SEXP subSetColMatrix(SEXP x, R_xlen_t idx) { // # nocov start
384 |   const R_xlen_t len_i = nrows(x);
385 |   SEXPTYPE xt = UTYPEOF(x);
386 |   SEXP ans = PROTECT(allocVector(xt, len_i));
387 |   const R_xlen_t pidx = idx * len_i;
388 |   switch(xt) {
389 |   case LGLSXP : {
390 |     memcpy(LOGICAL(ans), LOGICAL(x)+pidx, (unsigned)len_i*sizeof(*LOGICAL(ans)));
391 |   } break;
392 |   case INTSXP : {
393 |     memcpy(INTEGER(ans), INTEGER(x)+pidx, (unsigned)len_i*sizeof(int));
394 |   } break;
395 |   case REALSXP : {
396 |     memcpy(REAL(ans), REAL(x)+pidx, (unsigned)len_i*sizeof(double));
397 |   } break;
398 |   case CPLXSXP : {
399 |     memcpy(COMPLEX(ans), COMPLEX(x)+pidx, (unsigned)len_i*sizeof(Rcomplex));
400 |   } break;
401 |   case STRSXP : {
402 |     const SEXP *restrict px = STRING_PTR_RO(x);
403 |     for (R_xlen_t i = 0; i < len_i; ++i) {
404 |       SET_STRING_ELT(ans, i, px[i + pidx]);
405 |     }
406 |   } break;
407 |   default:
408 |     error("Matrix of type %s are not supported.", type2char(xt));
409 |   }
410 |   UNPROTECT(1);
411 |   return ans;
412 | } // # nocov end
413 | 
414 | // This function does not do any check
415 | SEXP addColToDataFrame(SEXP df, SEXP mcol, SEXP coln) {
416 |   const R_xlen_t len_df = xlength(df);
417 |   const R_xlen_t len_col = xlength(mcol);
418 |   SEXP dfo = R_NilValue;
419 |   if (UTYPEOF(mcol) != VECSXP) {
420 |     dfo = PROTECT(allocVector(VECSXP, len_df + 1));
421 | 	  for (int i = 0; i < len_df; ++i) {
422 |       SET_VECTOR_ELT(dfo, i, VECTOR_ELT(df, i));
423 |     }
424 |     SET_VECTOR_ELT(dfo, len_df, mcol);
425 |     classgets(dfo, STR_DF);
426 |     SEXP nam = PROTECT(allocVector(STRSXP, len_df + 1));
427 |     SEXP oldnam = PROTECT(getAttrib(df, R_NamesSymbol));
428 |     for (int i = 0; i < len_df; ++i) {
429 |       SET_STRING_ELT(nam, i, STRING_ELT(oldnam, i));
430 |     }
431 |     SET_STRING_ELT(nam, len_df, STRING_ELT(coln, 0));
432 |     namesgets(dfo, nam);
433 |     SEXP rownam = PROTECT(allocVector(INTSXP, 2));
434 |     INTEGER(rownam)[0] = NA_INTEGER;
435 |     INTEGER(rownam)[1] = -(int)len_col;
436 | 	  setAttrib(dfo, R_RowNamesSymbol, rownam);
437 |   } else {
438 |     const R_xlen_t len_row = xlength(VECTOR_ELT(df, 0));// # nocov start
439 | 	  dfo = PROTECT(allocVector(VECSXP, len_df + len_col));
440 |     for (int i = 0; i < len_df; ++i) {
441 |       SET_VECTOR_ELT(dfo, i, VECTOR_ELT(df, i));
442 |     }
443 |     for (int i = 0; i < len_col; ++i) {
444 |       SET_VECTOR_ELT(dfo, len_df + i, VECTOR_ELT(mcol, i));
445 |     }
446 |     classgets(dfo, STR_DF);
447 |     SEXP nam = PROTECT(allocVector(STRSXP, len_df + len_col));
448 |     SEXP oldnam = PROTECT(getAttrib(df, R_NamesSymbol));
449 |     for (int i = 0; i < len_df; ++i) {
450 |       SET_STRING_ELT(nam, i, STRING_ELT(oldnam, i));
451 |     }
452 |     for (int i = 0; i < len_col; ++i) {
453 |       SET_STRING_ELT(nam, len_df + i, STRING_ELT(coln, i));
454 |     }
455 |     namesgets(dfo, nam);
456 |     SEXP rownam = PROTECT(allocVector(INTSXP, 2));
457 |     INTEGER(rownam)[0] = NA_INTEGER;
458 |     INTEGER(rownam)[1] = -(int)len_row;
459 | 	  setAttrib(dfo, R_RowNamesSymbol, rownam);// # nocov end
460 |   }
461 |   UNPROTECT(4);
462 |   return dfo;
463 | }
464 | 
465 | // Try to improve this by removing element in the loop or initilising at 0 
466 | SEXP countOccurR(SEXP x) { // can be improved for factors
467 |   if (isDataFrame(x)) {
468 |     SEXP ans = PROTECT(countOccurDataFrameR(x));
469 |     UNPROTECT(1);
470 |     return ans;
471 |   }
472 |   if (isArray(x)) {
473 |     error("Array are not yet supported.");
474 |   }
475 |   const R_xlen_t n = xlength(x);
476 |   const SEXPTYPE tx = UTYPEOF(x);
477 |   int K;
478 |   size_t M;
479 |   if (tx == INTSXP || tx == STRSXP || tx == REALSXP || tx == CPLXSXP ) {
480 |     if(n >= 1073741824) {
481 |       error("Length of 'x' is too large. (Long vector not supported yet)"); // # nocov
482 |     }
483 |     const size_t n2 = 2U * (size_t) n;
484 |     M = 256;
485 |     K = 8;
486 |     while (M < n2) {
487 |       M *= 2;
488 |       K++;
489 |     }
490 |   } else if (tx == LGLSXP) {
491 |     M = 4;
492 |     K = 2;
493 |   } else {
494 |     error("Type %s is not supported.", type2char(tx));
495 |   }
496 |   R_xlen_t count = 0;
497 |   int *restrict h = (int*)calloc(M, sizeof(int));
498 |   SEXP ans_ct = PROTECT(allocVector(INTSXP, n));
499 |   int *restrict pans_l = (int*)calloc(n, sizeof(int));
500 |   int *restrict pans_ct = INTEGER(ans_ct);
501 |   SEXP ans_f = PROTECT(allocVector(VECSXP, 2));
502 |   switch (tx) {
503 |   case LGLSXP: {
504 |     const int *restrict px = LOGICAL(x);
505 |     size_t id = 0;
506 |     for (int i = 0; i < n; ++i) {
507 |       id = (px[i] == NA_LOGICAL) ? 2U : (size_t) px[i];
508 |       while (h[id]) {
509 |         if (px[h[id]-1]==px[i]) {
510 |           pans_ct[h[id]-1]++;
511 |           goto lbl;
512 |         }
513 |         id++; id %= M; // # nocov
514 |       }
515 |       h[id] = (int) i + 1;
516 |       pans_l[i]++;
517 |       pans_ct[i] = 1;
518 |       count++;
519 |       lbl:;
520 |     }
521 |     SET_VECTOR_ELT(ans_f, 0, PROTECT(allocVector(tx, count)));
522 |     SET_VECTOR_ELT(ans_f, 1, PROTECT(allocVector(INTSXP, count)));
523 |     R_xlen_t ct = 0;
524 |     int *restrict py = LOGICAL(PTR_ETL(ans_f, 0));
525 |     int *restrict pw = INTEGER(PTR_ETL(ans_f, 1));
526 |     for (int i = 0; ct < count; ++i) {
527 |       if (pans_l[i]) {
528 |         pw[ct] = pans_ct[i];
529 |         py[ct++] = px[i];
530 |       }
531 |     }
532 |   } break;
533 |   case INTSXP: { // think about factor and levels number
534 |     const int *restrict px = INTEGER(x);
535 |     size_t id = 0;
536 |     for (int i = 0; i < n; ++i) {
537 |       id = (px[i] == NA_INTEGER) ? 0 : HASH(px[i], K);
538 |       while (h[id]) {
539 |         if (px[h[id]-1]==px[i]) {
540 |           pans_ct[h[id]-1]++;
541 |           goto ibl;
542 |         }
543 |         id++; id %= M; // # nocov
544 |       }
545 |       h[id] = (int) i + 1;
546 |       pans_l[i]++;
547 |       pans_ct[i] = 1;
548 |       count++;
549 |       ibl:;
550 |     }
551 |     SET_VECTOR_ELT(ans_f, 0, PROTECT(allocVector(tx, count)));
552 |     SET_VECTOR_ELT(ans_f, 1, PROTECT(allocVector(INTSXP, count)));
553 |     R_xlen_t ct = 0;
554 |     int *restrict py = INTEGER(PTR_ETL(ans_f, 0));
555 |     int *restrict pw = INTEGER(PTR_ETL(ans_f, 1));
556 |     for (int i = 0; ct < count; ++i) {
557 |       if (pans_l[i]) {
558 |         pw[ct] = pans_ct[i];
559 |         py[ct++] = px[i];
560 |       }
561 |     }
562 |   } break;
563 |   case REALSXP: {
564 |     const double *restrict px = REAL(x);
565 |     size_t id = 0;
566 |     union uno tpv;
567 |     for (int i = 0; i < n; ++i) {
568 |       tpv.d = R_IsNA(px[i]) ? NA_REAL : (R_IsNaN(px[i]) ? R_NaN :px[i]);
569 |       id = HASH(tpv.u[0] + tpv.u[1], K);
570 |       while (h[id]) {
571 |         if (REQUAL(px[h[id] - 1], px[i])) {
572 |           pans_ct[h[id]-1]++;
573 |           goto rbl;
574 |         }
575 |         id++; id %= M; // # nocov
576 |       }
577 |       h[id] = (int) i + 1;
578 |       pans_l[i]++;
579 |       pans_ct[i] = 1;
580 |       count++;
581 |       rbl:;
582 |     }
583 |     SET_VECTOR_ELT(ans_f, 0, PROTECT(allocVector(tx, count)));
584 |     SET_VECTOR_ELT(ans_f, 1, PROTECT(allocVector(INTSXP, count)));
585 |     R_xlen_t ct = 0;
586 |     double *restrict py = REAL(PTR_ETL(ans_f, 0));
587 |     int *restrict pw = INTEGER(PTR_ETL(ans_f, 1));
588 |     for (int i = 0; ct < count; ++i) {
589 |       if (pans_l[i]) {
590 |         pw[ct] = pans_ct[i];
591 |         py[ct++] = px[i];
592 |       }
593 |     }
594 |   } break;
595 |   case CPLXSXP: {
596 |     const Rcomplex *restrict px = COMPLEX(x);
597 |     size_t id = 0;
598 |     unsigned int u;
599 |     union uno tpv;
600 |     Rcomplex tmp;
601 |     for (int i = 0; i < n; ++i) {
602 |       tmp.r = (px[i].r == 0.0) ? 0.0 : px[i].r;
603 |       tmp.i = (px[i].i == 0.0) ? 0.0 : px[i].i;
604 |       if (C_IsNA(tmp)) {
605 |         tmp.r = tmp.i = NA_REAL;
606 |       } else if (C_IsNaN(tmp)) {
607 |         tmp.r = tmp.i = R_NaN;
608 |       }
609 |       tpv.d = tmp.r;
610 |       u = tpv.u[0] ^ tpv.u[1];
611 |       tpv.d = tmp.i;
612 |       u ^= tpv.u[0] ^ tpv.u[1];
613 |       id = HASH(u, K);
614 |       while (h[id]) {
615 |         if (CEQUAL(px[h[id]-1],px[i])) {
616 |           pans_ct[h[id]-1]++;
617 |           goto cbl;
618 |         }
619 |         id++; id %= M;
620 |       }
621 |       h[id] = (int) i + 1;
622 |       pans_l[i]++;
623 |       pans_ct[i] = 1;
624 |       count++;
625 |       cbl:;
626 |     }
627 |     SET_VECTOR_ELT(ans_f, 0, PROTECT(allocVector(tx, count)));
628 |     SET_VECTOR_ELT(ans_f, 1, PROTECT(allocVector(INTSXP, count)));
629 |     R_xlen_t ct = 0;
630 |     Rcomplex *restrict py = COMPLEX(PTR_ETL(ans_f, 0));
631 |     int *restrict pw = INTEGER(PTR_ETL(ans_f, 1));
632 |     for (int i = 0; ct < count; ++i) {
633 |       if (pans_l[i]) {
634 |         pw[ct] = pans_ct[i];
635 |         py[ct++] = px[i];
636 |       }
637 |     }
638 |   } break;
639 |   case STRSXP: {
640 |     const SEXP *restrict px = STRING_PTR_RO(x);
641 |     size_t id = 0;
642 |     for (int i = 0; i < n; ++i) {
643 |       id = HASH(((intptr_t) px[i] & 0xffffffff), K);
644 |       while (h[id]) {
645 |         if (px[h[id]-1]==px[i]) {
646 |           pans_ct[h[id]-1]++;
647 |           goto sbl;
648 |         }
649 |         id++; id %= M; // # nocov
650 |       }
651 |       h[id] = (int) i + 1;
652 |       pans_l[i]++;
653 |       pans_ct[i] = 1;
654 |       count++;
655 |       sbl:;
656 |     }
657 |     SET_VECTOR_ELT(ans_f, 0, PROTECT(allocVector(tx, count)));
658 |     SET_VECTOR_ELT(ans_f, 1, PROTECT(allocVector(INTSXP, count)));
659 |     R_xlen_t ct = 0;
660 |     int *restrict pw = INTEGER(PTR_ETL(ans_f, 1));
661 |     SEXP p0 = PTR_ETL(ans_f, 0);
662 |     for (int i = 0; ct < count; ++i) {
663 |       if (pans_l[i]) {
664 |         pw[ct] = pans_ct[i];
665 |         SET_STRING_ELT(p0, ct++, px[i]);
666 |       }
667 |     }
668 |   } break;
669 |   }
670 |   free(pans_l);
671 |   free(h);
672 |   copyMostAttrib(x, PTR_ETL(ans_f, 0));
673 |   classgets(ans_f, STR_DF);
674 |   SEXP nam = PROTECT(allocVector(STRSXP, 2));
675 |   SET_STRING_ELT(nam, 0, mkChar("Variable"));
676 |   SET_STRING_ELT(nam, 1, mkChar("Count"));
677 |   namesgets(ans_f, nam);
678 |   SEXP rownam = PROTECT(allocVector(INTSXP, 2));
679 |   INTEGER(rownam)[0] = NA_INTEGER;
680 |   INTEGER(rownam)[1] = -(int)count;
681 |   setAttrib(ans_f, R_RowNamesSymbol, rownam);
682 |   UNPROTECT(6);
683 |   return ans_f;
684 | }
685 | 
686 | SEXP countOccurDataFrameR(SEXP x) { // move to matrix if possible (change hash algo)
687 |   const SEXP *restrict px = SEXPPTR_RO(x);
688 |   const R_xlen_t len_x = xlength(x);
689 |   const R_xlen_t len_i = xlength(px[0]);
690 |   SEXP mlv = PROTECT(allocMatrix(INTSXP, (int)len_i, (int)len_x));
691 |   for (R_xlen_t i = 0; i < len_x; ++i) {
692 |     memcpy(INTEGER(mlv)+i*len_i, INTEGER(PROTECT(dupVecIndexOnlyR(px[i]))), (unsigned)len_i*sizeof(int));
693 |   }
694 |   UNPROTECT((int)len_x);
695 |   const size_t n2 = 2U * (size_t) len_i;
696 |   size_t M = 256;
697 |   int K = 8;
698 |   while (M < n2) {
699 |     M *= 2;
700 |     K++;
701 |   }
702 |   R_xlen_t count = 0;
703 |   int *restrict h = (int*)calloc(M, sizeof(int));
704 |   SEXP ans_ct = PROTECT(allocVector(INTSXP, len_i));
705 |   int *restrict pans_l = (int*)calloc(len_i, sizeof(int));
706 |   int *restrict pans_ct = INTEGER(ans_ct);
707 |   const int *restrict v = INTEGER(mlv);
708 |   size_t id = 0;
709 |   for (R_xlen_t i = 0; i < len_i; ++i) {
710 |     R_xlen_t key = 0;
711 |     for (R_xlen_t j = 0; j < len_x; ++j) {
712 |       key ^= HASH(v[i+j*len_i],K)*97*(j+1);
713 |     }
714 |     id = HASH(key, K);
715 |     while (h[id]) {
716 |       for (R_xlen_t j = 0; j < len_x; ++j) {
717 |         if (v[h[id]-1+j*len_i] != v[i+j*len_i]) {
718 |           goto label1;
719 |         }
720 |       }
721 |       pans_ct[h[id]-1]++;
722 |       goto label2;
723 |       label1:;
724 |       id++; id %= M;
725 |     }
726 |     h[id] = (int) i + 1;
727 |     pans_l[i]++;
728 |     pans_ct[i] = 1;
729 |     count++;
730 |     label2:;
731 |   }
732 |   free(h);
733 |   SEXP indx = PROTECT(allocVector(INTSXP, count));
734 |   SEXP cntr = PROTECT(allocVector(INTSXP, count));
735 |   R_xlen_t ct = 0;
736 |   int *restrict py = INTEGER(indx);
737 |   int *restrict pw = INTEGER(cntr);
738 |   for (int i = 0; ct < count; ++i) {
739 |     if (pans_l[i]) {
740 |       pw[ct] = pans_ct[i];
741 |       py[ct++] = i;
742 |     }
743 |   }
744 |   free(pans_l);
745 |   SEXP output = PROTECT(addColToDataFrame(PROTECT(subSetRowDataFrame(x, indx)),cntr, PROTECT(mkString("Count"))));
746 |   UNPROTECT(7);
747 |   return output;
748 | }
749 | 
750 | // All columns must be of the same type
751 | SEXP dfToMatrix(SEXP df) {
752 |   const SEXP *restrict px = SEXPPTR_RO(df);
753 |   const R_xlen_t len_x = xlength(df);
754 |   const R_xlen_t len_i = xlength(px[0]);
755 |   const SEXPTYPE tx = UTYPEOF(px[0]);
756 |   SEXP mlv = PROTECT(allocMatrix(tx, (int)len_i, (int)len_x));
757 |   switch(tx) {
758 |   case LGLSXP :{
759 |     int *restrict pmlv = LOGICAL(mlv);
760 |     for (int i = 0; i < len_x; ++i) {
761 |       const int *restrict ppx = LOGICAL(px[i]);
762 |       const int ct = i*len_i;
763 |       for (int j = 0; j < len_i; ++j) {
764 |         pmlv[j+ct] = ppx[j];
765 |       }
766 |     }
767 |   } break;
768 |   case INTSXP :{
769 |     int *restrict pmlv = INTEGER(mlv);
770 |     for (int i = 0; i < len_x; ++i) {
771 |       const int *restrict ppx = INTEGER(px[i]);
772 |       const int ct = i*len_i;
773 |       for (int j = 0; j < len_i; ++j) {
774 |         pmlv[j+ct] = ppx[j];
775 |       }
776 |     }
777 |   } break;
778 |   case REALSXP :{
779 |     double *restrict pmlv = REAL(mlv);
780 |     for (int i = 0; i < len_x; ++i) {
781 |       const double *restrict ppx = REAL(px[i]);
782 |       const int ct = i*len_i;
783 |       for (int j = 0; j < len_i; ++j) {
784 |         pmlv[j+ct] = ppx[j];
785 |       }
786 |     }
787 |   } break;
788 |   case CPLXSXP :{
789 |     Rcomplex *restrict pmlv = COMPLEX(mlv);
790 |     for (int i = 0; i < len_x; ++i) {
791 |       const Rcomplex *restrict ppx = COMPLEX(px[i]);
792 |       const int ct = i*len_i;
793 |       for (int j = 0; j < len_i; ++j) {
794 |         pmlv[j+ct] = ppx[j];
795 |       }
796 |     }
797 |   } break;
798 |   case STRSXP :{
799 |     for (int i = 0; i < len_x; ++i) {
800 |       const SEXP *restrict ppx = STRING_PTR_RO(px[i]);
801 |       const int ct = i*len_i;
802 |       for (int j = 0; j < len_i; ++j) {
803 |         SET_STRING_ELT(mlv, j+ct, ppx[j]);
804 |       }
805 |     }
806 |   } break;
807 |   }
808 |   UNPROTECT(1);
809 |   return mlv;
810 | }
811 | 
812 | bool isMixEnc(SEXP x) {
813 |   const R_xlen_t len = xlength(x);
814 |   const SEXP *px = STRING_PTR_RO(x);
815 |   const cetype_t ces = getCharCE(px[0]);
816 |   for (R_xlen_t i = 1; i < len; ++i)
817 |     if(getCharCE(px[i]) != ces)
818 |       return true;
819 |   return false;
820 | }
821 | 
822 | SEXP enc2UTF8(SEXP x) {
823 |   const SEXP *px = STRING_PTR_RO(x);
824 |   const R_xlen_t len = xlength(x);
825 |   if (getCharCE(px[0]) != CE_UTF8) {
826 |     SEXP ans = PROTECT(allocVector(STRSXP, len));
827 |     for (R_xlen_t i = 0; i < len; ++i) {
828 |       SET_STRING_ELT(ans, i, mkCharCE(translateCharUTF8(px[i]), CE_UTF8));
829 |     }
830 |     UNPROTECT(1);
831 |     return ans;
832 |   }
833 |   return x;
834 | }
835 | 


--------------------------------------------------------------------------------