├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ └── test-coverage.yaml ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R └── call.R ├── README.md ├── cleanup ├── configure ├── inst └── NEWS.Rd ├── man ├── charToFact.Rd ├── count.Rd ├── fpos.Rd ├── funique.Rd ├── iif.Rd ├── nif.Rd ├── psort.Rd ├── psum.Rd ├── setlevels.Rd ├── shareData.Rd ├── topn.Rd └── vswitch.Rd ├── src ├── Makevars.in ├── Makevars.win ├── dup.c ├── dupLen.c ├── fpos.c ├── iif.c ├── init.c ├── kit.h ├── nswitch.c ├── psort.c ├── psum.c ├── share.c ├── topn.c ├── utils.c └── vswitch.c └── tests ├── test_kit.R └── test_kit.Rout.save /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^\.github$ 2 | ^\.appveyor\.yml$ 3 | ^README\.md$ 4 | LICENSE 5 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check.yaml 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | R-CMD-check: 15 | runs-on: ${{ matrix.config.os }} 16 | 17 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | config: 23 | - {os: macos-latest, r: 'release'} 24 | - {os: windows-latest, r: 'release'} 25 | - {os: windows-latest, r: 'oldrel-4'} 26 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 27 | - {os: ubuntu-latest, r: 'release'} 28 | - {os: ubuntu-latest, r: 'oldrel-1'} 29 | - {os: ubuntu-latest, r: 'oldrel-2'} 30 | - {os: ubuntu-latest, r: 'oldrel-3'} 31 | - {os: ubuntu-latest, r: 'oldrel-4'} 32 | 33 | env: 34 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 35 | R_KEEP_PKG_SOURCE: yes 36 | 37 | steps: 38 | - uses: actions/checkout@v4 39 | 40 | - uses: r-lib/actions/setup-pandoc@v2 41 | 42 | - uses: r-lib/actions/setup-r@v2 43 | with: 44 | r-version: ${{ matrix.config.r }} 45 | http-user-agent: ${{ matrix.config.http-user-agent }} 46 | use-public-rspm: true 47 | 48 | - uses: r-lib/actions/setup-r-dependencies@v2 49 | with: 50 | extra-packages: any::rcmdcheck 51 | needs: check 52 | 53 | - uses: r-lib/actions/check-r-package@v2 54 | with: 55 | upload-snapshots: true 56 | build_args: 'c("--no-manual")' 57 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | - master 6 | pull_request: 7 | branches: 8 | - main 9 | - master 10 | 11 | name: test-coverage 12 | 13 | jobs: 14 | test-coverage: 15 | runs-on: windows-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - uses: r-lib/actions/setup-r@v2 22 | 23 | - uses: r-lib/actions/setup-pandoc@v1 24 | 25 | - name: Query dependencies 26 | run: | 27 | install.packages('remotes') 28 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 29 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 30 | shell: Rscript {0} 31 | 32 | - name: Restore R package cache 33 | uses: actions/cache@v4 34 | with: 35 | path: ${{ env.R_LIBS_USER }} 36 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 37 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 38 | 39 | - name: Install dependencies 40 | run: | 41 | install.packages(c("remotes")) 42 | remotes::install_deps(dependencies = TRUE) 43 | remotes::install_cran("covr") 44 | shell: Rscript {0} 45 | 46 | - name: Test coverage 47 | run: covr::codecov() 48 | shell: Rscript {0} 49 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: kit 2 | Type: Package 3 | Title: Data Manipulation Functions Implemented in C 4 | Version: 0.0.20 5 | Date: 2025-04-17 6 | Authors@R: c(person("Morgan", "Jacob", role = c("aut", "cre", "cph"), email = "morgan.emailbox@gmail.com"), 7 | person("Sebastian", "Krantz", role = "ctb")) 8 | Author: Morgan Jacob [aut, cre, cph], Sebastian Krantz [ctb] 9 | Maintainer: Morgan Jacob 10 | Description: Basic functions, implemented in C, for large data manipulation. Fast vectorised ifelse()/nested if()/switch() functions, psum()/pprod() functions equivalent to pmin()/pmax() plus others which are missing from base R. Most of these functions are callable at C level. 11 | License: GPL-3 12 | Depends: R (>= 3.1.0) 13 | Encoding: UTF-8 14 | BugReports: https://github.com/2005m/kit/issues 15 | NeedsCompilation: yes 16 | ByteCompile: TRUE 17 | Repository: CRAN 18 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(kit, .registration = TRUE) 2 | 3 | export( 4 | charToFact, count, countNA, countOccur, fduplicated, fpos, funique, iif, nif, nswitch, 5 | pall, pallNA, pallv, pany, panyNA, panyv, pcount, pcountNA, pfirst, plast, pmean, pprod, psum, setlevels, topn, uniqLen, vswitch, psort, 6 | getData, shareData, clearData 7 | ) 8 | -------------------------------------------------------------------------------- /R/call.R: -------------------------------------------------------------------------------- 1 | # Function calls 2 | charToFact = function(x, decreasing=FALSE, addNA=TRUE, nThread=getOption("kit.nThread")) .Call(CcharToFactR, x, decreasing, nThread, NA, parent.frame(), addNA) 3 | clearData = function(x, verbose=FALSE) .Call("CclearMappingObjectR", x, verbose) 4 | count = function(x, value) .Call(CcountR, x, value) 5 | countNA = function(x) .Call(CcountNAR, x) 6 | countOccur = function(x) .Call(CcountOccurR, x) 7 | fduplicated = function(x, fromLast = FALSE) .Call(CdupR, x, FALSE, fromLast) 8 | fpos = function(needle, haystack, all=TRUE, overlap=TRUE) .Call(CfposR, needle, haystack, all, overlap) 9 | funique = function(x, fromLast = FALSE) .Call(CdupR, x, TRUE, fromLast) 10 | iif = function(test, yes, no, na=NULL, tprom=FALSE, nThread=getOption("kit.nThread")) .Call(CiifR, test, yes, no, na, tprom, nThread) 11 | nif = function(..., default=NULL) .Call(CnifR, default, parent.frame(), as.list(substitute(...()))) 12 | nswitch = function(x, ..., default=NULL, nThread=getOption("kit.nThread"), checkEnc = TRUE) .Call(CnswitchR, x, default, nThread, checkEnc, list(...)) 13 | pall = function(..., na.rm=FALSE) .Call(CpallR, na.rm, if (...length() == 1L && is.list(..1)) ..1 else list(...)) 14 | pallNA = function(...) { x = if (...length() == 1L && is.list(..1)) unclass(..1) else list(...); .Call(CpcountNAR, x) == length(x) } 15 | pallv = function(..., value) { x = if (...length() == 1L && is.list(..1)) unclass(..1) else list(...); pcount(..., value = value) == length(x) } 16 | pany = function(..., na.rm=FALSE) .Call(CpanyR, na.rm, if (...length() == 1L && is.list(..1)) ..1 else list(...)) 17 | panyNA = function(...) as.logical(pcountNA(...)) 18 | panyv = function(..., value) as.logical(pcount(..., value = value)) 19 | pcountNA = function(...) .Call(CpcountNAR, if (...length() == 1L && is.list(..1)) ..1 else list(...)) 20 | pfirst = function(...) .Call(CpfirstR, FALSE, if (...length() == 1L && is.list(..1)) ..1 else list(...)) 21 | plast = function(...) .Call(CpfirstR, TRUE, if (...length() == 1L && is.list(..1)) ..1 else list(...)) 22 | pmean = function(..., na.rm=FALSE) .Call(CpmeanR, na.rm, if (...length() == 1L && is.list(..1)) ..1 else list(...)) 23 | pprod = function(..., na.rm=FALSE) .Call(CpprodR, na.rm, if (...length() == 1L && is.list(..1)) ..1 else list(...)) 24 | psum = function(..., na.rm=FALSE) .Call(CpsumR, na.rm, if (...length() == 1L && is.list(..1)) ..1 else list(...)) 25 | setlevels = function(x, old = levels(x), new, skip_absent=FALSE) invisible(.Call(CsetlevelsR, x, old, new, skip_absent)) 26 | topn = function(vec, n=6L, decreasing=TRUE, hasna=TRUE,index=TRUE) if(index) .Call(CtopnR, vec, n, decreasing, hasna, parent.frame()) else vec[.Call(CtopnR, vec, n, decreasing, hasna, parent.frame())] 27 | uniqLen = function(x) .Call(CdupLenR, x) 28 | vswitch = function(x, values, outputs, default=NULL, nThread=getOption("kit.nThread"), checkEnc = TRUE) .Call(CvswitchR, x, values, outputs, default, nThread, checkEnc) 29 | 30 | .onUnload = function(libpath) library.dynam.unload("kit", libpath) #nocov 31 | 32 | .onAttach = function(libname, pkgname) { #nocov 33 | omp = if(.Call(CompEnabledR)) "enabled" else "disabled" #nocov 34 | nth = getOption("kit.nThread") #nocov 35 | thd = if (nth > 1L) " threads)" else " thread)" #nocov 36 | packageStartupMessage(paste0("Attaching kit 0.0.20 (OPENMP ",omp," using ",nth,thd)) #nocov 37 | } #nocov 38 | 39 | .onLoad = function(libname, pkgname) { #nocov 40 | if (!("kit.nThread" %in% names(options()))) { #nocov 41 | options("kit.nThread"=1L) #nocov 42 | } #nocov 43 | } #nocov 44 | 45 | pcount = function(..., value) { 46 | if(is.na(value[1])) { 47 | .Call(CpcountNAR, if (...length() == 1L && is.list(..1)) ..1 else list(...)) 48 | } else { 49 | .Call(CpcountR, value, if (...length() == 1L && is.list(..1)) ..1 else list(...)) 50 | } 51 | } 52 | 53 | psort = function(x, decreasing = FALSE, na.last = NA, nThread=getOption("kit.nThread"), c.locale = TRUE) { 54 | if (typeof(x) == "character") { 55 | return(.Call(CcpsortR, x, decreasing, nThread, na.last,parent.frame(), FALSE, c.locale)) 56 | } 57 | warning("Function 'psort' was only implemented for character vectors. Defaulting to base::sort.") 58 | sort(x, decreasing = decreasing, na.last = na.last,method = if(c.locale) "radix" else "quick") 59 | } 60 | 61 | shareData = function(data, map_name, verbose=FALSE) { 62 | conn = rawConnection(raw(0L), "w") 63 | serialize(data, conn) 64 | seek(conn, 0L) 65 | if (grepl('SunOS',Sys.info()['sysname'])) map_name = paste0("/",map_name) 66 | x = .Call( 67 | "CcreateMappingObjectR", map_name, paste0(map_name,"_key"), 68 | rawConnectionValue(conn), verbose 69 | ) 70 | close(conn) 71 | x 72 | } 73 | 74 | getData = function(map_name, verbose=FALSE) { 75 | if (grepl('SunOS',Sys.info()['sysname'])) map_name = paste0("/",map_name) 76 | output = .Call("CgetMappingObjectR", map_name, paste0(map_name,"_key"), verbose) 77 | conn = rawConnection(output,"r") 78 | obj = unserialize(conn) 79 | close(conn) 80 | obj 81 | } 82 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kit 2 | R Package: Basic functions implemented in C (and for some missing from base R) 3 | 4 | [![CRAN](https://www.r-pkg.org/badges/version-last-release/kit?color=blue)](https://cran.r-project.org/package=kit) 5 | [![CRAN](https://badges.cranchecks.info/flavor/release/kit.svg)](https://cran.r-project.org/web/checks/check_results_kit.html) 6 | [![License: GPL v3](https://img.shields.io/github/license/2005m/kit)](https://www.gnu.org/licenses/gpl-3.0) 7 | [![R-CMD-check](https://github.com/2005m/kit/workflows/R-CMD-check/badge.svg)](https://github.com/2005m/kit/actions) 8 | [![Coverage Status](https://codecov.io/gh/2005m/kit/graph/badge.svg)](https://codecov.io/github/2005m/kit?branch=master) 9 | [![downloads](https://cranlogs.r-pkg.org/badges/kit)](https://www.r-pkg.org/pkg/kit) 10 | [![kit status badge](https://fastverse.r-universe.dev/badges/kit)](https://fastverse.r-universe.dev) 11 | -------------------------------------------------------------------------------- /cleanup: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | rm -f src/Makevars 3 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | CC=`${R_HOME}/bin/R CMD config CC` 3 | CFLAGS=`${R_HOME}/bin/R CMD config CFLAGS` 4 | 5 | printf "#include \nint main () { return omp_get_num_threads(); }" | ${CC} ${CFLAGS} -fopenmp -xc ->/dev/null 2>&1 || R_NO_OPENMP=1; 6 | rm a.out >/dev/null 2>&1 7 | 8 | if [ "$R_NO_OPENMP" = "1" ]; then 9 | printf "#include \nint main () { return omp_get_num_threads(); }" | ${CC} ${CFLAGS} -Xpreprocessor -fopenmp -lomp -xc ->/dev/null 2>&1 || R_NO_OPENMP=2; 10 | rm a.out >/dev/null 2>&1 11 | if [ "$R_NO_OPENMP" = "2" ]; then 12 | if [ "`uname`" = "Linux" ] ;then 13 | echo "*** On Linux and OpenMP not supported!" 14 | sed -e "s|@openmp_cflags@||g; s|@openmp_libslags@||g; s|@realtime_libslags@|-lrt|g;" src/Makevars.in > src/Makevars 15 | else 16 | echo "*** Not on Linux and OpenMP not supported!" 17 | sed -e "s|@openmp_cflags@||g; s|@openmp_libslags@||g; s|@realtime_libslags@||g;" src/Makevars.in > src/Makevars 18 | fi 19 | else 20 | echo "*** OpenMP supported (-Xpreprocessor)!" 21 | sed -e "s|@openmp_cflags@|-Xpreprocessor -fopenmp|g; s|@openmp_libslags@|-lomp|g; s|@realtime_libslags@||g;" src/Makevars.in > src/Makevars 22 | fi 23 | else 24 | if [ "`uname`" = "Darwin" ] ;then 25 | if gcc --version | grep -i "gcc" > /dev/null; then 26 | echo "*** On Darwin and GCC OpenMP supported!" 27 | sed -e "s|@openmp_cflags@|-Xpreprocessor -fopenmp|g; s|@openmp_libslags@|-lgomp|g; s|@realtime_libslags@||g;" src/Makevars.in > src/Makevars 28 | elif clang --version | grep -i "clang" > /dev/null; then 29 | echo "*** On Darwin and CLANG OpenMP supported!" 30 | sed -e "s|@openmp_cflags@|-Xpreprocessor -fopenmp|g; s|@openmp_libslags@|-lomp|g; s|@realtime_libslags@||g;" src/Makevars.in > src/Makevars 31 | fi 32 | else 33 | echo "*** On Linux and OpenMP supported!" 34 | sed -e "s|@openmp_cflags@|\$(SHLIB_OPENMP_CFLAGS)|g; s|@openmp_libslags@|\$(SHLIB_OPENMP_CFLAGS)|g; s|@realtime_libslags@|-lrt|g;" src/Makevars.in > src/Makevars 35 | fi 36 | fi 37 | 38 | exit 0 39 | -------------------------------------------------------------------------------- /inst/NEWS.Rd: -------------------------------------------------------------------------------- 1 | \name{NEWS} 2 | \title{News for \R Package \pkg{kit}} 3 | \encoding{UTF-8} 4 | 5 | \newcommand{\CRANpkg}{\href{https://CRAN.R-project.org/package=#1}{\pkg{#1}}} 6 | 7 | \section{version 0.0.20 (2025-04-17)}{ 8 | \subsection{Notes}{ 9 | \itemize{ 10 | \item Update copyright date in c files 11 | 12 | \item Fix note on CRAN regarding Rf_isFrame 13 | } 14 | } 15 | } 16 | 17 | \section{version 0.0.19 (2024-09-07)}{ 18 | \subsection{Bug Fixes}{ 19 | \itemize{ 20 | \item Fix multiple warnings in C code. 21 | } 22 | } 23 | } 24 | 25 | \section{version 0.0.18 (2024-06-06)}{ 26 | \subsection{Bug Fixes}{ 27 | \itemize{ 28 | \item Fix \code{iif} tests for new version of R. 29 | } 30 | } 31 | } 32 | 33 | \section{version 0.0.17 (2024-05-03)}{ 34 | \subsection{Bug Fixes}{ 35 | \itemize{ 36 | \item Fix \code{nswitch}. Thanks to Sebastian Krantz for raising an issue. 37 | } 38 | } 39 | \subsection{Notes}{ 40 | \itemize{ 41 | \item Update copyright date in c files 42 | 43 | \item Fix note on CRAN regarding SETLENGTH 44 | } 45 | } 46 | } 47 | 48 | \section{version 0.0.16 (2024-03-01)}{ 49 | \subsection{Notes}{ 50 | \itemize{ 51 | \item Check if \code{"kit.nThread"} is defined before setting it to \code{1L} 52 | } 53 | } 54 | } 55 | 56 | \section{version 0.0.15 (2023-10-01)}{ 57 | \subsection{Notes}{ 58 | \itemize{ 59 | \item Correct typo in configure file 60 | } 61 | } 62 | } 63 | 64 | \section{version 0.0.14 (2023-08-12)}{ 65 | \subsection{Notes}{ 66 | \itemize{ 67 | \item Update configure file to extend support for GCC 68 | 69 | \item Correct warnings in NEWS.Rd (strong) 70 | 71 | \item Correct typo in funique.Rd thanks to @davidbudzynski 72 | } 73 | } 74 | } 75 | 76 | \section{version 0.0.13 (2023-02-24)}{ 77 | \subsection{Notes}{ 78 | \itemize{ 79 | \item Function \code{pprod} now returns double output even if inputs are integer - in line with \code{base::prod} - to avoid integer overflows. 80 | 81 | \item Update configure file 82 | } 83 | } 84 | } 85 | 86 | \section{version 0.0.12 (2022-10-26)}{ 87 | \subsection{New Features}{ 88 | \itemize{ 89 | \item Function \code{pcountNA} is equivalent to \code{pcount(..., value = NA)}. 90 | 91 | \item Function \code{pcountNA} and \code{pcount(..., value = NA)} allow \code{NA} counting with mixed data type (including \code{data.frame}). \code{pcountNA} also supports list-vectors as inputs and counts empty or \code{NULL} elements as \code{NA}. 92 | 93 | \item Functions \code{panyv}, \code{panyNA}, \code{pallv} and \code{pallNA} are added as efficient wrappers around \code{pcount} and \code{pcountNA}. They are parallel equivalents of scalar functions \code{base::anyNA} and \code{anyv}, \code{allv} and \code{allNA} in the 'collapse' R package. 94 | 95 | \item Functions \code{pfirst} and \code{plast} are added to efficiently obtain the row-wise first and last non-missing value or non-empty element of lists. They are parallel equivalents to the (column-wise) \code{ffirst} and \code{flast} functions in the 'collapse' R package. Implemented by @SebKrantz. 96 | 97 | \item Functions \code{psum/pprod/pmean} also support logical vectors as input. Implemented by @SebKrantz. 98 | } 99 | } 100 | \subsection{Bug Fixes}{ 101 | \itemize{ 102 | \item Function \code{charToFact} was not returning proper results. Thanks to @alex-raw for raising an issue. 103 | } 104 | } 105 | \subsection{Notes}{ 106 | \itemize{ 107 | \item Function \code{pprod} now returns double output even if inputs are integer - in line with \code{base::prod} - to avoid integer overflows. 108 | 109 | \item C compiler warnings on CRAN R-devel caused by compilation with -Wstrict-prototypes are now fixed. Declaration of functions without prototypes is depreciated in all versions of C. Thanks to Sebastian Krantz for the PR. 110 | } 111 | } 112 | } 113 | 114 | \section{version 0.0.11 (2022-03-19)}{ 115 | \subsection{New Features}{ 116 | \itemize{ 117 | \item Function \code{pcount} now supports data.frame. 118 | } 119 | } 120 | \subsection{Bug Fixes}{ 121 | \itemize{ 122 | \item Function \code{pcount} now works with specific NA values, i.e. NA_real_, NA_character_ etc... 123 | } 124 | } 125 | } 126 | 127 | \section{version 0.0.10 (2021-11-28)}{ 128 | \subsection{New Features}{ 129 | \itemize{ 130 | \item Function \code{psum}, \code{pmean}, \code{pprod}, \code{pany} and \code{pall} now support lists. Thanks to Sebastian Krantz for the request and code suggestion. 131 | } 132 | } 133 | \subsection{Bug Fixes}{ 134 | \itemize{ 135 | \item Function \code{topn} should now work for ALTREP object. Thanks to @ben-schwen for raising an issue. 136 | } 137 | } 138 | } 139 | 140 | \section{version 0.0.9 (2021-09-12)}{ 141 | \subsection{Notes}{ 142 | \itemize{ 143 | \item Re-organise header to prevent compilation errors with new version of Clang due to conflicts between R C headers and OpenMP. 144 | } 145 | } 146 | } 147 | 148 | \section{version 0.0.8 (2021-08-21)}{ 149 | \subsection{New Features}{ 150 | \itemize{ 151 | \item Function \code{funique} now preserves the attributes if the input is a 152 | \code{data.table}, \code{tibble} or similar objects. Thanks to Sebastian Krantz for the request. 153 | 154 | \item Function \code{topn} now defaults to base R \code{order} for large value of \code{n}. 155 | Please see updated documentation for more information \code{?kit::topn}. 156 | 157 | \item Function \code{charToFact} gains a new argument \code{addNA=TRUE} to be used 158 | to include (or not) \code{NA} in levels of the output. 159 | 160 | \item Function \code{shareData}, \code{getData} and \code{clearData} implemented 161 | to share data objects between \R sessions. These functions are experimental and might change in the future. 162 | Feedback is welcome. Please see \code{?kit::shareData} for more information. 163 | } 164 | } 165 | \subsection{Notes}{ 166 | \itemize{ 167 | \item Few \code{calloc} functions at C level have been replaced by R C API function 168 | \code{Calloc} to avoid valgrind errors/warnings in Travis CI. 169 | 170 | \item Errors reported by \code{rchk} on CRAN have been fixed. 171 | } 172 | } 173 | } 174 | 175 | \section{version 0.0.7 (2021-03-07)}{ 176 | \subsection{New Features}{ 177 | \itemize{ 178 | \item Function \code{charToFact} gains a new argument \code{decreasing=FALSE} to be used 179 | to order levels of the output in decreasing or increasing order. 180 | 181 | \item Function \code{topn} gains a new argument \code{index=TRUE} to be used return 182 | index (\code{TRUE}) or values (\code{FALSE}) of input vector. 183 | } 184 | } 185 | \subsection{Bug Fixes}{ 186 | \itemize{ 187 | \item Some tests of memory access errors using valgrind and AddressSanitizer were reported by CRAN. 188 | An attempt to fix these errors has been submitted as part of this package version. It also seems that 189 | these same errors were causing some tests to fail for \code{funique} and \code{psort} on some platforms. 190 | } 191 | } 192 | \subsection{Notes}{ 193 | \itemize{ 194 | \item Functions \code{pmean}, \code{pprod} and \code{psum} will result 195 | in error if used with factors. Documentation has been updated. 196 | } 197 | } 198 | } 199 | 200 | \section{version 0.0.6 (2021-02-21)}{ 201 | \subsection{New Features}{ 202 | \itemize{ 203 | \item Function \code{funique} and \code{fduplicated} gain an additional argument 204 | \code{fromLast=FALSE} to indicate whether the search should start from the end or beginning 205 | \href{https://github.com/2005m/kit/pull/11}{PR#11}. 206 | 207 | \item Functions \code{pall}, \code{pany}, \code{pmean}, 208 | \code{pprod} and \code{psum} accept \code{data.frame} as input 209 | \href{https://github.com/2005m/kit/pull/15}{PR#15}. Please see documentation for more 210 | information. 211 | 212 | \item Function \code{charToFact} is equivalent to to base R \code{as.factor} but is much 213 | quicker and only converts character vector to factor. Note that it is parallelised. For more 214 | details and benchmark please see \code{?kit::charToFact}. 215 | 216 | \item Function \code{psort} is experimental and equivalent to to base R \code{sort} 217 | but is only for character vector. It can sort by "C locale" or by "R session locale". 218 | For more details and benchmark please see \code{?kit::psort}. 219 | } 220 | } 221 | \subsection{Notes}{ 222 | \itemize{ 223 | \item A few OpenMP directives were missing for functions \code{vswitch} and 224 | \code{nswitch} for character vectors. These have been added in 225 | \href{https://github.com/2005m/kit/pull/12}{PR#12}. 226 | 227 | \item Function \code{funique} was not preserving attributes for character, logical and 228 | complex vectors/data.frames. Thanks to Sebastian Krantz (@SebKrantz) for bringing that to my 229 | attention. This has been fixed in \href{https://github.com/2005m/kit/pull/13}{PR#13}. 230 | 231 | \item Functions \code{funique} and \code{uniqLen} should now be faster for 232 | \code{factor} and \code{logical} vectors \href{https://github.com/2005m/kit/pull/14}{PR#14}. 233 | } 234 | } 235 | } 236 | 237 | \section{version 0.0.5 (2020-11-21)}{ 238 | \subsection{New Features}{ 239 | \itemize{ 240 | \item Function \code{uniqLen(x)} is equivalent to base R \code{length(unique(x))} and 241 | \code{uniqueN} in package \CRANpkg{data.table}. Function \code{uniqLen}, implemented in C, supports 242 | vectors, \code{data.frame} and \code{matrix}. It should be faster than these functions. For more 243 | details and benchmark please see \code{?kit::uniqLen}. 244 | 245 | \item Function \code{vswitch} now supports mixed encoding and gains an additional argument 246 | \code{checkEnc=TRUE}. Thanks to Xianying Tan (@shrektan) for the request and review 247 | \href{https://github.com/2005m/kit/pull/7}{PR#7}. 248 | 249 | \item Function \code{nswitch} is a nested version of function \code{vswitch} 250 | and also supports mixed encoding. Please see please see \code{?kit::nswitch} for further details. 251 | Thanks to Xianying Tan (@shrektan) for the request and review \href{https://github.com/2005m/kit/pull/10}{PR#10}. 252 | } 253 | } 254 | \subsection{Notes}{ 255 | \itemize{ 256 | \item Small algorithmic improvement for functions \code{fduplicated}, \code{funique} 257 | and \code{countOccur} for \code{vectors}, \code{data.frame} and \code{matrix}. 258 | 259 | \item A tests folder has been added to the source package to track coverage and bugs. 260 | } 261 | } 262 | \subsection{C-Level Facilities}{ 263 | \itemize{ 264 | \item Function \code{nif} has been split into two distinctive functions at C level, 265 | one has its arguments evaluated in a lazy way and is for R users and the other one (nifInternalR) 266 | is not lazy and is intended for usage at C level. 267 | } 268 | } 269 | } 270 | 271 | \section{version 0.0.4 (2020-07-21)}{ 272 | \subsection{New Features}{ 273 | \itemize{ 274 | \item Function \code{countOccur(x)}, implemented in C, is comparable to \code{base} 275 | \R function \code{table}. It returns a \code{data.frame} and is between 3 to 50 times faster. 276 | For more details, please see \code{?kit::countOccur}. 277 | 278 | \item Functions \code{funique} and \code{fduplicated} now support matrices. 279 | Additionally, these two functions should also have better performance compare to previous release. 280 | 281 | \item Functions \code{topn} has an additional argument \code{hasna=TRUE} to indicates whether 282 | data contains \code{NA} value or not. If the data does not contain \code{NA} values, the function 283 | should be faster. 284 | } 285 | } 286 | \subsection{C-Level Facilities}{ 287 | \itemize{ 288 | \item A few C functions have been added to subset \code{data.frame} and \code{matrix} as well as 289 | do other operations. These functions are not exported or visible to the user but might become 290 | available and callable at C level in the future. 291 | } 292 | } 293 | \subsection{Bug Fixes}{ 294 | \itemize{ 295 | \item Function \code{fpos} was not properly handling \code{NaN} and \code{NA} for complex 296 | and double. This should now be fixed. The function has also been changed in case the 'needle' and 297 | 'haysatck' are vectors so that a vector is returned. 298 | 299 | \item Functions \code{funique} and \code{fduplicated} were not properly handling 300 | data containing \code{POSIX} data. This has now been fixed. 301 | } 302 | } 303 | } 304 | 305 | \section{version 0.0.3 (2020-06-21)}{ 306 | \subsection{New Features}{ 307 | \itemize{ 308 | \item Functions \code{fduplicated(x)} and \code{funique(x)}, implemented in C, 309 | are comparable to \code{base} \R functions \code{duplicated} and \code{unique}. For more details, 310 | please see \code{?kit::funique}. 311 | 312 | \item Functions \code{psum} and \code{pprod} have now better performance for 313 | type double and complex. 314 | } 315 | } 316 | \subsection{Bug Fixes}{ 317 | \itemize{ 318 | \item Function \code{count(x, y)} now checks that \code{x} and \code{y} have the same class and 319 | levels. So does \code{pcount}. 320 | 321 | \item Function \code{pmean} was not callable at C level because of a typo. This is now fixed. 322 | } 323 | } 324 | } 325 | 326 | \section{version 0.0.2 (2020-05-22)}{ 327 | \subsection{New Features}{ 328 | \itemize{ 329 | \item Function \code{count(x, value)}, implemented in C, to simply count the number of times 330 | an element \code{value} occurs in a vector or in a list \code{x}. For more details, please see 331 | \code{?kit::count}. 332 | 333 | \item Function \code{pmean(..., na.rm=FALSE)}, \code{pall(..., na.rm=FALSE)}, 334 | \code{pany(..., na.rm=FALSE)} and \code{pcount(..., value)}, implemented in C, 335 | are similar to already available function \code{psum} and \code{pprod}. These 336 | functions respectively apply base \R functions \code{mean}, \code{all} and \code{any} element-wise. 337 | For more details, benchmarks and help, please see \code{?kit::pmean}. 338 | } 339 | } 340 | \subsection{Bug Fixes}{ 341 | \itemize{ 342 | \item Fix Solaris Unicode warnings for NEWS file. Benchmarks have been moved from the NEWS file to 343 | each function Rd file. 344 | 345 | \item Fix some \code{NA} edge cases for \code{pprod} and \code{psum} so these 346 | functions behave more like base \R function \code{prod} and \code{sum}. 347 | 348 | \item Fix installation errors for version of R (<3.5.0). 349 | } 350 | } 351 | } 352 | 353 | \section{version 0.0.1 (2020-05-03)}{ 354 | \subsection{Initial Release}{ 355 | \itemize{ 356 | \item Function \code{fpos(needle, haystack, all=TRUE, overlap=TRUE)}, implemented in C, is 357 | inspired by base function \code{which} when used in the following form 358 | \code{which(x == y, arr.ind =TRUE}). Function \code{fpos} returns the index(es) or position(s) 359 | of a matrix/vector within a larger matrix/vector. Please see \code{?kit::fpos} for more 360 | details. 361 | 362 | \item Function \code{iif(test, yes, no, na=NULL, tprom=FALSE, nThread=getOption("kit.nThread"))}, 363 | originally contributed as \code{fifelse} in package \CRANpkg{data.table}, was moved to package kit 364 | to be developed independently. Unlike the current version of \code{fifelse}, \code{iif} allows 365 | type promotion like base function \code{ifelse}. For further details about the differences 366 | with \code{fifelse}, as well as \code{hutils::if_else} and \code{dplyr::if_else}, please see 367 | \code{?kit::iif}. 368 | 369 | \item Function \code{nif(..., default=NULL)}, implemented in C, is inspired by 370 | \emph{SQL CASE WHEN}. It is comparable to \CRANpkg{dplyr} function \code{case_when} however it 371 | evaluates it arguments in a lazy way (i.e only when needed). Function \code{nif} was 372 | originally contributed as function \code{fcase} in the \CRANpkg{data.table} package but then moved 373 | to package kit so its development may resume independently. Please see \code{?kit::nif} for 374 | more details. 375 | 376 | \item Function \code{pprod(..., na.rm=FALSE)} and \code{psum(..., na.rm=FALSE)}, 377 | implemented in C, are inspired by base function \code{pmin} and \code{pmax}. These new 378 | functions work only for integer, double and complex types and do not recycle vectors. Please 379 | see \code{?kit::psum} for more details. 380 | 381 | \item Function \code{setlevels(x, old, new, skip_absent=FALSE)}, implemented in C, 382 | may be used to set levels of a factor object. Please see \code{?kit::setlevels} for more details. 383 | 384 | \item Function \code{topn(vec, n=6L, decreasing=TRUE)}, implemented in C, returns the top 385 | largest or smallest \code{n} values for a given numeric vector \code{vec}. It is inspired by 386 | \code{dplyr::top_n} and equivalent to base functions order and sort in specific cases as shown 387 | in the documentation. Please see \code{?kit::topn} for more details. 388 | 389 | \item Function \code{vswitch(x, values, outputs, default=NULL, nThread=getOption("kit.nThread"))} 390 | , implemented in C, is a vectorised version of \code{base} \R function \code{switch}. This 391 | function can also be seen as a particular case of function \code{nif}. Please see 392 | \code{?kit::switch} for more details. 393 | } 394 | } 395 | } 396 | -------------------------------------------------------------------------------- /man/charToFact.Rd: -------------------------------------------------------------------------------- 1 | \name{charToFact} 2 | \alias{charToFact} 3 | \title{Convert Character Vector to Factor} 4 | \description{ 5 | Similar to \code{base::as.factor} but much faster and only for converting character vector to factor. 6 | } 7 | \usage{ 8 | charToFact(x, decreasing=FALSE, addNA=TRUE, 9 | nThread=getOption("kit.nThread")) 10 | } 11 | \arguments{ 12 | \item{x}{ A vector of type character} 13 | \item{decreasing}{ A boolean. Whether to order levels in decreasing order or not. Default is \code{FALSE}.} 14 | \item{addNA}{ A boolean. Whether to include \code{NA} in levels of the output or not. Default is \code{TRUE}.} 15 | \item{nThread}{ Number of thread to use.} 16 | } 17 | \value{ 18 | The character vector input as a factor. Please note that, unlike \code{as.factor}, \code{NA} levels are preserved by default, however this can be changed by setting argument \code{addNA} to \code{FALSE}. 19 | } 20 | \examples{ 21 | x = c("b","A","B","a","\xe4","a") 22 | Encoding(x) = "latin1" 23 | identical(charToFact(x), as.factor(x)) 24 | identical(charToFact(c("a","b",NA,"a")), addNA(as.factor(c("a","b",NA,"a")))) 25 | identical(charToFact(c("a","b",NA,"a"), addNA=FALSE), as.factor(c("a","b",NA,"a"))) 26 | 27 | # Benchmarks 28 | # ---------- 29 | # x = sample(letters,3e7,TRUE) 30 | # microbenchmark::microbenchmark( 31 | # kit=kit::charToFact(x,nThread = 1L), 32 | # base=as.factor(x), 33 | # times = 5L 34 | # ) 35 | # Unit: milliseconds 36 | # expr min lq mean median uq max neval 37 | # kit 188 190 196 194 200 208 5 38 | # base 1402 1403 1455 1414 1420 1637 5 39 | } 40 | -------------------------------------------------------------------------------- /man/count.Rd: -------------------------------------------------------------------------------- 1 | \name{count} 2 | \alias{count} 3 | \alias{countNA} 4 | \alias{countOccur} 5 | \title{ count, countNA and countOccur} 6 | \description{ 7 | Simple functions to count the number of times an element occurs. 8 | } 9 | \usage{ 10 | count(x, value) 11 | countNA(x) 12 | countOccur(x) 13 | } 14 | \arguments{ 15 | \item{x}{ A vector or list for \code{countNA}. A vector for \code{count} and a vector or \code{data.frame} for \code{countOccur}.} 16 | \item{value}{ An element to look for. Must be non \code{NULL}, of length 1 and same type as \code{x}.} 17 | } 18 | \value{ 19 | For a vector \code{countNA} will return the total number of \code{NA} value. For a list, \code{countNA} will return a list with the number of \code{NA} in each item of the list. 20 | This is a major difference with \code{sum(is.na(x))} which will return the aggregated number of \code{NA}. 21 | Also, please note that every item of a list can be of different type and \code{countNA} will take them into account whether they are of type logical (\code{NA}), integer (\code{NA_integer_}), double (\code{NA_real_}), complex (\code{NA_complex_}) or character (\code{NA_character_}). 22 | As opposed to \code{countNA}, \code{count} does not support list type and requires \code{x} and \code{value} to be of the same type. 23 | Function \code{countOccur} takes vectors or data.frame as inputs and returns a \code{data.frame} with the number of times each value in the vector occurs or number of times each row in a \code{data.frame} occurs. 24 | } 25 | \seealso{ 26 | \code{\link{pcount}} 27 | } 28 | \author{Morgan Jacob} 29 | \examples{ 30 | x = c(1, 3, NA, 5) 31 | count(x, 3) 32 | 33 | countNA(x) 34 | countNA(as.list(x)) 35 | 36 | countOccur(x) 37 | 38 | # Benchmarks countNA 39 | # ------------------ 40 | # x = sample(c(TRUE,NA,FALSE),1e8,TRUE) # 382 Mb 41 | # microbenchmark::microbenchmark( 42 | # countNA(x), 43 | # sum(is.na(x)), 44 | # times=5L 45 | # ) 46 | # Unit: milliseconds 47 | # expr min lq mean median uq max neval 48 | # countNA(x) 98.7 99.2 101.2 100.1 101.4 106.4 5 49 | # sum(is.na(x)) 405.4 441.3 478.9 461.1 523.9 562.6 5 50 | # 51 | # Benchmarks countOccur 52 | # --------------------- 53 | # x = rnorm(1e6) 54 | # y = data.table::data.table(x) 55 | # microbenchmark::microbenchmark( 56 | # kit= countOccur(x), 57 | # data.table = y[, .N, keyby = x], 58 | # table(x), 59 | # times = 10L 60 | # ) 61 | # Unit: milliseconds 62 | # expr min lq mean median uq max neval 63 | # kit 62.26 63.88 89.29 75.49 95.17 162.40 10 64 | # data.table 189.17 194.08 235.30 227.43 263.74 337.74 10 # setDTthreads(1L) 65 | # data.table 140.15 143.91 190.04 182.85 234.48 261.43 10 # setDTthreads(2L) 66 | # table(x) 3560.77 3705.06 3843.47 3807.12 4048.40 4104.11 10 67 | } 68 | -------------------------------------------------------------------------------- /man/fpos.Rd: -------------------------------------------------------------------------------- 1 | \name{fpos} 2 | \alias{fpos} 3 | \title{ Find a matrix position inside a larger matrix } 4 | \description{ 5 | The function \code{fpos} returns the locations (row and column index) where a small matrix may be found in a larger matrix. The function also works with vectors. 6 | } 7 | \usage{ 8 | fpos(needle, haystack, all=TRUE, overlap=TRUE) 9 | } 10 | \arguments{ 11 | \item{needle}{ A matrix or vector to search for in the larger matrix or vector \code{haystack}. Note that the \code{needle} dimensions (row and column size) must be smaller than the \code{haystack} dimensions. } 12 | \item{haystack}{ A matrix or vector to look into.} 13 | \item{all}{ A logical value to indicate whether to return all occurrences (\code{TRUE}) or only the first one (\code{FALSE}). Default value is \code{TRUE}.} 14 | \item{overlap}{ A logical value to indicate whether to allow the small matrix occurrences to overlap or not. Default value is \code{TRUE}.} 15 | } 16 | \value{ 17 | A two columns matrix that contains the position or index where the small matrix (needle) can be found in the larger matrix. The first column refers to rows and the second to columns. In case both the needle and haystack are vectors, a vector is returned. 18 | } 19 | \author{Morgan Jacob} 20 | \examples{ 21 | # Example 1: find a matrix inside a larger one 22 | big_matrix = matrix(c(1:30), nrow = 10) 23 | small_matrix = matrix(c(14, 15, 24, 25), nrow = 2) 24 | 25 | fpos(small_matrix, big_matrix) 26 | 27 | # Example 2: find a vector inside a larger one 28 | fpos(14:15, 1:30) 29 | 30 | # Example 3: 31 | big_matrix = matrix(c(1:5), nrow = 10, ncol = 5) 32 | small_matrix = matrix(c(2:3), nrow = 2, ncol = 2) 33 | 34 | # return all occurences 35 | fpos(small_matrix, big_matrix) 36 | 37 | # return only the first 38 | fpos(small_matrix, big_matrix, all = FALSE) 39 | 40 | # return non overlapping occurences 41 | fpos(small_matrix, big_matrix, overlap = FALSE) 42 | 43 | # Benchmarks 44 | # ---------- 45 | # x = matrix(1:5, nrow=1e4, ncol=5e3) # 191Mb 46 | # microbenchmark::microbenchmark( 47 | # fpos=kit::fpos(1L, x), 48 | # which=which(x==1L, arr.ind=TRUE), 49 | # times=10L 50 | # ) 51 | # Unit: milliseconds 52 | # expr min lq mean median uq max neval 53 | # fpos 202 206 220 221 231 241 10 54 | # which 612 637 667 653 705 724 10 55 | } 56 | -------------------------------------------------------------------------------- /man/funique.Rd: -------------------------------------------------------------------------------- 1 | \name{fduplicated/funique} 2 | \alias{fduplicated} 3 | \alias{funique} 4 | \alias{uniqLen} 5 | \title{ Fast duplicated and unique} 6 | \description{ 7 | Similar to base R functions \code{duplicated} and \code{unique}, \code{fduplicated} and \code{funique} are slightly faster for vectors and much faster for \code{data.frame}. Function \code{uniqLen} is equivalent to base R \code{length(unique)} or \code{data.table::uniqueN}. 8 | } 9 | \usage{ 10 | fduplicated(x, fromLast = FALSE) 11 | funique(x, fromLast = FALSE) 12 | uniqLen(x) 13 | } 14 | \arguments{ 15 | \item{x}{ A vector, data.frame or matrix.} 16 | \item{fromLast}{ A logical value to indicate whether the search should start from the end or beginning. Default is \code{FALSE}.} 17 | } 18 | \value{ 19 | Function \code{fduplicated} returns a logical vector and \code{funique} returns a vector of the same type as \code{x} without the duplicated value. Function \code{uniqLen} returns an integer. 20 | } 21 | \author{Morgan Jacob} 22 | \examples{ 23 | # Example 1: fduplicated 24 | fduplicated(iris$Species) 25 | 26 | # Example 2: funique 27 | funique(iris$Species) 28 | 29 | # Example 3: uniqLen 30 | uniqLen(iris$Species) 31 | 32 | # Benchmarks 33 | # ---------- 34 | # x = sample(c(1:10,NA_integer_),1e8,TRUE) # 382 Mb 35 | # microbenchmark::microbenchmark( 36 | # duplicated(x), 37 | # fduplicated(x), 38 | # times = 5L 39 | # ) 40 | # Unit: seconds 41 | # expr min lq mean median uq max neval 42 | # duplicated(x) 2.21 2.21 2.48 2.21 2.22 3.55 5 43 | # fduplicated(x) 0.38 0.39 0.45 0.48 0.49 0.50 5 44 | # 45 | # vs data.table 46 | # ------------- 47 | # df = iris[,5:1] 48 | # for (i in 1:16) df = rbind(df, df) # 338 Mb 49 | # dt = data.table::as.data.table(df) 50 | # microbenchmark::microbenchmark( 51 | # kit = funique(df), 52 | # data.table = unique(dt), 53 | # times = 5L 54 | # ) 55 | # Unit: seconds 56 | # expr min lq mean median uq max neval 57 | # kit 1.22 1.27 1.33 1.27 1.36 1.55 5 58 | # data.table 6.20 6.24 6.43 6.33 6.46 6.93 5 # (setDTthreads(1L)) 59 | # data.table 4.20 4.25 4.47 4.26 4.32 5.33 5 # (setDTthreads(2L)) 60 | # 61 | # microbenchmark::microbenchmark( 62 | # kit=uniqLen(x), 63 | # data.table=uniqueN(x), 64 | # times = 5L, unit = "s" 65 | # ) 66 | # Unit: seconds 67 | # expr min lq mean median uq max neval 68 | # kit 0.17 0.17 0.17 0.17 0.17 0.17 5 69 | # data.table 1.66 1.68 1.70 1.71 1.71 1.72 5 # (setDTthreads(1L)) 70 | # data.table 1.13 1.15 1.16 1.16 1.18 1.18 5 # (setDTthreads(2L)) 71 | } 72 | -------------------------------------------------------------------------------- /man/iif.Rd: -------------------------------------------------------------------------------- 1 | \name{iif} 2 | \alias{iif} 3 | \title{ Fast if else } 4 | \description{ 5 | \code{iif} is a faster and more robust replacement of \code{\link[base]{ifelse}}. It is comparable to \code{dplyr::if_else}, \code{hutils::if_else} and \code{data.table::fifelse}. It returns a value with the same length as \code{test} filled with corresponding values from \code{yes}, \code{no} or eventually \code{na}, depending on \code{test}. It does not support S4 classes. 6 | } 7 | \usage{ 8 | iif(test, yes, no, na=NULL, tprom=FALSE, nThread=getOption("kit.nThread")) 9 | } 10 | \arguments{ 11 | \item{test}{ A logical vector. } 12 | \item{yes, no}{ Values to return depending on \code{TRUE}/\code{FALSE} element of \code{test}. They must be the same type and be either length \code{1} or the same length of \code{test}. } 13 | \item{na}{ Value to return if an element of \code{test} is missing. It must be the same type as \code{yes}/\code{no} and be either length \code{1} or the same length of \code{test}. Please note that \code{NA} is treated as logical value of length 1 as per the R documentation. \code{NA_integer_}, \code{NA_real_}, \code{NA_complex_} and \code{NA_character_} are equivalent to \code{NA} but for integer, double, complex and character. Default value for argument \code{na} is \code{NULL} and will automatically default to the equivalent NA type of argument \code{yes}.} 14 | \item{tprom}{ Argument to indicate whether type promotion of \code{yes} and \code{no} is allowed or not. Either \code{FALSE} or \code{TRUE}, default is \code{FALSE} to not allow type promotion. } 15 | \item{nThread}{ A integer for the number of threads to use with \emph{openmp}. Default value is \code{getOption("kit.nThread")}.} 16 | } 17 | \details{ 18 | In contrast to \code{\link[base]{ifelse}} attributes are copied from \code{yes} to the output. This is useful when returning \code{Date}, \code{factor} or other classes. 19 | Like \code{dplyr::if_else} and \code{hutils::if_else}, the \code{na} argument is by default set to \code{NULL}. This argument is set to \code{NA} in data.table::fifelse. 20 | Similarly to \code{dplyr::if_else} and when \code{tprom=FALSE}, \code{iif} requires same type for arguments \code{yes} and \code{no}. This is not strictly the case for \code{data.table::fifelse} which will coerce integer to double. 21 | When \code{tprom=TRUE}, \code{iif} behavior is similar to \code{base::ifelse} in the sense that it will promote or coerce \code{yes} and \code{no}to the "highest" used type. Note, however, that unlike \code{base::ifelse} attributes are still conserved. 22 | } 23 | \value{ 24 | A vector of the same length as \code{test} and attributes as \code{yes}. Data values are taken from the values of \code{yes} and \code{no}, eventually \code{na}. 25 | } 26 | \seealso{ 27 | \code{\link{nif}} 28 | \code{\link{vswitch}} 29 | } 30 | \author{Morgan Jacob} 31 | \examples{ 32 | x = c(1:4, 3:2, 1:4) 33 | iif(x > 2L, x, x - 1L) 34 | 35 | # unlike ifelse, iif preserves attributes, taken from the 'yes' argument 36 | dates = as.Date(c("2011-01-01","2011-01-02","2011-01-03","2011-01-04","2011-01-05")) 37 | ifelse(dates == "2011-01-01", dates - 1, dates) 38 | iif(dates == "2011-01-01", dates - 1, dates) 39 | yes = factor(c("a","b","c")) 40 | no = yes[1L] 41 | ifelse(c(TRUE,FALSE,TRUE), yes, no) 42 | iif(c(TRUE,FALSE,TRUE), yes, no) 43 | 44 | # Example of using the 'na' argument 45 | iif(test = c(-5L:5L < 0L, NA), yes = 1L, no = 0L, na = 2L) 46 | 47 | # Example of using the 'tprom' argument 48 | iif(test = c(-5L:5L < 0L, NA), yes = 1L, no = "0", na = 2L, tprom = TRUE) 49 | } 50 | -------------------------------------------------------------------------------- /man/nif.Rd: -------------------------------------------------------------------------------- 1 | \name{nif} 2 | \alias{nif} 3 | \title{Nested if else} 4 | \description{ 5 | \code{nif} is a fast implementation of SQL \code{CASE WHEN} statement for R. Conceptually, \code{nif} is a nested version of \code{\link{iif}} (with smarter implementation than manual nesting). It is not the same but it is comparable to \code{dplyr::case_when} and \code{data.table::fcase}. 6 | } 7 | \usage{ 8 | nif(..., default=NULL) 9 | } 10 | \arguments{ 11 | \item{...}{ A sequence consisting of logical condition (\code{when})-resulting value (\code{value}) \emph{pairs} in the following order \code{when1, value1, when2, value2, ..., whenN, valueN}. Logical conditions \code{when1, when2, ..., whenN} must all have the same length, type and attributes. Each \code{value} may either share length with \code{when} or be length 1. Please see Examples section for further details.} 12 | \item{default}{ Default return value, \code{NULL} by default, for when all of the logical conditions \code{when1, when2, ..., whenN} are \code{FALSE} or missing for some entries. Argument \code{default} can be a vector either of length 1 or length of logical conditions \code{when1, when2, ..., whenN}. Note that argument 'default' must be named explicitly.} 13 | } 14 | \value{ 15 | Vector with the same length as the logical conditions (\code{when}) in \code{...}, filled with the corresponding values (\code{value}) from \code{...}, or eventually \code{default}. Attributes of output values \code{value1, value2, ...valueN} in \code{...} are preserved. 16 | } 17 | \details{ 18 | Unlike \code{data.table::fcase}, the \code{default} argument is set to \code{NULL}. In addition, \code{nif} can be called by other packages at C level. Note that at C level, the function has an additional argument \code{SEXP md} which is either \code{TRUE} for lazy evaluation or \code{FALSE} for non lazy evaluation. This argument is not exposed to R users and is more for C users. 19 | } 20 | \seealso{ 21 | \code{\link{iif}} 22 | \code{\link{vswitch}} 23 | } 24 | \author{Morgan Jacob} 25 | \examples{ 26 | x = 1:10 27 | nif( 28 | x < 5L, 1L, 29 | x > 5L, 3L 30 | ) 31 | 32 | nif( 33 | x < 5L, 1L:10L, 34 | x > 5L, 3L:12L 35 | ) 36 | 37 | # Lazy evaluation example 38 | nif( 39 | x < 5L, 1L, 40 | x >= 5L, 3L, 41 | x == 5L, stop("provided value is an unexpected one!") 42 | ) 43 | 44 | # nif preserves attributes, example with dates 45 | nif( 46 | x < 5L, as.Date("2019-10-11"), 47 | x > 5L, as.Date("2019-10-14") 48 | ) 49 | 50 | # nif example with factor; note the matching levels 51 | nif( 52 | x < 5L, factor("a", levels=letters[1:3]), 53 | x > 5L, factor("b", levels=letters[1:3]) 54 | ) 55 | 56 | # Example of using the 'default' argument 57 | nif( 58 | x < 5L, 1L, 59 | x > 5L, 3L, 60 | default = 5L 61 | ) 62 | 63 | nif( 64 | x < 5L, 1L, 65 | x > 5L, 3L, 66 | default = rep(5L, 10L) 67 | ) 68 | } 69 | -------------------------------------------------------------------------------- /man/psort.Rd: -------------------------------------------------------------------------------- 1 | \name{psort} 2 | \alias{psort} 3 | \title{Parallel Sort} 4 | \description{ 5 | Similar to \code{base::sort} but just for character vector and partially using parallelism. 6 | It is currently experimental and might change in the future. Use with caution. 7 | } 8 | \usage{ 9 | psort(x, decreasing=FALSE, na.last=NA, 10 | nThread=getOption("kit.nThread"),c.locale=TRUE) 11 | } 12 | \arguments{ 13 | \item{x}{ A vector of type character. If other, it will default to \code{base::sort}} 14 | \item{na.last}{ For controlling the treatment of \code{NA}s. If \code{TRUE}, missing values in the data are put last; if \code{FALSE}, they are put first; if \code{NA}, they are removed. } 15 | \item{decreasing}{ A boolean indicating where to sort the data in decreasing way. Default is \code{FALSE}. } 16 | \item{nThread}{ Number of thread to use. Default value is \code{1L}.} 17 | \item{c.locale}{ A boolean, whether to use C Locale or R session locale. Default TRUE.} 18 | } 19 | \value{ 20 | Returns the input \code{x} in sorted order similar to \code{base::sort} but usually faster. If \code{c.locale=FALSE}, \code{psort} will return the same output as \code{base::sort} with \code{method="quick"}, i.e. using R session locale. If \code{c.locale=TRUE}, \code{psort} will return the same output as \code{base::sort} with \code{method="radix"}, i.e. using C locale. See example below. 21 | } 22 | \author{Morgan Jacob} 23 | \examples{ 24 | x = c("b","A","B","a","\xe4") 25 | Encoding(x) = "latin1" 26 | identical(psort(x, c.locale=FALSE), sort(x)) 27 | identical(psort(x, c.locale=TRUE), sort(x, method="radix")) 28 | 29 | # Benchmarks 30 | # ---------- 31 | # strings = as.character(as.hexmode(1:1000)) 32 | # x = sample(strings, 1e8, replace=TRUE) 33 | # system.time({kit::psort(x, na.last = TRUE, nThread = 1L)}) 34 | # user system elapsed 35 | # 2.833 0.434 3.277 36 | # system.time({sort(x,method="radix",na.last = TRUE)}) 37 | # user system elapsed 38 | # 5.597 0.559 6.176 39 | # system.time({x[order(x,method="radix",na.last = TRUE)]}) 40 | # user system elapsed 41 | # 5.561 0.563 6.143 42 | } 43 | -------------------------------------------------------------------------------- /man/psum.Rd: -------------------------------------------------------------------------------- 1 | \name{parallel-funs} 2 | \alias{psum} 3 | \alias{pprod} 4 | \alias{pall} 5 | \alias{pallNA} 6 | \alias{pallv} 7 | \alias{pany} 8 | \alias{panyNA} 9 | \alias{panyv} 10 | \alias{pmean} 11 | \alias{pcount} 12 | \alias{pcountNA} 13 | \alias{pfirst} 14 | \alias{plast} 15 | \title{Parallel (Statistical) Functions} 16 | \description{ 17 | Vector-valued (statistical) functions operating in parallel over vectors passed as arguments, or a single list of vectors (such as a data frame). Similar to \code{\link{pmin}} and \code{\link{pmax}}, except that these functions do not recycle vectors. 18 | } 19 | \usage{ 20 | psum(..., na.rm = FALSE) 21 | pprod(..., na.rm = FALSE) 22 | pmean(..., na.rm = FALSE) 23 | pfirst(...) # (na.rm = TRUE) 24 | plast(...) # (na.rm = TRUE) 25 | pall(..., na.rm = FALSE) 26 | pallNA(...) 27 | pallv(..., value) 28 | pany(..., na.rm = FALSE) 29 | panyNA(...) 30 | panyv(..., value) 31 | pcount(..., value) 32 | pcountNA(...) 33 | } 34 | \arguments{ 35 | \item{...}{ suitable (atomic) vectors of the same length, or a single list of vectors (such as a \code{data.frame}). See Details on the allowed data types for each function, and Examples.} 36 | \item{na.rm}{ A logical indicating whether missing values should be removed. Default value is \code{FALSE}, except for \code{pfirst} and \code{plast}.} 37 | \item{value}{ A non \code{NULL} value of length 1. } % \code{pcount} will count how many times it occurs. 38 | } 39 | \details{ 40 | Functions \code{psum}, \code{pprod} work for integer, logical, double and complex types. \code{pmean} only supports integer, logical and double types. All 3 functions will error if used with factors. 41 | 42 | \code{pfirst}/\code{plast} select the first/last non-missing value (or non-empty or \code{NULL} value for list-vectors). They accept all vector types with defined missing values + lists, but can only jointly handle integer and double types (not numeric and complex or character and factor). If factors are passed, they all need to have identical levels. 43 | 44 | \code{pany} and \code{pall} are derived from base functions \code{all} and \code{any} and only allow logical inputs. 45 | 46 | \code{pcount} counts the occurrence of \code{value}, and expects arguments of the same data type (except for \code{value = NA}). \code{pcountNA} is equivalent to \code{pcount} with \code{value = NA}, and they both allow \code{NA} counting in mixed-type data. \code{pcountNA} additionally supports list vectors and counts empty or \code{NULL} elements as \code{NA}. 47 | 48 | Functions \code{panyv/pallv} are wrappers around \code{pcount}, and \code{panyNA/pallNA} are wrappers around \code{pcountNA}. They return a logical vector instead of the integer count. 49 | 50 | None of these functions recycle vectors i.e. all input vectors need to have the same length. All functions support long vectors with up to \code{2^64-1} elements. 51 | } 52 | \value{ 53 | \code{psum/pprod/pmean} return the sum, product or mean of all arguments. The value returned will be of the highest argument type (integer < double < complex). \code{pprod} only returns double or complex. \code{pall[v/NA]} and \code{pany[v/NA]} return a logical vector. \code{pcount[NA]} returns an integer vector. \code{pfirst/plast} return a vector of the same type as the inputs. 54 | } 55 | \seealso{ 56 | Package 'collapse' provides column-wise and scalar-valued analogues to many of these functions. 57 | } 58 | \author{Morgan Jacob and Sebastian Krantz} 59 | \examples{ 60 | x = c(1, 3, NA, 5) 61 | y = c(2, NA, 4, 1) 62 | z = c(3, 4, 4, 1) 63 | 64 | # Example 1: psum 65 | psum(x, y, z, na.rm = FALSE) 66 | psum(x, y, z, na.rm = TRUE) 67 | 68 | # Example 2: pprod 69 | pprod(x, y, z, na.rm = FALSE) 70 | pprod(x, y, z, na.rm = TRUE) 71 | 72 | # Example 3: pmean 73 | pmean(x, y, z, na.rm = FALSE) 74 | pmean(x, y, z, na.rm = TRUE) 75 | 76 | # Example 4: pfirst and plast 77 | pfirst(x, y, z) 78 | plast(x, y, z) 79 | 80 | # Adjust x, y, and z to use in pall and pany 81 | x = c(TRUE, FALSE, NA, FALSE) 82 | y = c(TRUE, NA, TRUE, TRUE) 83 | z = c(TRUE, TRUE, FALSE, NA) 84 | 85 | # Example 5: pall 86 | pall(x, y, z, na.rm = FALSE) 87 | pall(x, y, z, na.rm = TRUE) 88 | 89 | # Example 6: pany 90 | pany(x, y, z, na.rm = FALSE) 91 | pany(x, y, z, na.rm = TRUE) 92 | 93 | # Example 7: pcount 94 | pcount(x, y, z, value = TRUE) 95 | pcountNA(x, y, z) 96 | 97 | # Example 8: list/data.frame as an input 98 | pprod(iris[,1:2]) 99 | psum(iris[,1:2]) 100 | pmean(iris[,1:2]) 101 | 102 | # Benchmarks 103 | # ---------- 104 | # n = 1e8L 105 | # x = rnorm(n) # 763 Mb 106 | # y = rnorm(n) 107 | # z = rnorm(n) 108 | # 109 | # microbenchmark::microbenchmark( 110 | # kit=psum(x, y, z, na.rm = TRUE), 111 | # base=rowSums(do.call(cbind,list(x, y, z)), na.rm=TRUE), 112 | # times = 5L, unit = "s" 113 | # ) 114 | # Unit: Second 115 | # expr min lq mean median uq max neval 116 | # kit 0.52 0.52 0.65 0.55 0.83 0.84 5 117 | # base 2.16 2.27 2.34 2.35 2.43 2.49 5 118 | # 119 | # x = sample(c(TRUE, FALSE, NA), n, TRUE) # 382 Mb 120 | # y = sample(c(TRUE, FALSE, NA), n, TRUE) 121 | # z = sample(c(TRUE, FALSE, NA), n, TRUE) 122 | # 123 | # microbenchmark::microbenchmark( 124 | # kit=pany(x, y, z, na.rm = TRUE), 125 | # base=sapply(1:n, function(i) any(x[i],y[i],z[i],na.rm=TRUE)), 126 | # times = 5L 127 | # ) 128 | # Unit: Second 129 | # expr min lq mean median uq max neval 130 | # kit 1.07 1.09 1.15 1.10 1.23 1.23 5 131 | # base 111.31 112.02 112.78 112.97 113.55 114.03 5 132 | } 133 | -------------------------------------------------------------------------------- /man/setlevels.Rd: -------------------------------------------------------------------------------- 1 | \name{setlevels} 2 | \alias{setlevels} 3 | \title{ Set levels of a factor object } 4 | \description{ 5 | A function to set levels of a factor object. 6 | } 7 | \usage{ 8 | setlevels(x, old=levels(x), new, skip_absent=FALSE) 9 | } 10 | \arguments{ 11 | \item{x}{ A factor object. } 12 | \item{old}{ A character vector containing the factor levels to be changed. Default is levels of \code{x}. } 13 | \item{new}{ The new character vector containing the factor levels to be added. } 14 | \item{skip_absent}{ Skip items in \code{old} that are missing (i.e. absent) in `names(x)`. Default \code{FALSE} halts with error if any are missing. } 15 | } 16 | \value{ 17 | Returns an invisible and modified factor object. 18 | } 19 | \author{Morgan Jacob} 20 | \examples{ 21 | x = factor(c("A", "A", "B", "B", "B", "C")) # factor vector with levels A B C 22 | setlevels(x, new = c("X", "Y", "Z")) # set factor levels to: X Y Z 23 | setlevels(x, old = "X", new = "A") # set factor levels X to A 24 | } 25 | -------------------------------------------------------------------------------- /man/shareData.Rd: -------------------------------------------------------------------------------- 1 | \name{shareData/getData/clearData} 2 | \alias{shareData} 3 | \alias{getData} 4 | \alias{clearData} 5 | \title{ Share Data between R Sessions} 6 | \description{ 7 | Experimental functions that enable the user to share a R object between 2 \R sessions. 8 | } 9 | \usage{ 10 | shareData(data, map_name, verbose=FALSE) 11 | getData(map_name, verbose=FALSE) 12 | clearData(x, verbose=FALSE) 13 | } 14 | \arguments{ 15 | \item{data}{ A \R object like a vector or a \code{data.frame}.} 16 | \item{map_name}{ A character. A name for the memory map location where to store the data.} 17 | \item{x}{ An external pointer like the one returned by function \code{shareData}.} 18 | \item{verbose}{ A logical value \code{TRUE} or \code{FALSE} to provide or not information to the user.} 19 | } 20 | \value{ 21 | \code{shareData} returns a external pointer. 22 | \code{getData} returns an \R object stored in the memory location \code{map_name}. 23 | \code{clearData} returns \code{TRUE} or \code{FALSE} depending on whether the data have been cleared in memory. 24 | } 25 | \author{Morgan Jacob} 26 | \examples{ 27 | # In R session 1: share data in memory 28 | # > x = shareData(mtcars,"share1") 29 | # 30 | # In R session 2: get data from session 1 31 | # > getData("share1") 32 | # 33 | # In R session 1: clear data in memory 34 | # > clearData(x) 35 | } 36 | -------------------------------------------------------------------------------- /man/topn.Rd: -------------------------------------------------------------------------------- 1 | \name{topn} 2 | \alias{topn} 3 | \title{ Top N values index} 4 | \description{ 5 | \code{topn} is used to get the indices of the few values of an input. This is an extension of \code{\link{which.max}}/\code{\link{which.min}} which provide \emph{only} the first such index. 6 | 7 | The output is the same as \code{order(vec)[1:n]}, but internally optimized not to sort the irrelevant elements of the input (and therefore much faster, for small \code{n} relative to input size). 8 | } 9 | \usage{ 10 | topn(vec, n=6L, decreasing=TRUE, hasna=TRUE, index=TRUE) 11 | } 12 | \arguments{ 13 | \item{vec}{ A numeric vector of type numeric or integer. Other types are not supported yet. } 14 | \item{n}{ A positive integer value greater or equal to 1. } 15 | \item{decreasing}{ A logical value (default \code{TRUE}) to indicate whether to sort \code{vec} in decreasing or increasing order. Equivalent to argument \code{decreasing} in function \code{base::order}. Please note that unlike \code{topn} default value in \code{base::order} is \code{FALSE}.} 16 | \item{hasna}{ A logical value (default \code{TRUE}) to indicate whether \code{vec} contains \code{NA} values. } 17 | \item{index}{ A logical value (default \code{TRUE}) to indicate whether indexes or values of \code{vec}. } 18 | } 19 | \value{ 20 | \code{integer} vector of indices of the most extreme (according to \code{decreasing}) \code{n} values in vector \code{vec}. Please note that for large value of \code{n}, i.e. 1500 or 2000 (depending on the value of \code{hasna}), \code{topn} will default to base R function \code{order}. 21 | } 22 | \author{Morgan Jacob} 23 | \examples{ 24 | x = rnorm(1e4) 25 | 26 | # Example 1: index of top 6 negative values 27 | topn(x, 6L, decreasing=FALSE) 28 | order(x)[1:6] 29 | 30 | # Example 2: index of top 6 positive values 31 | topn(x, 6L, decreasing = TRUE) 32 | order(x, decreasing=TRUE)[1:6] 33 | 34 | # Example 3: top 6 negative values 35 | topn(x, 6L, decreasing=FALSE, index=FALSE) 36 | sort(x)[1:6] 37 | 38 | # Benchmarks 39 | # ---------- 40 | # x = rnorm(1e7) # 76Mb 41 | # microbenchmark::microbenchmark( 42 | # topn=kit::topn(x, 6L), 43 | # order=order(x, decreasing=TRUE)[1:6], 44 | # times=10L 45 | # ) 46 | # Unit: milliseconds 47 | # expr min lq mean median uq max neval 48 | # topn 11 11 13 11 12 18 10 49 | # order 563 565 587 566 602 661 10 50 | # 51 | # microbenchmark::microbenchmark( 52 | # topn=kit::topn(x, 6L, decreasing=FALSE, index=FALSE), 53 | # sort=sort(x, partial=1:6)[1:6], 54 | # times=10L 55 | # ) 56 | # Unit: milliseconds 57 | # expr min lq mean median uq max neval 58 | # topn 11 11 11 11 12 12 10 59 | # sort 167 175 197 178 205 303 10 60 | } 61 | -------------------------------------------------------------------------------- /man/vswitch.Rd: -------------------------------------------------------------------------------- 1 | \name{vswitch/nswitch} 2 | \alias{vswitch} 3 | \alias{nswitch} 4 | \title{ Vectorised switch } 5 | \description{ 6 | \code{vswitch}/ \code{nswitch} is a vectorised version of \code{base} function \code{switch}. This function can also be seen as a particular case of function \code{nif}, as shown in examples below, and should also be faster. 7 | } 8 | \usage{ 9 | vswitch(x, values, outputs, default=NULL, 10 | nThread=getOption("kit.nThread"), 11 | checkEnc=TRUE) 12 | nswitch(x, ..., default=NULL, 13 | nThread=getOption("kit.nThread"), 14 | checkEnc=TRUE) 15 | } 16 | \arguments{ 17 | \item{x}{A vector or list.} 18 | \item{values}{A vector or list with values from \code{x} to match. Note that \code{x} and \code{values} must have the same class and attributes.} 19 | \item{outputs}{A list or vector with the outputs to return for every matching values. Each item of the list must be of length 1 or length of \code{x}. Note that if all list items are of length 1 then it might be simpler to use a vector.} 20 | \item{...}{A sequence of values and outputs in the following order \code{value1, output1, value2, output2, ..., valueN, outputN}. Values \code{value1, value2, ..., valueN} must all have length1, same type and attributes. Each \code{output} may either share length with \code{x} or be length 1. Please see Examples section for further details.} 21 | \item{default}{Values to return is no match. Must be a vector or list of length 1 or same length as \code{x}. Also, \code{default} must have the same type, class and attributes as items from \code{outputs}.} 22 | \item{nThread}{ A integer for the number of threads to use with \emph{openmp}. Default value is \code{getOption("kit.nThread")}.} 23 | \item{checkEnc}{ A logical value whether or not to check if \code{x} and \code{values} have comparable and consistent encoding. Default is \code{TRUE}.} 24 | } 25 | \value{ 26 | A vector or list of the same length as \code{x} with values from \code{outputs} items and from \code{default} if missing. 27 | } 28 | \seealso{ 29 | \code{\link{iif}} 30 | \code{\link{nif}} 31 | } 32 | \author{Morgan Jacob} 33 | \examples{ 34 | x = sample(c(10L, 20L, 30L, 40L, 50L, 60L), 3e2, replace=TRUE) 35 | 36 | # The below example of 'vswitch' is 37 | a1 = vswitch( 38 | x = x, 39 | values = c(10L,20L,30L,40L,50L), 40 | outputs = c(11L,21L,31L,41L,51L), 41 | default = NA_integer_ 42 | ) 43 | 44 | # equivalent to the following 'nif' example. 45 | # However for large vectors 'vswitch' should be faster. 46 | b1 = nif( 47 | x==10L, 11L, 48 | x==20L, 21L, 49 | x==30L, 31L, 50 | x==40L, 41L, 51 | x==50L, 51L, 52 | default = NA_integer_ 53 | ) 54 | identical(a1, b1) 55 | 56 | # nswitch can also be used as follows: 57 | c1 = nswitch(x, 58 | 10L, 11L, 59 | 20L, 21L, 60 | 30L, 31L, 61 | 40L, 41L, 62 | 50L, 51L, 63 | default = NA_integer_ 64 | ) 65 | identical(a1, c1) 66 | 67 | # Example with list in 'outputs' argument 68 | y = c(1, 0, NA_real_) 69 | a2 = vswitch( 70 | x = y, 71 | values = c(1, 0), 72 | outputs = list(c(2, 3, 4), c(5, 6, 7)), 73 | default = 8 74 | ) 75 | 76 | b2 = nif( 77 | y==1, c(2, 3, 4), 78 | y==0, c(5, 6, 7), 79 | default = 8 80 | ) 81 | 82 | identical(a2, b2) 83 | 84 | c2 = nswitch(y, 85 | 1, c(2, 3, 4), 86 | 0, c(5, 6, 7), 87 | default = 8 88 | ) 89 | 90 | identical(a2, c2) 91 | 92 | # Benchmarks 93 | # ---------- 94 | # x = sample(1:100, 3e8, TRUE) # 1.1Gb 95 | # microbenchmark::microbenchmark( 96 | # nif=kit::nif( 97 | # x==10L, 0L, 98 | # x==20L, 10L, 99 | # x==30L, 20L, 100 | # default= 30L 101 | # ), 102 | # vswitch=kit::vswitch( 103 | # x, c( 10L, 20L, 30L), list(0L, 10L, 20L), 30L 104 | # ), 105 | # times=10L 106 | # ) 107 | # Unit: seconds 108 | # expr min lq mean median uq max neval 109 | # nif 4.27 4.37 4.43 4.42 4.52 4.53 10 110 | # vswitch 1.08 1.09 1.20 1.10 1.43 1.44 10 # 1 thread 111 | # vswitch 0.46 0.57 0.57 0.58 0.58 0.60 10 # 2 threads 112 | } 113 | -------------------------------------------------------------------------------- /src/Makevars.in: -------------------------------------------------------------------------------- 1 | PKG_CFLAGS = @openmp_cflags@ 2 | PKG_LIBS = @openmp_libslags@ @realtime_libslags@ 3 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS) 2 | PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) 3 | -------------------------------------------------------------------------------- /src/dupLen.c: -------------------------------------------------------------------------------- 1 | /* 2 | * kit : Useful R Functions Implemented in C 3 | * Copyright (C) 2020-2025 Morgan Jacob 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | #include "kit.h" 20 | 21 | /* 22 | * Main Function 23 | */ 24 | 25 | SEXP dupLenR(SEXP x) { 26 | if (isDataFrame(x)) { 27 | SEXP ans = PROTECT(dupLenDataFrameR(x)); 28 | UNPROTECT(1); 29 | return ans; 30 | } 31 | if (isMatrix(x)) { 32 | SEXP ans = PROTECT(dupLenMatrixR(x)); 33 | UNPROTECT(1); 34 | return ans; 35 | } 36 | if (isArray(x)) { 37 | error("Arrays are not yet supported. (please raise a feature request if needed)"); 38 | } 39 | SEXP ans = PROTECT(dupLenVecR(x)); 40 | UNPROTECT(1); 41 | return ans; 42 | } 43 | 44 | /* 45 | * Data.Frame 46 | */ 47 | 48 | SEXP dupLenDataFrameR(SEXP x) { 49 | const SEXP *restrict px = SEXPPTR_RO(x); 50 | const R_xlen_t len_x = xlength(x); 51 | bool allT = true; 52 | const SEXPTYPE t0 = UTYPEOF(px[0]); 53 | for (int i = 1; i < len_x; ++i) { 54 | if (UTYPEOF(px[i]) != t0) { 55 | allT = false; 56 | break; 57 | } 58 | } 59 | if (allT) { 60 | SEXP output = PROTECT(dupLenMatrixR(PROTECT(dfToMatrix(x)))); 61 | UNPROTECT(2); 62 | return output; 63 | } 64 | const R_xlen_t len_i = xlength(px[0]); 65 | SEXP mlv = PROTECT(allocMatrix(INTSXP, (int)len_i, (int)len_x)); 66 | for (R_xlen_t i = 0; i < len_x; ++i) { 67 | memcpy(INTEGER(mlv)+i*len_i, INTEGER(PROTECT(dupVecIndexOnlyR(px[i]))), (unsigned)len_i*sizeof(int)); 68 | } 69 | UNPROTECT((int)len_x); 70 | const size_t n2 = 2U * (size_t) len_i; 71 | size_t M = 256; 72 | int K = 8; 73 | while (M < n2) { 74 | M *= 2; 75 | K++; 76 | } 77 | R_xlen_t count = 0; 78 | int *restrict h = (int*) R_Calloc(M, int); 79 | const int *restrict v = INTEGER(mlv); 80 | size_t id = 0; 81 | for (R_xlen_t i = 0; i < len_i; ++i) { 82 | R_xlen_t key = 0; 83 | for (R_xlen_t j = 0; j < len_x; ++j) { 84 | key ^= HASH(v[i+j*len_i],K)*97*(j+1); 85 | } 86 | id = HASH(key, K); 87 | while (h[id]) { 88 | for (R_xlen_t j = 0; j < len_x; ++j) { 89 | if (v[h[id]-1+j*len_i] != v[i+j*len_i]) { 90 | goto label1; 91 | } 92 | } 93 | goto label2; 94 | label1:; 95 | id++; id %= M; 96 | } 97 | h[id] = (int) i + 1; 98 | count++; 99 | label2:; 100 | } 101 | R_Free(h); 102 | UNPROTECT(1); 103 | return ScalarInteger(count); 104 | } 105 | 106 | /* 107 | * Matrix 108 | */ 109 | 110 | SEXP dupLenMatrixR(SEXP x) { 111 | const R_xlen_t len_x = ncols(x); 112 | const R_xlen_t len_i = nrows(x); 113 | const size_t n2 = 2U * (size_t) len_i; 114 | size_t M = 256; 115 | int K = 8; 116 | while (M < n2) { 117 | M *= 2; 118 | K++; 119 | } 120 | R_xlen_t count = 0; 121 | int *restrict h = (int*) R_Calloc(M, int); 122 | size_t id = 0; 123 | switch(UTYPEOF(x)) { 124 | case LGLSXP : { 125 | const int *restrict px = LOGICAL(x); 126 | for (R_xlen_t i = 0; i < len_i; ++i) { 127 | id = 0; 128 | for (R_xlen_t j = 0; j < len_x; ++j) { 129 | id ^= ((unsigned)(j+1) * ((px[i+j*len_i] == NA_LOGICAL) ? 2U : (size_t) px[i+j*len_i]))*97*(j+1); 130 | } 131 | id = HASH(id, K); 132 | while (h[id]) { 133 | for (R_xlen_t j = 0; j < len_x; ++j) { 134 | if (px[h[id]-1+j*len_i] != px[i+j*len_i]) { 135 | goto labelml1; // # nocov 136 | } 137 | } 138 | goto labelml2; 139 | labelml1:;// # nocov 140 | id++; id %= M; // # nocov 141 | } 142 | h[id] = (int) i + 1; 143 | count++; 144 | labelml2:; 145 | } 146 | } break; 147 | case INTSXP : { 148 | const int *restrict px = INTEGER(x); 149 | for (R_xlen_t i = 0; i < len_i; ++i) { 150 | R_xlen_t key = 0; 151 | for (R_xlen_t j = 0; j < len_x; ++j) { 152 | key ^= HASH(((px[i+j*len_i] == NA_INTEGER) ? 0 : px[i+j*len_i]),K)*97*(j+1); 153 | } 154 | id = HASH(key, K); 155 | while (h[id]) { 156 | for (R_xlen_t j = 0; j < len_x; ++j) { 157 | if (px[h[id]-1+j*len_i] != px[i+j*len_i]) { 158 | goto labelmi1; // # nocov 159 | } 160 | } 161 | goto labelmi2; 162 | labelmi1:; 163 | id++; id %= M; // # nocov 164 | } 165 | h[id] = (int) i + 1; 166 | count++; 167 | labelmi2:; 168 | } 169 | } break; 170 | case REALSXP : { 171 | const double *restrict px = REAL(x); 172 | union uno tpv; 173 | for (R_xlen_t i = 0; i < len_i; ++i) { 174 | R_xlen_t key = 0; 175 | for (R_xlen_t j = 0; j < len_x; ++j) { 176 | tpv.d = px[i+j*len_i]; 177 | key ^= HASH(tpv.u[0] + tpv.u[1],K)*97*(j+1); 178 | } 179 | tpv.d = key; 180 | id = HASH(tpv.u[0] + tpv.u[1], K); 181 | while (h[id]) { 182 | for (R_xlen_t j = 0; j < len_x; ++j) { 183 | if (!REQUAL(px[h[id]-1+j*len_i], px[i+j*len_i])) { 184 | goto labelmr1; 185 | } 186 | } 187 | goto labelmr2; 188 | labelmr1:; 189 | id++; id %= M; 190 | } 191 | h[id] = (int) i + 1; 192 | count++; 193 | labelmr2:; 194 | } 195 | } break; 196 | case CPLXSXP : { 197 | const Rcomplex *restrict px = COMPLEX(x); 198 | unsigned int u; 199 | union uno tpv; 200 | Rcomplex tmp; 201 | for (R_xlen_t i = 0; i < len_i; ++i) { 202 | R_xlen_t key = 0; 203 | for (R_xlen_t j = 0; j < len_x; ++j) { 204 | tmp.r = (px[i+j*len_i].r == 0.0) ? 0.0 : px[i+j*len_i].r; 205 | tmp.i = (px[i+j*len_i].i == 0.0) ? 0.0 : px[i+j*len_i].i; 206 | if (C_IsNA(tmp)) { 207 | tmp.r = tmp.i = NA_REAL; 208 | } else if (C_IsNaN(tmp)) { 209 | tmp.r = tmp.i = R_NaN; 210 | } 211 | tpv.d = tmp.r; 212 | u = tpv.u[0] ^ tpv.u[1]; 213 | tpv.d = tmp.i; 214 | u ^= tpv.u[0] ^ tpv.u[1]; 215 | key ^= HASH(u, K)*97*(j+1); 216 | } 217 | id = HASH(key, K); 218 | while (h[id]) { 219 | for (R_xlen_t j = 0; j < len_x; ++j) { 220 | if (!CEQUAL(px[h[id]-1+j*len_i], px[i+j*len_i])) { 221 | goto labelmc1; 222 | } 223 | } 224 | goto labelmc2; 225 | labelmc1:; 226 | id++; id %= M; 227 | } 228 | h[id] = (int) i + 1; 229 | count++; 230 | labelmc2:; 231 | } 232 | } break; 233 | case STRSXP : { 234 | const SEXP *restrict px = STRING_PTR_RO(x); 235 | for (R_xlen_t i = 0; i < len_i; ++i) { 236 | R_xlen_t key = 0; 237 | for (R_xlen_t j = 0; j < len_x; ++j) { 238 | key ^= HASH(((intptr_t) px[i+j*len_i] & 0xffffffff),K)*97*(j+1); 239 | } 240 | id = HASH(key, K); 241 | while (h[id]) { 242 | for (R_xlen_t j = 0; j < len_x; ++j) { 243 | if (px[h[id]-1+j*len_i] != px[i+j*len_i]) { 244 | goto labelms1; // # nocov 245 | } 246 | } 247 | goto labelms2; 248 | labelms1:; // # nocov 249 | id++; id %= M; // # nocov 250 | } 251 | h[id] = (int) i + 1; 252 | count++; 253 | labelms2:; 254 | } 255 | } break; 256 | default: { 257 | R_Free(h); 258 | error("Matrix of type %s are not supported.", type2char(UTYPEOF(x))); 259 | } 260 | } 261 | R_Free(h); 262 | return ScalarInteger(count); 263 | } 264 | 265 | /* 266 | * Vector 267 | */ 268 | 269 | SEXP dupLenVecR(SEXP x) { 270 | if (isFactor(x)) { 271 | const int len = LENGTH(PROTECT(getAttrib(x, R_LevelsSymbol))); 272 | UNPROTECT(1); 273 | bool *restrict count = (bool*)R_Calloc(len+1,bool); 274 | const int *restrict px = INTEGER(x); 275 | const int xlen = LENGTH(x); 276 | int j = 0; 277 | for (int i = 0; i < xlen; ++i) { 278 | if (!count[px[i]]) { 279 | j++; 280 | if (j == len) 281 | break; 282 | count[px[i]] = true; 283 | } 284 | } 285 | R_Free(count); 286 | return ScalarInteger(j); 287 | } 288 | if (isLogical(x)) { 289 | bool *restrict count = (bool*)R_Calloc(3,bool); 290 | const int *restrict px = LOGICAL(x); 291 | const int xlen = LENGTH(x); 292 | int j = 0; 293 | for (int i = 0; i < xlen; ++i) { 294 | const int cs = px[i] == NA_LOGICAL ? 2 : px[i]; 295 | if (!count[cs]) { 296 | j++; 297 | if (j == 3) 298 | break; 299 | count[cs] = true; 300 | } 301 | } 302 | R_Free(count); 303 | return ScalarInteger(j); 304 | } 305 | const R_xlen_t n = xlength(x); 306 | const SEXPTYPE tx = UTYPEOF(x); 307 | int K; 308 | size_t M; 309 | if (tx == INTSXP || tx == STRSXP || tx == REALSXP || tx == CPLXSXP ) { 310 | if(n >= 1073741824) { 311 | error("Length of 'x' is too large. (Long vector not supported yet)"); // # nocov 312 | } 313 | const size_t n2 = 2U * (size_t) n; 314 | M = 256; 315 | K = 8; 316 | while (M < n2) { 317 | M *= 2; 318 | K++; 319 | } 320 | } else { 321 | error("Type %s is not supported.", type2char(tx)); // # nocov 322 | } 323 | R_xlen_t count = 0; 324 | int *restrict h = (int*)R_Calloc(M, int); 325 | switch (tx) { 326 | case INTSXP: { 327 | const int *restrict px = INTEGER(x); 328 | size_t id = 0; 329 | for (int i = 0; i < n; ++i) { 330 | id = (px[i] == NA_INTEGER) ? 0 : HASH(px[i], K); 331 | while (h[id]) { 332 | if (px[h[id]-1]==px[i]) { 333 | goto ibl; 334 | } 335 | id++; id %= M; // # nocov 336 | } 337 | h[id] = (int) i + 1; 338 | count++; 339 | ibl:; 340 | } 341 | } break; 342 | case REALSXP: { 343 | const double *restrict px = REAL(x); 344 | size_t id = 0; 345 | union uno tpv; 346 | for (int i = 0; i < n; ++i) { 347 | tpv.d = R_IsNA(px[i]) ? NA_REAL : (R_IsNaN(px[i]) ? R_NaN :px[i]); 348 | id = HASH(tpv.u[0] + tpv.u[1], K); 349 | while (h[id]) { 350 | if (REQUAL(px[h[id]-1], px[i])) { 351 | goto rbl; 352 | } 353 | id++; id %= M; 354 | } 355 | h[id] = (int) i + 1; 356 | count++; 357 | rbl:; 358 | } 359 | } break; 360 | case CPLXSXP: { 361 | const Rcomplex *restrict px = COMPLEX(x); 362 | size_t id = 0; 363 | unsigned int u; 364 | union uno tpv; 365 | Rcomplex tmp; 366 | for (int i = 0; i < n; ++i) { 367 | tmp.r = (px[i].r == 0.0) ? 0.0 : px[i].r; 368 | tmp.i = (px[i].i == 0.0) ? 0.0 : px[i].i; 369 | if (C_IsNA(tmp)) { 370 | tmp.r = tmp.i = NA_REAL; 371 | } else if (C_IsNaN(tmp)) { 372 | tmp.r = tmp.i = R_NaN; 373 | } 374 | tpv.d = tmp.r; 375 | u = tpv.u[0] ^ tpv.u[1]; 376 | tpv.d = tmp.i; 377 | u ^= tpv.u[0] ^ tpv.u[1]; 378 | id = HASH(u, K); 379 | while (h[id]) { 380 | if (CEQUAL(px[h[id] - 1],px[i])) { 381 | goto cbl; 382 | } 383 | id++; id %= M; 384 | } 385 | h[id] = (int) i + 1; 386 | count++; 387 | cbl:; 388 | } 389 | } break; 390 | case STRSXP: { 391 | const SEXP *restrict px = STRING_PTR_RO(x); 392 | size_t id = 0; 393 | for (int i = 0; i < n; ++i) { 394 | id = HASH(((intptr_t) px[i] & 0xffffffff), K); 395 | while (h[id]) { 396 | if (px[h[id] - 1]==px[i]) { 397 | goto sbl; 398 | } 399 | id++; id %= M; 400 | } 401 | h[id] = (int) i + 1; 402 | count++; 403 | sbl:; 404 | } 405 | } break; 406 | } 407 | R_Free(h); 408 | return ScalarInteger(count); 409 | } 410 | -------------------------------------------------------------------------------- /src/fpos.c: -------------------------------------------------------------------------------- 1 | /* 2 | * kit : Useful R Functions Implemented in C 3 | * Copyright (C) 2020-2025 Morgan Jacob 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | #include "kit.h" 20 | 21 | SEXP fposR(SEXP ndle, SEXP hsk, SEXP all, SEXP overlap) { 22 | SEXP ans = R_NilValue; 23 | int nprotect = 0; 24 | if (isS4(hsk) || isS4(ndle)) { 25 | error("S4 class objects are not supported."); 26 | } 27 | if (isDataFrame(ndle) || isDataFrame(hsk)) { 28 | error("Please note that data.frame(s) are not supported."); 29 | } 30 | if (!R_compute_identical(PROTECT(GetArrayDimnames(ndle)), R_NilValue, 0)) { 31 | error("Arrays are not supported for argument 'needle'."); 32 | } 33 | UNPROTECT(1); 34 | if (!R_compute_identical(PROTECT(GetArrayDimnames(hsk)), R_NilValue, 0)) { 35 | error("Arrays are not supported for argument 'haystack'."); 36 | } 37 | UNPROTECT(1); 38 | if (isMatrix(ndle) || isMatrix(hsk)) { 39 | ans = PROTECT(fposMatR(ndle, hsk, all, overlap)); 40 | nprotect++; 41 | } else if (isVector(ndle) && isVector(hsk)) { 42 | ans = PROTECT(fposVectR(ndle, hsk, all, overlap)); 43 | nprotect++; 44 | } 45 | UNPROTECT(nprotect); 46 | return ans; 47 | } 48 | 49 | SEXP fposMatR(SEXP ndle, SEXP hsk, SEXP all, SEXP overlap) { 50 | if (!IS_BOOL(all)) { 51 | error("Argument 'all' must be TRUE or FALSE and length 1."); 52 | } 53 | if (!IS_BOOL(overlap)) { 54 | error("Argument 'overlap' must be TRUE or FALSE and length 1."); 55 | } 56 | SEXPTYPE thsk = UTYPEOF(hsk); 57 | SEXPTYPE tndle = UTYPEOF(ndle); 58 | if (thsk != INTSXP && thsk != REALSXP && thsk != LGLSXP && 59 | thsk != CPLXSXP && thsk != STRSXP) { 60 | error("Type %s for 'haystack' is not supported.", type2char(thsk)); 61 | } 62 | if (tndle != INTSXP && tndle != REALSXP && tndle != LGLSXP && 63 | tndle != CPLXSXP && tndle != STRSXP) { 64 | error("Type %s for 'needle' is not supported.", type2char(tndle)); 65 | } 66 | const int n = nrows(hsk); 67 | const int m = ncols(hsk); 68 | const int k = nrows(ndle); 69 | const int l = ncols(ndle); 70 | if (k > n || l > m) { 71 | error("One of the dimension of the small matrix is greater than the large matrix."); 72 | } 73 | int nprotect = 0; 74 | if (thsk != tndle) { 75 | if (tndle == INTSXP && thsk == REALSXP) { 76 | ndle = PROTECT(coerceVector(ndle, thsk)); nprotect++; 77 | tndle = thsk; 78 | } else if (tndle == REALSXP && thsk == INTSXP) { 79 | hsk = PROTECT(coerceVector(hsk, tndle)); nprotect++; 80 | thsk = tndle; 81 | } else { 82 | error("Haystack type (%s) and needle type (%s) are different." 83 | " Please make sure that they have the same type.", 84 | type2char(thsk), type2char(tndle)); 85 | } 86 | } 87 | const int lim_x = n - k + 1; 88 | const int lim_y = m - l + 1; 89 | const int sz = lim_x * lim_y; 90 | int i, j, p, q, id, tj = 0, ti = 0; 91 | int pos_h = 0, pos_n = 0, x = 0; 92 | SEXP col = PROTECT(allocVector(INTSXP, sz)); nprotect++; 93 | SEXP row = PROTECT(allocVector(INTSXP, sz)); nprotect++; 94 | int *restrict pcol = INTEGER(col); 95 | int *restrict prow = INTEGER(row); 96 | const int pall = !asLogical(all); 97 | const int poverlap = !asLogical(overlap); 98 | switch(thsk) { 99 | case LGLSXP: { 100 | const int *restrict int_h = LOGICAL(hsk); 101 | const int *restrict int_n = LOGICAL(ndle); 102 | for (i = 0; i < lim_y; ++i) { 103 | for (j = 0; j < lim_x; ++j) { 104 | id = 1; 105 | if (i < ti && j < tj) { 106 | continue; 107 | } 108 | for (p = 0; p < l; ++p) { 109 | pos_h = (i+p) * n + j; 110 | pos_n = p * k; 111 | for (q = 0; q < k; ++q) { 112 | if (int_h[pos_h + q] != int_n[pos_n + q]) { 113 | id = 0; 114 | break; 115 | } 116 | } 117 | if (!id) { 118 | break; 119 | } 120 | } 121 | if (id) { 122 | prow[x] = j + 1; 123 | pcol[x++] = i + 1; 124 | if (pall) { 125 | goto label; 126 | } 127 | if (poverlap) { 128 | ti = i + l; 129 | tj = j + k; 130 | } 131 | } 132 | } 133 | } 134 | } break; 135 | case INTSXP: { 136 | const int *restrict int_h = INTEGER(hsk); 137 | const int *restrict int_n = INTEGER(ndle); 138 | for (i = 0; i < lim_y; ++i) { 139 | for (j = 0; j < lim_x; ++j) { 140 | id = 1; 141 | if (i < ti && j < tj) { 142 | continue; 143 | } 144 | for (p = 0; p < l; ++p) { 145 | pos_h = (i+p) * n + j; 146 | pos_n = p * k; 147 | for (q = 0; q < k; ++q) { 148 | if (int_h[pos_h + q] != int_n[pos_n + q]) { 149 | id = 0; 150 | break; 151 | } 152 | } 153 | if (!id) { 154 | break; 155 | } 156 | } 157 | if (id) { 158 | prow[x] = j + 1; 159 | pcol[x++] = i + 1; 160 | if (pall) { 161 | goto label; 162 | } 163 | if (poverlap) { 164 | ti = i + l; 165 | tj = j + k; 166 | } 167 | } 168 | } 169 | } 170 | } break; 171 | case REALSXP: { 172 | const double *restrict dbl_h = REAL(hsk); 173 | const double *restrict dbl_n = REAL(ndle); 174 | for (i = 0; i < lim_y; ++i) { 175 | for (j = 0; j < lim_x; ++j) { 176 | id = 1; 177 | if (i < ti && j < tj) { 178 | continue; 179 | } 180 | for (p = 0; p < l; ++p) 181 | { 182 | pos_h = (i+p) * n + j; 183 | pos_n = p * k; 184 | for (q = 0; q < k; ++q) { 185 | if (!REQUAL(dbl_h[pos_h + q], dbl_n[pos_n + q])) { 186 | id = 0; 187 | break; 188 | } 189 | } 190 | if (!id) { 191 | break; 192 | } 193 | } 194 | if (id) { 195 | prow[x] = j + 1; 196 | pcol[x++] = i + 1; 197 | if (pall) { 198 | goto label; 199 | } 200 | if (poverlap) { 201 | ti = i + l; 202 | tj = j + k; 203 | } 204 | } 205 | } 206 | } 207 | } break; 208 | case CPLXSXP: { 209 | const Rcomplex *restrict cpl_h = COMPLEX(hsk); 210 | const Rcomplex *restrict cpl_n = COMPLEX(ndle); 211 | for (i = 0; i < lim_y; ++i) { 212 | for (j = 0; j < lim_x; ++j) { 213 | id = 1; 214 | if (i < ti && j < tj) { 215 | continue; 216 | } 217 | for (p = 0; p < l; ++p) { 218 | pos_h = (i+p) * n + j; 219 | pos_n = p * k; 220 | for (q = 0; q < k; ++q) { 221 | if (!CEQUAL(cpl_h[pos_h + q], cpl_n[pos_n + q])) { 222 | id = 0; 223 | break; 224 | } 225 | } 226 | if (!id) { 227 | break; 228 | } 229 | } 230 | if (id) { 231 | prow[x] = j + 1; 232 | pcol[x++] = i + 1; 233 | if (pall) { 234 | goto label; 235 | } 236 | if (poverlap) { 237 | ti = i + l; 238 | tj = j + k; 239 | } 240 | } 241 | } 242 | } 243 | } break; 244 | case STRSXP: { 245 | for (i = 0; i < lim_y; ++i) { 246 | for (j = 0; j < lim_x; ++j) { 247 | id = 1; 248 | if (i < ti && j < tj) { 249 | continue; 250 | } 251 | for (p = 0; p < l; ++p) { 252 | pos_h = (i+p) * n + j; 253 | pos_n = p * k; 254 | for (q = 0; q < k; ++q) { 255 | if (RCHAR(hsk, pos_h + q) != RCHAR(ndle, pos_n + q)) { 256 | id = 0; 257 | break; 258 | } 259 | } 260 | if (!id) { 261 | break; 262 | } 263 | } 264 | if (id) { 265 | prow[x] = j + 1; 266 | pcol[x++] = i + 1; 267 | if (pall) { 268 | goto label; 269 | } 270 | if (poverlap) { 271 | ti = i + l; 272 | tj = j + k; 273 | } 274 | } 275 | } 276 | } 277 | } break; 278 | } 279 | label:; 280 | if (x == 0) { 281 | UNPROTECT(nprotect); 282 | return R_NilValue; 283 | } 284 | SEXP ans = PROTECT(allocMatrix(INTSXP, x, 2)); nprotect++; 285 | memcpy(INTEGER(ans), prow, (unsigned)x*sizeof(int)); 286 | memcpy(INTEGER(ans)+x, pcol, (unsigned)x*sizeof(int)); 287 | UNPROTECT(nprotect); 288 | return ans; 289 | } 290 | 291 | SEXP fposVectR(SEXP ndle, SEXP hsk, SEXP all, SEXP overlap) { 292 | if (!IS_BOOL(all)) { 293 | error("Argument 'all' must be TRUE or FALSE and length 1."); 294 | } 295 | if (!IS_BOOL(overlap)) { 296 | error("Argument 'overlap' must be TRUE or FALSE and length 1."); 297 | } 298 | SEXPTYPE thsk = UTYPEOF(hsk); 299 | SEXPTYPE tndle = UTYPEOF(ndle); 300 | if (thsk != INTSXP && thsk != REALSXP && thsk != LGLSXP && 301 | thsk != CPLXSXP && thsk != STRSXP) { 302 | error("Type %s for 'haystack' is not supported.", type2char(thsk)); 303 | } 304 | if (tndle != INTSXP && tndle != REALSXP && tndle != LGLSXP && 305 | tndle != CPLXSXP && tndle != STRSXP) { 306 | error("Type %s for 'needle' is not supported.", type2char(tndle)); 307 | } 308 | const R_xlen_t n = xlength(hsk); 309 | const R_xlen_t k = xlength(ndle); 310 | if (k > n) { 311 | error("The 'needle' vector length is greater than the 'haystack' vector length."); 312 | } 313 | int nprotect = 0; 314 | if (thsk != tndle) { 315 | if (tndle == INTSXP && thsk == REALSXP) { 316 | ndle = PROTECT(coerceVector(ndle, thsk)); nprotect++; 317 | tndle = thsk; 318 | } else if (tndle == REALSXP && thsk == INTSXP) { 319 | hsk = PROTECT(coerceVector(hsk, tndle)); nprotect++; 320 | thsk = tndle; 321 | } else { 322 | error("Haystack type (%s) and needle type (%s) are different." 323 | " Please make sure that they have the same type.", 324 | type2char(thsk), type2char(tndle)); 325 | } 326 | } 327 | const R_xlen_t lim_x = n - k + 1; 328 | R_xlen_t ti = 0, x = 0; 329 | SEXP row = PROTECT(allocVector(INTSXP, lim_x)); nprotect++; 330 | int *restrict prow = INTEGER(row); 331 | const int pall = !asLogical(all); 332 | const int poverlap = !asLogical(overlap); 333 | switch(thsk) { 334 | case LGLSXP: { 335 | const int *restrict int_h = LOGICAL(hsk); 336 | const int *restrict int_n = LOGICAL(ndle); 337 | for (int i = 0; i < lim_x; ++i) { 338 | if (i < ti) { 339 | continue; 340 | } 341 | for (int j = 0; j < k; ++j) { 342 | if (int_h[i+j] != int_n[j]) { 343 | goto lbl; 344 | } 345 | } 346 | prow[x++] = i + 1; 347 | if (pall) { 348 | break; 349 | } 350 | if (poverlap) { 351 | ti = i + k; 352 | } 353 | lbl:; 354 | } 355 | } break; 356 | case INTSXP: { 357 | const int *restrict int_h = INTEGER(hsk); 358 | const int *restrict int_n = INTEGER(ndle); 359 | for (int i = 0; i < lim_x; ++i) { 360 | if (i < ti) { 361 | continue; 362 | } 363 | for (int j = 0; j < k; ++j) { 364 | if (int_h[i+j] != int_n[j]) { 365 | goto lbi; 366 | } 367 | } 368 | prow[x++] = i + 1; 369 | if (pall) { 370 | break; 371 | } 372 | if (poverlap) { 373 | ti = i + k; 374 | } 375 | lbi:; 376 | } 377 | } break; 378 | case REALSXP: { 379 | const double *restrict dbl_h = REAL(hsk); 380 | const double *restrict dbl_n = REAL(ndle); 381 | for (int i = 0; i < lim_x; ++i) { 382 | if (i < ti) { 383 | continue; 384 | } 385 | for (int j = 0; j < k; ++j) { 386 | if (!REQUAL(dbl_h[i+j], dbl_n[j])) { 387 | goto lbr; 388 | } 389 | } 390 | prow[x++] = i + 1; 391 | if (pall) { 392 | break; 393 | } 394 | if (poverlap) { 395 | ti = i + k; 396 | } 397 | lbr:; 398 | } 399 | } break; 400 | case CPLXSXP: { 401 | const Rcomplex *restrict cpl_h = COMPLEX(hsk); 402 | const Rcomplex *restrict cpl_n = COMPLEX(ndle); 403 | for (int i = 0; i < lim_x; ++i) { 404 | if (i < ti) { 405 | continue; 406 | } 407 | for (int j = 0; j < k; ++j) { 408 | if (!CEQUAL(cpl_h[i+j], cpl_n[j])) { 409 | goto lbc; 410 | } 411 | } 412 | prow[x++] = i + 1; 413 | if (pall) { 414 | break; 415 | } 416 | if (poverlap) { 417 | ti = i + k; 418 | } 419 | lbc:; 420 | } 421 | } break; 422 | case STRSXP: { 423 | for (int i = 0; i < lim_x; ++i) { 424 | if (i < ti) { 425 | continue; 426 | } 427 | for (int j = 0; j < k; ++j) { 428 | if (RCHAR(hsk, i+j) != RCHAR(ndle, j)) { 429 | goto lbs; 430 | } 431 | } 432 | prow[x++] = i + 1; 433 | if (pall) { 434 | break; 435 | } 436 | if (poverlap) { 437 | ti = i + k; 438 | } 439 | lbs:; 440 | } 441 | } break; 442 | } 443 | if (x == 0) { 444 | UNPROTECT(nprotect); 445 | return R_NilValue; 446 | } 447 | SEXP ans = PROTECT(allocVector(INTSXP, x)); nprotect++; 448 | memcpy(INTEGER(ans), prow, (unsigned)x*sizeof(int)); 449 | UNPROTECT(nprotect); 450 | return ans; 451 | } 452 | -------------------------------------------------------------------------------- /src/iif.c: -------------------------------------------------------------------------------- 1 | /* 2 | * kit : Useful R Functions Implemented in C 3 | * Copyright (C) 2020-2025 Morgan Jacob 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | #include "kit.h" 20 | 21 | #define IIF_LOOP(a, b, n) OMP_PARALLEL_FOR(nth) \ 22 | for (ssize_t i=0; i1) { \ 27 | if(len_b>1) { \ 28 | if(len_na>1) { \ 29 | IIF_LOOP(pa[i], pb[i], pna[i]) \ 30 | } else { \ 31 | IIF_LOOP(pa[i], pb[i], pna[0]) \ 32 | } \ 33 | } else { \ 34 | if(len_na>1) { \ 35 | IIF_LOOP(pa[i], pb[0], pna[i]) \ 36 | } else { \ 37 | IIF_LOOP(pa[i], pb[0], pna[0]) \ 38 | } \ 39 | } \ 40 | } else { \ 41 | if(len_b>1) { \ 42 | if(len_na>1) { \ 43 | IIF_LOOP(pa[0], pb[i], pna[i]) \ 44 | } else { \ 45 | IIF_LOOP(pa[0], pb[i], pna[0]) \ 46 | } \ 47 | } else { \ 48 | if(len_na>1) { \ 49 | IIF_LOOP(pa[0], pb[0], pna[i]) \ 50 | } else { \ 51 | IIF_LOOP(pa[0], pb[0], pna[0]) \ 52 | } \ 53 | } \ 54 | } \ 55 | } else { \ 56 | if(len_a>1) { \ 57 | if(len_b>1) { \ 58 | IIF_LOOP(pa[i], pb[i], x) \ 59 | } else { \ 60 | IIF_LOOP(pa[i], pb[0], x) \ 61 | } \ 62 | } else { \ 63 | if(len_b>1) { \ 64 | IIF_LOOP(pa[0], pb[i], x) \ 65 | } else { \ 66 | IIF_LOOP(pa[0], pb[0], x) \ 67 | } \ 68 | } \ 69 | 70 | SEXP iifR(SEXP l, SEXP a, SEXP b, SEXP na, SEXP tprom, SEXP nthreads) { 71 | if (!isLogical(l)) { 72 | error("Argument 'test' must be logical."); 73 | } 74 | if (isS4(a) || isS4(b)) { 75 | error("S4 class objects are not supported."); 76 | } 77 | if (!IS_BOOL(tprom)) { 78 | error("Argument 'tprom' must be either FALSE or TRUE and length 1."); 79 | } 80 | int nth = asInteger(nthreads); 81 | nth = nth > max_thread ? max_thread : (nth < min_thread ? min_thread : nth); //revisit this 82 | const R_xlen_t len_l = xlength(l); 83 | const R_xlen_t len_a = xlength(a); 84 | const R_xlen_t len_b = xlength(b); 85 | const R_xlen_t len_na = xlength(na); 86 | SEXPTYPE ta = UTYPEOF(a); 87 | SEXPTYPE tb = UTYPEOF(b); 88 | SEXPTYPE tn = UTYPEOF(na); 89 | const bool na_non_null = !isNull(na); 90 | const bool tp = LOGICAL(tprom)[0] == 0; 91 | int nprotect = 0; 92 | if (ta != tb) { 93 | if (tp) { 94 | error("'yes' is of type %s but 'no' is of type %s. Please make sure that both arguments have the same type.", type2char(ta), type2char(tb)); 95 | } else { 96 | if(IS_VALID_TYPE(ta) && IS_VALID_TYPE(tb)) { 97 | if(ta < tb) { 98 | SEXP tmp = PROTECT(coerceVector(a, tb)); nprotect++; 99 | a = tmp; 100 | ta = tb; 101 | copyMostAttrib(b, a); 102 | } else { 103 | SEXP tmp = PROTECT(coerceVector(b, ta)); nprotect++; 104 | b = tmp; 105 | tb = ta; 106 | copyMostAttrib(a, b); 107 | } 108 | } else { 109 | if (!IS_VALID_TYPE(ta)) { 110 | error("Type %s (argument 'yes') is not supported.", type2char(ta)); 111 | } else { 112 | error("Type %s (argument 'no') is not supported.", type2char(tb)); 113 | } 114 | } 115 | } 116 | } 117 | if (len_a!=1 && len_a!=len_l) { 118 | error("Length of 'yes' is %zu but must be 1 or length of 'test' (%zu).", len_a, len_l); 119 | } 120 | if (len_b!=1 && len_b!=len_l) { 121 | error("Length of 'no' is %zu but must be 1 or length of 'test' (%zu).", len_b, len_l); 122 | } 123 | SEXP class_a = PROTECT(getAttrib(a, R_ClassSymbol)); nprotect++; 124 | SEXP class_b = PROTECT(getAttrib(b, R_ClassSymbol)); nprotect++; 125 | const bool same_class = !R_compute_identical(class_a, class_b, 0); 126 | if (tp) { 127 | if (same_class) { 128 | error("'yes' has different class than 'no'. Please make sure that both arguments have the same class."); 129 | } 130 | } else { 131 | if (!same_class) { 132 | copyMostAttrib(a, b); 133 | } 134 | } 135 | if (na_non_null) { 136 | if (len_na!=1 && len_na!=len_l) { 137 | error("Length of 'na' is %zu but must be 1 or length of 'test' (%zu).", len_na, len_l); 138 | } 139 | if (tn != ta) { 140 | if (tp) { 141 | error("'yes' is of type %s but 'na' is of type %s. Please make sure that both arguments have the same type.", type2char(ta), type2char(tn)); 142 | } else if (tn > ta) { 143 | error("Type of 'na' (%s) is higher than %s (highest type of 'yes' and 'no'). Please make sure that it is at lower or the same.", type2char(tn), type2char(ta)); 144 | } else { 145 | SEXP tmp = PROTECT(coerceVector(na, ta)); nprotect++; 146 | na = tmp; 147 | tn = ta; 148 | copyMostAttrib(a, na); 149 | } 150 | } 151 | SEXP class_na = PROTECT(getAttrib(na, R_ClassSymbol)); nprotect++; 152 | if (tp) { 153 | if (!R_compute_identical(class_a, class_na, 0)) { 154 | error("'yes' has different class than 'na'. Please make sure that both arguments have the same class."); 155 | } 156 | } else { 157 | if (!R_compute_identical(class_a, class_na, 0)) { 158 | copyMostAttrib(a, na); 159 | } 160 | } 161 | } 162 | if (isFactor(a)) { 163 | SEXP level_a = PROTECT(getAttrib(a, R_LevelsSymbol)); nprotect++; 164 | SEXP level_b = PROTECT(getAttrib(b, R_LevelsSymbol)); nprotect++; 165 | if (!R_compute_identical(level_a, level_b, 0)) { 166 | error("'yes' and 'no' are both type factor but their levels are different."); 167 | } 168 | if (na_non_null) { 169 | SEXP level_na = PROTECT(getAttrib(na, R_LevelsSymbol)); nprotect++; 170 | if (!R_compute_identical(level_a, level_na, 0)) { 171 | error("'yes' and 'na' are both type factor but their levels are different."); 172 | } 173 | } 174 | } 175 | const int *restrict pl = LOGICAL(l); 176 | SEXP ans = PROTECT(allocVector(ta, len_l)); nprotect++; 177 | copyMostAttrib(a, ans); 178 | switch(ta) { 179 | case LGLSXP: { 180 | int *restrict pans = LOGICAL(ans); 181 | const int *restrict pa = LOGICAL(a); 182 | const int *restrict pb = LOGICAL(b); 183 | if(na_non_null) { 184 | const int *restrict pna = LOGICAL(na); 185 | IIF_LOGIC(NA_LOGICAL) 186 | } 187 | } break; 188 | case INTSXP: { 189 | int *restrict pans = INTEGER(ans); 190 | const int *restrict pa = INTEGER(a); 191 | const int *restrict pb = INTEGER(b); 192 | if(na_non_null) { 193 | const int *restrict pna = INTEGER(na); 194 | IIF_LOGIC(NA_INTEGER) 195 | } 196 | } break; 197 | case REALSXP: { 198 | double *restrict pans = REAL(ans); 199 | const double *restrict pa = REAL(a); 200 | const double *restrict pb = REAL(b); 201 | if(na_non_null) { 202 | const double *restrict pna = REAL(na); 203 | IIF_LOGIC(NA_REAL) 204 | } 205 | } break; 206 | case CPLXSXP : { 207 | Rcomplex *restrict pans = COMPLEX(ans); 208 | const Rcomplex *restrict pa = COMPLEX(a); 209 | const Rcomplex *restrict pb = COMPLEX(b); 210 | Rcomplex NA_CPLX; NA_CPLX.r = NA_REAL; NA_CPLX.i = NA_REAL; // deal with that across all functions 211 | if(na_non_null) { 212 | const Rcomplex *restrict pna = COMPLEX(na); 213 | IIF_LOGIC(NA_CPLX) 214 | } 215 | } break; 216 | case STRSXP : { 217 | const ssize_t amask = len_a>1 ? SSIZE_MAX : 0; 218 | const ssize_t bmask = len_b>1 ? SSIZE_MAX : 0; 219 | const ssize_t namask = len_na>1 ? SSIZE_MAX : 0; 220 | const SEXP *restrict pa = STRING_PTR_RO(a); 221 | const SEXP *restrict pb = STRING_PTR_RO(b); 222 | const SEXP *restrict pna = na_non_null ? STRING_PTR_RO(na) : NULL; 223 | for (ssize_t i=0; i1 ? SSIZE_MAX : 0; 229 | const ssize_t bmask = len_b>1 ? SSIZE_MAX : 0; 230 | const ssize_t namask = len_na>1 ? SSIZE_MAX : 0; 231 | const SEXP *restrict pa = SEXPPTR_RO(a); 232 | const SEXP *restrict pb = SEXPPTR_RO(b); 233 | const SEXP *restrict pna = na_non_null ? SEXPPTR_RO(na) : NULL; 234 | for (ssize_t i=0; i1 ? SSIZE_MAX : 0; 347 | switch(type1) { 348 | case LGLSXP: { 349 | const int *restrict pouts = LOGICAL(outs); 350 | int *restrict pans = LOGICAL(ans); 351 | if (imask) { 352 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 370 | const int *restrict pna = nonna ? LOGICAL(na) : NULL; 371 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 398 | const int *restrict pna = nonna ? INTEGER(na) : NULL; 399 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 426 | const double *restrict pna = nonna ? REAL(na) : NULL; 427 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 454 | Rcomplex NA_CPLX; NA_CPLX.r = NA_REAL; NA_CPLX.i = NA_REAL; // deal with that across all functions 455 | const Rcomplex *restrict pna = nonna ? COMPLEX(na) : NULL; 456 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 482 | const SEXP *restrict pna = nonna ? STRING_PTR_RO(na) : NULL; 483 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 509 | const SEXP *restrict pna = nonna ? SEXPPTR_RO(na) : NULL; 510 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 621 | switch(type1) { 622 | case LGLSXP: { 623 | const int *restrict pouts = LOGICAL(outs); 624 | int *restrict pans = LOGICAL(ans); 625 | if (imask) { 626 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 644 | const int *restrict pna = nonna ? LOGICAL(na) : NULL; 645 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 672 | const int *restrict pna = nonna ? INTEGER(na) : NULL; 673 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 700 | const double *restrict pna = nonna ? REAL(na) : NULL; 701 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 728 | Rcomplex NA_CPLX; NA_CPLX.r = NA_REAL; NA_CPLX.i = NA_REAL; // deal with that across all functions 729 | const Rcomplex *restrict pna = nonna ? COMPLEX(na) : NULL; 730 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 756 | const SEXP *restrict pna = nonna ? STRING_PTR_RO(na) : NULL; 757 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 783 | const SEXP *restrict pna = nonna ? SEXPPTR_RO(na) : NULL; 784 | for (ssize_t j=0; j 3 | #define omp_enabled true 4 | #define max_thread omp_get_num_procs() 5 | #define min_thread 1 6 | #define OMP_PARALLEL_FOR(nth) _Pragma("omp parallel for num_threads(nth)") 7 | #else 8 | #define omp_enabled false 9 | #define max_thread 1 10 | #define min_thread 1 11 | #define omp_get_thread_num() 0 12 | #define OMP_PARALLEL_FOR(n) 13 | #endif 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | #if !defined(R_VERSION) || R_VERSION < R_Version(3, 5, 0) 20 | #define USE_RINTERNALS 21 | #define DATAPTR_RO(x) ((const void *)DATAPTR(x)) 22 | #endif 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #if R_VERSION < R_Version(4, 5, 0) 30 | # define isDataFrame(x) Rf_isFrame(x) 31 | #endif 32 | 33 | #if !defined SSIZE_MAX 34 | #define SSIZE_MAX LLONG_MAX 35 | #endif 36 | 37 | #ifdef WIN32 38 | #include 39 | #else 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #endif 47 | 48 | #define UTYPEOF(x) ((unsigned)TYPEOF(x)) 49 | #define IS_BOOL(x) (LENGTH(x)==1 && TYPEOF(x)==LGLSXP && LOGICAL(x)[0]!=NA_LOGICAL) 50 | #define IS_VALID_TYPE(x) ((x) == LGLSXP || (x)==INTSXP || (x)==REALSXP || (x)==CPLXSXP || (x)==STRSXP || (x)==VECSXP) 51 | #define PTR_ETL(x, y) (((const SEXP *)DATAPTR_RO(x))[y]) 52 | #define SEXPPTR_RO(x) ((const SEXP *)DATAPTR_RO(x)) 53 | #define ISNA_COMPLEX(x) (ISNA(x.r) || ISNA(x.i)) 54 | #define ISNAN_COMPLEX(x) (ISNAN(x.r) || ISNAN(x.i)) 55 | #define EQUAL_CPLX(x, y) (((x.r) == (y.r)) && ((x.i) == (y.i))) 56 | #define RCHAR(x, y) CHAR(STRING_ELT(x, y)) 57 | #define SEXP_F ScalarLogical(FALSE) 58 | #define SEXP_T ScalarLogical(TRUE) 59 | #define HASH(key, K) (3141592653U * (unsigned int)(key) >> (32 - (K))) 60 | #define N_ISNAN(x, y) (!ISNAN(x) && !ISNAN(y)) 61 | #define B_IsNA(x, y) (R_IsNA(x) && R_IsNA(y)) 62 | #define B_IsNaN(x, y) (R_IsNaN(x) && R_IsNaN(y)) 63 | #define B_ISNAN(x, y) (ISNAN(x) && ISNAN(y)) 64 | #define C_IsNA(x) (R_IsNA(x.r) || R_IsNA(x.i)) 65 | #define C_IsNaN(x) (R_IsNaN(x.r) || R_IsNaN(x.i)) 66 | #define C_ISNAN(x, y) (B_ISNAN(x, y) || (N_ISNAN(x, y) && x == y)) 67 | #define REQUAL(x, y) (N_ISNAN(x, y) ? (x == y) : (B_IsNA(x, y) || B_IsNaN(x, y))) 68 | #define CEQUAL(x, y) ((N_ISNAN(x.r, x.i) && N_ISNAN(y.r, y.i)) ? (x.r == y.r && x.i == y.i) : (C_IsNA(x) ? C_IsNA(y) : (C_IsNA(y) ? 0 : (C_ISNAN(x.r, y.r) && C_ISNAN(x.i, y.i))))) 69 | #define STR_DF mkString("data.frame") 70 | #define MAX(a,b) (((a)>(b))?(a):(b)) 71 | #define IS_LOGICAL(x) (isLogical(x) && LENGTH(x)==1) 72 | 73 | extern SEXP addColToDataFrame(SEXP df, SEXP mcol, SEXP coln); 74 | extern SEXP callToOrder (SEXP x, const char* method, bool desc, Rboolean na, SEXP env); 75 | extern SEXP charToFactR(SEXP x, SEXP decreasingArg, SEXP nthread, SEXP nalast, SEXP env, SEXP addNA); 76 | extern SEXP countR(SEXP x, SEXP y); 77 | extern SEXP countNAR(SEXP x); 78 | extern SEXP countOccurR(SEXP x); 79 | extern SEXP countOccurDataFrameR(SEXP x); 80 | extern SEXP cpsortR(SEXP x, SEXP decreasing, SEXP nthread, SEXP nalast, SEXP env, SEXP index, SEXP clocale); 81 | extern SEXP dfToMatrix(SEXP df); 82 | extern SEXP dupR(SEXP x, SEXP uniq, SEXP fromLast); 83 | extern SEXP dupVecR(SEXP x, SEXP uniq, SEXP fromLast); 84 | extern SEXP dupVecIndexOnlyR(SEXP x); 85 | extern SEXP dupDataFrameR(SEXP x, SEXP uniq, SEXP fromLast); 86 | extern SEXP dupMatrixR(SEXP x, SEXP uniq, Rboolean idx, SEXP fromLast); 87 | extern SEXP dupLenR(SEXP x); 88 | extern SEXP dupLenDataFrameR(SEXP x); 89 | extern SEXP dupLenMatrixR(SEXP x); 90 | extern SEXP dupLenVecR(SEXP x); 91 | extern SEXP fposR(SEXP needle, SEXP haystack, SEXP all, SEXP overlap); 92 | extern SEXP fposMatR(SEXP needle, SEXP haystack, SEXP all, SEXP overlap); 93 | extern SEXP fposVectR(SEXP ndle, SEXP hsk, SEXP all, SEXP overlap); 94 | extern SEXP iifR(SEXP l, SEXP a, SEXP b, SEXP na, SEXP tprom, SEXP nthreads); 95 | extern SEXP nifR(SEXP na, SEXP rho, SEXP args); 96 | extern SEXP nifInternalR(SEXP na, SEXP args); 97 | extern SEXP nswitchR(SEXP x, SEXP na, SEXP nthreads, SEXP chkenc, SEXP args); 98 | extern SEXP ompEnabledR(void); 99 | extern SEXP pallR(SEXP na, SEXP args); 100 | extern SEXP panyR(SEXP na, SEXP args); 101 | extern SEXP pcountR(SEXP x, SEXP args); 102 | extern SEXP pcountNAR(SEXP args); 103 | extern SEXP pfirstR(SEXP last, SEXP args); 104 | extern SEXP pmeanR(SEXP na, SEXP args); 105 | extern SEXP pprodR(SEXP na, SEXP args); 106 | extern SEXP psumR(SEXP na, SEXP args); 107 | extern SEXP setlevelsR(SEXP x, SEXP old_lvl, SEXP new_lvl, SEXP skip_absent); 108 | extern SEXP subSetColDataFrame(SEXP df, SEXP str); 109 | extern SEXP subSetColMatrix(SEXP x, R_xlen_t idx); 110 | extern SEXP subSetRowDataFrame(SEXP df, SEXP rws); 111 | extern SEXP subSetRowMatrix(SEXP mat, SEXP rws); 112 | extern SEXP topnR(SEXP vec, SEXP n, SEXP dec, SEXP hasna, SEXP env); 113 | extern SEXP vswitchR(SEXP x, SEXP values, SEXP outputs, SEXP na, SEXP nthreads, SEXP chkenc); 114 | 115 | extern SEXP createMappingObjectR(SEXP MapName, SEXP MapLength, SEXP DataObject, SEXP verboseArg); 116 | extern SEXP getMappingObjectR(SEXP MapName, SEXP MapLength, SEXP verboseArg); 117 | extern SEXP clearMappingObjectR(SEXP ext, SEXP verboseArg); 118 | 119 | union uno { double d; unsigned int u[2]; }; 120 | bool isMixEnc(SEXP x); 121 | SEXP enc2UTF8(SEXP x); 122 | -------------------------------------------------------------------------------- /src/nswitch.c: -------------------------------------------------------------------------------- 1 | /* 2 | * kit : Useful R Functions Implemented in C 3 | * Copyright (C) 2020-2025 Morgan Jacob 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | #include "kit.h" 20 | 21 | SEXP nswitchR(SEXP x, SEXP na, SEXP nthreads, SEXP chkenc, SEXP args) { 22 | if (!IS_BOOL(chkenc)) { 23 | error("Argument 'checkEnc' must be TRUE or FALSE and length 1."); 24 | } 25 | if (isS4(x)) { 26 | error("S4 class objects for argument 'x' are not supported."); 27 | } 28 | if (isS4(na)) { 29 | error("S4 class objects for argument 'na' are not supported."); 30 | } 31 | int nth = asInteger(nthreads); 32 | nth = nth > max_thread ? max_thread : (nth < min_thread ? min_thread : nth); //revisit this 33 | R_len_t n = length(args); 34 | if (n % 2) { 35 | error("Received %d inputs; please supply an even number of arguments in ... " 36 | "consisting of target value, resulting output pairs (in that order). " 37 | "Note that argument 'default' must be named explicitly (e.g.: default=0)", n); 38 | } 39 | n = n / 2; 40 | const bool pchkenc = asLogical(chkenc); 41 | const bool nonna = !isNull(na); 42 | const bool ifact = isFactor(PTR_ETL(args, 1)); 43 | const R_xlen_t len_na = xlength(na); 44 | const R_xlen_t len_x = xlength(x); 45 | const SEXPTYPE type0 = UTYPEOF(PTR_ETL(args, 1)); 46 | const SEXPTYPE type1 = UTYPEOF(PTR_ETL(args, 0)); 47 | const SEXPTYPE type_x = UTYPEOF(x); 48 | if (type_x != type1) { 49 | error("Type of 'x' and 'values' are different. Please make sure they are the same."); 50 | } 51 | SEXP out0c = PROTECT(getAttrib(PTR_ETL(args, 1), R_ClassSymbol)); 52 | SEXP out0l = PROTECT(getAttrib(PTR_ETL(args, 1), R_LevelsSymbol)); 53 | if (nonna) { 54 | if (len_na != 1 && len_na != len_x) { 55 | error("Length of 'default' must either be 1 or length of 'x'."); // maybe improve message 56 | } 57 | SEXPTYPE tn = UTYPEOF(na); 58 | if (tn != type0) { 59 | error("Resulting value is of type %s but 'default' is of type %s. " 60 | "Please make sure that both arguments have the same type.", 61 | type2char(type0), type2char(tn)); 62 | } 63 | if (!R_compute_identical(out0c, PROTECT(getAttrib(na, R_ClassSymbol)), 0)) { 64 | error("Resulting value has different class than 'default'. " 65 | "Please make sure that both arguments have the same class."); 66 | } 67 | UNPROTECT(1); 68 | if (ifact) { 69 | if (!R_compute_identical(out0l, PROTECT(getAttrib(na, R_LevelsSymbol)), 0)) { 70 | error("Resulting value and 'default' are both type factor but their levels are different."); 71 | } 72 | UNPROTECT(1); 73 | } 74 | } 75 | ssize_t amask[n]; 76 | for (ssize_t i=0; i1 ? SSIZE_MAX : 0; 106 | } 107 | SEXP xans = R_NilValue, vans = R_NilValue; 108 | int nprotect = 0; 109 | bool utfcon = false; 110 | if (pchkenc && type_x == STRSXP) { 111 | if (!isMixEnc(x)) { 112 | const cetype_t cx = getCharCE(STRING_PTR_RO(x)[0]); 113 | for (ssize_t i = 0; i < n; ++i) { 114 | if(cx != getCharCE(STRING_PTR_RO(PTR_ETL(args,2*i))[0])) { 115 | utfcon = true; 116 | break; 117 | } 118 | } 119 | } else { 120 | utfcon = true; 121 | } 122 | if (utfcon) { 123 | xans = PROTECT(enc2UTF8(x)); 124 | vans = PROTECT(allocVector(STRSXP, n)); 125 | nprotect = 2; 126 | for (ssize_t i = 0; i < n; ++i) { 127 | SET_STRING_ELT(vans, i, STRING_PTR_RO(enc2UTF8(PTR_ETL(args,2*i)))[0]); 128 | } 129 | } 130 | } 131 | SEXP ans = PROTECT(allocVector(type0, len_x)); 132 | copyMostAttrib(PTR_ETL(args, 1), ans); 133 | switch(type0) { 134 | /* 135 | * This part is for LOGICAL 136 | */ 137 | case LGLSXP:{ 138 | int *restrict pans = LOGICAL(ans); 139 | const int *restrict pna = nonna ? LOGICAL(na) : NULL; 140 | ssize_t namask = len_na>1 ? SSIZE_MAX : 0; 141 | OMP_PARALLEL_FOR(nth) 142 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 234 | OMP_PARALLEL_FOR(nth) 235 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 327 | OMP_PARALLEL_FOR(nth) 328 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 420 | Rcomplex NA_CPLX; NA_CPLX.r = NA_REAL; NA_CPLX.i = NA_REAL; // deal with that across all functions 421 | OMP_PARALLEL_FOR(nth) 422 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 513 | for (ssize_t j=0; j1 ? SSIZE_MAX : 0; 599 | for (ssize_t j=0; j. 17 | */ 18 | 19 | #include "kit.h" 20 | 21 | #define HASHSTR(x) (HASH(((intptr_t) (x) & 0xffffffff), K)) 22 | #define LOOKUP_VAL(x) (lookupTable[getIndex((x), pvalSorted, lookupTable)]-1) 23 | 24 | #define STR_SORT install("sort") 25 | #define STR_ORDER install("order") 26 | #define STR_METHOD install("method") 27 | #define STR_NALAST install("na.last") 28 | #define STR_DECREA install("decreasing") 29 | #define STR_FACTOR mkChar("factor") 30 | 31 | static int K = 8; 32 | static size_t M = 256; 33 | 34 | static void recursiveRadix(const SEXP *restrict pans, const size_t k, size_t *restrict pos, 35 | size_t *restrict incr, uint8_t *restrict test, SEXP tmp, 36 | const SEXP *restrict ptmp, size_t start, size_t *restrict newpos) { 37 | for (uint16_t i = 1; i < 257; ++i){ 38 | if (pos[i] == 1) { 39 | start++; continue; 40 | } 41 | if (pos[i] > 1) { 42 | const size_t ct = pos[i]; 43 | SEXP *npans = (SEXP*)pans + start; 44 | memset(incr, 0, 257*sizeof(size_t)); 45 | for(size_t j = 0; j < ct; ++j) { 46 | test[j] = (uint8_t)(CHAR(npans[j])[k]); 47 | incr[++test[j]]++; 48 | } 49 | if (incr[1] == ct) { 50 | start += ct; 51 | continue; 52 | } 53 | newpos[0] = incr[0]; 54 | for (uint16_t j = 1; j < 257; ++j){ 55 | newpos[j] = incr[j] + newpos[j-1]; 56 | } 57 | for (size_t j = 0; j < ct; ++j){ 58 | SET_STRING_ELT(tmp, --newpos[test[j]], npans[j]); 59 | } 60 | memcpy(npans, ptmp, ct*sizeof(SEXP)); 61 | size_t maxLen=0; 62 | for (uint16_t j = 2; j < 257; ++j){ 63 | if(incr[j] > maxLen) { 64 | maxLen = incr[j]; 65 | } 66 | } 67 | if (maxLen > 1) { 68 | size_t *restrict secpos = malloc(257*sizeof(size_t)); 69 | recursiveRadix(pans, k+1, incr, newpos, test, tmp, ptmp, start,secpos); 70 | free(secpos); 71 | } 72 | start += ct; 73 | } 74 | } 75 | } 76 | 77 | static void recursiveRadixrev(const SEXP *restrict pans, const size_t k, size_t *restrict pos, 78 | size_t *restrict incr, uint8_t *restrict test, SEXP tmp, 79 | const SEXP *restrict ptmp, size_t start, size_t *restrict newpos) { 80 | for (uint16_t i = 256; i > 0; --i){ 81 | if (pos[i] == 1) { 82 | start++; continue; 83 | } 84 | if (pos[i] > 1) { 85 | const size_t ct = pos[i]; 86 | SEXP *npans = (SEXP*)pans + start; 87 | memset(incr, 0, 257*sizeof(size_t)); 88 | for(size_t j = 0; j < ct; ++j) { 89 | test[j] = (uint8_t)(CHAR(npans[j])[k]); 90 | incr[++test[j]]++; 91 | } 92 | if (incr[1] == ct) { 93 | start += ct; 94 | continue; 95 | } 96 | newpos[256] = incr[256]; 97 | for (uint16_t j = 255; j >= 1; --j){ 98 | newpos[j] = incr[j] + newpos[j+1]; 99 | } 100 | for (size_t j = 0; j < ct; ++j){ 101 | SET_STRING_ELT(tmp, --newpos[test[j]], npans[j]); 102 | } 103 | memcpy(npans, ptmp, ct*sizeof(SEXP)); 104 | size_t maxLen=0; 105 | for (uint16_t j = 2; j < 257; ++j){ 106 | if(incr[j] > maxLen) { 107 | maxLen = incr[j]; 108 | } 109 | } 110 | if (maxLen > 1) { 111 | size_t *restrict secpos = malloc(257*sizeof(size_t)); 112 | recursiveRadixrev(pans, k+1, incr, newpos, test, tmp, ptmp, start,secpos); 113 | free(secpos); 114 | } 115 | start += ct; 116 | } 117 | } 118 | } 119 | 120 | static SEXP rsort (SEXP x) { 121 | const size_t len = LENGTH(x); 122 | const SEXP *restrict px = STRING_PTR_RO(x); 123 | SEXP ans = PROTECT(allocVector(STRSXP, len)); 124 | uint8_t nprotect = 1; 125 | 126 | uint8_t *restrict test = malloc(len * sizeof(uint8_t)); 127 | size_t *restrict pos = calloc(257, sizeof(size_t)); 128 | size_t *restrict incr = malloc(257*sizeof(size_t)); 129 | 130 | for (size_t i = 0; i < len; ++i){ 131 | test[i] = (uint8_t)(CHAR(px[i])[0]); 132 | pos[++test[i]]++; 133 | } 134 | 135 | size_t maxLen=0; 136 | for (uint16_t i = 2; i < 257; ++i){ // start at 2, is that correct? 137 | if(pos[i] > maxLen) { 138 | maxLen = pos[i]; 139 | } 140 | } 141 | 142 | incr[0] = pos[0]; 143 | for (uint16_t i = 1; i < 257 ; ++i){ 144 | incr[i] = pos[i] + incr[i-1]; 145 | } 146 | 147 | for (size_t i = 0; i < len; ++i){ 148 | SET_STRING_ELT(ans, --incr[test[i]], px[i]); 149 | } 150 | 151 | if (maxLen > 1) { 152 | SEXP tmp = PROTECT(allocVector(STRSXP, maxLen)); 153 | nprotect++; 154 | size_t start = 0; 155 | size_t *restrict newpos = malloc(257*sizeof(size_t)); 156 | recursiveRadix(STRING_PTR_RO(ans), 1, pos, incr, test, tmp, STRING_PTR_RO(tmp), start,newpos); 157 | free(newpos); 158 | } 159 | 160 | free(pos); 161 | free(test); 162 | free(incr); 163 | 164 | UNPROTECT(nprotect); 165 | return ans; 166 | } 167 | 168 | static SEXP rsortrev (SEXP x) { 169 | const size_t len = LENGTH(x); 170 | const SEXP *restrict px = STRING_PTR_RO(x); 171 | SEXP ans = PROTECT(allocVector(STRSXP, len)); 172 | uint8_t nprotect = 1; 173 | 174 | uint8_t *restrict test = malloc(len * sizeof(uint8_t)); 175 | size_t *restrict pos = calloc(257, sizeof(size_t)); 176 | size_t *restrict incr = malloc(257*sizeof(size_t)); 177 | 178 | for (size_t i = 0; i < len; ++i){ 179 | test[i] = (uint8_t)(CHAR(px[i])[0]); 180 | pos[++test[i]]++; 181 | } 182 | 183 | size_t maxLen=0; 184 | for (uint16_t i = 2; i < 257; ++i){ // start at 2, is that correct? 185 | if(pos[i] > maxLen) { 186 | maxLen = pos[i]; 187 | } 188 | } 189 | 190 | incr[256] = pos[256]; 191 | for (uint16_t i = 255; i >= 1 ; --i){ 192 | incr[i] = pos[i] + incr[i+1]; 193 | } 194 | 195 | for (size_t i = 0; i < len; ++i){ 196 | SET_STRING_ELT(ans, --incr[test[i]], px[i]); 197 | } 198 | 199 | if (maxLen > 1) { 200 | SEXP tmp = PROTECT(allocVector(STRSXP, maxLen)); 201 | nprotect++; 202 | size_t start = 0; 203 | size_t *restrict newpos = malloc(257*sizeof(size_t)); 204 | recursiveRadixrev(STRING_PTR_RO(ans), 1, pos, incr, test, tmp, STRING_PTR_RO(tmp), start,newpos); 205 | free(newpos); 206 | } 207 | 208 | free(pos); 209 | free(test); 210 | free(incr); 211 | 212 | UNPROTECT(nprotect); 213 | return ans; 214 | } 215 | 216 | static SEXP dupVecSort(SEXP x) { 217 | const R_xlen_t n = xlength(x); 218 | int K =8; 219 | size_t M = 256; 220 | const size_t n2 = 2U * (size_t) n; 221 | while (M < n2) { 222 | M *= 2; 223 | K++; 224 | } 225 | R_xlen_t count = 0; 226 | size_t id = 0; 227 | int *restrict h = (int*)calloc(M, sizeof(int)); 228 | int *restrict pans = (int*)calloc(n, sizeof(int)); 229 | const SEXP *restrict px = STRING_PTR_RO(x); 230 | for (int i = 0; i < n; ++i) { 231 | id = HASHSTR(px[i]); 232 | while (h[id]) { 233 | if (px[h[id] - 1]==px[i]) { 234 | goto sbl; 235 | } 236 | id++; 237 | id %= M; 238 | } 239 | h[id] = (int) i + 1; 240 | pans[i]++; 241 | count++; 242 | sbl:; 243 | } 244 | free(h); 245 | SEXP indx = PROTECT(allocVector(STRSXP, count)); 246 | R_xlen_t ct = 0; 247 | for (int i = 0; ct < count; ++i) { 248 | if (pans[i]) { 249 | SET_STRING_ELT(indx, ct++, px[i]); 250 | } 251 | } 252 | free(pans); 253 | UNPROTECT(1); 254 | return indx; 255 | } 256 | 257 | static int *buildTable (SEXP x) { 258 | const R_xlen_t n = xlength(x); 259 | K = 8; 260 | M = 256; 261 | const size_t n2 = 2U * (size_t) n; 262 | while (M < n2) { M *= 2; K++;} 263 | size_t id = 0; 264 | int *h = (int*)calloc(M, sizeof(int)); 265 | const SEXP *restrict px = STRING_PTR_RO(x); 266 | for (int i = 0; i < n; ++i) { 267 | id = HASHSTR(px[i]); 268 | while (h[id]) { 269 | if (px[h[id] - 1]==px[i]) { 270 | goto bl; // # nocov 271 | } 272 | id++; 273 | id %= M; 274 | } 275 | h[id] = (int) i + 1; 276 | bl:; 277 | } 278 | return h; 279 | } 280 | 281 | static inline int getIndex(SEXP ptr, const SEXP *restrict cmp, int *lkpTbl) { 282 | size_t id = HASHSTR(ptr); 283 | while(true) { 284 | if (cmp[lkpTbl[id] - 1]==ptr) { 285 | return id; 286 | } 287 | id++; 288 | id %= M; 289 | } 290 | } 291 | 292 | static SEXP callToSort (SEXP x, const char* method, SEXP env) { 293 | SEXP call = PROTECT(allocVector(LANGSXP, 4)); 294 | SETCAR(call, STR_SORT); 295 | 296 | SEXP s = CDR(call); 297 | SETCAR(s, x); 298 | SET_TAG(s, install("x")); 299 | 300 | s = CDR(s); 301 | SETCAR(s, PROTECT(mkString(method))); 302 | SET_TAG(s, STR_METHOD); 303 | 304 | s = CDR(s); 305 | SETCAR(s, ScalarLogical(0)); 306 | SET_TAG(s, STR_NALAST); 307 | 308 | SEXP out = PROTECT(eval(call, env)); 309 | UNPROTECT(3); 310 | return out; 311 | } 312 | 313 | static SEXP callToSort2 (SEXP x, const char* method, const int desc, const int na, SEXP env) { 314 | SEXP call = PROTECT(allocVector(LANGSXP, 5)); 315 | SETCAR(call, STR_SORT); 316 | 317 | SEXP s = CDR(call); 318 | SETCAR(s, x); 319 | SET_TAG(s, install("x")); 320 | 321 | s = CDR(s); 322 | SETCAR(s, PROTECT(mkString(method))); 323 | SET_TAG(s, STR_METHOD); 324 | 325 | s = CDR(s); 326 | SETCAR(s, ScalarLogical(na)); 327 | SET_TAG(s, STR_NALAST); 328 | 329 | s = CDR(s); 330 | SETCAR(s, ScalarLogical(desc)); 331 | SET_TAG(s, STR_DECREA); 332 | 333 | SEXP out = PROTECT(eval(call, env)); 334 | UNPROTECT(3); 335 | return out; 336 | } 337 | 338 | SEXP callToOrder (SEXP x, const char* method, bool desc, Rboolean na, SEXP env) { 339 | SEXP call = PROTECT(allocVector(LANGSXP, 5)); 340 | SETCAR(call, STR_ORDER); 341 | 342 | SEXP s = CDR(call); 343 | SETCAR(s, x); 344 | SET_TAG(s, install("...")); 345 | 346 | s = CDR(s); 347 | SETCAR(s, PROTECT(mkString(method))); 348 | SET_TAG(s, STR_METHOD); 349 | 350 | s = CDR(s); 351 | SETCAR(s, ScalarLogical(na)); 352 | SET_TAG(s, STR_NALAST); 353 | 354 | s = CDR(s); 355 | SETCAR(s, ScalarLogical(desc)); 356 | SET_TAG(s, STR_DECREA); 357 | 358 | SEXP out = PROTECT(eval(call, env)); 359 | UNPROTECT(3); 360 | return out; 361 | } 362 | 363 | /* 364 | * Character sorting 365 | */ 366 | 367 | SEXP cpsortR (SEXP x, SEXP decreasing, SEXP nthread, SEXP nalast, SEXP env, SEXP index, SEXP clocale) { 368 | 369 | if (!IS_BOOL(decreasing)) { 370 | error("Argument 'decreasing' must be TRUE or FALSE."); 371 | } 372 | /*if (!IS_BOOL(index)) { 373 | error("Argument 'index.return' must be TRUE or FALSE."); 374 | }*/ 375 | if (!IS_LOGICAL(nalast)) { 376 | error("Argument 'na.last' must be TRUE, FALSE or NA."); 377 | } 378 | if (TYPEOF(nthread) != INTSXP) { 379 | error("Argument 'nThread' (%s) must be of type integer.",type2char(TYPEOF(nthread))); 380 | } 381 | if (!IS_BOOL(clocale)) { 382 | error("Argument 'c.locale' must be TRUE or FALSE."); 383 | } 384 | 385 | const int na_pos = asLogical(nalast); 386 | const int cindex = asLogical(index); 387 | const int dcr = asLogical(decreasing); 388 | const int cl = asLogical(clocale); 389 | const int xlen = LENGTH(x); 390 | 391 | SEXP uVals = PROTECT(dupVecSort(x)); 392 | const int n = LENGTH(uVals); 393 | 394 | const int early = xlen == n; 395 | SEXP valSorted = early ? ( 396 | cindex ? PROTECT(callToOrder(uVals, "shell", dcr, na_pos, env)) : 397 | (cl ? (dcr ? PROTECT(rsortrev(uVals)) : PROTECT(rsort(uVals))) : 398 | PROTECT(callToSort2(uVals, "quick", dcr, na_pos, env))) 399 | ) : ( cl ? PROTECT(rsort(uVals)) : 400 | PROTECT(callToSort(uVals, "quick", env))); 401 | 402 | /*if (early && cindex) { 403 | UNPROTECT(2); 404 | return valSorted; 405 | }*/ 406 | 407 | SEXP *restrict pvalSorted = (SEXP*)STRING_PTR_RO(valSorted); 408 | const int nlen = LENGTH(valSorted); 409 | 410 | int NAidx = -1; 411 | for (int i = 0; i < nlen; ++i) { 412 | if (pvalSorted[i] == NA_STRING) { 413 | NAidx = i; 414 | break; 415 | } 416 | } 417 | if (cl) { 418 | if ( ((na_pos != 0 && !dcr) || (na_pos == 0 && dcr && !early) || (na_pos != 0 && dcr && early)) && NAidx != nlen-1 ) { 419 | if (NAidx >= 0) { 420 | memmove(pvalSorted+NAidx, pvalSorted+NAidx+1, (nlen - (NAidx + 1))*sizeof(SEXP)); 421 | pvalSorted[nlen-1] = NA_STRING; 422 | } 423 | } else if ( (na_pos == 0 && !dcr) || (na_pos != 0 && dcr) || (na_pos == 0 && dcr && early)){ 424 | if (NAidx > 0 ) { 425 | memmove(pvalSorted+1, pvalSorted, NAidx*sizeof(SEXP)); 426 | pvalSorted[0] = NA_STRING; 427 | } 428 | } 429 | } else { 430 | if ( ((na_pos != 0 && !dcr) || (na_pos == 0 && dcr)) && NAidx != nlen-1) { 431 | if (NAidx >= 0) { 432 | memmove(pvalSorted+NAidx, pvalSorted+NAidx+1, (nlen - (NAidx + 1))*sizeof(SEXP)); 433 | pvalSorted[nlen-1] = NA_STRING; 434 | } 435 | } else if ( (na_pos == 0 && !dcr) || (na_pos != 0 && dcr) ){ 436 | if (NAidx > 0 ) { 437 | memmove(pvalSorted+1, pvalSorted, NAidx*sizeof(SEXP)); 438 | pvalSorted[0] = NA_STRING; 439 | } 440 | } 441 | } 442 | 443 | if (early) { 444 | if (na_pos == NA_LOGICAL && cl) { 445 | const SEXP *restrict pa = STRING_PTR_RO(valSorted); // already used pvalSorted 446 | int ct = 0; 447 | for (int i = nlen-1; i >= 0; --i) { 448 | if(pa[i] == NA_STRING) { 449 | ct++; 450 | } else { 451 | break; 452 | } 453 | } 454 | if (ct > 0) { 455 | valSorted = Rf_xlengthgets(valSorted, nlen-ct); 456 | } 457 | } 458 | UNPROTECT(2); 459 | return valSorted; 460 | } 461 | 462 | int *restrict lookupTable = buildTable(valSorted); 463 | const SEXP *restrict px = STRING_PTR_RO(x); 464 | int nth = asInteger(nthread); 465 | nth = nth > max_thread ? max_thread : (nth < min_thread ? min_thread : nth); //revisit this 466 | 467 | SEXP ans = PROTECT(allocVector(cindex ? INTSXP : STRSXP, xlen)); 468 | if (!cindex) { 469 | copyMostAttrib(x, ans); 470 | } 471 | 472 | int *restrict pos = (int*)R_alloc(n, sizeof(int)); 473 | memset(pos, 0, n*sizeof(int)); 474 | int *restrict lv = (int*)R_alloc(xlen, sizeof(int)); 475 | 476 | OMP_PARALLEL_FOR(nth) 477 | for (int j = 0; j < xlen; ++j) { 478 | lv[j] = LOOKUP_VAL(px[j]); 479 | } 480 | free(lookupTable); 481 | 482 | for (int j=0; j=0; --i) { 489 | temp = pos[i]; 490 | pos[i] = cumul; 491 | cumul += temp; 492 | } 493 | } else { 494 | for (int i=0; i= 0; --i) { 509 | if( px[pans[i]-1] == NA_STRING) { 510 | ct++; 511 | } else { 512 | break; 513 | } 514 | } 515 | if (ct > 0) { 516 | ans = Rf_xlengthgets(ans, xlen-ct); 517 | } 518 | } 519 | } else {*/ 520 | for (int j = 0; j < xlen; ++j) { 521 | SET_STRING_ELT(ans, pos[lv[j]]++ , px[j]); 522 | } 523 | if (na_pos == NA_LOGICAL) { 524 | const SEXP *restrict pa = STRING_PTR_RO(ans); 525 | int ct = 0; 526 | for (int i = xlen-1; i >= 0; --i) { 527 | if(pa[i] == NA_STRING) { 528 | ct++; 529 | } else { 530 | break; 531 | } 532 | } 533 | if (ct > 0) { 534 | ans = Rf_xlengthgets(ans, xlen-ct); 535 | if (!cindex) { 536 | copyMostAttrib(x, ans); 537 | } 538 | } 539 | } 540 | //} 541 | UNPROTECT(3); 542 | return ans; 543 | } 544 | 545 | /* 546 | * Character to factor conversion 547 | */ 548 | 549 | SEXP charToFactR (SEXP x, SEXP decreasing, SEXP nthread, SEXP nalast, SEXP env, SEXP addNA) { 550 | 551 | if (!IS_BOOL(decreasing)) { 552 | error("Argument 'decreasing' must be TRUE or FALSE."); 553 | } 554 | if (!IS_BOOL(addNA)) { 555 | error("Argument 'addNA' must be TRUE or FALSE."); 556 | } 557 | /*if (!IS_LOGICAL(nalast)) { 558 | error("Argument 'na.last' must be TRUE, FALSE or NA."); 559 | }*/ 560 | if (TYPEOF(x) != STRSXP) { 561 | error("Argument 'x' must be of type character."); 562 | } 563 | if (TYPEOF(nthread) != INTSXP) { 564 | error("Argument 'nThread' (%s) must be of type integer.",type2char(TYPEOF(nthread))); 565 | } 566 | const int na_pos = asLogical(nalast); 567 | const int dcr = asLogical(decreasing); 568 | const int addNAv = asLogical(addNA); 569 | const int xlen = LENGTH(x); 570 | SEXP uVals = PROTECT(dupVecSort(x)); 571 | const int n = LENGTH(uVals); 572 | SEXP valSorted = PROTECT(callToSort2(uVals, "quick", dcr, 1, env)); 573 | SEXP *restrict pvalSorted = (SEXP*)STRING_PTR_RO(valSorted); 574 | 575 | int NAidx = -1; 576 | for (int i = 0; i < n; ++i) { 577 | if (pvalSorted[i] == NA_STRING) { 578 | NAidx = i; break; 579 | } 580 | } 581 | 582 | if ( ((na_pos != 0 && !dcr) || (na_pos == 0 && dcr)) && NAidx != n-1) { 583 | if (NAidx >= 0) { 584 | memmove(pvalSorted+NAidx, pvalSorted+NAidx+1, (n - ((NAidx + 1)))*sizeof(SEXP)); 585 | pvalSorted[n-1] = NA_STRING; 586 | } 587 | } else if ( (na_pos == 0 && !dcr) || (na_pos != 0 && dcr) ){ 588 | if (NAidx > 0 ) { 589 | memmove(pvalSorted+1, pvalSorted, NAidx*sizeof(SEXP)); 590 | pvalSorted[0] = NA_STRING; 591 | } 592 | } 593 | 594 | int *restrict lookupTable = buildTable(valSorted); 595 | const SEXP *restrict px = STRING_PTR_RO(x); 596 | int nth = asInteger(nthread); 597 | nth = nth > max_thread ? max_thread : (nth < min_thread ? min_thread : nth); //revisit this 598 | 599 | SEXP ans = PROTECT(allocVector(INTSXP, xlen)); 600 | int *restrict pans = INTEGER(ans); 601 | 602 | if (addNAv == 0) { 603 | OMP_PARALLEL_FOR(nth) 604 | for (int j=0; j. 17 | */ 18 | 19 | #include "kit.h" 20 | 21 | /* 22 | * Structure to hold Length and Address 23 | * of data to be shared in memory segment 24 | */ 25 | 26 | struct OBJECT { 27 | #ifdef WIN32 28 | HANDLE hMapFile; 29 | HANDLE hMapLength; 30 | LPCTSTR lpMapAddress; 31 | LPCTSTR lpMapLength; 32 | #else 33 | int fd_addr; 34 | int fd_length; 35 | size_t STORAGE_SIZE; 36 | void *addr; 37 | void *length; 38 | const char *STORAGE_ID; 39 | const char *LENGTH_ID; 40 | #endif 41 | }; 42 | 43 | /* 44 | * Function to finalize memory map pointer 45 | */ 46 | 47 | static bool verbose_finalizer = false; 48 | 49 | static void map_finalizer (SEXP ext) { 50 | if (verbose_finalizer) Rprintf("* Finalize...\n"); 51 | if (NULL == R_ExternalPtrAddr(ext)) { 52 | return; 53 | } 54 | if (verbose_finalizer) Rprintf("* Clear external pointer...\n"); 55 | struct OBJECT *ptr = (struct OBJECT*) R_ExternalPtrAddr(ext); 56 | #ifdef WIN32 57 | UnmapViewOfFile(ptr->lpMapAddress); 58 | CloseHandle(ptr->hMapFile); 59 | UnmapViewOfFile(ptr->lpMapLength); 60 | CloseHandle(ptr->hMapLength); 61 | #else 62 | munmap(ptr->addr, ptr->STORAGE_SIZE); 63 | shm_unlink(ptr->STORAGE_ID); 64 | munmap(ptr->length, 256); 65 | shm_unlink(ptr->LENGTH_ID); 66 | #endif 67 | R_Free(ptr); 68 | R_ClearExternalPtr(ext); 69 | if (verbose_finalizer) Rprintf("* Clear external pointer...OK\n"); 70 | } 71 | 72 | /* 73 | * Function to create data 74 | */ 75 | 76 | SEXP createMappingObjectR (SEXP MapObjectName, SEXP MapLengthName, SEXP DataObject, SEXP verboseArg) { 77 | if (TYPEOF(MapObjectName) != STRSXP || LENGTH(MapObjectName) != 1) { 78 | error("Argument 'MapObjectName' must be of type character and length 1."); 79 | } 80 | if (!IS_BOOL(verboseArg)) { 81 | error("Argument 'verbose' must be TRUE or FALSE."); 82 | } 83 | const bool verbose = asLogical(verboseArg); 84 | verbose_finalizer = verbose; 85 | const size_t len = LENGTH(DataObject); 86 | const size_t BUF_SIZE = len*sizeof(Rbyte); 87 | if (verbose) Rprintf("* Data object size: %zu\n",len*sizeof(Rbyte)); 88 | if (verbose) Rprintf("* Start mapping object...OK\n"); 89 | struct OBJECT *foo = R_Calloc(1, struct OBJECT); 90 | SEXP ext = PROTECT(R_MakeExternalPtr(foo, R_NilValue, R_NilValue)); 91 | R_RegisterCFinalizerEx(ext, map_finalizer, TRUE); 92 | if (verbose) Rprintf("* Register finalizer...OK\n"); 93 | #ifdef WIN32 94 | LPSTR pMN = (LPSTR) CHAR(STRING_PTR_RO(MapObjectName)[0]); 95 | LPSTR pML = (LPSTR) CHAR(STRING_PTR_RO(MapLengthName)[0]); 96 | foo->hMapFile = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, BUF_SIZE, pMN); 97 | foo->hMapLength = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, 256, pML); 98 | if (foo->hMapFile == INVALID_HANDLE_VALUE || foo->hMapLength == INVALID_HANDLE_VALUE) { 99 | #else 100 | const char *pMN = CHAR(STRING_PTR_RO(MapObjectName)[0]); 101 | const char *pML = CHAR(STRING_PTR_RO(MapLengthName)[0]); 102 | foo->STORAGE_ID = pMN; 103 | foo->LENGTH_ID = pML; 104 | foo->STORAGE_SIZE = BUF_SIZE; 105 | foo->fd_addr = shm_open(foo->STORAGE_ID, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 106 | foo->fd_length = shm_open(foo->LENGTH_ID, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 107 | if (foo->fd_addr == -1 || foo->fd_length == -1) { 108 | Rprintf("shm_open error, errno(%d): %s\n", errno, strerror(errno)); 109 | #endif 110 | error("* Creating file mapping...ERROR"); 111 | } 112 | if (verbose) Rprintf("* Creating file maping...OK\n"); 113 | #ifdef WIN32 114 | #else 115 | struct stat mapstat; 116 | if (-1 != fstat(foo->fd_addr, &mapstat) && mapstat.st_size == 0) { 117 | if(ftruncate(foo->fd_addr, BUF_SIZE) == -1) { 118 | error("* Extend shared memory object (1)...ERROR"); 119 | } 120 | } 121 | if (-1 != fstat(foo->fd_length, &mapstat) && mapstat.st_size == 0) { 122 | if(ftruncate(foo->fd_length, 256) == -1) { 123 | error("* Extend shared memory object (2)...ERROR"); 124 | } 125 | } 126 | if (verbose) Rprintf("* Extend shared memory object...OK\n"); 127 | #endif 128 | 129 | #ifdef WIN32 130 | foo->lpMapAddress = (LPCTSTR) MapViewOfFile (foo->hMapFile, FILE_MAP_ALL_ACCESS, 0, 0, BUF_SIZE); 131 | foo->lpMapLength = (LPCTSTR) MapViewOfFile (foo->hMapLength, FILE_MAP_ALL_ACCESS, 0, 0, 256); 132 | if (foo->lpMapAddress == NULL || foo->lpMapLength == NULL) { 133 | #else 134 | foo->addr = mmap(NULL, BUF_SIZE, PROT_WRITE, MAP_SHARED, foo->fd_addr, 0); 135 | foo->length = mmap(NULL, 256, PROT_WRITE, MAP_SHARED, foo->fd_length, 0); 136 | if (foo->addr == MAP_FAILED || foo->length == MAP_FAILED) { 137 | #endif 138 | error("* Map view file...ERROR"); 139 | } 140 | if (verbose) Rprintf("* Map view file...OK\n"); 141 | #ifdef WIN32 142 | CopyMemory((LPVOID)foo->lpMapAddress, RAW(DataObject), BUF_SIZE); 143 | CopyMemory((LPVOID)foo->lpMapLength, &len, sizeof(size_t)); 144 | #else 145 | memcpy(foo->addr, RAW(DataObject), BUF_SIZE); 146 | memcpy(foo->length, &len, sizeof(size_t)); 147 | #endif 148 | if (verbose) Rprintf("* Copy memory...OK\n"); 149 | UNPROTECT(1); 150 | return ext; 151 | } 152 | 153 | /* 154 | * Function to retrieve data 155 | */ 156 | 157 | SEXP getMappingObjectR (SEXP MapObjectName, SEXP MapLengthName, SEXP verboseArg) { 158 | if (TYPEOF(MapObjectName) != STRSXP || LENGTH(MapObjectName) != 1) { 159 | error("Argument 'MapObjectName' must be of type character and length 1."); 160 | } 161 | if (!IS_BOOL(verboseArg)) { 162 | error("Argument 'verbose' must be TRUE or FALSE."); 163 | } 164 | const bool verbose = asLogical(verboseArg); 165 | #ifdef WIN32 166 | LPSTR pMN = (LPSTR) CHAR(STRING_PTR_RO(MapObjectName)[0]); 167 | LPSTR pML = (LPSTR) CHAR(STRING_PTR_RO(MapLengthName)[0]); 168 | HANDLE hMapFile = OpenFileMapping(FILE_MAP_ALL_ACCESS, FALSE, pMN); 169 | HANDLE hMapLength = OpenFileMapping(FILE_MAP_ALL_ACCESS, FALSE, pML); 170 | if (hMapFile == INVALID_HANDLE_VALUE || hMapLength == INVALID_HANDLE_VALUE) { 171 | #else 172 | const char *pMN = CHAR(STRING_PTR_RO(MapObjectName)[0]); 173 | const char *pML = CHAR(STRING_PTR_RO(MapLengthName)[0]); 174 | int fd_addr = shm_open(pMN, O_RDONLY, S_IRUSR | S_IWUSR); 175 | int fd_length = shm_open(pML, O_RDONLY, S_IRUSR | S_IWUSR); 176 | if (fd_addr == -1 || fd_length == -1) { 177 | #endif 178 | error("* Creating file mapping...ERROR"); 179 | } 180 | if (verbose) Rprintf("* Creating file maping...OK\n"); 181 | #ifdef WIN32 182 | LPCTSTR lpMapLength = (LPCTSTR) MapViewOfFile (hMapLength, FILE_MAP_ALL_ACCESS, 0, 0, 256); 183 | if (lpMapLength == NULL) { 184 | CloseHandle(hMapLength); 185 | #else 186 | void *length = mmap(NULL, 256, PROT_READ, MAP_SHARED, fd_length, 0); 187 | if (length == MAP_FAILED) { 188 | shm_unlink(pML); 189 | #endif 190 | error("* Map view file (length)...ERROR"); 191 | } 192 | if (verbose) Rprintf("* Map view file (length)...OK\n"); 193 | #ifdef WIN32 194 | size_t len = *(size_t*)lpMapLength; 195 | LPCTSTR lpMapAddress = (LPCTSTR) MapViewOfFile (hMapFile, FILE_MAP_ALL_ACCESS, 0, 0, len*sizeof(Rbyte)); 196 | if (lpMapAddress == NULL) { 197 | CloseHandle(hMapFile); 198 | #else 199 | size_t len = *(size_t*)length; 200 | void *addr = mmap(NULL, len*sizeof(Rbyte), PROT_READ, MAP_SHARED, fd_addr, 0); 201 | if (addr == MAP_FAILED) { 202 | shm_unlink(pMN); 203 | #endif 204 | error("* Map view file (address)...ERROR"); 205 | } 206 | if (verbose) Rprintf("* Map view file (address)...OK\n"); 207 | SEXP ans = PROTECT(allocVector(RAWSXP, len)); 208 | if (verbose) Rprintf("* Create RAW Vector...OK\n"); 209 | #ifdef WIN32 210 | CopyMemory(RAW(ans), (Rbyte*)lpMapAddress, len*sizeof(Rbyte)); 211 | #else 212 | memcpy(RAW(ans), (Rbyte*)addr, len*sizeof(Rbyte)); // maybe need +1 213 | #endif 214 | if (verbose) Rprintf("* Copy map memory...OK\n"); 215 | 216 | #ifdef WIN32 217 | if (!UnmapViewOfFile(lpMapLength)) { 218 | #else 219 | if (munmap(length, 256) == -1) { 220 | #endif 221 | error("* Closing mapping file (length)...ERROR"); 222 | } 223 | if (verbose) Rprintf("* Closing mapping file (length)...OK\n"); 224 | #ifdef WIN32 225 | if (!CloseHandle(hMapLength)) { 226 | #else 227 | if (shm_unlink(pML) == -1) { 228 | #endif 229 | error("* Closing mapping handle (length)...ERROR"); 230 | } 231 | if (verbose) Rprintf("* Closing mapping handle (length)...OK\n"); 232 | 233 | #ifdef WIN32 234 | if (!UnmapViewOfFile(lpMapAddress)) { 235 | #else 236 | if (munmap(length, len*sizeof(Rbyte)) == -1) { 237 | #endif 238 | error("* Closing mapping file (address)...ERROR"); 239 | } 240 | if (verbose) Rprintf("* Closing mapping file (address)...OK\n"); 241 | #ifdef WIN32 242 | if (!CloseHandle(hMapFile)) { 243 | #else 244 | if (shm_unlink(pMN) == -1) { 245 | #endif 246 | error("* Closing mapping handle (address)...ERROR"); 247 | } 248 | if (verbose) Rprintf("* Closing mapping handle (address)...OK\n"); 249 | UNPROTECT(1); 250 | return ans; 251 | } 252 | 253 | /* 254 | * Function to clear mapping object 255 | */ 256 | 257 | SEXP clearMappingObjectR (SEXP ext, SEXP verboseArg) { 258 | if (!IS_BOOL(verboseArg)) { 259 | error("Argument 'verbose' must be TRUE or FALSE."); 260 | } 261 | verbose_finalizer = asLogical(verboseArg); 262 | if (NULL == R_ExternalPtrAddr(ext)) { 263 | return ScalarLogical(FALSE); 264 | } 265 | map_finalizer(ext); 266 | return ScalarLogical(TRUE); 267 | } 268 | -------------------------------------------------------------------------------- /src/topn.c: -------------------------------------------------------------------------------- 1 | /* 2 | * kit : Useful R Functions Implemented in C 3 | * Copyright (C) 2020-2025 Morgan Jacob 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | #include "kit.h" 20 | 21 | SEXP topnR(SEXP vec, SEXP n, SEXP dec, SEXP hasna, SEXP env) { 22 | int len0 = asInteger(n); 23 | const R_xlen_t len1 = xlength(vec); 24 | if (isS4(vec)) { 25 | error("S4 class objects are not supported."); 26 | } 27 | if (len0 > len1) { 28 | warning("'n' is larger than length of 'vec'. 'n' will be set to length of 'vec'."); 29 | len0 = (int)len1; 30 | } 31 | if (len0 < 1) { 32 | error("Please enter a positive integer larger or equal to 1."); 33 | } 34 | if (!IS_BOOL(dec)) { 35 | error("Argument 'decreasing' must be TRUE or FALSE and length 1."); 36 | } 37 | if (!IS_BOOL(hasna)) { 38 | error("Argument 'hasna' must be TRUE or FALSE and length 1."); 39 | } 40 | const Rboolean dcr = asLogical(dec); 41 | const SEXPTYPE tvec = UTYPEOF(vec); 42 | const Rboolean vhasna = asLogical(hasna); 43 | if ( ((len0 > 2000 && vhasna == FALSE) || (len0 > 1500 && vhasna == TRUE)) && (tvec == INTSXP || tvec == REALSXP)) { 44 | SEXP prem = PROTECT(callToOrder(vec, "radix", dcr, TRUE, env)); 45 | SEXP ans = PROTECT(allocVector(UTYPEOF(prem), len0)); 46 | switch(UTYPEOF(prem)) { 47 | case INTSXP: { 48 | memcpy(INTEGER(ans), INTEGER(prem), len0 *sizeof(int)); 49 | } break; 50 | case REALSXP: { 51 | memcpy(REAL(ans), REAL(prem), len0 *sizeof(double)); 52 | } break; 53 | } 54 | UNPROTECT(2); 55 | return ans; 56 | } 57 | SEXP ans = PROTECT(allocVector(INTSXP, len0)); 58 | int *restrict pans = INTEGER(ans); 59 | int tmp; 60 | if (dcr) { 61 | switch(tvec) { 62 | case INTSXP: { 63 | int i, j, idx = 0; 64 | const int *restrict pvec = INTEGER(vec); 65 | int min_value = pvec[0]; 66 | if (vhasna) { 67 | for (i = 0; i < len0; ++i) { 68 | pans[i] = i; 69 | if (pvec[i] <= min_value || pvec[i] == NA_INTEGER) { 70 | min_value = pvec[i]; 71 | idx = i; 72 | } 73 | } 74 | for (i = len0; i < len1; ++i) { 75 | if (pvec[i] == NA_INTEGER) { 76 | continue; 77 | } 78 | if (pvec[i] > min_value) { 79 | min_value = pvec[i]; 80 | pans[idx] = i; 81 | for (j = 0; j pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) || pvec[pans[j]] == NA_INTEGER) { 83 | min_value = pvec[pans[j]]; 84 | idx = j; 85 | } 86 | } 87 | } 88 | } 89 | for (i = 0; i < len0; ++i) { 90 | tmp = pans[i]; 91 | for (j = i; j > 0 && (pvec[tmp] > pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1])); --j) { 92 | pans[j] = pans[j-1]; 93 | } 94 | pans[j] = tmp; 95 | } 96 | for (i =0; i < len0; ++i) { 97 | pans[i]++; 98 | } 99 | } else { 100 | for (i = 0; i < len0; ++i) { 101 | pans[i] = i; 102 | if (pvec[i] <= min_value) { 103 | min_value = pvec[i]; 104 | idx = i; 105 | } 106 | } 107 | for (i = len0; i < len1; ++i) { 108 | if (pvec[i] > min_value) { 109 | min_value = pvec[i]; 110 | pans[idx] = i; 111 | for (j = 0; j pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) { 113 | min_value = pvec[pans[j]]; 114 | idx = j; 115 | } 116 | } 117 | } 118 | } 119 | for (i = 0; i < len0; ++i) { 120 | tmp = pans[i]; 121 | for (j = i; j > 0 && (pvec[tmp] > pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1])); --j) { 122 | pans[j] = pans[j-1]; 123 | } 124 | pans[j] = tmp; 125 | } 126 | for (i =0; i < len0; ++i) { 127 | pans[i]++; 128 | } 129 | } 130 | } break; 131 | case REALSXP: { 132 | int i, j, idx = 0; 133 | const double *restrict pvec = REAL(vec); 134 | double min_value = pvec[0]; 135 | if (vhasna) { 136 | for (i = 0; i < len0; ++i) { 137 | pans[i] = i; 138 | if (pvec[i] <= min_value || ISNAN(pvec[i])) { 139 | min_value = pvec[i]; 140 | idx = i; 141 | } 142 | } 143 | for (i = len0; i < len1; ++i) { 144 | if (ISNAN(pvec[i])) { 145 | continue; 146 | } 147 | if (pvec[i] > min_value || ISNAN(min_value)) { 148 | min_value = pvec[i]; 149 | pans[idx] = i; 150 | for (j = 0; j pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) || ISNAN(pvec[pans[j]])) { 152 | min_value = pvec[pans[j]]; 153 | idx = j; 154 | } 155 | } 156 | } 157 | } 158 | for (i = 0; i < len0; ++i) { 159 | tmp = pans[i]; 160 | for (j = i; j > 0 && (pvec[tmp] > pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || (!ISNAN(pvec[tmp]) && ISNAN(pvec[pans[j-1]]))); --j) { 161 | pans[j] = pans[j-1]; 162 | } 163 | pans[j] = tmp; 164 | } 165 | for (i =0; i < len0; ++i) { 166 | pans[i]++; 167 | } 168 | } else { 169 | for (i = 0; i < len0; ++i) { 170 | pans[i] = i; 171 | if (pvec[i] <= min_value) { 172 | min_value = pvec[i]; 173 | idx = i; 174 | } 175 | } 176 | for (i = len0; i < len1; ++i) { 177 | if (pvec[i] > min_value) { 178 | min_value = pvec[i]; 179 | pans[idx] = i; 180 | for (j = 0; j pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) { 182 | min_value = pvec[pans[j]]; 183 | idx = j; 184 | } 185 | } 186 | } 187 | } 188 | for (i = 0; i < len0; ++i) { 189 | tmp = pans[i]; 190 | for (j = i; j > 0 && (pvec[tmp] > pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1])); --j) { 191 | pans[j] = pans[j-1]; 192 | } 193 | pans[j] = tmp; 194 | } 195 | for (i =0; i < len0; ++i) { 196 | pans[i]++; 197 | } 198 | } 199 | } break; 200 | default: 201 | error("Type %s is not supported.", type2char(tvec)); 202 | } 203 | } else { 204 | switch(tvec) { 205 | case INTSXP: { 206 | int i, j, idx = 0; 207 | const int *restrict pvec = INTEGER(vec); 208 | int min_value = pvec[0]; 209 | if (vhasna) { 210 | for (i = 0; i < len0; ++i) { 211 | pans[i] = i; 212 | if ((pvec[i] >= min_value && min_value != NA_INTEGER) || pvec[i] == NA_INTEGER) { 213 | min_value = pvec[i]; 214 | idx = i; 215 | } 216 | } 217 | for (i = len0; i < len1; ++i) { 218 | if (pvec[i] == NA_INTEGER) { 219 | continue; 220 | } 221 | if (pvec[i] < min_value || min_value == NA_INTEGER) { 222 | min_value = pvec[i]; 223 | pans[idx] = i; 224 | for (j = 0; j 0 && (pvec[tmp] < pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || pvec[pans[j-1]] == NA_INTEGER); --j) { 238 | pans[j] = pans[j-1]; 239 | } 240 | pans[j] = tmp; 241 | } 242 | for (i =0; i < len0; ++i) { 243 | pans[i]++; 244 | } 245 | } else { 246 | for (i = 0; i < len0; ++i) { 247 | pans[i] = i; 248 | if (pvec[i] >= min_value) { 249 | min_value = pvec[i]; 250 | idx = i; 251 | } 252 | } 253 | for (i = len0; i < len1; ++i) { 254 | if (pvec[i] < min_value) { 255 | min_value = pvec[i]; 256 | pans[idx] = i; 257 | for (j = 0; j 0 && (pvec[tmp] < pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1])); --j) { 268 | pans[j] = pans[j-1]; 269 | } 270 | pans[j] = tmp; 271 | } 272 | for (i =0; i < len0; ++i) { 273 | pans[i]++; 274 | } 275 | } 276 | } break; 277 | case REALSXP: { 278 | int i, j, idx = 0; 279 | const double *restrict pvec = REAL(vec); 280 | double min_value = pvec[0]; 281 | if (vhasna) { 282 | for (i = 0; i < len0; ++i) { 283 | pans[i] = i; 284 | if (pvec[i] >= min_value || ISNAN(pvec[i])) { 285 | min_value = pvec[i]; 286 | idx = i; 287 | } 288 | } 289 | for (i = len0; i < len1; ++i) { 290 | if (ISNAN(pvec[i])) { 291 | continue; 292 | } 293 | if (pvec[i] < min_value || ISNAN(min_value)) { 294 | min_value = pvec[i]; 295 | pans[idx] = i; 296 | for (j = 0; j 0 && (pvec[tmp] < pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || (!ISNAN(pvec[tmp]) && ISNAN(pvec[pans[j-1]]))); --j) { 307 | pans[j] = pans[j-1]; 308 | } 309 | pans[j] = tmp; 310 | } 311 | for (i =0; i < len0; ++i) { 312 | pans[i]++; 313 | } 314 | } else { 315 | for (i = 0; i < len0; ++i) { 316 | pans[i] = i; 317 | if (pvec[i] >= min_value) { 318 | min_value = pvec[i]; 319 | idx = i; 320 | } 321 | } 322 | for (i = len0; i < len1; ++i) { 323 | if (pvec[i] < min_value) { 324 | min_value = pvec[i]; 325 | pans[idx] = i; 326 | for (j = 0; j 0 && (pvec[tmp] < pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1])); --j) { 337 | pans[j] = pans[j-1]; 338 | } 339 | pans[j] = tmp; 340 | } 341 | for (i =0; i < len0; ++i) { 342 | pans[i]++; 343 | } 344 | } 345 | } break; 346 | default: 347 | error("Type %s is not supported.", type2char(tvec)); 348 | } 349 | } 350 | UNPROTECT(1); 351 | return ans; 352 | } 353 | -------------------------------------------------------------------------------- /src/utils.c: -------------------------------------------------------------------------------- 1 | /* 2 | * kit : Useful R Functions Implemented in C 3 | * Copyright (C) 2020-2025 Morgan Jacob 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | #include "kit.h" 20 | 21 | SEXP ompEnabledR(void) { 22 | return omp_enabled ? ScalarLogical(TRUE) : ScalarLogical(FALSE); 23 | } 24 | 25 | SEXP setlevelsR(SEXP x, SEXP old_lvl, SEXP new_lvl, SEXP skip_absent) { 26 | if (!IS_BOOL(skip_absent)) { 27 | error("Argument 'skip_absent' must be TRUE or FALSE and length 1."); 28 | } 29 | if (any_duplicated(old_lvl, FALSE)) { 30 | error("'old' has duplicated value. Please make sure no duplicated values are introduced."); 31 | } 32 | if (any_duplicated(new_lvl, FALSE)) { 33 | error("'new' has duplicated value. Please make sure no duplicated values are introduced."); 34 | } 35 | if (!isFactor(x)) { 36 | error("'setlevels' must be passed a factor."); 37 | } 38 | if (UTYPEOF(old_lvl) != STRSXP) { 39 | error("Type of 'old' must be character."); 40 | } 41 | if (UTYPEOF(new_lvl) != STRSXP) { 42 | error("Type of 'new' must be character."); 43 | } 44 | const R_xlen_t nlvl = xlength(old_lvl); 45 | if (nlvl != xlength(new_lvl)) { 46 | error("'old' and 'new' are not the same length."); 47 | } 48 | const bool absent = !LOGICAL(skip_absent)[0]; 49 | SEXP xchar = PROTECT(getAttrib(x, R_LevelsSymbol)); 50 | const R_xlen_t nx = xlength(xchar); 51 | for (ssize_t j=0; j INT_MAX ? ScalarReal((double)cnt) : ScalarInteger((int)cnt); 140 | } 141 | 142 | SEXP countNAR(SEXP x) { 143 | const R_xlen_t len_x = xlength(x); 144 | SEXPTYPE tx = UTYPEOF(x); 145 | R_xlen_t cnt = 0; 146 | switch(tx) { 147 | case NILSXP: break; 148 | case LGLSXP: { 149 | const int *restrict px = LOGICAL(x); 150 | for (ssize_t i=0; i INT_MAX ? ScalarReal((double)cnt) : ScalarInteger((int)cnt); 201 | } 202 | 203 | SEXP subSetRowDataFrame(SEXP df, SEXP rws) { 204 | const SEXP *restrict pdf = SEXPPTR_RO(df); 205 | const int *restrict prws = INTEGER(rws); 206 | const R_xlen_t len_df = xlength(df); 207 | const R_xlen_t len_rws = xlength(rws); 208 | SEXP dfo = PROTECT(allocVector(VECSXP, len_df)); 209 | DUPLICATE_ATTRIB(dfo, df); 210 | setAttrib(dfo, R_NamesSymbol, PROTECT(getAttrib(df, R_NamesSymbol))); 211 | SEXP rownam = PROTECT(allocVector(INTSXP, 2)); 212 | INTEGER(rownam)[0] = NA_INTEGER; 213 | INTEGER(rownam)[1] = -(int)len_rws; 214 | setAttrib(dfo, R_RowNamesSymbol, rownam); 215 | for (R_xlen_t i = 0; i < len_df; ++i) { 216 | switch(UTYPEOF(pdf[i])) { 217 | case LGLSXP : { 218 | const int *restrict ptmp = LOGICAL(pdf[i]); 219 | SEXP TYPECOL = PROTECT(allocVector(LGLSXP, len_rws)); 220 | int *restrict pc = LOGICAL(TYPECOL); 221 | for (R_xlen_t j = 0; j < len_rws; ++j) { 222 | pc[j] = ptmp[prws[j]]; 223 | } 224 | copyMostAttrib(pdf[i], TYPECOL); 225 | SET_VECTOR_ELT(dfo, i, TYPECOL); 226 | UNPROTECT(1); 227 | } break; 228 | case INTSXP : { 229 | const int *restrict ptmp = INTEGER(pdf[i]); 230 | SEXP TYPECOL = PROTECT(allocVector(INTSXP, len_rws)); 231 | int *restrict pc = INTEGER(TYPECOL); 232 | for (R_xlen_t j = 0; j < len_rws; ++j) { 233 | pc[j] = ptmp[prws[j]]; 234 | } 235 | copyMostAttrib(pdf[i], TYPECOL); 236 | SET_VECTOR_ELT(dfo, i, TYPECOL); 237 | UNPROTECT(1); 238 | } break; 239 | case REALSXP : { 240 | const double *restrict ptmp = REAL(pdf[i]); 241 | SEXP TYPECOL = PROTECT(allocVector(REALSXP, len_rws)); 242 | double *restrict pc = REAL(TYPECOL); 243 | for (R_xlen_t j = 0; j < len_rws; ++j) { 244 | pc[j] = ptmp[prws[j]]; 245 | } 246 | copyMostAttrib(pdf[i], TYPECOL); 247 | SET_VECTOR_ELT(dfo, i, TYPECOL); 248 | UNPROTECT(1); 249 | } break; 250 | case CPLXSXP : { 251 | const Rcomplex *restrict ptmp = COMPLEX(pdf[i]); 252 | SEXP TYPECOL = PROTECT(allocVector(CPLXSXP, len_rws)); 253 | Rcomplex *restrict pc = COMPLEX(TYPECOL); 254 | for (R_xlen_t j = 0; j < len_rws; ++j) { 255 | pc[j] = ptmp[prws[j]]; 256 | } 257 | copyMostAttrib(pdf[i], TYPECOL); 258 | SET_VECTOR_ELT(dfo, i, TYPECOL); 259 | UNPROTECT(1); 260 | } break; 261 | case STRSXP : { 262 | const SEXP *restrict ptmp = STRING_PTR_RO(pdf[i]); 263 | SEXP TYPECOL = PROTECT(allocVector(STRSXP, len_rws)); 264 | for (R_xlen_t j = 0; j < len_rws; ++j) { 265 | SET_STRING_ELT(TYPECOL,j, ptmp[prws[j]]); 266 | } 267 | copyMostAttrib(pdf[i], TYPECOL); 268 | SET_VECTOR_ELT(dfo, i, TYPECOL); 269 | UNPROTECT(1); 270 | } break; 271 | default: // # nocov 272 | error("Type %s is not supported.", type2char(UTYPEOF(pdf[i]))); // # nocov 273 | } 274 | } 275 | UNPROTECT(3); 276 | return dfo; 277 | } 278 | 279 | SEXP subSetRowMatrix(SEXP mat, SEXP rws) { 280 | const int *restrict prws = INTEGER(rws); 281 | const int col_mat = ncols(mat); 282 | const int row_mat = nrows(mat); 283 | const int len_rws = length(rws); 284 | SEXP mato; 285 | switch(UTYPEOF(mat)) { 286 | case LGLSXP : { 287 | mato = PROTECT(allocMatrix(LGLSXP, len_rws, col_mat)); 288 | const int *restrict pmat = LOGICAL(mat); 289 | int *restrict pmato = LOGICAL(mato); 290 | for (int i = 0; i < col_mat; ++i) { 291 | for (int j = 0; j < len_rws; ++j) { 292 | pmato[j+len_rws*i] = pmat[prws[j]+row_mat*i]; 293 | } 294 | } 295 | } break; 296 | case INTSXP : { 297 | mato = PROTECT(allocMatrix(INTSXP, len_rws, col_mat)); 298 | const int *restrict pmat = INTEGER(mat); 299 | int *restrict pmato = INTEGER(mato); 300 | for (int i = 0; i < col_mat; ++i) { 301 | for (int j = 0; j < len_rws; ++j) { 302 | pmato[j+len_rws*i] = pmat[prws[j]+row_mat*i]; 303 | } 304 | } 305 | } break; 306 | case REALSXP : { 307 | mato = PROTECT(allocMatrix(REALSXP, len_rws, col_mat)); 308 | const double *restrict pmat = REAL(mat); 309 | double *restrict pmato = REAL(mato); 310 | for (int i = 0; i < col_mat; ++i) { 311 | for (int j = 0; j < len_rws; ++j) { 312 | pmato[j+len_rws*i] = pmat[prws[j]+row_mat*i]; 313 | } 314 | } 315 | } break; 316 | case CPLXSXP : { 317 | mato = PROTECT(allocMatrix(CPLXSXP, len_rws, col_mat)); 318 | const Rcomplex *restrict pmat = COMPLEX(mat); 319 | Rcomplex *restrict pmato = COMPLEX(mato); 320 | for (int i = 0; i < col_mat; ++i) { 321 | for (int j = 0; j < len_rws; ++j) { 322 | pmato[j+len_rws*i] = pmat[prws[j]+row_mat*i]; 323 | } 324 | } 325 | } break; 326 | case STRSXP : { 327 | mato = PROTECT(allocMatrix(STRSXP, len_rws, col_mat)); 328 | const SEXP *restrict pmat = STRING_PTR_RO(mat); 329 | for (int i = 0; i < col_mat; ++i) { 330 | for (int j = 0; j < len_rws; ++j) { 331 | SET_STRING_ELT(mato, j+len_rws*i, pmat[prws[j]+row_mat*i]); 332 | } 333 | } 334 | } break; 335 | default: // # nocov 336 | error("Type %s is not supported.", type2char(UTYPEOF(mat))); // # nocov 337 | } 338 | UNPROTECT(1); 339 | return mato; 340 | } 341 | 342 | // No checks in this functions (not used for now) 343 | SEXP subSetColDataFrame(SEXP df, SEXP str) { // # nocov start 344 | SEXP nm = PROTECT(getAttrib(df, R_NamesSymbol)); 345 | const int len = length(str); 346 | const int cnm = length(nm); 347 | if (len == 1) { 348 | int i = 0; 349 | const SEXP pstr = STRING_ELT(str, 0); 350 | for (; i < cnm; ++i) { 351 | if (STRING_ELT(nm, i) == pstr) { 352 | break; 353 | } 354 | } 355 | if (i == cnm) { 356 | error("Column '%s' is not in data.frame.", RCHAR(str, 0)); 357 | } 358 | UNPROTECT(1); 359 | return VECTOR_ELT(df, i); 360 | } 361 | SEXP dfo = PROTECT(allocVector(VECSXP, len)); 362 | int ct = 0; 363 | for (int i = 0; ct < len; ++i) { 364 | if (i == cnm) { 365 | error("Column '%s' is not in data.frame.", RCHAR(str, ct)); 366 | } 367 | if (STRING_ELT(nm, i) == STRING_ELT(str, ct)) { 368 | SET_VECTOR_ELT(dfo, ct++, VECTOR_ELT(df, i)); 369 | i = -1; 370 | } 371 | } 372 | DUPLICATE_ATTRIB(dfo, df); 373 | namesgets(dfo, str); 374 | SEXP rownam = PROTECT(allocVector(INTSXP, 2)); 375 | INTEGER(rownam)[0] = NA_INTEGER; 376 | INTEGER(rownam)[1] = -(int)length(VECTOR_ELT(df, 0)); 377 | setAttrib(dfo, R_RowNamesSymbol, rownam); 378 | UNPROTECT(3); 379 | return dfo; 380 | } // # nocov end 381 | 382 | // No checks in this functions (subset just one column) (not used for now) 383 | SEXP subSetColMatrix(SEXP x, R_xlen_t idx) { // # nocov start 384 | const R_xlen_t len_i = nrows(x); 385 | SEXPTYPE xt = UTYPEOF(x); 386 | SEXP ans = PROTECT(allocVector(xt, len_i)); 387 | const R_xlen_t pidx = idx * len_i; 388 | switch(xt) { 389 | case LGLSXP : { 390 | memcpy(LOGICAL(ans), LOGICAL(x)+pidx, (unsigned)len_i*sizeof(*LOGICAL(ans))); 391 | } break; 392 | case INTSXP : { 393 | memcpy(INTEGER(ans), INTEGER(x)+pidx, (unsigned)len_i*sizeof(int)); 394 | } break; 395 | case REALSXP : { 396 | memcpy(REAL(ans), REAL(x)+pidx, (unsigned)len_i*sizeof(double)); 397 | } break; 398 | case CPLXSXP : { 399 | memcpy(COMPLEX(ans), COMPLEX(x)+pidx, (unsigned)len_i*sizeof(Rcomplex)); 400 | } break; 401 | case STRSXP : { 402 | const SEXP *restrict px = STRING_PTR_RO(x); 403 | for (R_xlen_t i = 0; i < len_i; ++i) { 404 | SET_STRING_ELT(ans, i, px[i + pidx]); 405 | } 406 | } break; 407 | default: 408 | error("Matrix of type %s are not supported.", type2char(xt)); 409 | } 410 | UNPROTECT(1); 411 | return ans; 412 | } // # nocov end 413 | 414 | // This function does not do any check 415 | SEXP addColToDataFrame(SEXP df, SEXP mcol, SEXP coln) { 416 | const R_xlen_t len_df = xlength(df); 417 | const R_xlen_t len_col = xlength(mcol); 418 | SEXP dfo = R_NilValue; 419 | if (UTYPEOF(mcol) != VECSXP) { 420 | dfo = PROTECT(allocVector(VECSXP, len_df + 1)); 421 | for (int i = 0; i < len_df; ++i) { 422 | SET_VECTOR_ELT(dfo, i, VECTOR_ELT(df, i)); 423 | } 424 | SET_VECTOR_ELT(dfo, len_df, mcol); 425 | classgets(dfo, STR_DF); 426 | SEXP nam = PROTECT(allocVector(STRSXP, len_df + 1)); 427 | SEXP oldnam = PROTECT(getAttrib(df, R_NamesSymbol)); 428 | for (int i = 0; i < len_df; ++i) { 429 | SET_STRING_ELT(nam, i, STRING_ELT(oldnam, i)); 430 | } 431 | SET_STRING_ELT(nam, len_df, STRING_ELT(coln, 0)); 432 | namesgets(dfo, nam); 433 | SEXP rownam = PROTECT(allocVector(INTSXP, 2)); 434 | INTEGER(rownam)[0] = NA_INTEGER; 435 | INTEGER(rownam)[1] = -(int)len_col; 436 | setAttrib(dfo, R_RowNamesSymbol, rownam); 437 | } else { 438 | const R_xlen_t len_row = xlength(VECTOR_ELT(df, 0));// # nocov start 439 | dfo = PROTECT(allocVector(VECSXP, len_df + len_col)); 440 | for (int i = 0; i < len_df; ++i) { 441 | SET_VECTOR_ELT(dfo, i, VECTOR_ELT(df, i)); 442 | } 443 | for (int i = 0; i < len_col; ++i) { 444 | SET_VECTOR_ELT(dfo, len_df + i, VECTOR_ELT(mcol, i)); 445 | } 446 | classgets(dfo, STR_DF); 447 | SEXP nam = PROTECT(allocVector(STRSXP, len_df + len_col)); 448 | SEXP oldnam = PROTECT(getAttrib(df, R_NamesSymbol)); 449 | for (int i = 0; i < len_df; ++i) { 450 | SET_STRING_ELT(nam, i, STRING_ELT(oldnam, i)); 451 | } 452 | for (int i = 0; i < len_col; ++i) { 453 | SET_STRING_ELT(nam, len_df + i, STRING_ELT(coln, i)); 454 | } 455 | namesgets(dfo, nam); 456 | SEXP rownam = PROTECT(allocVector(INTSXP, 2)); 457 | INTEGER(rownam)[0] = NA_INTEGER; 458 | INTEGER(rownam)[1] = -(int)len_row; 459 | setAttrib(dfo, R_RowNamesSymbol, rownam);// # nocov end 460 | } 461 | UNPROTECT(4); 462 | return dfo; 463 | } 464 | 465 | // Try to improve this by removing element in the loop or initilising at 0 466 | SEXP countOccurR(SEXP x) { // can be improved for factors 467 | if (isDataFrame(x)) { 468 | SEXP ans = PROTECT(countOccurDataFrameR(x)); 469 | UNPROTECT(1); 470 | return ans; 471 | } 472 | if (isArray(x)) { 473 | error("Array are not yet supported."); 474 | } 475 | const R_xlen_t n = xlength(x); 476 | const SEXPTYPE tx = UTYPEOF(x); 477 | int K; 478 | size_t M; 479 | if (tx == INTSXP || tx == STRSXP || tx == REALSXP || tx == CPLXSXP ) { 480 | if(n >= 1073741824) { 481 | error("Length of 'x' is too large. (Long vector not supported yet)"); // # nocov 482 | } 483 | const size_t n2 = 2U * (size_t) n; 484 | M = 256; 485 | K = 8; 486 | while (M < n2) { 487 | M *= 2; 488 | K++; 489 | } 490 | } else if (tx == LGLSXP) { 491 | M = 4; 492 | K = 2; 493 | } else { 494 | error("Type %s is not supported.", type2char(tx)); 495 | } 496 | R_xlen_t count = 0; 497 | int *restrict h = (int*)calloc(M, sizeof(int)); 498 | SEXP ans_ct = PROTECT(allocVector(INTSXP, n)); 499 | int *restrict pans_l = (int*)calloc(n, sizeof(int)); 500 | int *restrict pans_ct = INTEGER(ans_ct); 501 | SEXP ans_f = PROTECT(allocVector(VECSXP, 2)); 502 | switch (tx) { 503 | case LGLSXP: { 504 | const int *restrict px = LOGICAL(x); 505 | size_t id = 0; 506 | for (int i = 0; i < n; ++i) { 507 | id = (px[i] == NA_LOGICAL) ? 2U : (size_t) px[i]; 508 | while (h[id]) { 509 | if (px[h[id]-1]==px[i]) { 510 | pans_ct[h[id]-1]++; 511 | goto lbl; 512 | } 513 | id++; id %= M; // # nocov 514 | } 515 | h[id] = (int) i + 1; 516 | pans_l[i]++; 517 | pans_ct[i] = 1; 518 | count++; 519 | lbl:; 520 | } 521 | SET_VECTOR_ELT(ans_f, 0, PROTECT(allocVector(tx, count))); 522 | SET_VECTOR_ELT(ans_f, 1, PROTECT(allocVector(INTSXP, count))); 523 | R_xlen_t ct = 0; 524 | int *restrict py = LOGICAL(PTR_ETL(ans_f, 0)); 525 | int *restrict pw = INTEGER(PTR_ETL(ans_f, 1)); 526 | for (int i = 0; ct < count; ++i) { 527 | if (pans_l[i]) { 528 | pw[ct] = pans_ct[i]; 529 | py[ct++] = px[i]; 530 | } 531 | } 532 | } break; 533 | case INTSXP: { // think about factor and levels number 534 | const int *restrict px = INTEGER(x); 535 | size_t id = 0; 536 | for (int i = 0; i < n; ++i) { 537 | id = (px[i] == NA_INTEGER) ? 0 : HASH(px[i], K); 538 | while (h[id]) { 539 | if (px[h[id]-1]==px[i]) { 540 | pans_ct[h[id]-1]++; 541 | goto ibl; 542 | } 543 | id++; id %= M; // # nocov 544 | } 545 | h[id] = (int) i + 1; 546 | pans_l[i]++; 547 | pans_ct[i] = 1; 548 | count++; 549 | ibl:; 550 | } 551 | SET_VECTOR_ELT(ans_f, 0, PROTECT(allocVector(tx, count))); 552 | SET_VECTOR_ELT(ans_f, 1, PROTECT(allocVector(INTSXP, count))); 553 | R_xlen_t ct = 0; 554 | int *restrict py = INTEGER(PTR_ETL(ans_f, 0)); 555 | int *restrict pw = INTEGER(PTR_ETL(ans_f, 1)); 556 | for (int i = 0; ct < count; ++i) { 557 | if (pans_l[i]) { 558 | pw[ct] = pans_ct[i]; 559 | py[ct++] = px[i]; 560 | } 561 | } 562 | } break; 563 | case REALSXP: { 564 | const double *restrict px = REAL(x); 565 | size_t id = 0; 566 | union uno tpv; 567 | for (int i = 0; i < n; ++i) { 568 | tpv.d = R_IsNA(px[i]) ? NA_REAL : (R_IsNaN(px[i]) ? R_NaN :px[i]); 569 | id = HASH(tpv.u[0] + tpv.u[1], K); 570 | while (h[id]) { 571 | if (REQUAL(px[h[id] - 1], px[i])) { 572 | pans_ct[h[id]-1]++; 573 | goto rbl; 574 | } 575 | id++; id %= M; // # nocov 576 | } 577 | h[id] = (int) i + 1; 578 | pans_l[i]++; 579 | pans_ct[i] = 1; 580 | count++; 581 | rbl:; 582 | } 583 | SET_VECTOR_ELT(ans_f, 0, PROTECT(allocVector(tx, count))); 584 | SET_VECTOR_ELT(ans_f, 1, PROTECT(allocVector(INTSXP, count))); 585 | R_xlen_t ct = 0; 586 | double *restrict py = REAL(PTR_ETL(ans_f, 0)); 587 | int *restrict pw = INTEGER(PTR_ETL(ans_f, 1)); 588 | for (int i = 0; ct < count; ++i) { 589 | if (pans_l[i]) { 590 | pw[ct] = pans_ct[i]; 591 | py[ct++] = px[i]; 592 | } 593 | } 594 | } break; 595 | case CPLXSXP: { 596 | const Rcomplex *restrict px = COMPLEX(x); 597 | size_t id = 0; 598 | unsigned int u; 599 | union uno tpv; 600 | Rcomplex tmp; 601 | for (int i = 0; i < n; ++i) { 602 | tmp.r = (px[i].r == 0.0) ? 0.0 : px[i].r; 603 | tmp.i = (px[i].i == 0.0) ? 0.0 : px[i].i; 604 | if (C_IsNA(tmp)) { 605 | tmp.r = tmp.i = NA_REAL; 606 | } else if (C_IsNaN(tmp)) { 607 | tmp.r = tmp.i = R_NaN; 608 | } 609 | tpv.d = tmp.r; 610 | u = tpv.u[0] ^ tpv.u[1]; 611 | tpv.d = tmp.i; 612 | u ^= tpv.u[0] ^ tpv.u[1]; 613 | id = HASH(u, K); 614 | while (h[id]) { 615 | if (CEQUAL(px[h[id]-1],px[i])) { 616 | pans_ct[h[id]-1]++; 617 | goto cbl; 618 | } 619 | id++; id %= M; 620 | } 621 | h[id] = (int) i + 1; 622 | pans_l[i]++; 623 | pans_ct[i] = 1; 624 | count++; 625 | cbl:; 626 | } 627 | SET_VECTOR_ELT(ans_f, 0, PROTECT(allocVector(tx, count))); 628 | SET_VECTOR_ELT(ans_f, 1, PROTECT(allocVector(INTSXP, count))); 629 | R_xlen_t ct = 0; 630 | Rcomplex *restrict py = COMPLEX(PTR_ETL(ans_f, 0)); 631 | int *restrict pw = INTEGER(PTR_ETL(ans_f, 1)); 632 | for (int i = 0; ct < count; ++i) { 633 | if (pans_l[i]) { 634 | pw[ct] = pans_ct[i]; 635 | py[ct++] = px[i]; 636 | } 637 | } 638 | } break; 639 | case STRSXP: { 640 | const SEXP *restrict px = STRING_PTR_RO(x); 641 | size_t id = 0; 642 | for (int i = 0; i < n; ++i) { 643 | id = HASH(((intptr_t) px[i] & 0xffffffff), K); 644 | while (h[id]) { 645 | if (px[h[id]-1]==px[i]) { 646 | pans_ct[h[id]-1]++; 647 | goto sbl; 648 | } 649 | id++; id %= M; // # nocov 650 | } 651 | h[id] = (int) i + 1; 652 | pans_l[i]++; 653 | pans_ct[i] = 1; 654 | count++; 655 | sbl:; 656 | } 657 | SET_VECTOR_ELT(ans_f, 0, PROTECT(allocVector(tx, count))); 658 | SET_VECTOR_ELT(ans_f, 1, PROTECT(allocVector(INTSXP, count))); 659 | R_xlen_t ct = 0; 660 | int *restrict pw = INTEGER(PTR_ETL(ans_f, 1)); 661 | SEXP p0 = PTR_ETL(ans_f, 0); 662 | for (int i = 0; ct < count; ++i) { 663 | if (pans_l[i]) { 664 | pw[ct] = pans_ct[i]; 665 | SET_STRING_ELT(p0, ct++, px[i]); 666 | } 667 | } 668 | } break; 669 | } 670 | free(pans_l); 671 | free(h); 672 | copyMostAttrib(x, PTR_ETL(ans_f, 0)); 673 | classgets(ans_f, STR_DF); 674 | SEXP nam = PROTECT(allocVector(STRSXP, 2)); 675 | SET_STRING_ELT(nam, 0, mkChar("Variable")); 676 | SET_STRING_ELT(nam, 1, mkChar("Count")); 677 | namesgets(ans_f, nam); 678 | SEXP rownam = PROTECT(allocVector(INTSXP, 2)); 679 | INTEGER(rownam)[0] = NA_INTEGER; 680 | INTEGER(rownam)[1] = -(int)count; 681 | setAttrib(ans_f, R_RowNamesSymbol, rownam); 682 | UNPROTECT(6); 683 | return ans_f; 684 | } 685 | 686 | SEXP countOccurDataFrameR(SEXP x) { // move to matrix if possible (change hash algo) 687 | const SEXP *restrict px = SEXPPTR_RO(x); 688 | const R_xlen_t len_x = xlength(x); 689 | const R_xlen_t len_i = xlength(px[0]); 690 | SEXP mlv = PROTECT(allocMatrix(INTSXP, (int)len_i, (int)len_x)); 691 | for (R_xlen_t i = 0; i < len_x; ++i) { 692 | memcpy(INTEGER(mlv)+i*len_i, INTEGER(PROTECT(dupVecIndexOnlyR(px[i]))), (unsigned)len_i*sizeof(int)); 693 | } 694 | UNPROTECT((int)len_x); 695 | const size_t n2 = 2U * (size_t) len_i; 696 | size_t M = 256; 697 | int K = 8; 698 | while (M < n2) { 699 | M *= 2; 700 | K++; 701 | } 702 | R_xlen_t count = 0; 703 | int *restrict h = (int*)calloc(M, sizeof(int)); 704 | SEXP ans_ct = PROTECT(allocVector(INTSXP, len_i)); 705 | int *restrict pans_l = (int*)calloc(len_i, sizeof(int)); 706 | int *restrict pans_ct = INTEGER(ans_ct); 707 | const int *restrict v = INTEGER(mlv); 708 | size_t id = 0; 709 | for (R_xlen_t i = 0; i < len_i; ++i) { 710 | R_xlen_t key = 0; 711 | for (R_xlen_t j = 0; j < len_x; ++j) { 712 | key ^= HASH(v[i+j*len_i],K)*97*(j+1); 713 | } 714 | id = HASH(key, K); 715 | while (h[id]) { 716 | for (R_xlen_t j = 0; j < len_x; ++j) { 717 | if (v[h[id]-1+j*len_i] != v[i+j*len_i]) { 718 | goto label1; 719 | } 720 | } 721 | pans_ct[h[id]-1]++; 722 | goto label2; 723 | label1:; 724 | id++; id %= M; 725 | } 726 | h[id] = (int) i + 1; 727 | pans_l[i]++; 728 | pans_ct[i] = 1; 729 | count++; 730 | label2:; 731 | } 732 | free(h); 733 | SEXP indx = PROTECT(allocVector(INTSXP, count)); 734 | SEXP cntr = PROTECT(allocVector(INTSXP, count)); 735 | R_xlen_t ct = 0; 736 | int *restrict py = INTEGER(indx); 737 | int *restrict pw = INTEGER(cntr); 738 | for (int i = 0; ct < count; ++i) { 739 | if (pans_l[i]) { 740 | pw[ct] = pans_ct[i]; 741 | py[ct++] = i; 742 | } 743 | } 744 | free(pans_l); 745 | SEXP output = PROTECT(addColToDataFrame(PROTECT(subSetRowDataFrame(x, indx)),cntr, PROTECT(mkString("Count")))); 746 | UNPROTECT(7); 747 | return output; 748 | } 749 | 750 | // All columns must be of the same type 751 | SEXP dfToMatrix(SEXP df) { 752 | const SEXP *restrict px = SEXPPTR_RO(df); 753 | const R_xlen_t len_x = xlength(df); 754 | const R_xlen_t len_i = xlength(px[0]); 755 | const SEXPTYPE tx = UTYPEOF(px[0]); 756 | SEXP mlv = PROTECT(allocMatrix(tx, (int)len_i, (int)len_x)); 757 | switch(tx) { 758 | case LGLSXP :{ 759 | int *restrict pmlv = LOGICAL(mlv); 760 | for (int i = 0; i < len_x; ++i) { 761 | const int *restrict ppx = LOGICAL(px[i]); 762 | const int ct = i*len_i; 763 | for (int j = 0; j < len_i; ++j) { 764 | pmlv[j+ct] = ppx[j]; 765 | } 766 | } 767 | } break; 768 | case INTSXP :{ 769 | int *restrict pmlv = INTEGER(mlv); 770 | for (int i = 0; i < len_x; ++i) { 771 | const int *restrict ppx = INTEGER(px[i]); 772 | const int ct = i*len_i; 773 | for (int j = 0; j < len_i; ++j) { 774 | pmlv[j+ct] = ppx[j]; 775 | } 776 | } 777 | } break; 778 | case REALSXP :{ 779 | double *restrict pmlv = REAL(mlv); 780 | for (int i = 0; i < len_x; ++i) { 781 | const double *restrict ppx = REAL(px[i]); 782 | const int ct = i*len_i; 783 | for (int j = 0; j < len_i; ++j) { 784 | pmlv[j+ct] = ppx[j]; 785 | } 786 | } 787 | } break; 788 | case CPLXSXP :{ 789 | Rcomplex *restrict pmlv = COMPLEX(mlv); 790 | for (int i = 0; i < len_x; ++i) { 791 | const Rcomplex *restrict ppx = COMPLEX(px[i]); 792 | const int ct = i*len_i; 793 | for (int j = 0; j < len_i; ++j) { 794 | pmlv[j+ct] = ppx[j]; 795 | } 796 | } 797 | } break; 798 | case STRSXP :{ 799 | for (int i = 0; i < len_x; ++i) { 800 | const SEXP *restrict ppx = STRING_PTR_RO(px[i]); 801 | const int ct = i*len_i; 802 | for (int j = 0; j < len_i; ++j) { 803 | SET_STRING_ELT(mlv, j+ct, ppx[j]); 804 | } 805 | } 806 | } break; 807 | } 808 | UNPROTECT(1); 809 | return mlv; 810 | } 811 | 812 | bool isMixEnc(SEXP x) { 813 | const R_xlen_t len = xlength(x); 814 | const SEXP *px = STRING_PTR_RO(x); 815 | const cetype_t ces = getCharCE(px[0]); 816 | for (R_xlen_t i = 1; i < len; ++i) 817 | if(getCharCE(px[i]) != ces) 818 | return true; 819 | return false; 820 | } 821 | 822 | SEXP enc2UTF8(SEXP x) { 823 | const SEXP *px = STRING_PTR_RO(x); 824 | const R_xlen_t len = xlength(x); 825 | if (getCharCE(px[0]) != CE_UTF8) { 826 | SEXP ans = PROTECT(allocVector(STRSXP, len)); 827 | for (R_xlen_t i = 0; i < len; ++i) { 828 | SET_STRING_ELT(ans, i, mkCharCE(translateCharUTF8(px[i]), CE_UTF8)); 829 | } 830 | UNPROTECT(1); 831 | return ans; 832 | } 833 | return x; 834 | } 835 | --------------------------------------------------------------------------------