├── .Rbuildignore ├── .codecov.yml ├── .github └── workflows │ └── ci.yaml ├── .gitignore ├── ChangeLog ├── DESCRIPTION ├── NAMESPACE ├── R ├── RcppExports.R └── dtts.R ├── README.md ├── cleanup ├── dtts.utils.Rproj ├── inst ├── NEWS.Rd ├── images │ ├── align_closest.svg │ ├── align_count.svg │ └── ops.svg └── tinytest │ └── test_dtts.R ├── man ├── align.Rd ├── align_idx.Rd ├── frequency-data.table-method.Rd ├── grid_align.Rd └── ops.Rd ├── src ├── RcppExports.cpp └── align.cpp └── tests └── tinytest.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.travis.yml$ 4 | ^tests/.*\.Rout.save 5 | ^\.codecov\.yml$ 6 | ^local 7 | ^LICENSE$ 8 | .*\.tar\.gz 9 | ^\.github 10 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | coverage: 3 | status: 4 | project: 5 | default: 6 | target: 70% # the (on purpose low) required coverage value 7 | threshold: 10% # the permitted delta in hitting the target 8 | patch: 9 | default: 10 | target: 0% # the (on purpose low) required coverage value 11 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | # Run CI for R using https://eddelbuettel.github.io/r-ci/ 2 | 3 | name: ci 4 | 5 | on: 6 | push: 7 | pull_request: 8 | 9 | env: 10 | USE_BSPM: "true" 11 | _R_CHECK_FORCE_SUGGESTS_: "false" 12 | 13 | jobs: 14 | ci: 15 | strategy: 16 | matrix: 17 | include: 18 | #- {os: macOS-latest} 19 | - {os: ubuntu-latest} 20 | 21 | runs-on: ${{ matrix.os }} 22 | 23 | steps: 24 | - name: Checkout 25 | uses: actions/checkout@v4 26 | 27 | - name: Setup 28 | uses: eddelbuettel/github-actions/r-ci@master 29 | 30 | - name: Dependencies 31 | run: ./run.sh install_deps 32 | 33 | - name: Test 34 | run: ./run.sh run_tests 35 | 36 | - name: Coverage 37 | if: ${{ matrix.os == 'ubuntu-latest' }} 38 | run: ./run.sh coverage 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | src/*.o 6 | src/*.so 7 | src/*.dll 8 | *~ 9 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2024-12-31 Dirk Eddelbuettel 2 | 3 | * .github/workflows/ci.yaml: Simplify to r-ci with included bootstrap 4 | 5 | 2024-08-23 Dirk Eddelbuettel 6 | 7 | * DESCRIPTION (Authors@R): Added 8 | 9 | 2024-07-18 Dirk Eddelbuettel 10 | 11 | * DESCRIPTION (Version, Date): Release 0.1.3 12 | 13 | 2024-07-16 Dirk Eddelbuettel 14 | 15 | * DESCRIPTION (Version, Date): Roll minor version and date 16 | 17 | 2024-07-16 Michael Chirico 18 | 19 | * inst/tinytest/test_dtts.R: Correct tests with 1-second errors 20 | exposed by upstream improvements to all.equal.data.table() 21 | 22 | 2024-05-19 Dirk Eddelbuettel 23 | 24 | * README.md: Use tinyverse.netlify.app for dependency badge 25 | 26 | 2024-02-02 Dirk Eddelbuettel 27 | 28 | * DESCRIPTION (Version, Date): Roll minor version and date 29 | 30 | 2024-02-02 Tomas Kalibera 31 | 32 | * src/align.cpp: Ensure localtime_s is used on Windows with LLVM 33 | 34 | 2024-01-31 Dirk Eddelbuettel 35 | 36 | * DESCRIPTION (Version, Date): Release 0.1.2 37 | 38 | * src/align.cpp (align_func_period): Update signature of C-level 39 | exported data.table function to DT_subsetDT 40 | * DESCRIPTION (Imports): Set to data.table (>= 1.5.0) 41 | 42 | * .github/workflows/ci.yaml (jobs): Upate to r-ci setup action 43 | 44 | 2023-09-20 Dirk Eddelbuettel 45 | 46 | * .github/workflows/ci.yaml (jobs): Update to actions/checkout@v4 47 | 48 | 2023-08-08 Dirk Eddelbuettel 49 | 50 | * DESCRIPTION (Version, Date): Release 0.1.1 51 | 52 | 2023-05-15 Leonardo Silvestri 53 | 54 | * DESCRIPTION (Version, Date): Roll minor version and date 55 | 56 | * R/dtts.R: non-dotted names, allow arbitrary index column 57 | * man/align_idx.Rd: replaces align.idx.Rd 58 | * man/grid_align.Rd: replace grid.align.Rd 59 | * README.md: non-dotted names 60 | * inst/tinytest/test_dtts.R: non-dotted names 61 | 62 | 2023-05-08 Leonardo Silvestri 63 | 64 | * DESCRIPTION (Version, Date): Roll minor version and date 65 | 66 | * NAMESPACE: Added function 'ops' 67 | * R/dtts.R: Ditto 68 | * inst/tinytest/test_dtts.R: Ditto 69 | * man/ops.Rd: Ditto 70 | * src/align.cpp: Ditto 71 | * man/align.Rd: Trivial change on regeneration 72 | * man/align.idx.Rd: Ditto 73 | * man/frequency-data.table-method.Rd: Ditto 74 | * man/grid.align.Rd: Ditto 75 | * README.md: Added example for 'ops' 76 | 77 | 2023-04-26 Leonardo Silvestri 78 | 79 | * DESCRIPTION (Version, Date): Roll minor version and date 80 | 81 | * src/align.cpp: Fix unitialized memory read 82 | 83 | 2023-03-22 Dirk Eddelbuettel 84 | 85 | * DESCRIPTION (Versio, Date): Roll minor version and date 86 | 87 | * src/Makevars: Removed as we do not set a compilation standard 88 | 89 | 2022-08-21 Dirk Eddelbuettel 90 | 91 | * src/align.cpp (align_{,idx}_{duration,period}): Small simplification 92 | 93 | 2022-03-10 Dirk Eddelbuettel 94 | 95 | * README.md: Add badges, polish one example (thanks, @jangorecki) 96 | 97 | 2022-03-06 Dirk Eddelbuettel 98 | 99 | * DESCRIPTION (Version): Initial release 0.1.0 100 | 101 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: dtts 2 | Type: Package 3 | Title: 'data.table' Time-Series 4 | Version: 0.1.3 5 | Date: 2024-07-18 6 | Authors@R: c(person("Dirk", "Eddelbuettel", role = c("aut", "cre"), email = "edd@debian.org", 7 | comment = c(ORCID = "0000-0001-6419-907X")), 8 | person("Leonardo", "Silvestri", role="aut")) 9 | Description: High-frequency time-series support via 'nanotime' and 'data.table'. 10 | License: GPL (>= 2) 11 | Imports: nanotime, data.table (>= 1.5.0), methods, bit64, Rcpp (>= 0.11.5), RcppCCTZ (>= 0.2.0) 12 | Suggests: tinytest 13 | LinkingTo: Rcpp, RcppCCTZ, RcppDate, nanotime 14 | BugReports: https://github.com/eddelbuettel/dtts/issues 15 | RoxygenNote: 7.2.2 16 | Encoding: UTF-8 17 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(dtts, .registration=TRUE) 2 | import("methods") 3 | import("Rcpp") 4 | import("RcppCCTZ") 5 | import("bit64") 6 | import("nanotime") 7 | import("data.table") 8 | importFrom("utils", "tail") 9 | exportMethods("align_idx") 10 | exportMethods("align") 11 | exportMethods("grid_align") 12 | exportMethods("ops") 13 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | .align_duration_cpp <- function(x, y, xdata, start, end, sopen, eopen, func) { 5 | .Call(`_dtts_align_duration`, x, y, xdata, start, end, sopen, eopen, func) 6 | } 7 | 8 | .align_period_cpp <- function(x, y, xdata, start, end, sopen, eopen, func, tz) { 9 | .Call(`_dtts_align_period`, x, y, xdata, start, end, sopen, eopen, func, tz) 10 | } 11 | 12 | .align_idx_duration_cpp <- function(x, y, start, end, sopen, eopen) { 13 | .Call(`_dtts_align_idx_duration`, x, y, start, end, sopen, eopen) 14 | } 15 | 16 | .align_idx_period_cpp <- function(x, y, start, end, sopen, eopen, tz) { 17 | .Call(`_dtts_align_idx_period`, x, y, start, end, sopen, eopen, tz) 18 | } 19 | 20 | .ops <- function(xdata, ydata, op_string) { 21 | .Call(`_dtts_ops`, xdata, ydata, op_string) 22 | } 23 | 24 | -------------------------------------------------------------------------------- /R/dtts.R: -------------------------------------------------------------------------------- 1 | 2 | align_idx_duration <- function(x, # nanotime vector 3 | y, # nanotime vector 4 | start, 5 | end, 6 | sopen = FALSE, 7 | eopen = TRUE, 8 | bypass_x_check = FALSE, 9 | bypass_y_check = FALSE) 10 | { 11 | if (missing(start) && missing(end) && missing(sopen) && missing(eopen)) { 12 | eopen = FALSE # otherwise no interval is 13 | # defined and the result is 14 | # all NA, which is likely not 15 | # what the user intended 16 | } 17 | else { 18 | if (!is.logical(sopen)) { 19 | stop("'sopen' must be a 'logical'") 20 | } 21 | if (!is.logical(eopen)) { 22 | stop("'eopen' must be a 'logical'") 23 | } 24 | } 25 | if (missing(start)) { 26 | start <- as.nanoduration(0) 27 | } 28 | if (missing(end)) { 29 | end <- as.nanoduration(0) 30 | } 31 | if (!bypass_y_check & is.unsorted(y)) { 32 | stop("'y' must be sorted in ascending order") 33 | } 34 | if (!bypass_x_check & is.unsorted(x)) { 35 | stop("'x' must be sorted in ascending order") 36 | } 37 | 38 | .align_idx_duration_cpp(x, y, start, end, sopen, eopen) 39 | } 40 | 41 | ##' Get the index of the alignment of one vector onto another 42 | ##' 43 | ##' \code{align_idx} returns the index of the alignment of \code{x} on \code{y} 44 | ##' 45 | ##' In order to perform the alignment, intervals are created around 46 | ##' each elements in \code{y} using \code{start} and \code{end}. For 47 | ##' each such interval, the closest element in \code{x} is chosen. If 48 | ##' no element in \code{x} falls in the interval, then NaN is 49 | ##' returned. 50 | ##' 51 | ##' @param x the \code{nanotime} vector to align from 52 | ##' @param y the \code{nanotime} vector to align to 53 | ##' @param start scalar or vector of same length as \code{y} of type 54 | ##' \code{nanoduration} or \code{nanoperiod}; \code{start} is 55 | ##' added to each element in \code{y} and it then defines the 56 | ##' starting point of the interval under consideration for the 57 | ##' alignment on that element of \code{y} 58 | ##' @param end scalar or vector of same length as \code{y} of type 59 | ##' \code{nanoduration} or \code{nanoperiod}; \code{start} is 60 | ##' added to each element in \code{y} and it then defines the 61 | ##' ending point of the interval under consideration for the 62 | ##' alignment on that element of \code{y} 63 | ##' @param sopen boolean scalar or vector of same lengths as \code{y} 64 | ##' that indicates if the start of the interval is open or 65 | ##' closed. Defaults to FALSE. 66 | ##' @param eopen boolean scalar or vector of same lengths as \code{y} 67 | ##' that indicates if the end of the interval is open or 68 | ##' closed. Defaults to TRUE. 69 | ##' @param tz scalar or vector of same length as \code{y} of type 70 | ##' character. Only used when the type of \code{start} and 71 | ##' \code{end} is \code{nanoperiod}. It defines the time zone for 72 | ##' the definition of the interval. 73 | ##' @param bypass_x_check logical indicating if the sorting of 74 | ##' \code{x} should be bypassed. This can provide a marginal 75 | ##' speedup, but should be used carefully. 76 | ##' @param bypass_y_check logical indicating if the sorting of 77 | ##' \code{y} should be bypassed. This can provide a marginal 78 | ##' speedup, but should be used carefully. 79 | ##' @param ... further arguments passed to or from methods. 80 | ##' @return a vector of indices of the same length as \code{y}; this 81 | ##' vector indexes into \code{x} and represent the closest point 82 | ##' of \code{x} that is in the interval defined around each point 83 | ##' in \code{y} 84 | ##' 85 | ##' @details When only \code{x} and \code{y} are specified, the 86 | ##' default is to close the intervals so that the alignment simply 87 | ##' picks up equal points. Note that it is possible to specify 88 | ##' meaningless intervals, for instance with a \code{start} that 89 | ##' is beyond \code{end}. In this case, the alignment will simply 90 | ##' return NA for each element in \code{y}. In principle, the 91 | ##' \code{start} and \code{end} are chosen to define an interval 92 | ##' is the past, or around the points in \code{y}, but if they are 93 | ##' both positive, they can define intervals in the future. 94 | ##' 95 | ##' @rdname align_idx 96 | ##' 97 | ##' @examples 98 | ##' \dontrun{ 99 | ##' align_idx(nanotime(c(10:14, 17:19)), nanotime(11:20)) 100 | ##' ## [1] 2 3 4 5 NA NA 6 7 8 NA 101 | ##' } 102 | setGeneric("align_idx", function(x, y, start, end, ...) standardGeneric("align_idx")) 103 | 104 | ##' @rdname align_idx 105 | setMethod("align_idx", signature("nanotime", "nanotime", "nanoduration", "nanoduration"), align_idx_duration) 106 | 107 | ##' @rdname align_idx 108 | setMethod("align_idx", signature("nanotime", "nanotime", "missing", "missing"), align_idx_duration) 109 | 110 | ##' @rdname align_idx 111 | setMethod("align_idx", signature("nanotime", "nanotime", "missing", "nanoduration"), align_idx_duration) 112 | 113 | ##' @rdname align_idx 114 | setMethod("align_idx", signature("nanotime", "nanotime", "nanoduration", "missing"), align_idx_duration) 115 | 116 | 117 | align_idx_period <- function(x, # time-series 118 | y, # nanotime vector 119 | start=as.nanoperiod(0), 120 | end=as.nanoperiod(0), 121 | sopen = FALSE, 122 | eopen = TRUE, 123 | tz, 124 | bypass_x_check = FALSE, 125 | bypass_y_check = FALSE) 126 | { 127 | if (missing(start)) { 128 | start <- as.nanoperiod(0) 129 | } 130 | if (missing(end)) { 131 | end <- as.nanoperiod(0) 132 | } 133 | if (!is.logical(sopen)) { 134 | stop("'sopen' must be a 'logical'") 135 | } 136 | if (!is.logical(eopen)) { 137 | stop("'eopen' must be a 'logical'") 138 | } 139 | if (!is.character(tz)) { 140 | stop ("'tz' must be a 'character'") 141 | } 142 | if (!bypass_y_check & is.unsorted(y)) { 143 | stop("'y' must be sorted in ascending order") 144 | } 145 | if (!bypass_x_check & is.unsorted(x)) { 146 | stop("'x' must be sorted in ascending order") 147 | } 148 | 149 | .align_idx_period_cpp(sort(x), sort(y), start, end, sopen, eopen, tz) 150 | } 151 | 152 | 153 | ##' @rdname align_idx 154 | setMethod("align_idx", signature("nanotime", "nanotime", "nanoperiod", "nanoperiod"), align_idx_period) 155 | 156 | ##' @rdname align_idx 157 | setMethod("align_idx", signature("nanotime", "nanotime", "missing", "nanoperiod"), align_idx_period) 158 | 159 | ##' @rdname align_idx 160 | setMethod("align_idx", signature("nanotime", "nanotime", "nanoperiod", "missing"), align_idx_period) 161 | 162 | 163 | 164 | ##' @rdname align 165 | setGeneric("align", function(x, y, start, end, ...) standardGeneric("align")) 166 | 167 | 168 | align_duration <- function(x, # data.table time-series 169 | y, # nanotime vector 170 | start=as.nanoduration(0), 171 | end=as.nanoduration(0), 172 | sopen = FALSE, 173 | eopen = TRUE, 174 | func = NULL) 175 | { 176 | if (missing(start) && missing(end) && missing(sopen) && missing(eopen)) { 177 | eopen = FALSE # otherwise no interval is 178 | # defined and the result is 179 | # all NA, which is likely not 180 | # what the user intended 181 | } 182 | if (missing(start)) { 183 | start <- as.nanoduration(0) 184 | } 185 | if (missing(end)) { 186 | end <- as.nanoduration(0) 187 | } 188 | if (!inherits(x[[1]], "nanotime")) { 189 | stop("first column of 'data.table' must be of type 'nanotime'") 190 | } 191 | if (is.null(key(x)) || names(x)[1] != key(x)[1]) { 192 | stop("first column of 'data.table' must be the first key") 193 | } 194 | if (!is.logical(sopen)) { 195 | stop("'sopen' must be a 'logical'") 196 | } 197 | if (!is.logical(eopen)) { 198 | stop("'eopen' must be a 'logical'") 199 | } 200 | if (!is.null(func)) { 201 | if (!is.function(func)) { 202 | stop ("'func' must be a function") 203 | } 204 | res <- data.table(index=y, 205 | do.call(rbind, 206 | .align_duration_cpp(x[[1]], # the index of the data.table 207 | sort(y), # nanotime vector to align on 208 | x, # data.table data 209 | start, 210 | end, 211 | sopen, 212 | eopen, 213 | func))) 214 | names(res)[1] <- names(x)[1] # keep the original name of the index 215 | setkeyv(res, key(x)) 216 | res 217 | } 218 | else { 219 | ## if no function is supplied, make closest alignment: 220 | sorted_y <- sort(y) 221 | res <- x[align_idx_duration(x[[1]], sorted_y, start, end, sopen, eopen, bypass_x_check=TRUE, bypass_y_check=TRUE)] 222 | res[[1]] <- sorted_y 223 | res 224 | } 225 | } 226 | 227 | 228 | ##' Align a \code{data.table} onto a \code{nanotime} vector 229 | ##' 230 | ##' \code{align} returns the subset of \code{data.table} \code{x} that 231 | ##' aligns on the temporal vector \code{y} 232 | ##' 233 | ##' For each element in \code{y}, intervals are created around this 234 | ##' element with \code{start} and \code{end}. All the elements of 235 | ##' \code{x} that fall within this interval are given as argument to 236 | ##' the function \code{func}. The function \code{func} show reduce 237 | ##' this \code{data.frame} to one unique row that will be associated 238 | ##' with the \code{nanotime} value in \code{y}. 239 | ##' 240 | ##' @param x the \code{data.table} time-series to align from 241 | ##' @param y the \code{nanotime} vector to align to 242 | ##' @param start scalar or vector of same length as \code{y} of type 243 | ##' \code{integer64}; \code{start} is added to each element in 244 | ##' \code{y} and it then defines the starting point of the 245 | ##' interval under consideration for the alignment on that 246 | ##' element of \code{y} 247 | ##' @param end scalar or vector of same length as \code{y} of type 248 | ##' \code{integer64}; \code{start} is added to each element in 249 | ##' \code{y} and it then defines the ending point of the interval 250 | ##' under consideration for the alignment on that element of 251 | ##' \code{y} 252 | ##' @param sopen boolean scalar or vector of same lengths as \code{y} 253 | ##' that indicates if the start of the interval is open or 254 | ##' closed. Defaults to FALSE. 255 | ##' @param eopen boolean scalar or vector of same lengths as \code{y} 256 | ##' that indicates if the end of the interval is open or 257 | ##' closed. Defaults to TRUE. 258 | ##' @param tz scalar or vector of same length as \code{y} of type 259 | ##' character. Only used when the type of \code{start} and 260 | ##' \code{end} is \code{nanoperiod}. It defines the time zone for 261 | ##' the definition of the interval. 262 | ##' @param func a function taking one argument and which provides an 263 | ##' arbitrary aggregation of its argument; if \code{NULL} then a 264 | ##' function which takes the closest observation is used. 265 | ##' @param ... further arguments passed to or from methods. 266 | ##' @return a \code{data.table} time-series of the same length as 267 | ##' \code{y}; this is a subset of \code{x} with the 268 | ##' \code{nanotime} index of \code{y} 269 | ##' 270 | ##' @rdname align 271 | ##' 272 | ##' @examples 273 | ##' \dontrun{ 274 | ##' y <- nanotime((1:10)*1e9) 275 | ##' x <- data.table(index=nanotime((1:10)*1e9), data=1:10) 276 | ##' align(x, y, as.nanoduration(-1e9), as.nanoduration(1e9), colMeans) 277 | ##' } 278 | ##' 279 | ##' 280 | setMethod("align", signature("data.table", "nanotime", "nanoduration", "nanoduration"), align_duration) 281 | ##' @rdname align 282 | setMethod("align", signature("data.table", "nanotime", "missing", "missing"), align_duration) 283 | ##' @rdname align 284 | setMethod("align", signature("data.table", "nanotime", "nanoduration", "missing"), align_duration) 285 | ##' @rdname align 286 | setMethod("align", signature("data.table", "nanotime", "missing", "nanoduration"), align_duration) 287 | 288 | 289 | align_period <- function(x, # data.table time-series 290 | y, # nanotime vector 291 | start=as.nanoperiod(0), 292 | end=as.nanoperiod(0), 293 | sopen = FALSE, 294 | eopen = TRUE, 295 | tz, 296 | func=NULL) 297 | { 298 | 299 | if (missing(start)) { 300 | start <- as.nanoperiod(0) 301 | } 302 | if (missing(end)) { 303 | end <- as.nanoperiod(0) 304 | } 305 | if (!inherits(x[[1]], "nanotime")) { 306 | stop("first column of 'data.table' must be of type 'nanotime'") 307 | } 308 | if (!is.logical(sopen)) { 309 | stop("'sopen' must be a 'logical'") 310 | } 311 | if (!is.logical(eopen)) { 312 | stop("'eopen' must be a 'logical'") 313 | } 314 | if (!is.character(tz)) { 315 | stop ("'tz' must be a 'character'") 316 | } 317 | if (is.null(key(x)) || names(x)[1] != key(x)[1]) { 318 | stop("first column of 'data.table' must be the first key") 319 | } 320 | if (!is.null(func)) { 321 | if (!is.function(func)) { 322 | stop ("'func' must be a function") 323 | } 324 | res <- data.table(index=y, 325 | do.call(rbind, 326 | .align_period_cpp(x[[1]], # the index of the data.table 327 | sort(y), # nanotime vector to align on 328 | x, # data.table data 329 | start, 330 | end, 331 | sopen, 332 | eopen, 333 | func, 334 | tz))) 335 | names(res)[1] <- names(x)[1] # keep the original name of the index 336 | setkeyv(res, key(x)) 337 | res 338 | } 339 | else { 340 | ## if no function is supplied, make closest alignment: 341 | sorted_y <- sort(y) 342 | res <- x[align_idx_period(x[[1]], sorted_y, start, end, sopen, eopen, tz, bypass_x_check=TRUE, bypass_y_check=TRUE)] 343 | res[[1]] <- sorted_y 344 | res 345 | } 346 | } 347 | 348 | 349 | ##' @rdname align 350 | setMethod("align", signature("data.table", "nanotime", "nanoperiod", "nanoperiod"), align_period) 351 | ##' @rdname align 352 | setMethod("align", signature("data.table", "nanotime", "nanoperiod", "missing"), align_period) 353 | ##' @rdname align 354 | setMethod("align", signature("data.table", "nanotime", "missing", "nanoperiod"), align_period) 355 | 356 | 357 | 358 | ##' @rdname grid_align 359 | setGeneric("grid_align", function(x, by, ...) standardGeneric("grid_align")) 360 | 361 | grid_align_duration <- function(x, # time-series 362 | by, # the grid size 363 | func=NULL, # function to apply on the subgroups 364 | grid_start=x[[1]][1] + by, # start of the grid 365 | grid_end=tail(x[[1]], 1), # end of the grid 366 | ival_start=-by, # the interval start 367 | ival_end=as.nanoduration(0), # the interval end 368 | ival_sopen=FALSE, # the interval start open 369 | ival_eopen=TRUE) # the interval end open 370 | { 371 | if (!inherits(x[[1]], "nanotime")) { 372 | stop("first column of 'data.table' must be of type 'nanotime'") 373 | } 374 | grid <- seq(grid_start, grid_end, by=by) 375 | if (tail(grid,1) < grid_end) { 376 | grid <- c(grid, tail(grid,1) + by) 377 | } 378 | 379 | if (missing(ival_sopen) & missing(ival_eopen) & is.null(func)) { 380 | ## the intention is a closest align, so make sure the interval 381 | ## is closed at the end, as equality is considered the 382 | ## closest: 383 | ival_eopen <- FALSE 384 | } 385 | 386 | align(x, grid, ival_start, ival_end, ival_sopen, ival_eopen, func) 387 | } 388 | 389 | 390 | grid_align_period <- function(x, # time-series 391 | by, # the grid size 392 | func=NULL, # function to apply on the subgroups 393 | grid_start=plus(x[[1]][1], by, tz), # start of the grid 394 | grid_end=tail(x[[1]], 1), # end of the grid 395 | ival_start=-by, # the interval start 396 | ival_end=as.nanoperiod(0), # the interval end 397 | ival_sopen=FALSE, # the interval start open 398 | ival_eopen=TRUE, # the interval end open 399 | tz) # time zone when using 'period' 400 | { 401 | if (!inherits(x[[1]], "nanotime")) { 402 | stop("first column of 'data.table' must be of type 'nanotime'") 403 | } 404 | grid <- seq(grid_start, grid_end, by=by, tz=tz) 405 | if (tail(grid,1) < grid_end) { 406 | grid <- c(grid, plus(tail(grid,1), by, tz)) 407 | } 408 | 409 | if (missing(ival_sopen) & missing(ival_eopen) & is.null(func)) { 410 | ## the intention is a closest align, so make sure the interval 411 | ## is closed at the end, as equality is considered the 412 | ## closest: 413 | ival_eopen <- FALSE 414 | } 415 | 416 | align(x, grid, ival_start, ival_end, ival_sopen, ival_eopen, tz, func) 417 | } 418 | 419 | 420 | ##' Align a \code{data.table} onto a \code{nanotime} vector grid 421 | ##' 422 | ##' \code{grid_align} returns the subset of \code{data.table} \code{x} 423 | ##' that aligns on the grid defined by \code{by}, \code{start} and 424 | ##' \code{end} 425 | ##' 426 | ##' A grid defined by the parameter \code{by}, \code{start} and 427 | ##' \code{end} is created. The function then does a standard alignment 428 | ##' of \code{x} onto this grid (see the \code{align} function) 429 | ##' 430 | ##' @param x the \code{data.table} time-series to align from 431 | ##' @param by interval specified as a \code{nanoduration} or 432 | ##' \code{nanoperiod}. 433 | ##' @param grid_start scalar \code{nanotime} defining the start of the 434 | ##' grid; by default the first element of \code{x} is taken. 435 | ##' @param grid_end scalar \code{nanotime} defining the end of the grid; by 436 | ##' default the last element of \code{x} is taken. 437 | ##' @param ival_start scalar of type \code{nanoduration} or 438 | ##' \code{nanoperiod}; \code{ival_start} is added to each element 439 | ##' of the grid and it then defines the starting point of the 440 | ##' interval under consideration for the alignment onto that 441 | ##' element. 442 | ##' @param ival_end scalar of type \code{nanoduration} or 443 | ##' \code{nanoperiod}; \code{ival_end} is added to each element of 444 | ##' the grid and it then defines the ending point of the interval 445 | ##' under consideration for the alignment onto that element. 446 | ##' @param ival_sopen boolean scalar that indicates if the start of 447 | ##' the interval is open or closed. Defaults to FALSE. 448 | ##' @param ival_eopen boolean scalar that indicates if the end of the 449 | ##' interval is open or closed. Defaults to TRUE. 450 | ##' @param tz scalar of type character. Only used when the type of 451 | ##' \code{by} and \code{end} is \code{nanoperiod}. It defines the 452 | ##' time zone for the definition of the interval. 453 | ##' @param func a function taking one argument and which provides an 454 | ##' arbitrary aggregation of its argument; if \code{NULL} then a 455 | ##' function which takes the closest observation is used. 456 | ##' @param ... further arguments passed to or from methods. 457 | ##' @return a \code{data.table} time-series of the same length as 458 | ##' \code{y} with the aggregations computed by \code{func} 459 | ##' 460 | ##' @rdname grid_align 461 | ##' 462 | ##' @examples 463 | ##' \dontrun{ 464 | ##' one_second <- 1e9 465 | ##' x <- data.table(index=nanotime(cumsum(sin(seq(0.001, pi, 0.001)) * one_second))) 466 | ##' x <- x[, V2 := 1:nrow(x)] 467 | ##' setkey(x, index) 468 | ##' grid_align(x, as.nanoduration("00:01:00"), sum) 469 | ##' } 470 | setMethod("grid_align", signature("data.table", "nanoduration"), grid_align_duration) 471 | ##' @rdname grid_align 472 | setMethod("grid_align", signature("data.table", "nanoperiod"), grid_align_period) 473 | 474 | 475 | ##' Return the number of observations per interval 476 | ##' 477 | ##' \code{frequency} returns the number of observations in 478 | ##' \code{data.table} \code{x} for each interval specified by 479 | ##' \code{by}. 480 | ##' 481 | ##' @param x the \code{data.table} time-series for which to calculate 482 | ##' the frequency 483 | ##' @param by interval specified as a \code{nanoduration} or 484 | ##' \code{nanoperiod}. 485 | ##' @param grid_start scalar \code{nanotime} defining the start of the 486 | ##' grid; by default the first element of \code{x} is taken. 487 | ##' @param grid_end scalar \code{nanotime} defining the end of the 488 | ##' grid; by default the last element of \code{x} is taken. 489 | ##' @param tz scalar of type character. Only used when the type of 490 | ##' \code{by} and \code{end} is \code{nanoperiod}. It defines the 491 | ##' time zone for the definition of the interval. 492 | ##' @param ival_start scalar of type \code{nanoduration} or 493 | ##' \code{nanoperiod}; \code{ival_start} is added to each element 494 | ##' of the grid and it then defines the starting point of the 495 | ##' interval under consideration for the alignment onto that 496 | ##' element. This defaults to -\code{by} and most likely does not 497 | ##' need to be overriden. 498 | ##' @param ival_end scalar of type \code{nanoduration} or 499 | ##' \code{nanoperiod}; \code{ival_end} is added to each element of 500 | ##' the grid and it then defines the ending point of the interval 501 | ##' under consideration for the alignment onto that element. This 502 | ##' defaults to 0 and most likely does not need to be overriden. 503 | ##' @param ival_sopen boolean scalar that indicates if the start of 504 | ##' the interval is open or closed. Defaults to FALSE. 505 | ##' @param ival_eopen boolean scalar that indicates if the end of the 506 | ##' interval is open or closed. Defaults to TRUE. 507 | ##' @return a \code{data.table} time-series with the number of 508 | ##' observations in \code{x} that fall withing the intervals 509 | ##' defined by the grid interval defined by \code{by}. 510 | ##' 511 | ##' @examples 512 | ##' \dontrun{ 513 | ##' one_second <- as.nanoduration("00:00:01") 514 | ##' one_minute <- 60 * one_second 515 | ##' x <- data.table(index=nanotime((1:100) * one_second), 1) 516 | ##' setkey(x, index) 517 | ##' frequency(x, one_minute) 518 | ##' } 519 | setMethod("frequency", 520 | signature("data.table"), 521 | function(x, by, grid_start, grid_end, tz, ival_start=-by, ival_end, ival_sopen=FALSE, ival_eopen=TRUE) 522 | { 523 | if (missing(grid_end)) { 524 | grid_end = tail(x[[1]], 1) 525 | } 526 | if (inherits(by, "nanoduration")) { 527 | if (missing(grid_start)) { 528 | grid_start = x[[1]][1] + by 529 | } 530 | if (missing(ival_end)) { 531 | ival_end = as.nanoduration(0) 532 | } 533 | grid_align(x, by, nrow, grid_start, grid_end, ival_start, ival_end, ival_sopen, ival_eopen) 534 | } 535 | else if (inherits(by, "nanoperiod")) { 536 | if (missing(grid_start)) { 537 | grid_start = plus(x[[1]][1], by, tz) 538 | } 539 | if (missing(ival_end)) { 540 | ival_end = nanoperiod(0) 541 | } 542 | grid_align(x, by, nrow, grid_start, grid_end, ival_start, ival_end, ival_sopen, ival_eopen, tz) 543 | } 544 | else { 545 | stop("argument 'by' must be either 'nanoduration' or 'nanotime'") 546 | } 547 | }) 548 | 549 | 550 | ##' @rdname ops 551 | setGeneric("ops", function(x, y, op_string) standardGeneric("ops")) 552 | 553 | 554 | ##' Arithmetic operations on two \code{data.table} time-series 555 | ##' 556 | ##' \code{ops} returns the \code{y} time-series on which the \code{x} 557 | ##' time-series values are applied using the specified operator 558 | ##' \code{op}. 559 | ##' 560 | ##' @section Details: 561 | ##' 562 | ##' The n elements of the \code{x} time-series operand define a set of 563 | ##' n-1 intervals, and the value associated with each interval is 564 | ##' applied to all the observations in the \code{y} time-series 565 | ##' operand that fall in the interval. Note that the interval is 566 | ##' closed at the beginning and open at the end. The supported values 567 | ##' for \code{op} are "*", "/", "+", "-". 568 | ##' 569 | ##' There has to be one numeric column in \code{x} and \code{y}; there 570 | ##' has to be either a one to one correspondance between the number of 571 | ##' numeric columns in \code{x} and \code{y}, or there must be only 572 | ##' one numeric column in \code{x} that will be applied to all numeric 573 | ##' columns in \code{y}. Non-numeric columns must not appear in 574 | ##' \code{x}, whereas they will be skipped of they appear in \code{y}. 575 | ##' 576 | ##' @param x the \code{data.table} time-series that determines the 577 | ##' left operand 578 | ##' @param y the \code{data.table} time-series that determines the 579 | ##' right operand \code{nanoperiod}. 580 | ##' @param op_string string defining the operation to apply; the 581 | ##' supported values for \code{op} are "*", "/", "+", "-". 582 | ##' 583 | ##' @rdname ops 584 | ##' 585 | ##' @examples 586 | ##' \dontrun{ 587 | ##' one_second_duration <- as.nanoduration("00:00:01") 588 | ##' t1 <- nanotime(1:2 * one_second_duration * 3) 589 | ##' t2 <- nanotime(1:4 * one_second_duration) 590 | ##' dt1 <- data.table(index=t1, data1 = 1:length(t1)) 591 | ##' setkey(dt1, index) 592 | ##' dt2 <- data.table(index=t2, data1 = 1:length(t2)) 593 | ##' setkey(dt2, index) 594 | ##' ops(dt1, dt2, "+") 595 | ##' } 596 | setMethod("ops", 597 | signature("data.table", "data.table", "character"), 598 | function(x, y, op_string) 599 | { 600 | if (!inherits(x[[1]], "nanotime")) { 601 | stop("first column of 'x' must be of type 'nanotime'") 602 | } 603 | if (!inherits(y[[1]], "nanotime")) { 604 | stop("first column of 'y' must be of type 'nanotime'") 605 | } 606 | if (is.null(key(x)) || names(x)[1] != key(x)[1]) { 607 | stop("first column of 'x' must be the first key") 608 | } 609 | if (is.null(key(y)) || names(y)[1] != key(y)[1]) { 610 | stop("first column of 'y' must be the first key") 611 | } 612 | 613 | 614 | .ops(x, y, op_string) 615 | }) 616 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## dtts: Time-series functionality based on `nanotime` and `data.table`. 2 | 3 | [![CI](https://github.com/eddelbuettel/dtts/workflows/ci/badge.svg)](https://github.com/eddelbuettel/dtts/actions?query=workflow%3Aci) 4 | [![License](https://eddelbuettel.github.io/badges/GPL2+.svg)](https://www.gnu.org/licenses/gpl-2.0.html) 5 | [![CRAN](https://www.r-pkg.org/badges/version/dtts)](https://cran.r-project.org/package=dtts) 6 | [![r-universe](https://eddelbuettel.r-universe.dev/badges/dtts)](https://eddelbuettel.r-universe.dev/dtts) 7 | [![Dependencies](https://tinyverse.netlify.app/badge/dtts)](https://cran.r-project.org/package=dtts) 8 | [![Downloads](https://cranlogs.r-pkg.org/badges/dtts?color=brightgreen)](https://www.r-pkg.org/pkg/dtts) 9 | [![Code Coverage](https://codecov.io/gh/eddelbuettel/dtts/graph/badge.svg)](https://app.codecov.io/gh/eddelbuettel/dtts) 10 | [![Last Commit](https://img.shields.io/github/last-commit/eddelbuettel/dtts)](https://github.com/eddelbuettel/dtts) 11 | 12 | ## Motivation 13 | 14 | Combining package [`nanotime`](https://CRAN.R-project.org/package=nanotime) for 15 | operating with nanosecond time-resolution with package 16 | [`data.table`](https://CRAN.R-project.org/package=data.table) leverages 17 | the conciseness, high performance, and memory efficiency of the 18 | latter to provide high-resolution, high-performance time series operations. 19 | 20 | Our time-series representation is simply a `data.table` with a first column 21 | of type `nanotime` and a key on it. This means all the standard `data.table` 22 | functions can be used, and this package consolidates this functionality. 23 | 24 | Specifically, `dtts` proposes alignment functions that are particularly 25 | versatile, and allow to work across time-zones. 26 | 27 | ## Usage 28 | 29 | ### Creating a `data.table`-based time-series with a `nanotime` index 30 | 31 | Three operations are necessary to create a `data.table`-based 32 | time-series for use with the functions defined in this package: 33 | 1. Create the time index, i.e. a vector of `nanotime` 34 | 2. Create a `data.table` with the first column being the time index 35 | and specifying it as a key 36 | 37 | For instance, this code creates a time-series of 10 rows spaced every 38 | hour with a data column `V1` containing random data: 39 | 40 | ~~~ R 41 | library(data.table) 42 | library(nanotime) 43 | t1 <- seq(as.nanotime(Sys.time()), by=as.nanoduration("01:00:00"), length.out=10) 44 | dt1 <- data.table(index=t1, V1=runif(10), key="index") 45 | ~~~ 46 | 47 | produces: 48 | 49 | ~~~ 50 | index V1 51 | 1: 2021-11-21T06:23:12.404650+00:00 0.7206800 52 | 2: 2021-11-21T07:23:12.404650+00:00 0.9677868 53 | 3: 2021-11-21T08:23:12.404650+00:00 0.6211587 54 | 4: 2021-11-21T09:23:12.404650+00:00 0.7669201 55 | 5: 2021-11-21T10:23:12.404650+00:00 0.6426368 56 | 6: 2021-11-21T11:23:12.404650+00:00 0.4026811 57 | 7: 2021-11-21T12:23:12.404650+00:00 0.2512213 58 | 8: 2021-11-21T13:23:12.404650+00:00 0.3476128 59 | 9: 2021-11-21T14:23:12.404650+00:00 0.9663271 60 | 10: 2021-11-21T15:23:12.404650+00:00 0.4744729 61 | ~~~ 62 | 63 | (Note that we can also write this in a single `data.table` statement as 64 | 65 | ~~~ R 66 | dt1 <- data.table(index = seq(as.nanotime(Sys.time()), by=as.nanoduration("01:00:00"), length.out=10), 67 | V1 = runif(10), 68 | key = "index") 69 | ~~~ 70 | 71 | ### Alignment functions 72 | 73 | Alignment is the process of matching the time of the observations of 74 | one time series to another. All alignment functions in this package 75 | work in a similar way. For each point in the vector `y` onto which `x` 76 | is aligned, a pair or arguments named `start` and `end` define an 77 | interval around this point. As an example let us take `start` equal to 78 | -1 hour and `end` equal to 0 hour. This means that a `y` of 2021-11-20 79 | 11:00:00 defines an interval from 2021-11-20 10:00:00 to 2021-11-20 80 | 11:00:00. The alignment process will then use that interval to pick 81 | points in order to compute one or more statistics on that interval for 82 | the corresponding point in `y`. 83 | 84 | In addition to the arguments `start` and `end`, two other arguments, 85 | booleans named `sopen` and `eopen`, define if the start and end, 86 | respectively, of the interval are open or not. 87 | 88 | Finally, note that when the interval is specified with a `nanoperiod` 89 | type, the argument `tz` is necessary in order to give meaning to the 90 | interval. With `nanoperiod`, alignments are time-zone aware and 91 | correct across daylight saving time. 92 | 93 | This figure shows an alignment using the "closest" 94 | point as data: 95 | 96 | 97 | 98 | 99 | This figure shows an alignment using a statistic 100 | (here simply counting the number of elements in the intervals): 101 | 102 | 103 | 104 | 105 | #### `align_idx` 106 | 107 | This function takes two vectors of type `nanotime`. It aligns the 108 | first one onto the second one and returns the indices of the first 109 | vector that align with the second vector. There is no choice of 110 | aggregation function here as this function works uniquely on 111 | `nanotime` vectors. The algorithm selects the point in `x` that falls 112 | in the interval that is closest to the point of alignment in `y`. The 113 | index of the point that falls in that interval is returned at the 114 | position of the vector `y`. If no point exists in that interval `NaN` 115 | is returned. 116 | 117 | 118 | ~~~ R 119 | library(dtts) 120 | 121 | t1 <- seq(as.nanotime("1970-01-01T00:00:00+00:00"), by=as.nanoduration("00:00:01"), length.out=100) 122 | t2 <- seq(as.nanotime("1970-01-01T00:00:10+00:00"), by=as.nanoduration("00:00:10"), length.out=10) 123 | 124 | align_idx(t1, t2, start=as.nanoduration("-00:00:10")) 125 | ~~~ 126 | 127 | Which produces: 128 | 129 | ~~~ 130 | [1] 10 20 30 40 50 60 70 80 90 100 131 | ~~~ 132 | 133 | #### `align` 134 | 135 | This function takes a `data.table` and aligns it onto `y`, a vector of 136 | `nanotime`. Like `align_idx`, it uses the arguments `start`, `end`, 137 | `sopen` and `eopen` to define the intervals around the points in `y`. 138 | 139 | Instead of the result being an index, it is a new `data.table` 140 | time-series with the first `nanotime` column being the vector `y`, and 141 | the rows of this time-series are taken from the `data.table` `x`. If 142 | no function is specified (i.e. `func` is `NULL`), the function returns 143 | the row of the point in `x` that is in the interval and that is 144 | closest to the point in `y` on which the alignment is made. If `func` 145 | is defined, it receives for each point in `y` all the rows in `x` that 146 | are in the defined interval. So `func` must be a statistic that 147 | returns one row, but it may return one or more columns. Common examples 148 | are means (e.g. using `colMeans`), counts, etc. 149 | 150 | 151 | In the following example a time-series `dt1` is created with a data 152 | column `V1` which has the integer index as value and it is aligned 153 | onto a `nanotime` vector `t2` 154 | 155 | ~~~ R 156 | library(dtts) 157 | 158 | t1 <- seq(as.nanotime("1970-01-01T00:00:00+00:00"), by=as.nanoduration("00:00:01"), length.out=100) 159 | dt1 <- data.table(index=t1, V1=0:99) 160 | setkey(dt1, index) 161 | 162 | t2 <- seq(as.nanotime("1970-01-01T00:00:10+00:00"), by=as.nanoduration("00:00:10"), length.out=10) 163 | 164 | align(dt1, t2, start=as.nanoduration("-00:00:10"), func=colMeans) 165 | ~~~ 166 | 167 | Which produces: 168 | 169 | ~~~ 170 | index V1 171 | 1: 1970-01-01T00:00:10+00:00 4.5 172 | 2: 1970-01-01T00:00:20+00:00 14.5 173 | 3: 1970-01-01T00:00:30+00:00 24.5 174 | 4: 1970-01-01T00:00:40+00:00 34.5 175 | 5: 1970-01-01T00:00:50+00:00 44.5 176 | 6: 1970-01-01T00:01:00+00:00 54.5 177 | 7: 1970-01-01T00:01:10+00:00 64.5 178 | 8: 1970-01-01T00:01:20+00:00 74.5 179 | 9: 1970-01-01T00:01:30+00:00 84.5 180 | 10: 1970-01-01T00:01:40+00:00 94.5 181 | ~~~ 182 | 183 | #### `grid_align` 184 | 185 | This function adds one more dimension to the function `align`. Instead 186 | of taking a vector `y`, it constructs a grid that has as interval the 187 | value supplied in the argument `by`. The interval is controllable 188 | (with arguments `ival_start`, `ival_end`, `ival_sopen`, `ival_eopen`) 189 | but it is likely that in most cases the default will be used which is 190 | the grid interval. As in the case of `align`, the caller can specify 191 | `func`. Finally, note that `by` can be either a `nanoduration` or a 192 | `nanoperiod`. In the latter case, as for the other functions, the 193 | argument `tz` must be supplied so that the `nanoperiod` interval can 194 | be anchored to a specific timezone. 195 | 196 | The following example is the same as for the `align` function, but 197 | shows that the vector `t2` does not need to be supplied as it is 198 | instead constructed by `grid_align`: 199 | 200 | ~~~ R 201 | library(dtts) 202 | 203 | t1 <- seq(as.nanotime("1970-01-01T00:00:00+00:00"), by=as.nanoduration("00:00:01"), length.out=100) 204 | dt1 <- data.table(index=t1, V1=0:99) 205 | setkey(dt1, index) 206 | 207 | grid_align(dt1, as.nanoduration("00:00:10"), func=colMeans) 208 | ~~~ 209 | 210 | Which produces: 211 | 212 | ~~~ 213 | index V1 214 | 1: 1970-01-01T00:00:10+00:00 4.5 215 | 2: 1970-01-01T00:00:20+00:00 14.5 216 | 3: 1970-01-01T00:00:30+00:00 24.5 217 | 4: 1970-01-01T00:00:40+00:00 34.5 218 | 5: 1970-01-01T00:00:50+00:00 44.5 219 | 6: 1970-01-01T00:01:00+00:00 54.5 220 | 7: 1970-01-01T00:01:10+00:00 64.5 221 | 8: 1970-01-01T00:01:20+00:00 74.5 222 | 9: 1970-01-01T00:01:30+00:00 84.5 223 | 10: 1970-01-01T00:01:40+00:00 94.5 224 | ~~~ 225 | 226 | #### Frequency 227 | 228 | Using `grid_align` and `nrow` it is possible to get the frequency of a 229 | time-series, i.e. to count the number of elements in each interval of 230 | a grid. 231 | 232 | Taking the same example as above, we see that the result is the count 233 | of elements of `dt1` that are in each interval: 234 | 235 | ~~~ R 236 | library(dtts) 237 | 238 | t1 <- seq(as.nanotime("1970-01-01T00:00:00+00:00"), by=as.nanoduration("00:00:01"), length.out=100) 239 | dt1 <- data.table(index=t1, V1=0:99) 240 | setkey(dt1, index) 241 | 242 | grid_align(dt1, as.nanoduration("00:00:10"), func=nrow) 243 | ~~~ 244 | 245 | Which produces: 246 | 247 | ~~~ 248 | index V1 249 | 1: 1970-01-01T00:00:10+00:00 10 250 | 2: 1970-01-01T00:00:20+00:00 10 251 | 3: 1970-01-01T00:00:30+00:00 10 252 | 4: 1970-01-01T00:00:40+00:00 10 253 | 5: 1970-01-01T00:00:50+00:00 10 254 | 6: 1970-01-01T00:01:00+00:00 10 255 | 7: 1970-01-01T00:01:10+00:00 10 256 | 8: 1970-01-01T00:01:20+00:00 10 257 | 9: 1970-01-01T00:01:30+00:00 10 258 | 10: 1970-01-01T00:01:40+00:00 10 259 | ~~~ 260 | 261 | #### ops 262 | 263 | `ops` performs arithmetic operations between two time-series and has 264 | the following signature, where `x` and `y` are time-series and `op` is 265 | a string denoting an arithmetic operator. 266 | 267 | ~~~ R 268 | ops(x, y, op_string) 269 | ~~~ 270 | 271 | Each entry in the left time-series operand defines an interval from 272 | the previous entry, and the value associated with this interval will 273 | be applied to all the observations in the right time-series operand 274 | that fall in the interval. Note that the interval is closed at the 275 | beginning and open and the end. The available values for op are "*", 276 | "/", "+", "-". 277 | 278 | This function is particulary useful to apply a multiplier or to add a 279 | constant that changes over time; one example would be the adjustment 280 | of stock prices for splits. 281 | 282 | Here is a visualization of `ops`: 283 | 284 | 285 | 286 | 287 | Here is an example: 288 | 289 | ~~~ R 290 | one_second_duration <- as.nanoduration("00:00:01") 291 | t1 <- nanotime(1:2 * one_second_duration * 3) 292 | t2 <- nanotime(1:4 * one_second_duration) 293 | dt1 <- data.table(index=t1, data1 = 1:length(t1)) 294 | setkey(dt1, index) 295 | dt2 <- data.table(index=t2, data1 = 1:length(t2)) 296 | setkey(dt2, index) 297 | ops(dt1, dt2, "+") 298 | ~~~ 299 | 300 | Which produces: 301 | ``` 302 | index data1 303 | 1: 1970-01-01T00:00:01+00:00 2 304 | 2: 1970-01-01T00:00:02+00:00 3 305 | 3: 1970-01-01T00:00:03+00:00 3 306 | 4: 1970-01-01T00:00:04+00:00 4 307 | ``` 308 | 309 | ### Time-series subsetting 310 | 311 | Using `nanoival`, it is possible to do complex subsetting of a time-series: 312 | 313 | ~~~ R 314 | one_second <- 1e9 315 | index <- seq(nanotime("2022-12-12 12:12:10+00:00"), length.out=10, by=one_second) 316 | dts <- data.table(index=index, data=1:length(index), key="index") 317 | ival <- as.nanoival(c("-2022-12-12 12:12:10+00:00 -> 2022-12-12 12:12:14+00:00-"), 318 | ("+2022-12-12 12:12:18+00:00 -> 2022-12-12 12:12:20+00:00+")) 319 | dts[index %in% ival] 320 | 321 | ~~~ 322 | 323 | ## Status 324 | 325 | `dtts` currently proposes only a set of alignment functions, but it is 326 | likely that other time-series functions will be impletemented so that 327 | `nanotime`-based time-series have reasonably complete time-series 328 | functionality. 329 | 330 | See the [issue tickets](https://github.com/eddelbuettel/dtts/issues) 331 | for an up to date list of potentially desirable, possibly planned, or 332 | at least discussed items. 333 | 334 | ## Installation 335 | 336 | The package is on [CRAN](https://cran.r-project.org) and can be installed via a standard 337 | 338 | 339 | ```r 340 | install.packages("dtts") 341 | ``` 342 | 343 | and development versions can be installed via 344 | 345 | ```r 346 | remotes::install_github("eddelbuettel/dtts") 347 | ``` 348 | 349 | ## Author 350 | 351 | Dirk Eddelbuettel, Leonardo Silvestri 352 | 353 | ## License 354 | 355 | GPL (>= 2) 356 | -------------------------------------------------------------------------------- /cleanup: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm -fr src/*.o src/*.so src/*.dll src/*.dylib *~ */*~ vignettes/auto/ 4 | -------------------------------------------------------------------------------- /dtts.utils.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | -------------------------------------------------------------------------------- /inst/NEWS.Rd: -------------------------------------------------------------------------------- 1 | \name{NEWS} 2 | \title{News for Package \pkg{dtts}} 3 | \newcommand{\ghpr}{\href{https://github.com/eddelbuettel/dtts/pull/#1}{##1}} 4 | \newcommand{\ghit}{\href{https://github.com/eddelbuettel/dtts/issues/#1}{##1}} 5 | 6 | \section{Changes in version 0.1.3 (2024-07-18)}{ 7 | \itemize{ 8 | \item Windows builds use \code{localtime_s} with LLVM (Tomas Kalibera in 9 | \ghpr{16}) 10 | \item Tests code has been adjusted for an upstream change in 11 | \pkg{data.table} tests for \code{all.equal} (Michael Chirico in \ghpr{18} 12 | addressing \ghit{17}) 13 | } 14 | } 15 | 16 | \section{Changes in version 0.1.2 (2024-01-31)}{ 17 | \itemize{ 18 | \item Update the one exported C-level identifier from \pkg{data.table} 19 | following its 1.5.0 release and a renaming 20 | \item Routine continuous integration updates 21 | } 22 | } 23 | 24 | \section{Changes in version 0.1.1 (2023-08-08)}{ 25 | \itemize{ 26 | \item A simplifcation was applied to the C++ interface glue code 27 | (\ghpr{9} fixing \ghit{8}) 28 | \item The package no longer enforces the C++11 compilation standard 29 | (\ghpr{10}) 30 | \item An uninitialized memory read has been correct (\ghpr{11}) 31 | \item A new function \code{ops} has been added (\ghpr{12}) 32 | \item Function names no longer start with a dot (\ghpr{13}) 33 | \item Arbitrary index columns are now supported (\ghpr{13}) 34 | } 35 | } 36 | 37 | \section{Changes in version 0.1.0 (2022-03-06)}{ 38 | \itemize{ 39 | \item Initial CRAN upload. 40 | \item Package is functional and provides examples. 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /inst/images/align_closest.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 18 | 20 | 27 | 33 | 34 | 35 | 53 | 60 | 61 | 63 | 64 | 66 | image/svg+xml 67 | 69 | 70 | 71 | 72 | 73 | 78 | 85 | 92 | 99 | 106 | 113 | 118 | 123 | x 134 | y 145 | 150 | 155 | 160 | 165 | 170 | 175 | 180 | 185 | 190 | 195 | 200 | 205 | 210 | 215 | 220 | 225 | 230 | 235 | 240 | 245 | 250 | 255 | 260 | 265 | 270 | 276 | 282 | 288 | 294 | 300 | 306 | 311 | 316 | -1 hour 327 | +0 338 | NaN 349 | 350 | 351 | -------------------------------------------------------------------------------- /inst/images/align_count.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 24 | 28 | 32 | 33 | 36 | 40 | 44 | 45 | 52 | 58 | 59 | 68 | 77 | 84 | 90 | 91 | 98 | 104 | 105 | 112 | 118 | 119 | 126 | 132 | 133 | 140 | 146 | 147 | 154 | 160 | 161 | 168 | 174 | 175 | 182 | 188 | 189 | 190 | 210 | 217 | 218 | 220 | 221 | 223 | image/svg+xml 224 | 226 | 227 | 228 | 229 | 230 | 235 | 242 | 249 | 256 | 263 | 270 | 275 | 280 | x 291 | y 302 | 307 | 312 | 317 | 322 | 327 | 332 | 337 | 342 | 347 | 352 | 357 | 362 | 367 | 372 | 377 | 382 | 387 | 392 | 397 | 402 | 407 | 412 | 417 | 422 | 427 | 433 | 439 | 445 | 450 | 455 | -1 hour 466 | +0 477 |   488 |     513 | 519 | 525 | 531 | 537 | 543 | 549 | 555 | 561 | 567 | 3 578 | 0 589 | 4 600 | 5 611 | 2 622 | 623 | 624 | -------------------------------------------------------------------------------- /inst/images/ops.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 18 | 20 | 27 | 33 | 34 | 35 | 53 | 60 | 61 | 63 | 64 | 66 | image/svg+xml 67 | 69 | 70 | 71 | 72 | 73 | 78 | 85 | 92 | 99 | 106 | 111 | 116 | t2 128 | t1 140 | 145 | 150 | 155 | 160 | 165 | 170 | 175 | 180 | 185 | 190 | 195 | 200 | 205 | 210 | 215 | 220 | 225 | 230 | 235 | 240 | 245 | 250 | 255 | 261 | 267 | 5 279 | 3 291 | 7 303 | 2 315 | op = "*" 337 | 343 | * 5 355 | 361 | * 3 373 | 379 | 385 | *2 397 | * 7 409 | 410 | 411 | -------------------------------------------------------------------------------- /inst/tinytest/test_dtts.R: -------------------------------------------------------------------------------- 1 | 2 | suppressMessages({ 3 | library(dtts) # in case test script gets run stand-alone 4 | library(nanotime) 5 | library(data.table) 6 | }) 7 | 8 | savedFormat <- NULL 9 | one_second_duration <- as.nanoduration("00:00:01") 10 | one_second_period <- as.nanoperiod(one_second_duration) 11 | 12 | savedFormat <- options()$nanotimeFormat 13 | options(nanotimeFormat="%Y-%m-%d %H:%M:%S") 14 | 15 | ## align_idx 16 | ## --------- 17 | #test_align_idx_equal_duration <- function() { 18 | ## do the alignment with no interval, so require equality for alignment: 19 | t1 <- nanotime(1:100 * one_second_duration) 20 | t2 <- nanotime(1:10 * one_second_duration * 10) 21 | expect_equal(align_idx(t1, t2, sopen=FALSE, eopen=FALSE), 1:10 * 10) 22 | #} 23 | 24 | #test_align_idx_before_duration <- function() { 25 | ## do the alignment with an interval before of 1 nanosecond 26 | t1 <- nanotime(1:100 * one_second_duration * 2 + one_second_duration) 27 | t2 <- nanotime(1:10 * one_second_duration * 10) 28 | expect_equal(align_idx(t1, t2, start=-one_second_duration), seq(4, 49, 5)) 29 | #} 30 | 31 | #test_align_idx_after_duration <- function() { 32 | ## do the alignment with an interval after of 1 nanosecond 33 | t1 <- nanotime(1:100 * one_second_duration * 2 + one_second_duration) 34 | t2 <- nanotime(1:10 * one_second_duration * 10) 35 | expect_equal(align_idx(t1, t2, end=one_second_duration, eopen=FALSE), seq(5, 50, 5)) 36 | #} 37 | ## test align_idx duration with non-equal times, with NA at the end: 38 | t1 <- nanotime(1:10 * one_second_duration * 2 + one_second_duration - 1) 39 | t2 <- nanotime(1:10 * one_second_duration * 10) 40 | expect_equal(align_idx(t1, t2, start=-2*one_second_duration, eopen=FALSE), c(4, 9, rep(NA, 8))) 41 | 42 | ## test align_idx duration with non-equal times, with NA at the end: 43 | t1 <- nanotime(1:10 * one_second_duration * 2 + one_second_duration - 1) 44 | t2 <- nanotime(1:10 * one_second_duration * 10) 45 | expect_equal(align_idx(t1, t2, start=-2*one_second_duration), c(4, 9, rep(NA, 8))) 46 | 47 | ## test align_idx duration with non-equal times, with NA at the beginning: 48 | t1 <- nanotime(1:10 * one_second_duration * 2 + 10 * one_second_duration - 1) 49 | t2 <- nanotime(1:10 * one_second_duration * 10) 50 | expect_equal(align_idx(t1, t2, sopen=TRUE, start=-2*one_second_duration), c(NA, 5, 10, rep(NA, 7))) 51 | 52 | #test_align_idx_before_period <- function() { 53 | ## do the alignment with an interval before of a 1 nanosecond period 54 | t1 <- nanotime(1:100 * one_second_duration * 2 + one_second_duration) 55 | t2 <- nanotime(1:10 * one_second_duration * 10) 56 | expect_equal(align_idx(t1, t2, start=-one_second_period, tz="America/New_York"), seq(4, 49, 5)) 57 | #} 58 | 59 | ## test align_idx period with non-equal times, with NA at the end: 60 | t1 <- nanotime(1:10 * one_second_duration * 2 + one_second_duration - 1) 61 | t2 <- nanotime(1:10 * one_second_duration * 10) 62 | expect_equal(align_idx(t1, t2, start=-as.nanoperiod("00:00:02"), eopen=FALSE, tz="America/New_York"), c(4, 9, rep(NA, 8))) 63 | 64 | ## test align_idx period with non-equal times, with NA at the end: 65 | t1 <- nanotime(1:10 * one_second_duration * 2 + one_second_duration - 1) 66 | t2 <- nanotime(1:10 * one_second_duration * 10) 67 | expect_equal(align_idx(t1, t2, start=-as.nanoperiod("00:00:02"), tz="America/New_York"), c(4, 9, rep(NA, 8))) 68 | 69 | #test_align_idx_after_period <- function() { 70 | ## do the alignment with an interval after of a 1 nanosecond period 71 | t1 <- nanotime(1:100 * one_second_duration * 2 + one_second_duration) 72 | t2 <- nanotime(1:10 * one_second_duration * 10) 73 | expect_equal(align_idx(t1, t2, end=one_second_period, eopen=FALSE, tz="America/New_York"), seq(5, 50, 5)) 74 | #} 75 | ## all default arguments: 76 | t1 <- nanotime(1:30 * one_second_duration) 77 | t2 <- nanotime(1:3 * one_second_duration * 10) 78 | expect_equal(align_idx(t1, t2), c(10,20,30)) 79 | 80 | ## incorrect parameter types (nanoduration): 81 | expect_error(align_idx(t1, t2, sopen="open"), "must be a 'logical'") 82 | expect_error(align_idx(t1, t2, eopen="open"), "must be a 'logical'") 83 | ## incorrect parameter types (nanoperiod): 84 | expect_error(align_idx(t1, t2, end=one_second_period, sopen="open"), "must be a 'logical'") 85 | expect_error(align_idx(t1, t2, end=one_second_period, eopen="open"), "must be a 'logical'") 86 | expect_error(align_idx(t1, t2, end=one_second_period, tz=3), "'tz' must be a 'character'") 87 | 88 | 89 | ## align 90 | ## ----- 91 | 92 | ## with all default params: 93 | rows <- 10 94 | t1 <- seq(as.nanotime(0), by=one_second_duration, length.out=rows) 95 | dt1 <- data.table(idx=t1, v1=1:rows, key="idx") 96 | t2 <- seq(as.nanotime(0), by=2*one_second_duration, length.out=rows/2) 97 | expect_equal(align(dt1, t2), dt1[seq(1, rows, by=2)]) 98 | 99 | ## do the alignment with no interval, so require equality for alignment: 100 | cols <- 3 101 | rows <- 100 102 | t1 <- nanotime(1:rows * one_second_duration) 103 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 104 | setkey(dt1, index) 105 | t2 <- nanotime(1:10 * one_second_duration * 10) 106 | expect_equal(align(dt1, t2, eopen=FALSE), dt1[1:10 * 10]) 107 | #} 108 | 109 | #test_align_duration.before <- function() { 110 | ## do the alignment with an interval before of 1 nanosecond 111 | cols <- 3 112 | rows <- 100 113 | t1 <- nanotime(1:100 * one_second_duration * 2 + one_second_duration) 114 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 115 | setkey(dt1, index) 116 | t2 <- nanotime(1:10 * one_second_duration * 10) 117 | expected <- dt1[seq(4, 49, 5)] 118 | expected[, index := index + one_second_duration] 119 | setkey(expected, index) 120 | expect_equal(align(dt1, t2, start=-one_second_duration), expected) 121 | #} 122 | 123 | #test_align_duration.after <- function() { 124 | ## do the alignment with an interval after of 1 nanosecond 125 | cols <- 3 126 | rows <- 100 127 | t1 <- nanotime(1:100 * one_second_duration * 2 + one_second_duration) 128 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 129 | setkey(dt1, index) 130 | t2 <- nanotime(1:10 * one_second_duration * 10) 131 | expected <- dt1[seq(5, 50, 5)] 132 | expected[, index := index - one_second_duration] 133 | setkey(expected, index) 134 | expect_equal(align(dt1, t2, start=as.nanoduration(0), end=one_second_duration, eopen=FALSE), expected) 135 | #} 136 | 137 | #test_align_period.before <- function() { 138 | ## do the alignment with an interval before of 1 nanosecond 139 | cols <- 3 140 | rows <- 100 141 | t1 <- nanotime(1:100 * one_second_duration * 2 + one_second_duration) 142 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 143 | setkey(dt1, index) 144 | t2 <- nanotime(1:10 * one_second_duration * 10) 145 | expected <- dt1[seq(4, 49, 5)] 146 | expected[, index := index + one_second_duration] 147 | setkey(expected, index) 148 | expect_equal(align(dt1, t2, start=-one_second_period, tz="America/New_York"), expected) 149 | #} 150 | 151 | # test align period after; do the alignment with an interval after of 1 nanosecond 152 | cols <- 3 153 | rows <- 100 154 | t1 <- nanotime(1:100 * one_second_duration * 2 + one_second_duration) 155 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 156 | setkey(dt1, index) 157 | t2 <- nanotime(1:10 * one_second_duration * 10) 158 | expected <- dt1[seq(5, 50, 5)] 159 | expected[, index := index - one_second_duration] 160 | setkey(expected, index) 161 | expect_equal(align(dt1, t2, end=one_second_period, eopen=FALSE, tz="America/New_York"), expected) 162 | 163 | 164 | ## for period alignment, check it more carefully on a timezone boundary: 165 | #test_align_period.before <- function() { 166 | cols <- 3 167 | t1 <- seq(from=as.nanotime("2021-11-06T00:00:00 America/New_York"), 168 | to=as.nanotime("2021-11-09T00:00:00 America/New_York"), 169 | by=as.nanoduration("01:00:00")) 170 | rows <- length(t1) 171 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 172 | setkey(dt1, index) 173 | t2 <- seq(from=as.nanotime("2021-11-06T00:00:00 America/New_York"), 174 | to=as.nanotime("2021-11-09T00:00:00 America/New_York"), 175 | by=as.nanoperiod("1d"), tz="America/New_York") 176 | ## notice below the transition over the day where we go to winter 177 | ## time that has 25 hours, hence the index that goes from 25 to 178 | ## 50: 179 | expect_equal(align(dt1, t2, start=-as.nanoperiod("1d"), sopen=TRUE, eopen=FALSE, tz="America/New_York"), dt1[c(1, 25, 50, 74)]) 180 | #} 181 | 182 | ## incorrect parameter types (nanoduration): 183 | expect_error(align(dt1, t2, sopen="open"), "must be a 'logical'") 184 | expect_error(align(dt1, t2, eopen="open"), "must be a 'logical'") 185 | expect_error(align(dt1, t2, func="a string instead of a function"), "must be a function") 186 | ## incorrect parameter types (nanoperiod): 187 | expect_error(align(dt1, t2, start=-as.nanoperiod("1d"), sopen="open"), "must be a 'logical'") 188 | expect_error(align(dt1, t2, start=-as.nanoperiod("1d"), eopen="open"), "must be a 'logical'") 189 | expect_error(align(dt1, t2, start=-as.nanoperiod("1d"), func="a string instead of a function", tz="America/New_York"), "must be a function") 190 | expect_error(align(dt1, t2, start=-as.nanoperiod("1d"), tz=complex(1)), "'tz' must be a 'character'") 191 | expect_error(align(dt1[,2], t2, start=-as.nanoperiod("1d")), "first column of 'data.table' must be of type 'nanotime'") 192 | 193 | ## missing key (nanoduration): 194 | t1 <- as.nanotime("2021-11-06T00:00:00 America/New_York") 195 | dt1 <- data.table(index=t1, v1=0) 196 | expect_error(align(dt1, t1), "first column of 'data.table' must be the first key") 197 | 198 | ## missing key (nanoperiod): 199 | t1 <- as.nanotime("2021-11-06T00:00:00 America/New_York") 200 | dt1 <- data.table(index=t1, v1=0) 201 | expect_error(align(dt1, t1, start=as.nanoperiod(0), tz="America/New_York"), "first column of 'data.table' must be the first key") 202 | 203 | 204 | ## align func 205 | ## ---------- 206 | #test_align.func.equal <- function() { 207 | ## do the alignment with only one 'x' row for each 'y' nanotime: 208 | cols <- 3 209 | rows <- 100 210 | t1 <- nanotime(1:rows * one_second_duration) 211 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 212 | setkey(dt1, index) 213 | t2 <- nanotime(1:10 * one_second_duration * 10) 214 | square_col1 <- function(x) if (is.null(x)) c(NaN, NaN, NaN) else { x[1,1] <- x[1,1] ^ 2; x } 215 | exp <- dt1[1:10 * 10] 216 | exp[,2] <- exp[,2] ^ 2 217 | expect_equal(align(dt1, t2, end=as.nanoduration(1), func=square_col1), exp) 218 | #} 219 | 220 | #test_align.func.multiple <- function() { 221 | ### LLL 222 | ## test multiple 'x' rows for each 'y' nanotime: 223 | #} 224 | 225 | #test_align.func.variable_start <- function() { 226 | ## test using a non-scalar 'start' parameter 227 | cols <- 3 228 | rows <- 100 229 | t1 <- nanotime(1:rows * one_second_duration) 230 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 231 | setkey(dt1, index) 232 | t2 <- nanotime(1:10 * one_second_duration * 10) 233 | square_col1 <- function(x) { x[1,1] <- x[1,1] ^ 2; x } 234 | exp <- dt1[1:10 * 10] 235 | exp[,2] <- exp[,2] ^ 2 236 | expect_equal(align(dt1, t2, end=as.nanoduration(1), func=square_col1), exp) 237 | #} 238 | 239 | #test_align.func.variable_start_end <- function() { 240 | ## test using a non-scalar 'start' and 'end' parameters 241 | cols <- 3 242 | rows <- 100 243 | t1 <- nanotime(1:rows * one_second_duration) 244 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 245 | setkey(dt1, index) 246 | t2 <- nanotime(1:10 * one_second_duration * 10) 247 | square_col1 <- function(x) { x[1,1] <- x[1,1] ^ 2; x } 248 | exp <- dt1[1:10 * 10] 249 | exp[,2] <- exp[,2] ^ 2 250 | expect_equal(align(dt1, t2, end=as.nanoduration(1), func=square_col1), exp) 251 | #} 252 | 253 | #test_align.func.variable_start_end_overlapping <- function() { 254 | ## test using a non-scalar 'start' and 'end' parameters 255 | cols <- 3 256 | rows <- 100 257 | t1 <- nanotime(1:rows * one_second_duration) 258 | dt1 <- data.table(an_index=t1, matrix(0:(rows*cols-1), rows, cols), key="an_index") 259 | t2 <- nanotime(1:9 * one_second_duration * 10) 260 | exp <- data.table(an_index=t2, V1=seq(9.5, 89.5, by=10), key="an_index") 261 | exp[, V2 := V1 + 100] 262 | exp[, V3 := V2 + 100] 263 | ## useful for testing: 264 | ## newColMeans <- function(x) { 265 | ## print("this is x") 266 | ## print(x) 267 | ## colMeans(x) 268 | ## } 269 | expect_equal(align(dt1, t2, start=-10*one_second_duration, end=10*one_second_duration, sopen=TRUE, eopen=TRUE, func=colMeans), exp) 270 | #} 271 | 272 | #test_align.func_missing <- function() { 273 | ## test where some groups have no rows (0 row data.table) 274 | cols <- 3 275 | rows <- 100 276 | t1 <- nanotime(1:rows * one_second_duration) 277 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 278 | setkey(dt1, index) 279 | t2 <- nanotime(1:11 * one_second_duration * 10) 280 | square_col1 <- function(x) { 281 | if (nrow(x)==0) data.table(NaN, NaN, NaN) 282 | else { x[1,1] <- x[1,1] ^ 2; x } 283 | } 284 | exp <- rbind(dt1[1:10 * 10], data.table(index=t2[11], V1=NaN, V2=NaN, V3=NaN)) 285 | exp[,2] <- exp[,2] ^ 2 286 | setkey(exp, index) 287 | expect_equal(align(dt1, t2, end=as.nanoduration(1), eopen=FALSE, func=square_col1), exp) 288 | #} 289 | 290 | #test_align.func_error_dim <- function() { 291 | ## test when 'func' returns an incorrect number of columns 292 | cols <- 3 293 | rows <- 100 294 | t1 <- nanotime(1:rows * one_second_duration) 295 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 296 | setkey(dt1, index) 297 | t2 <- nanotime(1:11 * one_second_duration * 10) 298 | square_col1 <- function(x) if (nrow(x)==0) 1 else { x[1,1] <- x[1,1] ^ 2; x } 299 | expect_error(align(dt1, t2, end=as.nanoduration(1), func=square_col1)) 300 | #} 301 | 302 | #test_align.func_error_incorrect_function <- function() { 303 | ## test when 'func' cannot be called 304 | cols <- 3 305 | rows <- 100 306 | t1 <- nanotime(1:rows * one_second_duration) 307 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 308 | setkey(dt1, index) 309 | t2 <- nanotime(1:11 * one_second_duration * 10) 310 | f <- function(x) lkdsjfdsfsdfsdfds(x) # will not eval 311 | expect_error(align(dt1, t2, end=as.nanoduration(1), func=f)) 312 | #} 313 | 314 | 315 | ## for period alignment, check it more carefully on a timezone boundary: 316 | #test_align_func_period_before <- function() { 317 | ## what we are specifically looking for here is that the alignment 318 | ## will take the calendar day over the timezone boundary, so using 319 | ## `nrow` as `func` will yield the transition day to winter time 320 | ## as having 25 hours" 321 | cols <- 3 322 | t1 <- seq(from=as.nanotime("2021-11-06T00:00:00 America/New_York"), 323 | to=as.nanotime("2021-11-10T00:00:00 America/New_York"), 324 | by=as.nanoduration("01:00:00")) 325 | rows <- length(t1) 326 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 327 | setkey(dt1, index) 328 | t2 <- seq(from=as.nanotime("2021-11-07T00:00:00 America/New_York"), 329 | to=as.nanotime("2021-11-09T00:00:00 America/New_York"), 330 | by=as.nanoperiod("1d"), tz="America/New_York") 331 | expected <- data.table(index=t2, V1=c(24, 25, 24)) 332 | setkey(expected, index) 333 | expect_equal(align(dt1, t2, start=-as.nanoperiod("1d"), tz="America/New_York", func=nrow), expected) 334 | #} 335 | 336 | #test_align_func_period_after <- function() { 337 | ## what we are specifically looking for here is that the alignment 338 | ## will take the calendar day over the timezone boundary, so using 339 | ## `nrow` as `func` will yield the transition day to winter time 340 | ## as having 25 hours" 341 | cols <- 3 342 | t1 <- seq(from=as.nanotime("2021-11-06T00:00:00 America/New_York"), 343 | to=as.nanotime("2021-11-10T00:00:00 America/New_York"), 344 | by=as.nanoduration("01:00:00")) 345 | rows <- length(t1) 346 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 347 | setkey(dt1, index) 348 | t2 <- seq(from=as.nanotime("2021-11-07T00:00:00 America/New_York"), 349 | to=as.nanotime("2021-11-09T00:00:00 America/New_York"), 350 | by=as.nanoperiod("1d"), tz="America/New_York") 351 | expected <- data.table(index=t2, V1=c(25, 24, 24)) 352 | setkey(expected, index) 353 | expect_equal(align(dt1, t2, end=as.nanoperiod("1d"), tz="America/New_York", func=nrow), expected) 354 | #} 355 | 356 | #test_align.func.equal_incorrect_dt <- function() { 357 | ## do the alignment with only one 'x' row for each 'y' nanotime: 358 | cols <- 3 359 | rows <- 100 360 | dt1 <- data.table(index="a", matrix(1:(rows*cols), rows, cols)) 361 | setkey(dt1, index) 362 | t2 <- nanotime(1:10 * one_second_duration * 10) 363 | square_col1 <- function(x) if (is.null(x)) c(NaN, NaN, NaN) else { x[1,1] <- x[1,1] ^ 2; x } 364 | exp <- dt1[1:10 * 10] 365 | exp[,2] <- exp[,2] ^ 2 366 | expect_error(align(dt1, t2, end=as.nanoduration(1), func=square_col1)) 367 | #} 368 | 369 | 370 | # test align period before; make sure we have trailing NA: 371 | cols <- 3 372 | rows <- 40 373 | t1 <- nanotime(1:rows * one_second_duration * 2 + one_second_duration) 374 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 375 | setkey(dt1, index) 376 | t2 <- nanotime(1:10 * one_second_duration * 10) 377 | exp <- data.table(index=t2, V1=c(rep(1, 8), rep(0, 2))) 378 | setkey(exp, index) 379 | expect_equal(align(dt1, t2, start=-one_second_period, eopen=FALSE, func=nrow, tz="America/New_York"), exp) 380 | 381 | # test align period before; make sure we have trailing NA; same as above with eopen==TRUE: 382 | cols <- 3 383 | rows <- 40 384 | t1 <- nanotime(1:rows * one_second_duration * 2 + one_second_duration) 385 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 386 | setkey(dt1, index) 387 | t2 <- nanotime(1:10 * one_second_duration * 10) 388 | exp <- data.table(index=t2, V1=c(rep(1, 8), rep(0, 2))) 389 | setkey(exp, index) 390 | expect_equal(align(dt1, t2, start=-one_second_period, func=nrow, tz="America/New_York"), exp) 391 | 392 | # test align period before; make sure we have trailing NA; same as above with sopen==TRUE: 393 | cols <- 3 394 | rows <- 40 395 | t1 <- nanotime(1:rows * one_second_duration * 2 + one_second_duration) 396 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 397 | setkey(dt1, index) 398 | t2 <- nanotime(1:10 * one_second_duration * 10) 399 | exp <- data.table(index=t2, V1=0) 400 | setkey(exp, index) 401 | expect_equal(align(dt1, t2, start=-one_second_period, sopen=TRUE, eopen=FALSE, func=nrow, tz="America/New_York"), exp) 402 | 403 | 404 | ## grid_align: 405 | ## ---------- 406 | 407 | ## nanoduration 408 | t1 <- seq(from=as.nanotime("2021-11-06T00:00:00 America/New_York"), 409 | to=as.nanotime("2021-11-09T00:00:00 America/New_York"), 410 | by=as.nanoduration("01:00:00")) 411 | dt1 <- data.table(index=t1, V1=0:(length(t1)-1)) 412 | setkey(dt1, index) 413 | exp <- data.table(index=as.nanotime(c("2021-11-07T00:00:00-04:00", "2021-11-07T23:00:00-05:00", 414 | "2021-11-08T23:00:00-05:00", "2021-11-09T23:00:00-05:00")), 415 | V1=c(24, 48, 72, 73)) 416 | setkey(exp, index) 417 | expect_equal(grid_align(dt1, by=as.nanoduration("24:00:00")), exp) 418 | 419 | ## nanoduration error with dt 420 | dt1 <- data.table(index=1:10, V1=1:10) 421 | setkey(dt1, index) 422 | expect_error(grid_align(dt1, by=as.nanoduration("24:00:00")), "first column of 'data.table' must be of type 'nanotime'") 423 | 424 | ## nanoperiod 425 | t1 <- seq(from=as.nanotime("2021-11-06T00:00:00 America/New_York"), 426 | to=as.nanotime("2021-11-09T00:00:00 America/New_York"), 427 | by=as.nanoduration("01:00:00")) 428 | dt1 <- data.table(index=t1, V1=0:(length(t1)-1)) 429 | setkey(dt1, index) 430 | # format(grid_align(dt1, by=as.nanoperiod("1d"), tz="America/New_York")$index, tz="America/New_York") 431 | exp <- data.table(index=as.nanotime(c("2021-11-07T00:00:00-04:00", "2021-11-08T00:00:00-05:00", 432 | "2021-11-09T00:00:00-05:00")), 433 | V1=c(24, 49, 73)) 434 | setkey(exp, index) 435 | expect_equal(grid_align(dt1, by=as.nanoperiod("1d"), tz="America/New_York"), exp) 436 | 437 | ## nanoduration error with dt 438 | dt1 <- data.table(index=1:10, V1=1:10) 439 | setkey(dt1, index) 440 | expect_error(grid_align(dt1, by=as.nanoperiod("24:00:00")), "first column of 'data.table' must be of type 'nanotime'") 441 | 442 | ## this test to make the grid longer than t1: 443 | t1 <- seq(from=as.nanotime("2021-11-06T00:00:00 America/New_York"), 444 | to=as.nanotime("2021-11-08T23:00:00 America/New_York"), 445 | by=as.nanoduration("01:00:00")) 446 | dt1 <- data.table(index=t1, V1=0:(length(t1)-1)) 447 | setkey(dt1, index) 448 | exp <- data.table(index=as.nanotime(c("2021-11-07T00:00:00-04:00", "2021-11-08T00:00:00-05:00", 449 | "2021-11-09T00:00:00-05:00")), 450 | V1=c(24, 49, 72)) 451 | setkey(exp, index) 452 | expect_equal(grid_align(dt1, by=as.nanoperiod("1d"), tz="America/New_York"), exp) 453 | 454 | 455 | ## frequency (aka grid_align with func = 'nrow': 456 | ## --------- 457 | #test_frequency_duration <- function() { 458 | cols <- 3 459 | rows <- 100 460 | t1 <- nanotime(1:rows * one_second_duration) 461 | dt1 <- data.table(index=t1, matrix(1:(rows*cols), rows, cols)) 462 | setkey(dt1, index) 463 | res <- dtts:::frequency(dt1, by=as.nanoduration("00:00:30")) 464 | exp <- tail(data.table(index=seq(dt1$index[1], by=30*one_second_duration, length.out=4), V1=30), -1) 465 | exp <- rbind(exp, data.table(index=tail(exp$index,1)+30*one_second_duration, V1=10)) 466 | setkey(exp, index) 467 | expect_equal(res, exp) 468 | #} 469 | 470 | ## #test_frequency_start_end_duration <- function() { 471 | cols <- 3 472 | rows <- 100 473 | t1 <- nanotime(0:(rows-1) * one_second_duration) 474 | dt1 <- data.table(index=t1, matrix(0:(rows*cols-1), rows, cols)) 475 | setkey(dt1, index) 476 | res <- dtts:::frequency(dt1, by=as.nanoduration("00:00:30"), grid_start=nanotime(0), grid_end=nanotime(0) + 2*30*one_second_duration) 477 | exp <- data.table(index=seq(dt1$index[1], by=30*one_second_duration, length.out=3), V1=c(0, 30, 30)) 478 | setkey(exp, index) 479 | expect_equal(res, exp) 480 | ## #} 481 | 482 | 483 | #test_frequency_period <- function() { 484 | t1 <- seq(nanotime("2021-02-01 00:00:00 America/New_York"), nanotime("2021-04-01 00:00:00 America/New_York"), 485 | by=as.nanoperiod("01:00:00"), tz="America/New_York") 486 | dt1 <- data.table(index=t1, V1=1:length(t1)) 487 | setkey(dt1, index) 488 | res <- dtts:::frequency(dt1, by=as.nanoperiod("1d"), tz="America/New_York") 489 | 490 | t2 <- seq(nanotime("2021-02-02 00:00:00 America/New_York"), nanotime("2021-04-01 00:00:00 America/New_York"), 491 | by=as.nanoperiod("1d"), tz="America/New_York") 492 | exp <- data.table(index=t2, V1=24) 493 | exp[index=="2021-03-15T04:00:00+00:00", V1 := 23] # the dailight transition day 494 | setkey(exp, index) 495 | expect_equal(res, exp) 496 | #} 497 | 498 | ## frequency wrong 'by' type: 499 | expect_error(dtts:::frequency(dt1, by=3), "argument 'by' must be either 'nanoduration' or 'nanotime'") 500 | 501 | ## tests for when there are duplicate times in the vector to align onto: 502 | ## -------------------------------------------------------------------- 503 | 504 | ## align_idx duplicates in t2: 505 | t1 <- nanotime(1:100 * one_second_duration) 506 | time_vec <- c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) 507 | t2 <- nanotime(c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) * one_second_duration * 10) 508 | expect_equal(align_idx(t1, t2, sopen=FALSE, eopen=FALSE), time_vec * 10) 509 | 510 | ## check the same but with duplicate in t1: 511 | t1 <- nanotime(rep(1:10, each=2) * one_second_duration) 512 | t2 <- nanotime(c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) * one_second_duration) 513 | # we don't expect the indices to be duplicates, because the indices represent a number for each element of t2: 514 | expect_equal(align_idx(t1, t2, sopen=FALSE, eopen=FALSE), c(1, 3, 5, 5, 7, 9, 11, 13, 15, 15)) 515 | 516 | ## check the same but with duplicate on open start boundary with no interval: 517 | t1 <- nanotime(rep(1:10, each=2) * one_second_duration) 518 | t2 <- nanotime(c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) * one_second_duration) 519 | # we don't expect the indices to be duplicates, because the indices represent a number for each element of t2: 520 | expect_equal(align_idx(t1, t2, sopen=TRUE, eopen=FALSE), rep(NA_real_, length(t2))) 521 | 522 | ## idx.align, duplicates in t2, with interval: 523 | t1 <- nanotime(1:100 * one_second_duration) 524 | time_vec <- c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) 525 | t2 <- nanotime(c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) * one_second_duration * 10) 526 | expect_equal(align_idx(t1, t2, start=-one_second_duration, sopen=FALSE, eopen=FALSE), time_vec * 10) 527 | 528 | ## idx.align, duplicates in t2, with interval, sopen=TRUE: 529 | t1 <- nanotime(1:100 * one_second_duration) 530 | time_vec <- c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) 531 | t2 <- nanotime(c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) * one_second_duration * 10) 532 | expect_equal(align_idx(t1, t2, start=-one_second_duration, sopen=TRUE, eopen=FALSE), time_vec * 10) 533 | 534 | ## idx.align, duplicates in t2, with interval, sopen=FALSE, eopen=TRUE: 535 | t1 <- nanotime(1:100 * one_second_duration) 536 | time_vec <- c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) 537 | t2 <- nanotime(c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) * one_second_duration * 10) 538 | ## since we have eopen TRUE, we have to fetch one index before, hence the '-1': 539 | expect_equal(align_idx(t1, t2, start=-one_second_duration, sopen=FALSE, eopen=TRUE), time_vec * 10 - 1) 540 | 541 | ## align, duplicates in t2 542 | t1 <- nanotime(1:100 * one_second_duration) 543 | dt1 <- data.table(index=t1, 1) 544 | setkey(dt1, index) 545 | time_vec <- c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) 546 | t2 <- nanotime(c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) * one_second_duration * 10) 547 | dt2 <- data.table(index=t2, V1=2) 548 | setkey(dt2, index) 549 | ## since we have eopen TRUE, we have to fetch one index before, hence the '-1': 550 | expect_equal(align(dt1, t2, start=-2*one_second_duration, func=sum), dt2) 551 | 552 | ## align, duplicates in t1/t2 553 | t1 <- nanotime(rep(1:50, each=2) * one_second_duration) 554 | dt1 <- data.table(index=t1, 1) 555 | setkey(dt1, index) 556 | time_vec <- c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) 557 | t2 <- nanotime(c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) * one_second_duration * 10) 558 | dt2 <- data.table(index=t2, V1=c(rep(4, 6), rep(0, 4))) 559 | setkey(dt2, index) 560 | ## since we have eopen TRUE, we have to fetch one index before, hence the '-1': 561 | expect_equal(align(dt1, t2, start=-2*one_second_duration, func=sum), dt2) 562 | 563 | ## align, duplicates in t1/t2, sopen/eopen FALSE 564 | t1 <- nanotime(rep(1:50, each=2) * one_second_duration) 565 | dt1 <- data.table(index=t1, 1) 566 | setkey(dt1, index) 567 | time_vec <- c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) 568 | t2 <- nanotime(c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) * one_second_duration * 10) 569 | dt2 <- data.table(index=t2, V1=c(rep(6, 6), rep(0, 4))) 570 | setkey(dt2, index) 571 | ## since we have eopen TRUE, we have to fetch one index before, hence the '-1': 572 | expect_equal(align(dt1, t2, start=-2*one_second_duration, sopen=FALSE, eopen=FALSE, func=sum), dt2) 573 | 574 | ## align, duplicates in t2: 575 | t1 <- nanotime(rep(1:100, each=1) * one_second_duration) 576 | dt1 <- data.table(index=t1, 1) 577 | setkey(dt1, index) 578 | time_vec <- c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) 579 | t2 <- nanotime(c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) * one_second_duration * 10) 580 | dt2 <- data.table(index=t2, V1=0) 581 | setkey(dt2, index) 582 | expect_equal(align(dt1, t2, start=-one_second_duration, sopen=TRUE, eopen=TRUE, func=sum), dt2) 583 | dt2[, V1 := 1] 584 | expect_equal(align(dt1, t2, start=-one_second_duration, sopen=TRUE, eopen=FALSE, func=sum), dt2) 585 | expect_equal(align(dt1, t2, start=-one_second_duration, sopen=FALSE, eopen=TRUE, func=sum), dt2) 586 | dt2[, V1 := 2] 587 | expect_equal(align(dt1, t2, start=-one_second_duration, sopen=FALSE, eopen=FALSE, func=sum), dt2) 588 | 589 | ## align, duplicates in t1/t2: 590 | t1 <- nanotime(rep(1:50, each=2) * one_second_duration) 591 | dt1 <- data.table(index=t1, 1) 592 | setkey(dt1, index) 593 | time_vec <- c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) 594 | t2 <- nanotime(c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) * one_second_duration * 10) 595 | dt2 <- data.table(index=t2, V1=c(rep(0, 6), rep(0, 4))) 596 | setkey(dt2, index) 597 | expect_equal(align(dt1, t2, start=-one_second_duration, sopen=TRUE, eopen=TRUE, func=sum), dt2) 598 | dt2[, V1 := c(rep(2, 6), rep(0, 4))] 599 | expect_equal(align(dt1, t2, start=-one_second_duration, sopen=TRUE, eopen=FALSE, func=sum), dt2) 600 | expect_equal(align(dt1, t2, start=-one_second_duration, sopen=FALSE, eopen=TRUE, func=sum), dt2) 601 | dt2[, V1 := c(rep(4, 6), rep(0, 4))] 602 | expect_equal(align(dt1, t2, start=-one_second_duration, sopen=FALSE, eopen=FALSE, func=sum), dt2) 603 | 604 | ## align, duplicates in t1/t2, period: 605 | t1 <- nanotime(rep(1:50, each=2) * one_second_duration) 606 | dt1 <- data.table(index=t1, 1) 607 | setkey(dt1, index) 608 | time_vec <- c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) 609 | t2 <- nanotime(c(1, 2, 3, 3, 4, 5, 6, 7, 8, 8) * one_second_duration * 10) 610 | dt2 <- data.table(index=t2, V1=c(rep(0, 6), rep(0, 4))) 611 | setkey(dt2, index) 612 | expect_equal(align(dt1, t2, start=-one_second_period, sopen=TRUE, eopen=TRUE, func=sum, tz="UTC"), dt2) 613 | dt2[, V1 := c(rep(2, 6), rep(0, 4))] 614 | expect_equal(align(dt1, t2, start=-one_second_period, sopen=TRUE, eopen=FALSE, func=sum, tz="UTC"), dt2) 615 | expect_equal(align(dt1, t2, start=-one_second_period, sopen=FALSE, eopen=TRUE, func=sum, tz="UTC"), dt2) 616 | dt2[, V1 := c(rep(4, 6), rep(0, 4))] 617 | expect_equal(align(dt1, t2, start=-one_second_period, sopen=FALSE, eopen=FALSE, func=sum, tz="UTC"), dt2) 618 | 619 | 620 | ## tests for unsorted calls to align_idx: 621 | ## ------------------------------------- 622 | ## both unsorted: 623 | x <- as.nanotime(10:1) 624 | y <- as.nanotime(4:2) 625 | expect_error(align_idx(x, y), "'y' must be sorted in ascending order") 626 | ## x only unsorted: 627 | x <- as.nanotime(10:1) 628 | y <- as.nanotime(2:4) 629 | expect_error(align_idx(x, y), "'x' must be sorted in ascending order") 630 | ## y only unsorted: 631 | x <- as.nanotime(1:10) 632 | y <- as.nanotime(4:2) 633 | expect_error(align_idx(x, y), "'y' must be sorted in ascending order") 634 | ## bypass x sorted: 635 | x <- as.nanotime(1:10) 636 | y <- as.nanotime(2:4) 637 | expect_equal(align_idx(x, y, bypass_x_check=TRUE), c(2:4)) 638 | ## bypass x sorted, x descending: 639 | x <- as.nanotime(10:1) 640 | y <- as.nanotime(2:4) 641 | expect_equal(align_idx(x, y, bypass_x_check=TRUE), rep(NA_real_, 3)) # incorrect align as x is not sorted 642 | ## bypass y sorted, y descending: 643 | x <- as.nanotime(1:10) 644 | y <- as.nanotime(4:1) 645 | expect_equal(align_idx(x, y, bypass_y_check=TRUE), c(4, rep(NA_real_, 3))) # incorrect align as x is not sorted 646 | ## x only unsorted, period: 647 | x <- as.nanotime(10:1) 648 | y <- as.nanotime(2:4) 649 | expect_error(align_idx(x, y, start=-as.nanoperiod("00:00:01"), tz="UTC"), "'x' must be sorted in ascending order") 650 | ## y only unsorted, period: 651 | x <- as.nanotime(1:10) 652 | y <- as.nanotime(4:2) 653 | expect_error(align_idx(x, y, start=-as.nanoperiod("00:00:01"), tz="UTC"), "'y' must be sorted in ascending order") 654 | 655 | 656 | ## tests for 'ops' function: 657 | ## ------------------------ 658 | ## 1 'x' col, 1 'y' col: 659 | t1 <- nanotime(1:2 * one_second_duration * 2) 660 | t2 <- nanotime(1:4 * one_second_duration) 661 | dt1 <- data.table(index=t1, data1 = 1:2) 662 | setkey(dt1, index) 663 | dt2 <- data.table(index=t2, data1 = 1) 664 | setkey(dt2, index) 665 | expected_dt = copy(dt2) 666 | expected_dt[, data1 := c(2, 3, 3, 1)] 667 | expect_equal(expected_dt, ops(dt1, dt2, "+")) 668 | ## 1 'x' col, 1 'y' col, ops=='-': 669 | t1 <- nanotime(1:2 * one_second_duration * 2) 670 | t2 <- nanotime(1:4 * one_second_duration) 671 | dt1 <- data.table(index=t1, data1 = 1:2) 672 | setkey(dt1, index) 673 | dt2 <- data.table(index=t2, data1 = 1) 674 | setkey(dt2, index) 675 | expected_dt = copy(dt2) 676 | expected_dt[, data1 := c(0, 1, 1, 1)] 677 | expect_equal(expected_dt, ops(dt1, dt2, "-")) 678 | ## 1 'x' col, 1 'y' col, ops=='*': 679 | t1 <- nanotime(1:2 * one_second_duration * 2) 680 | t2 <- nanotime(1:4 * one_second_duration) 681 | dt1 <- data.table(index=t1, data1 = 1:2) 682 | setkey(dt1, index) 683 | dt2 <- data.table(index=t2, data1 = 1) 684 | setkey(dt2, index) 685 | expected_dt = copy(dt2) 686 | expected_dt[, data1 := c(1, 2, 2, 1)] 687 | expect_equal(expected_dt, ops(dt1, dt2, "*")) 688 | ## 1 'x' col, 1 'y' col, ops=='/': 689 | t1 <- nanotime(1:2 * one_second_duration * 2) 690 | t2 <- nanotime(1:4 * one_second_duration) 691 | dt1 <- data.table(index=t1, data1 = 1:2) 692 | setkey(dt1, index) 693 | dt2 <- data.table(index=t2, data1 = 1) 694 | setkey(dt2, index) 695 | expected_dt = copy(dt2) 696 | expected_dt[, data1 := c(1, 2, 2, 1)] 697 | expect_equal(expected_dt, ops(dt1, dt2, "/")) 698 | ## 1 'x' col, 3 'y' cols: 699 | t1 <- nanotime(1:2 * one_second_duration * 2) 700 | t2 <- nanotime(1:4 * one_second_duration) 701 | dt1 <- data.table(index=t1, data1 = 1:2) 702 | setkey(dt1, index) 703 | dt2 <- data.table(index=t2, data1=1, data2=2, data3=3) 704 | setkey(dt2, index) 705 | ops(dt1, dt2, "+") 706 | expected_dt = copy(dt2) 707 | expected_dt[, data1 := c(2, 3, 3, 1)] 708 | expected_dt[, data2 := data1 + 1] 709 | expected_dt[, data3 := data2 + 1] 710 | expect_equal(expected_dt, ops(dt1, dt2, "+")) 711 | ## 3 'x' col, 3 'y' cols: 712 | t1 <- nanotime(1:2 * one_second_duration * 2) 713 | t2 <- nanotime(1:4 * one_second_duration) 714 | dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6) 715 | setkey(dt1, index) 716 | dt2 <- data.table(index=t2, data1=1, data2=2, data3=3) 717 | setkey(dt2, index) 718 | expected_dt = copy(dt2) 719 | expected_dt[, data1 := c(2, 3, 3, 1)] 720 | expected_dt[, data2 := c(5, 6, 6, 2)] 721 | expected_dt[, data3 := c(8, 9, 9, 3)] 722 | expect_equal(expected_dt, ops(dt1, dt2, "+")) 723 | ## no overlap -> no change 724 | t1 <- nanotime(1:2 * one_second_duration * 10) 725 | t2 <- nanotime(1:4 * one_second_duration) 726 | dt1 <- data.table(index=t1, data1 = 1:2) 727 | setkey(dt1, index) 728 | dt2 <- data.table(index=t2, data1 = 1) 729 | setkey(dt2, index) 730 | expect_equal(dt2, ops(dt1, dt2, "+")) 731 | ## 3 'x' col, 3 'y' cols:, skip extra string cols in 'y': 732 | t1 <- nanotime(1:2 * one_second_duration * 2) 733 | t2 <- nanotime(1:4 * one_second_duration) 734 | dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6) 735 | setkey(dt1, index) 736 | dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", d3=3, c3="c") 737 | setkey(dt2, index) 738 | expected_dt = copy(dt2) 739 | expected_dt[, d1 := c(2, 3, 3, 1)] 740 | expected_dt[, d2 := c(5, 6, 6, 2)] 741 | expected_dt[, d3 := c(8, 9, 9, 3)] 742 | expect_equal(expected_dt, ops(dt1, dt2, "+")) 743 | ## same, but mix of int and double: 744 | t1 <- nanotime(1:2 * one_second_duration * 2) 745 | t2 <- nanotime(1:4 * one_second_duration) 746 | dt1 <- data.table(index=t1, d1=1:2, d2=3:4, d3=5:6) 747 | setkey(dt1, index) 748 | dt2 <- data.table(index=t2, d1=as.integer(1), c1="a", d2=2, c2="b", d3=3, c3="c") 749 | setkey(dt2, index) 750 | expected_dt = copy(dt2) 751 | expected_dt[, d1 := c(2, 3, 3, 1)] 752 | expected_dt[, d2 := c(5, 6, 6, 2)] 753 | expected_dt[, d3 := c(8, 9, 9, 3)] 754 | expect_equal(expected_dt, ops(dt1, dt2, "+")) 755 | ## error, non-numeric column in x: 756 | t1 <- nanotime(1:2 * one_second_duration * 2) 757 | t2 <- nanotime(1:4 * one_second_duration) 758 | dt1 <- data.table(index=t1, d1=1:2, c1="a", d2=3:4, d3=5:6) 759 | setkey(dt1, index) 760 | dt2 <- data.table(index=t2, d1=as.integer(1), c1="a", d2=2, c2="b", d3=3, c3="c") 761 | setkey(dt2, index) 762 | expect_error(ops(dt1, dt2, "+"), "all data columns of 'x' must be numeric") 763 | ## error, 2 cols in 'x', 3 cols in 'y' 764 | t1 <- nanotime(1:2 * one_second_duration * 2) 765 | t2 <- nanotime(1:4 * one_second_duration) 766 | dt1 <- data.table(index=t1, c1=1:2, c2=3:4) 767 | setkey(dt1, index) 768 | dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", d3=3, c3="c") 769 | setkey(dt2, index) 770 | expect_error(ops(dt1, dt2, "+"), "'x' must have one numeric column or the same number as 'y'") 771 | ## error, no numerical columns in x: 772 | t1 <- nanotime(1:2 * one_second_duration * 2) 773 | t2 <- nanotime(1:4 * one_second_duration) 774 | dt1 <- data.table(index=t1, c1="a", c2="b") 775 | setkey(dt1, index) 776 | dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", d3=3, c3="c") 777 | setkey(dt2, index) 778 | expect_error(ops(dt1, dt2, "+"), "'x' must have at least one numeric column") 779 | ## error, 3 cols in 'x', 2 cols in 'y' 780 | t1 <- nanotime(1:2 * one_second_duration * 2) 781 | t2 <- nanotime(1:4 * one_second_duration) 782 | dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6) 783 | setkey(dt1, index) 784 | dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", c3="c") 785 | setkey(dt2, index) 786 | expect_error(ops(dt1, dt2, "+"), "'x' must have one numeric column or the same number as 'y'") 787 | ## error key check dt1: 788 | t1 <- nanotime(1:2 * one_second_duration * 2) 789 | t2 <- nanotime(1:4 * one_second_duration) 790 | dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6) 791 | dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", d3=3, c3="c") 792 | setkey(dt2, index) 793 | expect_error(ops(dt1, dt2, "+"), "first column of 'x' must be the first key") 794 | ## error key check dt2: 795 | t1 <- nanotime(1:2 * one_second_duration * 2) 796 | t2 <- nanotime(1:4 * one_second_duration) 797 | dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6) 798 | setkey(dt1, index) 799 | dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", d3=3, c3="c") 800 | expect_error(ops(dt1, dt2, "+"), "first column of 'y' must be the first key") 801 | ## error key check first col of 'x' not nanotime: 802 | t1 <- 1:2 803 | t2 <- nanotime(1:4 * one_second_duration) 804 | dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6) 805 | setkey(dt1, index) 806 | dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", d3=3, c3="c") 807 | setkey(dt2, index) 808 | expect_error(ops(dt1, dt2, "+"), "first column of 'x' must be of type 'nanotime'") 809 | ## error key check first col of 'y' not nanotime: 810 | t1 <- nanotime(1:2 * one_second_duration * 2) 811 | t2 <- 1:4 812 | dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6) 813 | setkey(dt1, index) 814 | dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", d3=3, c3="c") 815 | setkey(dt2, index) 816 | expect_error(ops(dt1, dt2, "+"), "first column of 'y' must be of type 'nanotime'") 817 | ## check unknown 'op': 818 | t1 <- nanotime(1:2 * one_second_duration * 2) 819 | t2 <- nanotime(1:4 * one_second_duration) 820 | dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6) 821 | setkey(dt1, index) 822 | dt2 <- data.table(index=t2, data1=1, data2=2, data3=3) 823 | setkey(dt2, index) 824 | expect_error(ops(dt1, dt2, "sdf"), "unsupported operator 'sdf'") 825 | 826 | 827 | if (FALSE) { 828 | ## don't do this; must appear in vignette! 829 | 830 | t2 <- seq(nanotime("2021-02-02 00:00:00 America/New_York"), nanotime("2021-04-01 00:00:00 America/New_York"), 831 | by=as.nanoperiod("1d"), tz="America/New_York") 832 | exp <- data.table(index=t2, V1=24) 833 | exp[index=="2021-03-15T04:00:00+00:00"] <- 1e9 834 | } 835 | 836 | options(nanotimeFormat=savedFormat) 837 | -------------------------------------------------------------------------------- /man/align.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtts.R 3 | \name{align} 4 | \alias{align} 5 | \alias{align,data.table,nanotime,nanoduration,nanoduration-method} 6 | \alias{align,data.table,nanotime,missing,missing-method} 7 | \alias{align,data.table,nanotime,nanoduration,missing-method} 8 | \alias{align,data.table,nanotime,missing,nanoduration-method} 9 | \alias{align,data.table,nanotime,nanoperiod,nanoperiod-method} 10 | \alias{align,data.table,nanotime,nanoperiod,missing-method} 11 | \alias{align,data.table,nanotime,missing,nanoperiod-method} 12 | \title{Align a \code{data.table} onto a \code{nanotime} vector} 13 | \usage{ 14 | align(x, y, start, end, ...) 15 | 16 | \S4method{align}{data.table,nanotime,nanoduration,nanoduration}( 17 | x, 18 | y, 19 | start = as.nanoduration(0), 20 | end = as.nanoduration(0), 21 | sopen = FALSE, 22 | eopen = TRUE, 23 | func = NULL 24 | ) 25 | 26 | \S4method{align}{data.table,nanotime,missing,missing}( 27 | x, 28 | y, 29 | start = as.nanoduration(0), 30 | end = as.nanoduration(0), 31 | sopen = FALSE, 32 | eopen = TRUE, 33 | func = NULL 34 | ) 35 | 36 | \S4method{align}{data.table,nanotime,nanoduration,missing}( 37 | x, 38 | y, 39 | start = as.nanoduration(0), 40 | end = as.nanoduration(0), 41 | sopen = FALSE, 42 | eopen = TRUE, 43 | func = NULL 44 | ) 45 | 46 | \S4method{align}{data.table,nanotime,missing,nanoduration}( 47 | x, 48 | y, 49 | start = as.nanoduration(0), 50 | end = as.nanoduration(0), 51 | sopen = FALSE, 52 | eopen = TRUE, 53 | func = NULL 54 | ) 55 | 56 | \S4method{align}{data.table,nanotime,nanoperiod,nanoperiod}( 57 | x, 58 | y, 59 | start = as.nanoperiod(0), 60 | end = as.nanoperiod(0), 61 | sopen = FALSE, 62 | eopen = TRUE, 63 | tz, 64 | func = NULL 65 | ) 66 | 67 | \S4method{align}{data.table,nanotime,nanoperiod,missing}( 68 | x, 69 | y, 70 | start = as.nanoperiod(0), 71 | end = as.nanoperiod(0), 72 | sopen = FALSE, 73 | eopen = TRUE, 74 | tz, 75 | func = NULL 76 | ) 77 | 78 | \S4method{align}{data.table,nanotime,missing,nanoperiod}( 79 | x, 80 | y, 81 | start = as.nanoperiod(0), 82 | end = as.nanoperiod(0), 83 | sopen = FALSE, 84 | eopen = TRUE, 85 | tz, 86 | func = NULL 87 | ) 88 | } 89 | \arguments{ 90 | \item{x}{the \code{data.table} time-series to align from} 91 | 92 | \item{y}{the \code{nanotime} vector to align to} 93 | 94 | \item{start}{scalar or vector of same length as \code{y} of type 95 | \code{integer64}; \code{start} is added to each element in 96 | \code{y} and it then defines the starting point of the 97 | interval under consideration for the alignment on that 98 | element of \code{y}} 99 | 100 | \item{end}{scalar or vector of same length as \code{y} of type 101 | \code{integer64}; \code{start} is added to each element in 102 | \code{y} and it then defines the ending point of the interval 103 | under consideration for the alignment on that element of 104 | \code{y}} 105 | 106 | \item{...}{further arguments passed to or from methods.} 107 | 108 | \item{sopen}{boolean scalar or vector of same lengths as \code{y} 109 | that indicates if the start of the interval is open or 110 | closed. Defaults to FALSE.} 111 | 112 | \item{eopen}{boolean scalar or vector of same lengths as \code{y} 113 | that indicates if the end of the interval is open or 114 | closed. Defaults to TRUE.} 115 | 116 | \item{func}{a function taking one argument and which provides an 117 | arbitrary aggregation of its argument; if \code{NULL} then a 118 | function which takes the closest observation is used.} 119 | 120 | \item{tz}{scalar or vector of same length as \code{y} of type 121 | character. Only used when the type of \code{start} and 122 | \code{end} is \code{nanoperiod}. It defines the time zone for 123 | the definition of the interval.} 124 | } 125 | \value{ 126 | a \code{data.table} time-series of the same length as 127 | \code{y}; this is a subset of \code{x} with the 128 | \code{nanotime} index of \code{y} 129 | } 130 | \description{ 131 | \code{align} returns the subset of \code{data.table} \code{x} that 132 | aligns on the temporal vector \code{y} 133 | } 134 | \details{ 135 | For each element in \code{y}, intervals are created around this 136 | element with \code{start} and \code{end}. All the elements of 137 | \code{x} that fall within this interval are given as argument to 138 | the function \code{func}. The function \code{func} show reduce 139 | this \code{data.frame} to one unique row that will be associated 140 | with the \code{nanotime} value in \code{y}. 141 | } 142 | \examples{ 143 | \dontrun{ 144 | y <- nanotime((1:10)*1e9) 145 | x <- data.table(index=nanotime((1:10)*1e9), data=1:10) 146 | align(x, y, as.nanoduration(-1e9), as.nanoduration(1e9), colMeans) 147 | } 148 | 149 | 150 | } 151 | -------------------------------------------------------------------------------- /man/align_idx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtts.R 3 | \name{align_idx} 4 | \alias{align_idx} 5 | \alias{align_idx,nanotime,nanotime,nanoduration,nanoduration-method} 6 | \alias{align_idx,nanotime,nanotime,missing,missing-method} 7 | \alias{align_idx,nanotime,nanotime,missing,nanoduration-method} 8 | \alias{align_idx,nanotime,nanotime,nanoduration,missing-method} 9 | \alias{align_idx,nanotime,nanotime,nanoperiod,nanoperiod-method} 10 | \alias{align_idx,nanotime,nanotime,missing,nanoperiod-method} 11 | \alias{align_idx,nanotime,nanotime,nanoperiod,missing-method} 12 | \title{Get the index of the alignment of one vector onto another} 13 | \usage{ 14 | align_idx(x, y, start, end, ...) 15 | 16 | \S4method{align_idx}{nanotime,nanotime,nanoduration,nanoduration}( 17 | x, 18 | y, 19 | start, 20 | end, 21 | sopen = FALSE, 22 | eopen = TRUE, 23 | bypass_x_check = FALSE, 24 | bypass_y_check = FALSE 25 | ) 26 | 27 | \S4method{align_idx}{nanotime,nanotime,missing,missing}( 28 | x, 29 | y, 30 | start, 31 | end, 32 | sopen = FALSE, 33 | eopen = TRUE, 34 | bypass_x_check = FALSE, 35 | bypass_y_check = FALSE 36 | ) 37 | 38 | \S4method{align_idx}{nanotime,nanotime,missing,nanoduration}( 39 | x, 40 | y, 41 | start, 42 | end, 43 | sopen = FALSE, 44 | eopen = TRUE, 45 | bypass_x_check = FALSE, 46 | bypass_y_check = FALSE 47 | ) 48 | 49 | \S4method{align_idx}{nanotime,nanotime,nanoduration,missing}( 50 | x, 51 | y, 52 | start, 53 | end, 54 | sopen = FALSE, 55 | eopen = TRUE, 56 | bypass_x_check = FALSE, 57 | bypass_y_check = FALSE 58 | ) 59 | 60 | \S4method{align_idx}{nanotime,nanotime,nanoperiod,nanoperiod}( 61 | x, 62 | y, 63 | start = as.nanoperiod(0), 64 | end = as.nanoperiod(0), 65 | sopen = FALSE, 66 | eopen = TRUE, 67 | tz, 68 | bypass_x_check = FALSE, 69 | bypass_y_check = FALSE 70 | ) 71 | 72 | \S4method{align_idx}{nanotime,nanotime,missing,nanoperiod}( 73 | x, 74 | y, 75 | start = as.nanoperiod(0), 76 | end = as.nanoperiod(0), 77 | sopen = FALSE, 78 | eopen = TRUE, 79 | tz, 80 | bypass_x_check = FALSE, 81 | bypass_y_check = FALSE 82 | ) 83 | 84 | \S4method{align_idx}{nanotime,nanotime,nanoperiod,missing}( 85 | x, 86 | y, 87 | start = as.nanoperiod(0), 88 | end = as.nanoperiod(0), 89 | sopen = FALSE, 90 | eopen = TRUE, 91 | tz, 92 | bypass_x_check = FALSE, 93 | bypass_y_check = FALSE 94 | ) 95 | } 96 | \arguments{ 97 | \item{x}{the \code{nanotime} vector to align from} 98 | 99 | \item{y}{the \code{nanotime} vector to align to} 100 | 101 | \item{start}{scalar or vector of same length as \code{y} of type 102 | \code{nanoduration} or \code{nanoperiod}; \code{start} is 103 | added to each element in \code{y} and it then defines the 104 | starting point of the interval under consideration for the 105 | alignment on that element of \code{y}} 106 | 107 | \item{end}{scalar or vector of same length as \code{y} of type 108 | \code{nanoduration} or \code{nanoperiod}; \code{start} is 109 | added to each element in \code{y} and it then defines the 110 | ending point of the interval under consideration for the 111 | alignment on that element of \code{y}} 112 | 113 | \item{...}{further arguments passed to or from methods.} 114 | 115 | \item{sopen}{boolean scalar or vector of same lengths as \code{y} 116 | that indicates if the start of the interval is open or 117 | closed. Defaults to FALSE.} 118 | 119 | \item{eopen}{boolean scalar or vector of same lengths as \code{y} 120 | that indicates if the end of the interval is open or 121 | closed. Defaults to TRUE.} 122 | 123 | \item{bypass_x_check}{logical indicating if the sorting of 124 | \code{x} should be bypassed. This can provide a marginal 125 | speedup, but should be used carefully.} 126 | 127 | \item{bypass_y_check}{logical indicating if the sorting of 128 | \code{y} should be bypassed. This can provide a marginal 129 | speedup, but should be used carefully.} 130 | 131 | \item{tz}{scalar or vector of same length as \code{y} of type 132 | character. Only used when the type of \code{start} and 133 | \code{end} is \code{nanoperiod}. It defines the time zone for 134 | the definition of the interval.} 135 | } 136 | \value{ 137 | a vector of indices of the same length as \code{y}; this 138 | vector indexes into \code{x} and represent the closest point 139 | of \code{x} that is in the interval defined around each point 140 | in \code{y} 141 | } 142 | \description{ 143 | \code{align_idx} returns the index of the alignment of \code{x} on \code{y} 144 | } 145 | \details{ 146 | In order to perform the alignment, intervals are created around 147 | each elements in \code{y} using \code{start} and \code{end}. For 148 | each such interval, the closest element in \code{x} is chosen. If 149 | no element in \code{x} falls in the interval, then NaN is 150 | returned. 151 | 152 | When only \code{x} and \code{y} are specified, the 153 | default is to close the intervals so that the alignment simply 154 | picks up equal points. Note that it is possible to specify 155 | meaningless intervals, for instance with a \code{start} that 156 | is beyond \code{end}. In this case, the alignment will simply 157 | return NA for each element in \code{y}. In principle, the 158 | \code{start} and \code{end} are chosen to define an interval 159 | is the past, or around the points in \code{y}, but if they are 160 | both positive, they can define intervals in the future. 161 | } 162 | \examples{ 163 | \dontrun{ 164 | align_idx(nanotime(c(10:14, 17:19)), nanotime(11:20)) 165 | ## [1] 2 3 4 5 NA NA 6 7 8 NA 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /man/frequency-data.table-method.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtts.R 3 | \name{frequency,data.table-method} 4 | \alias{frequency,data.table-method} 5 | \title{Return the number of observations per interval} 6 | \usage{ 7 | \S4method{frequency}{data.table}( 8 | x, 9 | by, 10 | grid_start, 11 | grid_end, 12 | tz, 13 | ival_start = -by, 14 | ival_end, 15 | ival_sopen = FALSE, 16 | ival_eopen = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{the \code{data.table} time-series for which to calculate 21 | the frequency} 22 | 23 | \item{by}{interval specified as a \code{nanoduration} or 24 | \code{nanoperiod}.} 25 | 26 | \item{grid_start}{scalar \code{nanotime} defining the start of the 27 | grid; by default the first element of \code{x} is taken.} 28 | 29 | \item{grid_end}{scalar \code{nanotime} defining the end of the 30 | grid; by default the last element of \code{x} is taken.} 31 | 32 | \item{tz}{scalar of type character. Only used when the type of 33 | \code{by} and \code{end} is \code{nanoperiod}. It defines the 34 | time zone for the definition of the interval.} 35 | 36 | \item{ival_start}{scalar of type \code{nanoduration} or 37 | \code{nanoperiod}; \code{ival_start} is added to each element 38 | of the grid and it then defines the starting point of the 39 | interval under consideration for the alignment onto that 40 | element. This defaults to -\code{by} and most likely does not 41 | need to be overriden.} 42 | 43 | \item{ival_end}{scalar of type \code{nanoduration} or 44 | \code{nanoperiod}; \code{ival_end} is added to each element of 45 | the grid and it then defines the ending point of the interval 46 | under consideration for the alignment onto that element. This 47 | defaults to 0 and most likely does not need to be overriden.} 48 | 49 | \item{ival_sopen}{boolean scalar that indicates if the start of 50 | the interval is open or closed. Defaults to FALSE.} 51 | 52 | \item{ival_eopen}{boolean scalar that indicates if the end of the 53 | interval is open or closed. Defaults to TRUE.} 54 | } 55 | \value{ 56 | a \code{data.table} time-series with the number of 57 | observations in \code{x} that fall withing the intervals 58 | defined by the grid interval defined by \code{by}. 59 | } 60 | \description{ 61 | \code{frequency} returns the number of observations in 62 | \code{data.table} \code{x} for each interval specified by 63 | \code{by}. 64 | } 65 | \examples{ 66 | \dontrun{ 67 | one_second <- as.nanoduration("00:00:01") 68 | one_minute <- 60 * one_second 69 | x <- data.table(index=nanotime((1:100) * one_second), 1) 70 | setkey(x, index) 71 | frequency(x, one_minute) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /man/grid_align.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtts.R 3 | \name{grid_align} 4 | \alias{grid_align} 5 | \alias{grid_align,data.table,nanoduration-method} 6 | \alias{grid_align,data.table,nanoperiod-method} 7 | \title{Align a \code{data.table} onto a \code{nanotime} vector grid} 8 | \usage{ 9 | grid_align(x, by, ...) 10 | 11 | \S4method{grid_align}{data.table,nanoduration}( 12 | x, 13 | by, 14 | func = NULL, 15 | grid_start = x[[1]][1] + by, 16 | grid_end = tail(x[[1]], 1), 17 | ival_start = -by, 18 | ival_end = as.nanoduration(0), 19 | ival_sopen = FALSE, 20 | ival_eopen = TRUE 21 | ) 22 | 23 | \S4method{grid_align}{data.table,nanoperiod}( 24 | x, 25 | by, 26 | func = NULL, 27 | grid_start = plus(x[[1]][1], by, tz), 28 | grid_end = tail(x[[1]], 1), 29 | ival_start = -by, 30 | ival_end = as.nanoperiod(0), 31 | ival_sopen = FALSE, 32 | ival_eopen = TRUE, 33 | tz 34 | ) 35 | } 36 | \arguments{ 37 | \item{x}{the \code{data.table} time-series to align from} 38 | 39 | \item{by}{interval specified as a \code{nanoduration} or 40 | \code{nanoperiod}.} 41 | 42 | \item{...}{further arguments passed to or from methods.} 43 | 44 | \item{func}{a function taking one argument and which provides an 45 | arbitrary aggregation of its argument; if \code{NULL} then a 46 | function which takes the closest observation is used.} 47 | 48 | \item{grid_start}{scalar \code{nanotime} defining the start of the 49 | grid; by default the first element of \code{x} is taken.} 50 | 51 | \item{grid_end}{scalar \code{nanotime} defining the end of the grid; by 52 | default the last element of \code{x} is taken.} 53 | 54 | \item{ival_start}{scalar of type \code{nanoduration} or 55 | \code{nanoperiod}; \code{ival_start} is added to each element 56 | of the grid and it then defines the starting point of the 57 | interval under consideration for the alignment onto that 58 | element.} 59 | 60 | \item{ival_end}{scalar of type \code{nanoduration} or 61 | \code{nanoperiod}; \code{ival_end} is added to each element of 62 | the grid and it then defines the ending point of the interval 63 | under consideration for the alignment onto that element.} 64 | 65 | \item{ival_sopen}{boolean scalar that indicates if the start of 66 | the interval is open or closed. Defaults to FALSE.} 67 | 68 | \item{ival_eopen}{boolean scalar that indicates if the end of the 69 | interval is open or closed. Defaults to TRUE.} 70 | 71 | \item{tz}{scalar of type character. Only used when the type of 72 | \code{by} and \code{end} is \code{nanoperiod}. It defines the 73 | time zone for the definition of the interval.} 74 | } 75 | \value{ 76 | a \code{data.table} time-series of the same length as 77 | \code{y} with the aggregations computed by \code{func} 78 | } 79 | \description{ 80 | \code{grid_align} returns the subset of \code{data.table} \code{x} 81 | that aligns on the grid defined by \code{by}, \code{start} and 82 | \code{end} 83 | } 84 | \details{ 85 | A grid defined by the parameter \code{by}, \code{start} and 86 | \code{end} is created. The function then does a standard alignment 87 | of \code{x} onto this grid (see the \code{align} function) 88 | } 89 | \examples{ 90 | \dontrun{ 91 | one_second <- 1e9 92 | x <- data.table(index=nanotime(cumsum(sin(seq(0.001, pi, 0.001)) * one_second))) 93 | x <- x[, V2 := 1:nrow(x)] 94 | setkey(x, index) 95 | grid_align(x, as.nanoduration("00:01:00"), sum) 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /man/ops.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtts.R 3 | \name{ops} 4 | \alias{ops} 5 | \alias{ops,data.table,data.table,character-method} 6 | \title{Arithmetic operations on two \code{data.table} time-series} 7 | \usage{ 8 | ops(x, y, op_string) 9 | 10 | \S4method{ops}{data.table,data.table,character}(x, y, op_string) 11 | } 12 | \arguments{ 13 | \item{x}{the \code{data.table} time-series that determines the 14 | left operand} 15 | 16 | \item{y}{the \code{data.table} time-series that determines the 17 | right operand \code{nanoperiod}.} 18 | 19 | \item{op_string}{string defining the operation to apply; the 20 | supported values for \code{op} are "*", "/", "+", "-".} 21 | } 22 | \description{ 23 | \code{ops} returns the \code{y} time-series on which the \code{x} 24 | time-series values are applied using the specified operator 25 | \code{op}. 26 | } 27 | \section{Details}{ 28 | 29 | 30 | The n elements of the \code{x} time-series operand define a set of 31 | n-1 intervals, and the value associated with each interval is 32 | applied to all the observations in the \code{y} time-series 33 | operand that fall in the interval. Note that the interval is 34 | closed at the beginning and open at the end. The supported values 35 | for \code{op} are "*", "/", "+", "-". 36 | 37 | There has to be one numeric column in \code{x} and \code{y}; there 38 | has to be either a one to one correspondance between the number of 39 | numeric columns in \code{x} and \code{y}, or there must be only 40 | one numeric column in \code{x} that will be applied to all numeric 41 | columns in \code{y}. Non-numeric columns must not appear in 42 | \code{x}, whereas they will be skipped of they appear in \code{y}. 43 | } 44 | 45 | \examples{ 46 | \dontrun{ 47 | one_second_duration <- as.nanoduration("00:00:01") 48 | t1 <- nanotime(1:2 * one_second_duration * 3) 49 | t2 <- nanotime(1:4 * one_second_duration) 50 | dt1 <- data.table(index=t1, data1 = 1:length(t1)) 51 | setkey(dt1, index) 52 | dt2 <- data.table(index=t2, data1 = 1:length(t2)) 53 | setkey(dt2, index) 54 | ops(dt1, dt2, "+") 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | #ifdef RCPP_USE_GLOBAL_ROSTREAM 9 | Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); 10 | Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); 11 | #endif 12 | 13 | // align_duration 14 | Rcpp::List align_duration(const Rcpp::NumericVector& x, const Rcpp::NumericVector& y, const Rcpp::List xdata, const Rcpp::NumericVector& start, const Rcpp::NumericVector& end, const Rcpp::LogicalVector& sopen, const Rcpp::LogicalVector& eopen, const Rcpp::Function func); 15 | RcppExport SEXP _dtts_align_duration(SEXP xSEXP, SEXP ySEXP, SEXP xdataSEXP, SEXP startSEXP, SEXP endSEXP, SEXP sopenSEXP, SEXP eopenSEXP, SEXP funcSEXP) { 16 | BEGIN_RCPP 17 | Rcpp::RObject rcpp_result_gen; 18 | Rcpp::RNGScope rcpp_rngScope_gen; 19 | Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type x(xSEXP); 20 | Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type y(ySEXP); 21 | Rcpp::traits::input_parameter< const Rcpp::List >::type xdata(xdataSEXP); 22 | Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type start(startSEXP); 23 | Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type end(endSEXP); 24 | Rcpp::traits::input_parameter< const Rcpp::LogicalVector& >::type sopen(sopenSEXP); 25 | Rcpp::traits::input_parameter< const Rcpp::LogicalVector& >::type eopen(eopenSEXP); 26 | Rcpp::traits::input_parameter< const Rcpp::Function >::type func(funcSEXP); 27 | rcpp_result_gen = Rcpp::wrap(align_duration(x, y, xdata, start, end, sopen, eopen, func)); 28 | return rcpp_result_gen; 29 | END_RCPP 30 | } 31 | // align_period 32 | Rcpp::List align_period(const Rcpp::NumericVector& x, const Rcpp::NumericVector& y, const Rcpp::List xdata, const Rcpp::ComplexVector& start, const Rcpp::ComplexVector& end, const Rcpp::LogicalVector& sopen, const Rcpp::LogicalVector& eopen, const Rcpp::Function func, const Rcpp::CharacterVector tz); 33 | RcppExport SEXP _dtts_align_period(SEXP xSEXP, SEXP ySEXP, SEXP xdataSEXP, SEXP startSEXP, SEXP endSEXP, SEXP sopenSEXP, SEXP eopenSEXP, SEXP funcSEXP, SEXP tzSEXP) { 34 | BEGIN_RCPP 35 | Rcpp::RObject rcpp_result_gen; 36 | Rcpp::RNGScope rcpp_rngScope_gen; 37 | Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type x(xSEXP); 38 | Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type y(ySEXP); 39 | Rcpp::traits::input_parameter< const Rcpp::List >::type xdata(xdataSEXP); 40 | Rcpp::traits::input_parameter< const Rcpp::ComplexVector& >::type start(startSEXP); 41 | Rcpp::traits::input_parameter< const Rcpp::ComplexVector& >::type end(endSEXP); 42 | Rcpp::traits::input_parameter< const Rcpp::LogicalVector& >::type sopen(sopenSEXP); 43 | Rcpp::traits::input_parameter< const Rcpp::LogicalVector& >::type eopen(eopenSEXP); 44 | Rcpp::traits::input_parameter< const Rcpp::Function >::type func(funcSEXP); 45 | Rcpp::traits::input_parameter< const Rcpp::CharacterVector >::type tz(tzSEXP); 46 | rcpp_result_gen = Rcpp::wrap(align_period(x, y, xdata, start, end, sopen, eopen, func, tz)); 47 | return rcpp_result_gen; 48 | END_RCPP 49 | } 50 | // align_idx_duration 51 | Rcpp::NumericVector align_idx_duration(const Rcpp::NumericVector& x, const Rcpp::NumericVector& y, const Rcpp::NumericVector& start, const Rcpp::NumericVector& end, const Rcpp::LogicalVector& sopen, const Rcpp::LogicalVector& eopen); 52 | RcppExport SEXP _dtts_align_idx_duration(SEXP xSEXP, SEXP ySEXP, SEXP startSEXP, SEXP endSEXP, SEXP sopenSEXP, SEXP eopenSEXP) { 53 | BEGIN_RCPP 54 | Rcpp::RObject rcpp_result_gen; 55 | Rcpp::RNGScope rcpp_rngScope_gen; 56 | Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type x(xSEXP); 57 | Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type y(ySEXP); 58 | Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type start(startSEXP); 59 | Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type end(endSEXP); 60 | Rcpp::traits::input_parameter< const Rcpp::LogicalVector& >::type sopen(sopenSEXP); 61 | Rcpp::traits::input_parameter< const Rcpp::LogicalVector& >::type eopen(eopenSEXP); 62 | rcpp_result_gen = Rcpp::wrap(align_idx_duration(x, y, start, end, sopen, eopen)); 63 | return rcpp_result_gen; 64 | END_RCPP 65 | } 66 | // align_idx_period 67 | Rcpp::NumericVector align_idx_period(const Rcpp::NumericVector& x, const Rcpp::NumericVector& y, const Rcpp::ComplexVector& start, const Rcpp::ComplexVector& end, const Rcpp::LogicalVector& sopen, const Rcpp::LogicalVector& eopen, const Rcpp::CharacterVector& tz); 68 | RcppExport SEXP _dtts_align_idx_period(SEXP xSEXP, SEXP ySEXP, SEXP startSEXP, SEXP endSEXP, SEXP sopenSEXP, SEXP eopenSEXP, SEXP tzSEXP) { 69 | BEGIN_RCPP 70 | Rcpp::RObject rcpp_result_gen; 71 | Rcpp::RNGScope rcpp_rngScope_gen; 72 | Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type x(xSEXP); 73 | Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type y(ySEXP); 74 | Rcpp::traits::input_parameter< const Rcpp::ComplexVector& >::type start(startSEXP); 75 | Rcpp::traits::input_parameter< const Rcpp::ComplexVector& >::type end(endSEXP); 76 | Rcpp::traits::input_parameter< const Rcpp::LogicalVector& >::type sopen(sopenSEXP); 77 | Rcpp::traits::input_parameter< const Rcpp::LogicalVector& >::type eopen(eopenSEXP); 78 | Rcpp::traits::input_parameter< const Rcpp::CharacterVector& >::type tz(tzSEXP); 79 | rcpp_result_gen = Rcpp::wrap(align_idx_period(x, y, start, end, sopen, eopen, tz)); 80 | return rcpp_result_gen; 81 | END_RCPP 82 | } 83 | // ops 84 | Rcpp::List ops(Rcpp::List& xdata, Rcpp::List& ydata, Rcpp::String& op_string); 85 | RcppExport SEXP _dtts_ops(SEXP xdataSEXP, SEXP ydataSEXP, SEXP op_stringSEXP) { 86 | BEGIN_RCPP 87 | Rcpp::RObject rcpp_result_gen; 88 | Rcpp::RNGScope rcpp_rngScope_gen; 89 | Rcpp::traits::input_parameter< Rcpp::List& >::type xdata(xdataSEXP); 90 | Rcpp::traits::input_parameter< Rcpp::List& >::type ydata(ydataSEXP); 91 | Rcpp::traits::input_parameter< Rcpp::String& >::type op_string(op_stringSEXP); 92 | rcpp_result_gen = Rcpp::wrap(ops(xdata, ydata, op_string)); 93 | return rcpp_result_gen; 94 | END_RCPP 95 | } 96 | 97 | static const R_CallMethodDef CallEntries[] = { 98 | {"_dtts_align_duration", (DL_FUNC) &_dtts_align_duration, 8}, 99 | {"_dtts_align_period", (DL_FUNC) &_dtts_align_period, 9}, 100 | {"_dtts_align_idx_duration", (DL_FUNC) &_dtts_align_idx_duration, 6}, 101 | {"_dtts_align_idx_period", (DL_FUNC) &_dtts_align_idx_period, 7}, 102 | {"_dtts_ops", (DL_FUNC) &_dtts_ops, 3}, 103 | {NULL, NULL, 0} 104 | }; 105 | 106 | RcppExport void R_init_dtts(DllInfo *dll) { 107 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 108 | R_useDynamicSymbols(dll, FALSE); 109 | } 110 | -------------------------------------------------------------------------------- /src/align.cpp: -------------------------------------------------------------------------------- 1 | // define align functions 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "nanotime/globals.hpp" 11 | #include "nanotime/pseudovector.hpp" 12 | #include "nanotime/period.hpp" 13 | 14 | 15 | typedef nanotime::ConstPseudoVector ConstPseudoVectorDuration; 16 | typedef nanotime::ConstPseudoVector ConstPseudoVectorPrd; 17 | typedef nanotime::ConstPseudoVector ConstPseudoVectorLgl; 18 | typedef nanotime::ConstPseudoVector ConstPseudoVectorChar; 19 | 20 | 21 | // for debug reasons... 22 | // the following code from: https://stackoverflow.com/a/16692519 23 | template 24 | std::ostream &operator<<(std::ostream &stream, 25 | const std::chrono::time_point &time_point) { 26 | const time_t time = Clock::to_time_t(time_point); 27 | #if (__GNUC__ > 4 || defined(__WIN32) || ((__GNUC__ == 4) && __GNUC_MINOR__ > 8 && __GNUC_REVISION__ > 1)) 28 | // Maybe the put_time will be implemented later? 29 | struct tm tm; 30 | // thanks to https://stackoverflow.com/a/38034148/143305 for the next test 31 | #if defined(__unix__) 32 | localtime_r(&time, &tm); 33 | #elif defined(_MSC_VER) || defined(__WIN32) 34 | localtime_s(&tm, &time); 35 | #endif 36 | return stream << std::put_time(&tm, "%c"); // Print standard date&time 37 | #else 38 | char buffer[26]; 39 | ctime_r(&time, buffer); 40 | buffer[24] = '\0'; // Removes the newline that is added 41 | return stream << buffer; 42 | #endif 43 | } 44 | 45 | 46 | nanotime::duration abs_duration(nanotime::duration d) 47 | { 48 | if (d >= d.zero()) 49 | return d; 50 | return -d; 51 | } 52 | 53 | 54 | static Rcpp::NumericVector align_idx_helper_duration(const nanotime::dtime* x, 55 | size_t xlen, 56 | const nanotime::dtime* y, 57 | size_t ylen, 58 | const ConstPseudoVectorDuration& start, 59 | const ConstPseudoVectorDuration& end, 60 | const ConstPseudoVectorLgl& sopen, 61 | const ConstPseudoVectorLgl& eopen) 62 | { 63 | Rcpp::NumericVector res(ylen); 64 | size_t ix = 0, iy = 0; 65 | 66 | // for each point in y, we try to find a matching point or set of 67 | // points in x: 68 | for (iy=0; iy= xlen || x[ix] >= yend) { 81 | res[iy] = NA_REAL; 82 | continue; 83 | } 84 | } else { 85 | if (ix >= xlen || x[ix] > yend) { 86 | res[iy] = NA_REAL; 87 | continue; 88 | } 89 | } 90 | 91 | // find the closest point in the interval: 92 | if (eopen[iy]) { 93 | while (ix+1 < xlen && x[ix+1] < yend && abs_duration(x[ix] - y[iy]) > abs_duration(x[ix+1] - y[iy])) ++ix; 94 | } else { 95 | while (ix+1 < xlen && x[ix+1] <= yend && abs_duration(x[ix] - y[iy]) > abs_duration(x[ix+1] - y[iy])) ++ix; 96 | } 97 | res[iy] = ix + 1; // +1 because of R numbering start convention 98 | } 99 | 100 | return res; 101 | } 102 | 103 | 104 | 105 | static Rcpp::NumericVector align_idx_helper_period(const nanotime::dtime* x, 106 | size_t xlen, 107 | const nanotime::dtime* y, 108 | size_t ylen, 109 | const ConstPseudoVectorPrd& start, 110 | const ConstPseudoVectorPrd& end, 111 | const ConstPseudoVectorLgl& sopen, 112 | const ConstPseudoVectorLgl& eopen, 113 | const ConstPseudoVectorChar& tz) 114 | { 115 | Rcpp::NumericVector res(ylen); 116 | size_t ix = 0, iy = 0; 117 | 118 | // for each point in y, we try to find a matching point or set of 119 | // points in x: 120 | for (iy=0; iy(&start[iy]), sizeof(nanotime::period)); 122 | nanotime::period prd_end; memcpy(&prd_end, reinterpret_cast(&end[iy]), sizeof(nanotime::period)); 123 | auto ystart = nanotime::plus(y[iy], prd_start, std::string(tz[iy])); 124 | auto yend = nanotime::plus(y[iy], prd_end, std::string(tz[iy])); 125 | 126 | // advance until we have a point in x that is in the interval 127 | // defined around yi: 128 | if (sopen[iy]) { 129 | while (ix < xlen && x[ix] <= ystart) ++ix; 130 | } else { 131 | while (ix < xlen && x[ix] < ystart) ++ix; 132 | } 133 | if (eopen[iy]) { 134 | if (ix >= xlen || x[ix] >= yend) { 135 | res[iy] = NA_REAL; 136 | continue; 137 | } 138 | } else { 139 | if (ix >= xlen || x[ix] > yend) { 140 | res[iy] = NA_REAL; 141 | continue; 142 | } 143 | } 144 | 145 | // find the closest point in the interval: 146 | if (eopen[iy]) { 147 | while (ix+1 < xlen && x[ix+1] < yend && abs_duration(x[ix] - y[iy]) > abs_duration(x[ix+1] - y[iy])) ++ix; 148 | } else { 149 | while (ix+1 < xlen && x[ix+1] <= yend && abs_duration(x[ix] - y[iy]) > abs_duration(x[ix+1] - y[iy])) ++ix; 150 | } 151 | res[iy] = ix + 1; // +1 because of R numbering start convention 152 | } 153 | 154 | return res; 155 | } 156 | 157 | 158 | static Rcpp::IntegerVector makeIndex(size_t start, size_t end) { 159 | Rcpp::IntegerVector res(end - start); 160 | size_t off = 0; 161 | for (size_t i=start; i 169 | ForwardIt lower_bound_sopen(ForwardIt first, ForwardIt last, const T& value) 170 | { 171 | ForwardIt it; 172 | typename std::iterator_traits::difference_type count, step; 173 | count = std::distance(first, last); 174 | 175 | while (count > 0) { 176 | it = first; 177 | step = count / 2; 178 | std::advance(it, step); 179 | if (*it <= value) { // '<=' rather than '<' as in the STL! 180 | first = ++it; 181 | count -= step + 1; 182 | } 183 | else 184 | count = step; 185 | } 186 | return first; 187 | } 188 | 189 | 190 | Rcpp::List align_func_duration(const nanotime::dtime* x, 191 | size_t xlen, 192 | const nanotime::dtime* y, 193 | size_t ylen, 194 | Rcpp::List xdata, 195 | const ConstPseudoVectorDuration& start, 196 | const ConstPseudoVectorDuration& end, 197 | const ConstPseudoVectorLgl& sopen, 198 | const ConstPseudoVectorLgl& eopen, 199 | const Rcpp::Function& func) 200 | { 201 | auto res = Rcpp::List::create(); 202 | auto cols = makeIndex(2, XLENGTH(xdata)+1); 203 | typedef SEXP SUBSET_DT_FUN(SEXP,SEXP,SEXP); 204 | SUBSET_DT_FUN *subsetDT = (SUBSET_DT_FUN *) R_GetCCallable("data.table", "DT_subsetDT" ); 205 | 206 | size_t ix = 0, iy = 0; 207 | 208 | // for each point in y, we try to find a matching point or set of 209 | // points in x: 210 | for (iy=0; iy= xlen || x[ix] >= yend) { 226 | const SEXP rows = Rcpp::IntegerVector::create(0); 227 | res.push_back(func(subsetDT(xdata, rows, cols))); // empty interval 228 | continue; 229 | } 230 | } else { 231 | if (ix >= xlen || x[ix] > yend) { 232 | const SEXP rows = Rcpp::IntegerVector::create(0); 233 | res.push_back(func(subsetDT(xdata, rows, cols))); // empty interval 234 | continue; 235 | } 236 | } 237 | auto first_ix = ix; 238 | 239 | // find the last point in the interval: 240 | if (sopen[iy]) { 241 | auto iter = lower_bound_sopen(x + ix, x+xlen, yend); 242 | ix = iter - x; 243 | } else { 244 | auto iter = std::lower_bound(x + ix, x+xlen, yend); 245 | ix = iter - x; 246 | } 247 | if (eopen[iy]) { 248 | while (ix < xlen && x[ix] < yend) ++ix; 249 | } else { 250 | while (ix < xlen && x[ix] <= yend) ++ix; 251 | } 252 | 253 | const SEXP rows = makeIndex(first_ix+1, ix+1); // subsetDT is 1-based indexing 254 | res.push_back(func(subsetDT(xdata, rows, cols))); 255 | 256 | // reset ix to the first ix found, because the intervals 257 | // specified could overlap: 258 | ix = first_ix; 259 | } 260 | return res; 261 | } 262 | 263 | 264 | Rcpp::List align_func_period(const nanotime::dtime* x, 265 | size_t xlen, 266 | const nanotime::dtime* y, 267 | size_t ylen, 268 | Rcpp::List xdata, 269 | const ConstPseudoVectorPrd& start, 270 | const ConstPseudoVectorPrd& end, 271 | const ConstPseudoVectorLgl& sopen, 272 | const ConstPseudoVectorLgl& eopen, 273 | const Rcpp::Function& func, 274 | const ConstPseudoVectorChar& tz) 275 | { 276 | auto res = Rcpp::List::create(); 277 | auto cols = makeIndex(2, XLENGTH(xdata)+1); 278 | typedef SEXP SUBSET_DT_FUN(SEXP,SEXP,SEXP); 279 | SUBSET_DT_FUN *subsetDT = (SUBSET_DT_FUN *) R_GetCCallable("data.table", "DT_subsetDT" ); 280 | 281 | size_t ix = 0, iy = 0; 282 | 283 | // for each point in y, we try to find a matching point or set of 284 | // points in x: 285 | for (iy=0; iy(&start[iy]), sizeof(nanotime::period)); 287 | nanotime::period prd_end; memcpy(&prd_end, reinterpret_cast(&end[iy]), sizeof(nanotime::period)); 288 | auto ystart = nanotime::plus(y[iy], prd_start, std::string(tz[iy])); 289 | auto yend = nanotime::plus(y[iy], prd_end, std::string(tz[iy])); 290 | 291 | // advance until we have a point in x that is in the interval 292 | // defined around yi: 293 | if (sopen[iy]) { 294 | auto iter = lower_bound_sopen(x + ix, x+xlen, ystart); 295 | ix = iter - x; 296 | } else { 297 | auto iter = std::lower_bound(x + ix, x+xlen, ystart); 298 | ix = iter - x; 299 | } 300 | 301 | if (eopen[iy]) { 302 | if (ix >= xlen || x[ix] >= yend) { 303 | const SEXP rows = Rcpp::IntegerVector::create(0); 304 | res.push_back(func(subsetDT(xdata, rows, cols))); // empty interval 305 | continue; 306 | } 307 | } else { 308 | if (ix >= xlen || x[ix] > yend) { 309 | const SEXP rows = Rcpp::IntegerVector::create(0); 310 | res.push_back(func(subsetDT(xdata, rows, cols))); // empty interval 311 | continue; 312 | } 313 | } 314 | auto first_ix = ix; 315 | 316 | // find the last point in the interval: 317 | if (sopen[iy]) { 318 | auto iter = lower_bound_sopen(x + ix, x+xlen, yend); 319 | ix = iter - x; 320 | } else { 321 | auto iter = std::lower_bound(x + ix, x+xlen, yend); 322 | ix = iter - x; 323 | } 324 | if (eopen[iy]) { 325 | while (ix < xlen && x[ix] < yend) ++ix; 326 | } else { 327 | while (ix < xlen && x[ix] <= yend) ++ix; 328 | } 329 | 330 | const SEXP rows = makeIndex(first_ix+1, ix+1); // subsetDT is 1-based indexing 331 | res.push_back(func(subsetDT(xdata, rows, cols))); 332 | 333 | // reset ix to the first ix found, because the intervals 334 | // specified could overlap: 335 | ix = first_ix; 336 | } 337 | return res; 338 | } 339 | 340 | 341 | // [[Rcpp::export(.align_duration_cpp)]] 342 | Rcpp::List align_duration(const Rcpp::NumericVector& x, // nanotime vector 343 | const Rcpp::NumericVector& y, // nanotime vector 344 | const Rcpp::List xdata, // DT 345 | const Rcpp::NumericVector& start, // duration 346 | const Rcpp::NumericVector& end, // duration 347 | const Rcpp::LogicalVector& sopen, // start open 348 | const Rcpp::LogicalVector& eopen, // end open 349 | const Rcpp::Function func) // function to apply (character) 350 | { 351 | return align_func_duration(reinterpret_cast(&x[0]), 352 | x.size(), 353 | reinterpret_cast(&y[0]), 354 | y.size(), 355 | xdata, 356 | ConstPseudoVectorDuration(start), 357 | ConstPseudoVectorDuration(end), 358 | ConstPseudoVectorLgl(sopen), 359 | ConstPseudoVectorLgl(eopen), 360 | Rcpp::Function(func)); 361 | } 362 | 363 | 364 | // [[Rcpp::export(.align_period_cpp)]] 365 | Rcpp::List align_period(const Rcpp::NumericVector& x, // nanotime vector 366 | const Rcpp::NumericVector& y, // nanotime vector 367 | const Rcpp::List xdata, // DT 368 | const Rcpp::ComplexVector& start, // period 369 | const Rcpp::ComplexVector& end, // period 370 | const Rcpp::LogicalVector& sopen, // start open 371 | const Rcpp::LogicalVector& eopen, // end open 372 | const Rcpp::Function func, // function to apply (character) 373 | const Rcpp::CharacterVector tz) // timezone 374 | { 375 | return align_func_period(reinterpret_cast(&x[0]), 376 | x.size(), 377 | reinterpret_cast(&y[0]), 378 | y.size(), 379 | xdata, 380 | ConstPseudoVectorPrd(start), 381 | ConstPseudoVectorPrd(end), 382 | ConstPseudoVectorLgl(sopen), 383 | ConstPseudoVectorLgl(eopen), 384 | Rcpp::Function(func), 385 | ConstPseudoVectorChar(tz)); 386 | } 387 | 388 | 389 | // [[Rcpp::export(.align_idx_duration_cpp)]] 390 | Rcpp::NumericVector align_idx_duration(const Rcpp::NumericVector& x, // nanotime vector 391 | const Rcpp::NumericVector& y, // nanotime vector 392 | const Rcpp::NumericVector& start, // duration 393 | const Rcpp::NumericVector& end, // duration 394 | const Rcpp::LogicalVector& sopen, // start open 395 | const Rcpp::LogicalVector& eopen) // end open 396 | { 397 | return align_idx_helper_duration(reinterpret_cast(&x[0]), 398 | x.size(), 399 | reinterpret_cast(&y[0]), 400 | y.size(), 401 | ConstPseudoVectorDuration(start), 402 | ConstPseudoVectorDuration(end), 403 | ConstPseudoVectorLgl(sopen), 404 | ConstPseudoVectorLgl(eopen)); 405 | } 406 | 407 | 408 | // [[Rcpp::export(.align_idx_period_cpp)]] 409 | Rcpp::NumericVector align_idx_period(const Rcpp::NumericVector& x, // nanotime vector 410 | const Rcpp::NumericVector& y, // nanotime vector 411 | const Rcpp::ComplexVector& start, // period 412 | const Rcpp::ComplexVector& end, // period 413 | const Rcpp::LogicalVector& sopen, // start open 414 | const Rcpp::LogicalVector& eopen, // end open 415 | const Rcpp::CharacterVector& tz) // timezone 416 | { 417 | return align_idx_helper_period(reinterpret_cast(&x[0]), 418 | x.size(), 419 | reinterpret_cast(&y[0]), 420 | y.size(), 421 | ConstPseudoVectorPrd(start), 422 | ConstPseudoVectorPrd(end), 423 | ConstPseudoVectorLgl(sopen), 424 | ConstPseudoVectorLgl(eopen), 425 | ConstPseudoVectorChar(tz)); 426 | } 427 | 428 | 429 | 430 | // this function takes two vectors and positional args and an op: 431 | template 432 | void applyv(U x, Rcpp::Vector& y, size_t y_s, size_t y_e, F f) { 433 | for (auto iy=y_s; iy op) 446 | { 447 | size_t ix = 0; 448 | 449 | // for each point in x, we try to find a matching point or set of 450 | // points in y: 451 | 452 | auto from_yiter = y; 453 | for (ix=0; ix < xlen; ix++) { 454 | auto to_yiter = std::lower_bound(from_yiter, y + ylen, x[ix]); 455 | if (to_yiter == y + ylen) continue; 456 | 457 | auto iy_s = from_yiter - y; 458 | auto iy_e = to_yiter - y; 459 | applyv(xdata[ix], ydata, iy_s, iy_e, op); 460 | 461 | from_yiter = to_yiter; 462 | } 463 | } 464 | 465 | 466 | 467 | static bool check_numeric(SEXP s) { 468 | return TYPEOF(s) == REALSXP || TYPEOF(s) == INTSXP; 469 | } 470 | 471 | 472 | static R_xlen_t get_nb_numeric_columns(Rcpp::List& l) { 473 | auto ncols_double = 0; 474 | for (auto i=1; i op; 490 | if (op_string == "+") { 491 | op = std::plus(); 492 | } else if (op_string == "-") { 493 | op = std::minus(); 494 | } else if (op_string == "*") { 495 | op = std::multiplies(); 496 | } else if (op_string == "/") { 497 | op = std::divides(); 498 | } else { 499 | Rcpp::stop(std::string("unsupported operator '") + std::string(op_string) + "'"); 500 | } 501 | 502 | // only work with doubles; require that except for the index, all 503 | // other columns of 'x' are numerics: 504 | auto x_ncols_numeric = get_nb_numeric_columns(xdata); 505 | if (x_ncols_numeric == 0) { 506 | Rcpp::stop("'x' must have at least one numeric column"); 507 | } else if (x_ncols_numeric != xdata.size() - 1) { 508 | Rcpp::stop("all data columns of 'x' must be numeric"); 509 | } 510 | // if one column, easy, apply it on all columns of 'ydata', but if 511 | // more than one, check we have the same number of numeric columns in 512 | // 'ydata': 513 | auto y_ncols_numeric = get_nb_numeric_columns(ydata); 514 | if (x_ncols_numeric != 1 && x_ncols_numeric != y_ncols_numeric) { 515 | Rcpp::stop("'x' must have one numeric column or the same number as 'y'"); 516 | } 517 | 518 | 519 | Rcpp::NumericVector x = xdata[0]; 520 | auto x_dt = reinterpret_cast(&x[0]); 521 | Rcpp::NumericVector y = ydata[0]; 522 | auto y_dt = reinterpret_cast(&y[0]); 523 | 524 | Rcpp::List res = Rcpp::clone(ydata); 525 | 526 | // iterate through the rest of y columns and apply the ops: 527 | auto ix = 0; 528 | for (auto iy=1; iy= xdata.size()) { 535 | break; 536 | } 537 | xdata_col = xdata[ix]; 538 | } 539 | 540 | // move to next numeric column of 'ydata': 541 | while (!check_numeric(ydata[iy]) && (iy < ydata.size())) { 542 | ++iy; 543 | } 544 | // if we got to the last columns it means we are done (not 545 | // reachable as we run out of x cols first, but in case that logic 546 | // changes, keep this check): 547 | if (iy == ydata.size()) { 548 | break; // # nocov 549 | } 550 | 551 | Rcpp::NumericVector ydata_col = res[iy]; 552 | ops_helper(x_dt, 553 | x.size(), 554 | y_dt, 555 | y.size(), 556 | xdata_col, 557 | ydata_col, 558 | op); 559 | // after cloning, strangely, it seems we get copies and not references to the 560 | // elements of the list; so we reassign 'ydata_col' back to res so res is 561 | // modified: 562 | res[iy] = ydata_col; 563 | 564 | ++iy; // increment for the next time round 565 | } 566 | 567 | return res; 568 | } 569 | -------------------------------------------------------------------------------- /tests/tinytest.R: -------------------------------------------------------------------------------- 1 | pkg <- "dtts" 2 | 3 | if (requireNamespace("tinytest", quietly=TRUE)) { 4 | tinytest::test_package(pkg) 5 | } 6 | --------------------------------------------------------------------------------