├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ └── R-CMD-check.yaml ├── .gitignore ├── CRAN-RELEASE ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── Makefile ├── NAMESPACE ├── NEWS.md ├── R ├── cache-disk.R ├── cache-layered.R ├── cache-mem.R ├── cachem-package.R ├── cachem.R ├── reexports.R └── utils.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── cachem.Rproj ├── cran-comments.md ├── man ├── cache_disk.Rd ├── cache_layered.Rd ├── cache_mem.Rd └── reexports.Rd ├── revdep ├── .gitignore ├── README.md ├── cran.md ├── failures.md └── problems.md ├── src ├── .gitignore ├── cache.c └── init.c └── tests ├── testthat.R └── testthat ├── helper-utils.R ├── test-cache-disk.R ├── test-cache-mem.R └── test-utils.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | ^\.github$ 5 | ^.*\.sublime-project$ 6 | ^README\.Rmd$ 7 | ^_pkgdown\.yml$ 8 | ^docs$ 9 | ^pkgdown$ 10 | ^Makefile$ 11 | ^cran-comments\.md$ 12 | ^revdep$ 13 | ^CRAN-RELEASE$ 14 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/rstudio/shiny-workflows 2 | # 3 | # NOTE: This Shiny team GHA workflow is overkill for most R packages. 4 | # For most R packages it is better to use https://github.com/r-lib/actions 5 | on: 6 | push: 7 | branches: [main, rc-**] 8 | pull_request: 9 | branches: [main] 10 | schedule: 11 | - cron: '0 7 * * 1' # every monday 12 | 13 | name: Package checks 14 | 15 | jobs: 16 | website: 17 | uses: rstudio/shiny-workflows/.github/workflows/website.yaml@v1 18 | routine: 19 | uses: rstudio/shiny-workflows/.github/workflows/routine.yaml@v1 20 | R-CMD-check: 21 | uses: rstudio/shiny-workflows/.github/workflows/R-CMD-check.yaml@v1 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | docs 3 | CRAN-RELEASE 4 | -------------------------------------------------------------------------------- /CRAN-RELEASE: -------------------------------------------------------------------------------- 1 | This package was submitted to CRAN on 2021-08-19. 2 | Once it is accepted, delete this file and tag the release (commit 9e4b75d). 3 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: cachem 2 | Version: 1.1.0 3 | Title: Cache R Objects with Automatic Pruning 4 | Description: Key-value stores with automatic pruning. Caches can limit 5 | either their total size or the age of the oldest object (or both), 6 | automatically pruning objects to maintain the constraints. 7 | Authors@R: c( 8 | person("Winston", "Chang", , "winston@posit.co", c("aut", "cre")), 9 | person(family = "Posit Software, PBC", role = c("cph", "fnd"))) 10 | License: MIT + file LICENSE 11 | Encoding: UTF-8 12 | ByteCompile: true 13 | URL: https://cachem.r-lib.org/, https://github.com/r-lib/cachem 14 | Imports: 15 | rlang, 16 | fastmap (>= 1.2.0) 17 | Suggests: 18 | testthat 19 | RoxygenNote: 7.2.3 20 | Roxygen: list(markdown = TRUE) 21 | Config/Needs/routine: 22 | lobstr 23 | Config/Needs/website: 24 | pkgdown 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2023 2 | COPYRIGHT HOLDER: Posit Software, PBC 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2023 Posit Software, PBC. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | README.md: README.Rmd 3 | # Instead of just knitr::knit, use rmarkdown::render and remove the .html 4 | # file, because the latter uses pandoc to process the .md file, and it 5 | # supports adding a table of contents. 6 | Rscript -e "rmarkdown::render('$<', output_file = '$@')" 7 | rm -f README.html 8 | 9 | clean: 10 | rm -f README.md 11 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(format,cachem) 4 | S3method(print,cachem) 5 | export(cache_disk) 6 | export(cache_layered) 7 | export(cache_mem) 8 | export(is.key_missing) 9 | export(key_missing) 10 | import(fastmap) 11 | importFrom(fastmap,is.key_missing) 12 | importFrom(fastmap,key_missing) 13 | importFrom(rlang,as_quosure) 14 | importFrom(rlang,enquo) 15 | importFrom(rlang,eval_tidy) 16 | importFrom(utils,object.size) 17 | useDynLib(cachem, .registration = TRUE) 18 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # cachem 1.1.0 2 | 3 | * Fixed #47: In some cases, with a `cache_mem`, removing an item could result in the spurious message "nothing to compact" being printed out. (#48) 4 | 5 | # cachem 1.0.8 6 | 7 | * Fixed #38: With a `cache_mem`, `$reset()` did not fully reset the cache, and calling calling `$prune()` could result in an error. (#39) 8 | 9 | # cachem 1.0.7 10 | 11 | * `cache_disk()` gains a `read_fn`, `write_fn` and `extension` arguments, to allow specifying custom formats for serializing object to disk. (@jimhester) 12 | 13 | # cachem 1.0.6 14 | 15 | * Fixed #14: Fix off-by-one error when checking pruning throttling counter for `cache_disk`. (#15) 16 | 17 | * Closed #13: Added documentation for the `remove()` method. 18 | 19 | 20 | # cachem 1.0.5 21 | 22 | * `cache_mem()` and `cache_disk()` now allow `-` and `_` (hyphen and underscore) characters in the keys. (#9) 23 | 24 | * `cache_disk()` previously did not correctly throttle pruning. (#11) 25 | 26 | 27 | # cachem 1.0.4 28 | 29 | * More pruning speed enhancements for `cache_mem()`. (#7) 30 | 31 | 32 | # cachem 1.0.3 33 | 34 | * Addressed issues with timing-sensitive tests. 35 | 36 | 37 | # cachem 1.0.2 38 | 39 | * Closed #4: Sped up pruning for `cache_mem`. (#5) 40 | 41 | * Fixed `cache_mem` pruning with `evict="lru"`. 42 | 43 | 44 | # cachem 1.0.1 45 | 46 | * Fixed function declaration of `C_validate_key`. 47 | 48 | 49 | # cachem 1.0.0 50 | 51 | * First CRAN release. 52 | -------------------------------------------------------------------------------- /R/cache-disk.R: -------------------------------------------------------------------------------- 1 | #' Create a disk cache object 2 | #' 3 | #' A disk cache object is a key-value store that saves the values as files in a 4 | #' directory on disk. Objects can be stored and retrieved using the `get()` and 5 | #' `set()` methods. Objects are automatically pruned from the cache according to 6 | #' the parameters `max_size`, `max_age`, `max_n`, and `evict`. 7 | #' 8 | #' 9 | #' @section Missing keys: 10 | #' 11 | #' The `missing` parameter controls what happens when `get()` is called with a 12 | #' key that is not in the cache (a cache miss). The default behavior is to 13 | #' return a [key_missing()] object. This is a *sentinel value* that indicates 14 | #' that the key was not present in the cache. You can test if the returned 15 | #' value represents a missing key by using the [is.key_missing()] function. 16 | #' You can also have `get()` return a different sentinel value, like `NULL`. 17 | #' If you want to throw an error on a cache miss, you can do so by providing 18 | #' an expression for `missing`, as in `missing = stop("Missing key")`. 19 | #' 20 | #' When the cache is created, you can supply a value for `missing`, which sets 21 | #' the default value to be returned for missing values. It can also be 22 | #' overridden when `get()` is called, by supplying a `missing` argument. For 23 | #' example, if you use `cache$get("mykey", missing = NULL)`, it will return 24 | #' `NULL` if the key is not in the cache. 25 | #' 26 | #' The `missing` parameter is actually an expression which is evaluated each 27 | #' time there is a cache miss. A quosure (from the rlang package) can be used. 28 | #' 29 | #' If you use this, the code that calls `get()` should be wrapped with 30 | #' [tryCatch()] to gracefully handle missing keys. 31 | #' 32 | #' 33 | #' @section Cache pruning: 34 | #' 35 | #' Cache pruning occurs when `set()` is called, or it can be invoked manually 36 | #' by calling `prune()`. 37 | #' 38 | #' The disk cache will throttle the pruning so that it does not happen on 39 | #' every call to `set()`, because the filesystem operations for checking the 40 | #' status of files can be slow. Instead, it will prune once in every 41 | #' `prune_rate` calls to `set()`, or if at least 5 seconds have elapsed since 42 | #' the last prune occurred, whichever is first. 43 | #' 44 | #' When a pruning occurs, if there are any objects that are older than 45 | #' `max_age`, they will be removed. 46 | #' 47 | #' The `max_size` and `max_n` parameters are applied to the cache as a whole, 48 | #' in contrast to `max_age`, which is applied to each object individually. 49 | #' 50 | #' If the number of objects in the cache exceeds `max_n`, then objects will be 51 | #' removed from the cache according to the eviction policy, which is set with 52 | #' the `evict` parameter. Objects will be removed so that the number of items 53 | #' is `max_n`. 54 | #' 55 | #' If the size of the objects in the cache exceeds `max_size`, then objects 56 | #' will be removed from the cache. Objects will be removed from the cache so 57 | #' that the total size remains under `max_size`. Note that the size is 58 | #' calculated using the size of the files, not the size of disk space used by 59 | #' the files --- these two values can differ because of files are stored in 60 | #' blocks on disk. For example, if the block size is 4096 bytes, then a file 61 | #' that is one byte in size will take 4096 bytes on disk. 62 | #' 63 | #' Another time that objects can be removed from the cache is when `get()` is 64 | #' called. If the target object is older than `max_age`, it will be removed 65 | #' and the cache will report it as a missing value. 66 | #' 67 | #' @section Eviction policies: 68 | #' 69 | #' If `max_n` or `max_size` are used, then objects will be removed from the 70 | #' cache according to an eviction policy. The available eviction policies are: 71 | #' 72 | #' \describe{ 73 | #' \item{`"lru"`}{ 74 | #' Least Recently Used. The least recently used objects will be removed. 75 | #' This uses the filesystem's mtime property. When "lru" is used, each 76 | #' `get()` is called, it will update the file's mtime using 77 | #' [Sys.setFileTime()]. Note that on some platforms, the resolution of 78 | #' [Sys.setFileTime()] may be low, one or two seconds. 79 | #' } 80 | #' \item{`"fifo"`}{ 81 | #' First-in-first-out. The oldest objects will be removed. 82 | #' } 83 | #' } 84 | #' 85 | #' Both of these policies use files' mtime. Note that some filesystems (notably 86 | #' FAT) have poor mtime resolution. (atime is not used because support for atime 87 | #' is worse than mtime.) 88 | #' 89 | #' 90 | #' @section Sharing among multiple processes: 91 | #' 92 | #' The directory for a cache_disk can be shared among multiple R processes. To 93 | #' do this, each R process should have a cache_disk object that uses the same 94 | #' directory. Each cache_disk will do pruning independently of the others, so 95 | #' if they have different pruning parameters, then one cache_disk may remove 96 | #' cached objects before another cache_disk would do so. 97 | #' 98 | #' Even though it is possible for multiple processes to share a cache_disk 99 | #' directory, this should not be done on networked file systems, because of 100 | #' slow performance of networked file systems can cause problems. If you need 101 | #' a high-performance shared cache, you can use one built on a database like 102 | #' Redis, SQLite, mySQL, or similar. 103 | #' 104 | #' When multiple processes share a cache directory, there are some potential 105 | #' race conditions. For example, if your code calls `exists(key)` to check if 106 | #' an object is in the cache, and then call `get(key)`, the object may be 107 | #' removed from the cache in between those two calls, and `get(key)` will 108 | #' throw an error. Instead of calling the two functions, it is better to 109 | #' simply call `get(key)`, and check that the returned object is not a 110 | #' `key_missing()` object, using `is.key_missing()`. This effectively tests 111 | #' for existence and gets the object in one operation. 112 | #' 113 | #' It is also possible for one processes to prune objects at the same time 114 | #' that another processes is trying to prune objects. If this happens, you may 115 | #' see a warning from `file.remove()` failing to remove a file that has 116 | #' already been deleted. 117 | #' 118 | #' 119 | #' @section Methods: 120 | #' 121 | #' A disk cache object has the following methods: 122 | #' 123 | #' \describe{ 124 | #' \item{`get(key, missing)`}{ 125 | #' Returns the value associated with `key`. If the key is not in the 126 | #' cache, then it evaluates the expression specified by `missing` and 127 | #' returns the value. If `missing` is specified here, then it will 128 | #' override the default that was set when the `cache_mem` object was 129 | #' created. See section Missing Keys for more information. 130 | #' } 131 | #' \item{`set(key, value)`}{ 132 | #' Stores the `key`-`value` pair in the cache. 133 | #' } 134 | #' \item{`exists(key)`}{ 135 | #' Returns `TRUE` if the cache contains the key, otherwise 136 | #' `FALSE`. 137 | #' } 138 | #' \item{`remove(key)`}{ 139 | #' Removes `key` from the cache, if it exists in the cache. If the key is 140 | #' not in the cache, this does nothing. 141 | #' } 142 | #' \item{`size()`}{ 143 | #' Returns the number of items currently in the cache. 144 | #' } 145 | #' \item{`keys()`}{ 146 | #' Returns a character vector of all keys currently in the cache. 147 | #' } 148 | #' \item{`reset()`}{ 149 | #' Clears all objects from the cache. 150 | #' } 151 | #' \item{`destroy()`}{ 152 | #' Clears all objects in the cache, and removes the cache directory from 153 | #' disk. 154 | #' } 155 | #' \item{`prune()`}{ 156 | #' Prunes the cache, using the parameters specified by `max_size`, 157 | #' `max_age`, `max_n`, and `evict`. 158 | #' } 159 | #' } 160 | #' 161 | #' @param dir Directory to store files for the cache. If `NULL` (the default) it 162 | #' will create and use a temporary directory. 163 | #' @param read_fn The function used to read the values from disk. If `NULL` 164 | #' (the default) it will use `readRDS`. 165 | #' @param write_fn The function used to write the values from disk. If `NULL` 166 | #' (the default) it will use `writeRDS`. 167 | #' @param extension The file extension to use for files on disk. 168 | #' @param max_age Maximum age of files in cache before they are evicted, in 169 | #' seconds. Use `Inf` for no age limit. 170 | #' @param max_size Maximum size of the cache, in bytes. If the cache exceeds 171 | #' this size, cached objects will be removed according to the value of the 172 | #' `evict`. Use `Inf` for no size limit. The default is 1 gigabyte. 173 | #' @param max_n Maximum number of objects in the cache. If the number of objects 174 | #' exceeds this value, then cached objects will be removed according to the 175 | #' value of `evict`. Use `Inf` for no limit of number of items. 176 | #' @param evict The eviction policy to use to decide which objects are removed 177 | #' when a cache pruning occurs. Currently, `"lru"` and `"fifo"` are supported. 178 | #' @param destroy_on_finalize If `TRUE`, then when the cache_disk object is 179 | #' garbage collected, the cache directory and all objects inside of it will be 180 | #' deleted from disk. If `FALSE` (the default), it will do nothing when 181 | #' finalized. 182 | #' @param missing A value to return when `get(key)` is called but the key is not 183 | #' present in the cache. The default is a [key_missing()] object. It is 184 | #' actually an expression that is evaluated each time there is a cache miss. 185 | #' See section Missing keys for more information. 186 | #' @param prune_rate How often to prune the cache. See section Cache Pruning for 187 | #' more information. 188 | #' @param warn_ref_objects Should a warning be emitted when a reference is 189 | #' stored in the cache? This can be useful because serializing and 190 | #' deserializing a reference object (such as environments and external 191 | #' pointers) can lead to unexpected behavior. 192 | #' @param logfile An optional filename or connection object to where logging 193 | #' information will be written. To log to the console, use `stderr()` or 194 | #' `stdout()`. 195 | #' 196 | #' @return A disk caching object, with class `cache_disk`. 197 | #' @importFrom rlang enquo eval_tidy as_quosure 198 | #' @export 199 | cache_disk <- function( 200 | dir = NULL, 201 | max_size = 1024 * 1024 ^ 2, 202 | max_age = Inf, 203 | max_n = Inf, 204 | evict = c("lru", "fifo"), 205 | destroy_on_finalize = FALSE, 206 | read_fn = NULL, 207 | write_fn = NULL, 208 | extension = ".rds", 209 | missing = key_missing(), 210 | prune_rate = 20, 211 | warn_ref_objects = FALSE, 212 | logfile = NULL 213 | ) { 214 | # ============================================================================ 215 | # Constants 216 | # ============================================================================ 217 | PRUNE_THROTTLE_TIME_LIMIT <- 5 218 | 219 | # ============================================================================ 220 | # Logging 221 | # ============================================================================ 222 | # This needs to be defined first, because it's used in initialization. 223 | log_ <- function(text) { 224 | if (is.null(logfile_)) return() 225 | 226 | text <- paste0(format(Sys.time(), "[%Y-%m-%d %H:%M:%OS3] cache_disk "), text) 227 | cat(text, sep = "\n", file = logfile_, append = TRUE) 228 | } 229 | 230 | # ============================================================================ 231 | # Initialization 232 | # ============================================================================ 233 | if (is.null(dir)) { 234 | dir <- tempfile("cache_disk-") 235 | } 236 | if (!is.numeric(max_size)) stop("max_size must be a number. Use `Inf` for no limit.") 237 | if (!is.numeric(max_age)) stop("max_age must be a number. Use `Inf` for no limit.") 238 | if (!is.numeric(max_n)) stop("max_n must be a number. Use `Inf` for no limit.") 239 | 240 | if (!dir.exists(dir)) { 241 | # log_(paste0("initialize: Creating ", dir)) 242 | dir.create(dir, recursive = TRUE) 243 | } 244 | 245 | logfile_ <- logfile 246 | dir_ <- normalizePath(dir) 247 | extension_ <- extension 248 | extension_regex_ <- sub("\\.(.*)", "\\\\.\\1$", extension) 249 | read_fn_ <- read_fn 250 | write_fn_ <- write_fn 251 | max_size_ <- max_size 252 | max_age_ <- max_age 253 | max_n_ <- max_n 254 | evict_ <- match.arg(evict) 255 | destroy_on_finalize_ <- destroy_on_finalize 256 | missing_ <- enquo(missing) 257 | prune_rate_ <- prune_rate 258 | 259 | destroyed_ <- FALSE 260 | 261 | 262 | if (is.null(read_fn_)) { 263 | read_fn_ <- readRDS 264 | } 265 | 266 | if (is.null(write_fn_)) { 267 | write_fn_ <- local({ 268 | ref_object <- FALSE 269 | function(value, file) { 270 | on.exit({ 271 | ref_object <<- TRUE 272 | unlink(temp_file) 273 | }) 274 | temp_file <- paste0(file, "-temp-", random_hex(16)) 275 | saveRDS(value, file = temp_file, 276 | refhook = function(x) { 277 | ref_object <<- TRUE 278 | NULL 279 | } 280 | ) 281 | file.rename(temp_file, file) 282 | if (warn_ref_objects && ref_object) { 283 | log_(paste0('set: value is a reference object')) 284 | warning("A reference object was cached in a serialized format. The restored object may not work as expected.") 285 | } 286 | } 287 | }) 288 | } 289 | 290 | # Start the prune throttle counter with a random number from 0-19. This is 291 | # so that, in the case where multiple cache_disk objects that point to the 292 | # same directory are created and discarded after just a few uses each, 293 | # pruning will still occur. 294 | prune_throttle_counter_ <- sample.int(prune_rate_, 1) - 1 295 | prune_last_time_ <- as.numeric(Sys.time()) 296 | 297 | if (destroy_on_finalize_) { 298 | reg.finalizer( 299 | environment(), 300 | function(e) { e$destroy() } 301 | ) 302 | } 303 | 304 | # ============================================================================ 305 | # Public methods 306 | # ============================================================================ 307 | get <- function(key, missing = missing_) { 308 | log_(paste0('get: key "', key, '"')) 309 | is_destroyed(throw = TRUE) 310 | validate_key(key) 311 | 312 | maybe_prune_single_(key) 313 | 314 | filename <- key_to_filename_(key) 315 | 316 | # Instead of calling exists() before fetching the value, just try to 317 | # fetch the value. This reduces the risk of a race condition when 318 | # multiple processes share a cache. 319 | read_error <- FALSE 320 | tryCatch( 321 | { 322 | value <- suppressWarnings(read_fn_(filename)) 323 | if (evict_ == "lru"){ 324 | Sys.setFileTime(filename, Sys.time()) 325 | } 326 | }, 327 | error = function(e) { 328 | read_error <<- TRUE 329 | } 330 | ) 331 | if (read_error) { 332 | log_(paste0('get: key "', key, '" is missing')) 333 | missing <- as_quosure(missing) 334 | return(eval_tidy(missing)) 335 | } 336 | 337 | log_(paste0('get: key "', key, '" found')) 338 | value 339 | } 340 | 341 | set <- function(key, value) { 342 | log_(paste0('set: key "', key, '"')) 343 | is_destroyed(throw = TRUE) 344 | validate_key(key) 345 | 346 | file <- key_to_filename_(key) 347 | 348 | save_error <- FALSE 349 | tryCatch( 350 | { 351 | write_fn_(value, file) 352 | }, 353 | error = function(e) { 354 | save_error <<- TRUE 355 | } 356 | ) 357 | if (save_error) { 358 | log_(paste0('set: key "', key, '" error')) 359 | stop('Error setting value for key "', key, '".') 360 | } 361 | 362 | prune_throttled_() 363 | invisible(TRUE) 364 | } 365 | 366 | exists <- function(key) { 367 | is_destroyed(throw = TRUE) 368 | validate_key(key) 369 | file.exists(key_to_filename_(key)) 370 | } 371 | 372 | # Return all keys in the cache 373 | keys <- function() { 374 | is_destroyed(throw = TRUE) 375 | files <- dir(dir_, extension_regex_) 376 | sub(extension_regex_, "", files) 377 | } 378 | 379 | remove <- function(key) { 380 | log_(paste0('remove: key "', key, '"')) 381 | is_destroyed(throw = TRUE) 382 | validate_key(key) 383 | # Remove file; use unlink() instead of file.remove() because it won't 384 | # warn if the file doesn't exist. 385 | unlink(key_to_filename_(key)) 386 | invisible(TRUE) 387 | } 388 | 389 | reset <- function() { 390 | log_(paste0('reset')) 391 | is_destroyed(throw = TRUE) 392 | file.remove(dir(dir_, extension_regex_, full.names = TRUE)) 393 | invisible(TRUE) 394 | } 395 | 396 | prune <- function() { 397 | # TODO: It would be good to add parameters `n` and `size`, so that the 398 | # cache can be pruned to `max_n - n` and `max_size - size` before adding 399 | # an object. Right now we prune after adding the object, so the cache 400 | # can temporarily grow past the limits. The reason we don't do this now 401 | # is because it is expensive to find the size of the serialized object 402 | # before adding it. 403 | 404 | log_('prune') 405 | is_destroyed(throw = TRUE) 406 | 407 | current_time <- Sys.time() 408 | 409 | filenames <- dir(dir_, extension_regex_, full.names = TRUE) 410 | info <- file.info(filenames, extra_cols = FALSE) 411 | info <- info[info$isdir == FALSE, ] 412 | info$name <- rownames(info) 413 | rownames(info) <- NULL 414 | # Files could be removed between the dir() and file.info() calls. The 415 | # entire row for such files will have NA values. Remove those rows. 416 | info <- info[!is.na(info$size), ] 417 | 418 | # 1. Remove any files where the age exceeds max age. 419 | if (is.finite(max_age_)) { 420 | timediff <- as.numeric(current_time - info$mtime, units = "secs") 421 | rm_idx <- timediff > max_age_ 422 | if (any(rm_idx)) { 423 | log_(paste0("prune max_age: Removing ", paste(info$name[rm_idx], collapse = ", "))) 424 | rm_success <- file.remove(info$name[rm_idx]) 425 | # This maps rm_success back into the TRUEs in the rm_idx vector. 426 | # If (for example) rm_idx is c(F,T,F,T,T) and rm_success is c(T,F,T), 427 | # then this line modifies rm_idx to be c(F,T,F,F,T). 428 | rm_idx[rm_idx] <- rm_success 429 | info <- info[!rm_idx, ] 430 | } 431 | } 432 | 433 | # Sort objects by priority. The sorting is done in a function which can be 434 | # called multiple times but only does the work the first time. 435 | info_is_sorted <- FALSE 436 | ensure_info_is_sorted <- function() { 437 | if (info_is_sorted) return() 438 | 439 | info <<- info[order(info$mtime, decreasing = TRUE), ] 440 | info_is_sorted <<- TRUE 441 | } 442 | 443 | # 2. Remove files if there are too many. 444 | if (is.finite(max_n_) && nrow(info) > max_n_) { 445 | ensure_info_is_sorted() 446 | rm_idx <- seq_len(nrow(info)) > max_n_ 447 | log_(paste0("prune max_n: Removing ", paste(info$name[rm_idx], collapse = ", "))) 448 | rm_success <- file.remove(info$name[rm_idx]) 449 | rm_idx[rm_idx] <- rm_success 450 | info <- info[!rm_idx, ] 451 | } 452 | 453 | # 3. Remove files if cache is too large. 454 | if (is.finite(max_size_) && sum(info$size) > max_size_) { 455 | ensure_info_is_sorted() 456 | cum_size <- cumsum(info$size) 457 | rm_idx <- cum_size > max_size_ 458 | log_(paste0("prune max_size: Removing ", paste(info$name[rm_idx], collapse = ", "))) 459 | rm_success <- file.remove(info$name[rm_idx]) 460 | rm_idx[rm_idx] <- rm_success 461 | info <- info[!rm_idx, ] 462 | } 463 | 464 | prune_last_time_ <<- as.numeric(current_time) 465 | 466 | invisible(TRUE) 467 | } 468 | 469 | size <- function() { 470 | is_destroyed(throw = TRUE) 471 | length(dir(dir_, extension_regex_)) 472 | } 473 | 474 | info <- function() { 475 | list( 476 | dir = dir_, 477 | max_size = max_size_, 478 | max_age = max_age_, 479 | max_n = max_n_, 480 | evict = evict_, 481 | destroy_on_finalize = destroy_on_finalize_, 482 | missing = missing_, 483 | prune_rate = prune_rate, 484 | logfile = logfile_, 485 | prune_throttle_counter = prune_throttle_counter_, 486 | prune_last_time = as.POSIXct(prune_last_time_, origin = "1970-01-01") 487 | ) 488 | } 489 | 490 | destroy <- function() { 491 | if (is_destroyed()) { 492 | return(invisible(FALSE)) 493 | } 494 | 495 | log_(paste0("destroy: Removing ", dir_)) 496 | # First create a sentinel file so that other processes sharing this 497 | # cache know that the cache is to be destroyed. This is needed because 498 | # the recursive unlink is not atomic: another process can add a file to 499 | # the directory after unlink starts removing files but before it removes 500 | # the directory, and when that happens, the directory removal will fail. 501 | file.create(file.path(dir_, "__destroyed__")) 502 | # Remove all the extension files. This will not remove the setinel file. 503 | file.remove(dir(dir_, extension_regex_, full.names = TRUE)) 504 | # Next remove dir recursively, including sentinel file. 505 | unlink(dir_, recursive = TRUE) 506 | destroyed_ <<- TRUE 507 | invisible(TRUE) 508 | } 509 | 510 | is_destroyed <- function(throw = FALSE) { 511 | if (!dir.exists(dir_) || 512 | file.exists(file.path(dir_, "__destroyed__"))) 513 | { 514 | # It's possible for another process to destroy a shared cache directory 515 | destroyed_ <<- TRUE 516 | } 517 | 518 | if (throw) { 519 | if (destroyed_) { 520 | stop("Attempted to use cache which has been destroyed:\n ", dir_) 521 | } 522 | 523 | } else { 524 | destroyed_ 525 | } 526 | } 527 | 528 | # ============================================================================ 529 | # Private methods 530 | # ============================================================================ 531 | key_to_filename_ <- function(key) { 532 | validate_key(key) 533 | # Additional validation. This 80-char limit is arbitrary, and is 534 | # intended to avoid hitting a filename length limit on Windows. 535 | if (nchar(key) > 80) { 536 | stop("Invalid key: key must have fewer than 80 characters.") 537 | } 538 | file.path(dir_, paste0(key, extension_)) 539 | } 540 | 541 | # A wrapper for prune() that throttles it, because prune() can be expensive 542 | # due to filesystem operations. This function will prune only once every 543 | # `prune_rate` times it is called, or if it has been more than 5 seconds since 544 | # the last time the cache was actually pruned, whichever is first. In the 545 | # future, the behavior may be customizable. 546 | prune_throttled_ <- function() { 547 | # Count the number of times prune() has been called. 548 | prune_throttle_counter_ <<- prune_throttle_counter_ + 1 549 | 550 | if (prune_throttle_counter_ >= prune_rate_ || 551 | as.numeric(Sys.time()) - prune_last_time_ > PRUNE_THROTTLE_TIME_LIMIT) 552 | { 553 | prune() 554 | prune_throttle_counter_ <<- 0 555 | } 556 | } 557 | 558 | # Prunes a single object if it exceeds max_age. If the object does not 559 | # exceed max_age, or if the object doesn't exist, do nothing. 560 | maybe_prune_single_ <- function(key) { 561 | # obj <- cache_[[key]] 562 | # if (is.null(obj)) return() 563 | filepath <- file.path(dir_, paste0(key, extension_)) 564 | info <- file.info(filepath, extra_cols = FALSE) 565 | if (is.na(info$mtime)) return() 566 | 567 | timediff <- as.numeric(Sys.time()) - as.numeric(info$mtime) 568 | if (timediff > max_age_) { 569 | log_(paste0("pruning single object exceeding max_age: Removing ", key)) 570 | unlink(filepath) 571 | } 572 | } 573 | 574 | 575 | # ============================================================================ 576 | # Returned object 577 | # ============================================================================ 578 | structure( 579 | list( 580 | get = get, 581 | set = set, 582 | exists = exists, 583 | keys = keys, 584 | remove = remove, 585 | reset = reset, 586 | prune = prune, 587 | size = size, 588 | destroy = destroy, 589 | is_destroyed = is_destroyed, 590 | info = info 591 | ), 592 | class = c("cache_disk", "cachem") 593 | ) 594 | } 595 | -------------------------------------------------------------------------------- /R/cache-layered.R: -------------------------------------------------------------------------------- 1 | #' Compose any number of cache objects into a new, layered cache object 2 | #' 3 | #' Note that `cache_layered` is currently experimental. 4 | #' 5 | #' @param ... Cache objects to compose into a new, layered cache object. 6 | #' @inheritParams cache_disk 7 | #' 8 | #' @return A layered caching object, with class `cache_layered`. 9 | #' @examples 10 | #' 11 | #' # Make a layered cache from a small memory cache and large disk cache 12 | #' m <- cache_mem(max_size = 100 * 1024^2) 13 | #' d <- cache_disk(max_size = 2 * 1024^3) 14 | #' cl <- cache_layered(m, d) 15 | #' @export 16 | cache_layered <- function(..., logfile = NULL) { 17 | caches <- list(...) 18 | logfile_ <- logfile 19 | 20 | # ============================================================================ 21 | # Logging 22 | # ============================================================================ 23 | # This needs to be defined first, because it's used in initialization. 24 | log_ <- function(text) { 25 | if (is.null(logfile_)) return() 26 | 27 | text <- paste0(format(Sys.time(), "[%Y-%m-%d %H:%M:%OS3] cache_layered "), text) 28 | cat(text, sep = "\n", file = logfile_, append = TRUE) 29 | } 30 | 31 | get <- function(key) { 32 | log_(paste0("Get: ", key)) 33 | value <- NULL 34 | # Search down the caches for the object 35 | for (i in seq_along(caches)) { 36 | value <- caches[[i]]$get(key) 37 | 38 | if (!is.key_missing(value)) { 39 | log_(paste0("Get from ", class(caches[[i]])[1], "... hit")) 40 | # Set the value in any caches where we searched and missed. 41 | for (j in seq_len(i-1)) { 42 | caches[[j]]$set(key, value) 43 | } 44 | break 45 | } else { 46 | log_(paste0("Get from ", class(caches[[i]])[1], "... miss")) 47 | } 48 | } 49 | 50 | value 51 | } 52 | 53 | set <- function(key, value) { 54 | for (cache in caches) { 55 | cache$set(key, value) 56 | } 57 | } 58 | 59 | exists <- function(key) { 60 | for (cache in caches) { 61 | if (cache$exists(key)) { 62 | return(TRUE) 63 | } 64 | } 65 | FALSE 66 | } 67 | 68 | keys <- function() { 69 | unique(unlist(lapply(caches, function (cache) { 70 | cache$keys() 71 | }))) 72 | } 73 | 74 | remove <- function(key) { 75 | for (cache in caches) { 76 | cache$remove(key) 77 | } 78 | } 79 | 80 | reset <- function() { 81 | for (cache in caches) { 82 | cache$reset() 83 | } 84 | } 85 | 86 | get_caches <- function() { 87 | caches 88 | } 89 | 90 | structure( 91 | list( 92 | get = get, 93 | set = set, 94 | exists = exists, 95 | keys = keys, 96 | remove = remove, 97 | reset = reset, 98 | get_caches = get_caches 99 | ), 100 | class = c("cache_layered", "cachem") 101 | ) 102 | } 103 | 104 | -------------------------------------------------------------------------------- /R/cache-mem.R: -------------------------------------------------------------------------------- 1 | #' Create a memory cache object 2 | #' 3 | #' A memory cache object is a key-value store that saves the values in an 4 | #' environment. Objects can be stored and retrieved using the `get()` and 5 | #' `set()` methods. Objects are automatically pruned from the cache according to 6 | #' the parameters `max_size`, `max_age`, `max_n`, and `evict`. 7 | #' 8 | #' In a `cache_mem`, R objects are stored directly in the cache; they are not 9 | #' *not* serialized before being stored in the cache. This contrasts with other 10 | #' cache types, like [cache_disk()], where objects are serialized, and the 11 | #' serialized object is cached. This can result in some differences of behavior. 12 | #' For example, as long as an object is stored in a cache_mem, it will not be 13 | #' garbage collected. 14 | #' 15 | #' @section Missing keys: 16 | #' 17 | #' The `missing` parameter controls what happens when `get()` is called with a 18 | #' key that is not in the cache (a cache miss). The default behavior is to 19 | #' return a [key_missing()] object. This is a *sentinel value* that indicates 20 | #' that the key was not present in the cache. You can test if the returned 21 | #' value represents a missing key by using the [is.key_missing()] function. 22 | #' You can also have `get()` return a different sentinel value, like `NULL`. 23 | #' If you want to throw an error on a cache miss, you can do so by providing 24 | #' an expression for `missing`, as in `missing = stop("Missing key")`. 25 | #' 26 | #' When the cache is created, you can supply a value for `missing`, which sets 27 | #' the default value to be returned for missing values. It can also be 28 | #' overridden when `get()` is called, by supplying a `missing` argument. For 29 | #' example, if you use `cache$get("mykey", missing = NULL)`, it will return 30 | #' `NULL` if the key is not in the cache. 31 | #' 32 | #' The `missing` parameter is actually an expression which is evaluated each 33 | #' time there is a cache miss. A quosure (from the rlang package) can be used. 34 | #' 35 | #' If you use this, the code that calls `get()` should be wrapped with 36 | #' [tryCatch()] to gracefully handle missing keys. 37 | #' 38 | #' 39 | #' @section Cache pruning: 40 | #' 41 | #' Cache pruning occurs when `set()` is called, or it can be invoked manually 42 | #' by calling `prune()`. 43 | #' 44 | #' When a pruning occurs, if there are any objects that are older than 45 | #' `max_age`, they will be removed. 46 | #' 47 | #' The `max_size` and `max_n` parameters are applied to the cache as a whole, 48 | #' in contrast to `max_age`, which is applied to each object individually. 49 | #' 50 | #' If the number of objects in the cache exceeds `max_n`, then objects will be 51 | #' removed from the cache according to the eviction policy, which is set with 52 | #' the `evict` parameter. Objects will be removed so that the number of items 53 | #' is `max_n`. 54 | #' 55 | #' If the size of the objects in the cache exceeds `max_size`, then objects 56 | #' will be removed from the cache. Objects will be removed from the cache so 57 | #' that the total size remains under `max_size`. 58 | #' 59 | #' Another time that objects can be removed from the cache is when `get()` is 60 | #' called. If the target object is older than `max_age`, it will be removed 61 | #' and the cache will report it as a missing value. 62 | #' 63 | #' @section Eviction policies: 64 | #' 65 | #' If `max_n` or `max_size` are used, then objects will be removed 66 | #' from the cache according to an eviction policy. The available eviction 67 | #' policies are: 68 | #' 69 | #' \describe{ 70 | #' \item{`"lru"`}{ 71 | #' Least Recently Used. The least recently used objects will be removed. 72 | #' } 73 | #' \item{`"fifo"`}{ 74 | #' First-in-first-out. The oldest objects will be removed. 75 | #' } 76 | #' } 77 | #' 78 | #' @section Methods: 79 | #' 80 | #' A disk cache object has the following methods: 81 | #' 82 | #' \describe{ 83 | #' \item{`get(key, missing)`}{ 84 | #' Returns the value associated with `key`. If the key is not in the 85 | #' cache, then it evaluates the expression specified by `missing` and 86 | #' returns the value. If `missing` is specified here, then it will 87 | #' override the default that was set when the `cache_mem` object was 88 | #' created. See section Missing Keys for more information. 89 | #' } 90 | #' \item{`set(key, value)`}{ 91 | #' Stores the `key`-`value` pair in the cache. 92 | #' } 93 | #' \item{`exists(key)`}{ 94 | #' Returns `TRUE` if the cache contains the key, otherwise 95 | #' `FALSE`. 96 | #' } 97 | #' \item{`remove(key)`}{ 98 | #' Removes `key` from the cache, if it exists in the cache. If the key is 99 | #' not in the cache, this does nothing. 100 | #' } 101 | #' \item{`size()`}{ 102 | #' Returns the number of items currently in the cache. 103 | #' } 104 | #' \item{`keys()`}{ 105 | #' Returns a character vector of all keys currently in the cache. 106 | #' } 107 | #' \item{`reset()`}{ 108 | #' Clears all objects from the cache. 109 | #' } 110 | #' \item{`destroy()`}{ 111 | #' Clears all objects in the cache, and removes the cache directory from 112 | #' disk. 113 | #' } 114 | #' \item{`prune()`}{ 115 | #' Prunes the cache, using the parameters specified by `max_size`, 116 | #' `max_age`, `max_n`, and `evict`. 117 | #' } 118 | #' } 119 | #' 120 | #' @param max_size Maximum size of the cache, in bytes. If the cache exceeds 121 | #' this size, cached objects will be removed according to the value of the 122 | #' `evict`. Use `Inf` for no size limit. The default is 512 megabytes. 123 | #' @inheritParams cache_disk 124 | #' 125 | #' @return A memory caching object, with class `cache_mem`. 126 | #' @importFrom utils object.size 127 | #' @export 128 | cache_mem <- function( 129 | max_size = 512 * 1024 ^ 2, 130 | max_age = Inf, 131 | max_n = Inf, 132 | evict = c("lru", "fifo"), 133 | missing = key_missing(), 134 | logfile = NULL) 135 | { 136 | # ============================================================================ 137 | # Constants 138 | # ============================================================================ 139 | DEBUG <- TRUE 140 | INITIAL_SIZE <- 64L 141 | # When compacting, how much space should be reserved? For example, if there 142 | # are 75 items in the cache when it is compacted and COMPACT_MULT is 2, then 143 | # the data store will be compacted to have a capacity of 150 items. 144 | COMPACT_MULT <- 2 145 | # If TRUE, the data will be kept in the correct atime (for lru) or mtime (for 146 | # fifo) order each time get() or set() is called, though the metadata log will 147 | # grow by one entry each time (it will also occasionally be compacted). If 148 | # FALSE, the metadata entry will be kept in place (so the metadata log won't 149 | # grow as quickly), but the atimes/mtimes will not be kept in order; instead, 150 | # the metadata will be sorted by atime/mtime each time prune() is called (and 151 | # prune() is called by set()). The overall behavior is the same, but there are 152 | # somewhat different performance characteristics. The tradeoff is either 153 | # growing the log for every get() (and needing to occasionally compact it), or 154 | # having to sort it every time set() is called. Sorting data of a reasonable 155 | # size (up to around 1e5) is fast in R. For larger numbers of items it may be 156 | # better to set this to TRUE. 157 | MAINTAIN_TIME_SORT <- FALSE 158 | 159 | # ============================================================================ 160 | # Initialization 161 | # ============================================================================ 162 | if (!is.numeric(max_size)) stop("max_size must be a number. Use `Inf` for no limit.") 163 | if (!is.numeric(max_age)) stop("max_age must be a number. Use `Inf` for no limit.") 164 | if (!is.numeric(max_n)) stop("max_n must be a number. Use `Inf` for no limit.") 165 | 166 | max_size_ <- max_size 167 | max_age_ <- max_age 168 | max_n_ <- max_n 169 | evict_ <- match.arg(evict) 170 | missing_ <- enquo(missing) 171 | logfile_ <- logfile 172 | 173 | PRUNE_BY_SIZE <- is.finite(max_size_) 174 | PRUNE_BY_AGE <- is.finite(max_age_) 175 | PRUNE_BY_N <- is.finite(max_n_) 176 | 177 | # ============================================================================ 178 | # Internal state 179 | # ============================================================================ 180 | # The keys, values, and metadata are stored in columnar format. The vectors 181 | # key_, value_, size_, mtime_, and atime_ are the columns. Separate vectors 182 | # are used instead of a data frame, because operations for modifying and 183 | # growing vectors are much faster than the same operations on data frames. 184 | # 185 | # It uses a column-first format because a row-first format is much slower for 186 | # doing the manipulations and computations that are needed for pruning, such 187 | # as sorting by atime, and calculating a cumulative sum of sizes. 188 | # 189 | # For fast get() performance, there is also key_idx_map_, which maps between 190 | # the key, and the "row" index in our "data frame". 191 | # 192 | # An older version of this code stored the value along with metadata (size, 193 | # mtime, and atime) in a fastmap object, but this had poor performance for 194 | # pruning operations. This is because, for pruning, it needs to fetch the 195 | # metadata for all objects, then sort by atime (if evict="lru"), then take a 196 | # cumulative sum of sizes. Fetching the metadata for all objects was slow, as 197 | # was converting the resulting row-first data into column-first data. The 198 | # current column-first approach is much, much faster. 199 | key_idx_map_ <- fastmap() 200 | 201 | # These values are set in the reset() method. 202 | key_ <- NULL 203 | value_ <- NULL 204 | size_ <- NULL 205 | mtime_ <- NULL 206 | atime_ <- NULL 207 | 208 | total_n_ <- NULL # Total number of items 209 | total_size_ <- NULL # Total number of bytes used 210 | last_idx_ <- NULL # Most recent (and largest) index used 211 | 212 | 213 | # ============================================================================ 214 | # Public methods 215 | # ============================================================================ 216 | 217 | reset <- function() { 218 | log_(paste0('reset')) 219 | key_idx_map_$reset() 220 | key_ <<- rep_len(NA_character_, INITIAL_SIZE) 221 | value_ <<- vector("list", INITIAL_SIZE) 222 | size_ <<- rep_len(NA_real_, INITIAL_SIZE) 223 | mtime_ <<- rep_len(NA_real_, INITIAL_SIZE) 224 | atime_ <<- rep_len(NA_real_, INITIAL_SIZE) 225 | 226 | total_n_ <<- 0L 227 | total_size_ <<- 0 228 | last_idx_ <<- 0L 229 | invisible(TRUE) 230 | } 231 | 232 | get <- function(key, missing = missing_) { 233 | log_(paste0('get: key "', key, '"')) 234 | validate_key(key) 235 | 236 | idx <- key_idx_map_$get(key) 237 | 238 | if (is.null(idx)) { 239 | log_(paste0('get: key "', key, '" is missing')) 240 | missing <- as_quosure(missing) 241 | return(eval_tidy(missing)) 242 | } 243 | 244 | # Prunes a single object if it exceeds max_age. If the object does not 245 | # exceed max_age, or if the object doesn't exist, do nothing. 246 | if (PRUNE_BY_AGE) { 247 | time <- as.numeric(Sys.time()) 248 | if (time - mtime_[idx] > max_age_) { 249 | log_(paste0("pruning single object exceeding max_age: Removing ", key)) 250 | remove_(key) 251 | missing <- as_quosure(missing) 252 | return(eval_tidy(missing)) 253 | } 254 | } 255 | 256 | log_(paste0('get: key "', key, '" found')) 257 | 258 | # Get the value before updating atime, because that can move items around 259 | # when MAINTAIN_TIME_SORT is TRUE. 260 | value <- value_[[idx]] 261 | update_atime_(key) 262 | value 263 | } 264 | 265 | set <- function(key, value) { 266 | log_(paste0('set: key "', key, '"')) 267 | validate_key(key) 268 | 269 | time <- as.numeric(Sys.time()) 270 | 271 | if (PRUNE_BY_SIZE) { 272 | # Reported size is rough! See ?object.size. 273 | size <- as.numeric(object.size(value)) 274 | total_size_ <<- total_size_ + size 275 | } else { 276 | size <- NA_real_ 277 | } 278 | 279 | old_idx <- key_idx_map_$get(key) 280 | 281 | # We'll set this to TRUE if we need to append to the data; FALSE if we can 282 | # modify the existing entry in place. 283 | append <- NULL 284 | 285 | if (!is.null(old_idx)) { 286 | # If there's an existing entry with this key, clear out its row, because 287 | # we'll be appending a new one later. 288 | if (PRUNE_BY_SIZE) { 289 | total_size_ <<- total_size_ - size_[old_idx] 290 | } 291 | 292 | if (MAINTAIN_TIME_SORT && old_idx != last_idx_) { 293 | append <- TRUE 294 | 295 | key_ [old_idx] <<- NA_character_ 296 | value_[old_idx] <<- list(NULL) 297 | size_ [old_idx] <<- NA_real_ 298 | mtime_[old_idx] <<- NA_real_ 299 | atime_[old_idx] <<- NA_real_ 300 | 301 | } else { 302 | append <- FALSE 303 | } 304 | 305 | } else { 306 | append <- TRUE 307 | total_n_ <<- total_n_ + 1L 308 | } 309 | 310 | if (append) { 311 | # If we're appending, update the last_idx_ and use it for storage. This 312 | # assign past the end of the vector. As of R 3.4, this grows the vector in 313 | # place if possible, and is generally very fast, because vectors are 314 | # allocated with extra memory at the end. For older versions of R, this 315 | # can be very slow because a copy of the whole vector must be made each 316 | # time. 317 | last_idx_ <<- last_idx_ + 1L 318 | key_idx_map_$set(key, last_idx_) 319 | new_idx <- last_idx_ 320 | 321 | } else { 322 | # Not appending; replace the old item in place. 323 | new_idx <- old_idx 324 | } 325 | 326 | key_ [new_idx] <<- key 327 | value_[[new_idx]] <<- value 328 | size_ [new_idx] <<- size 329 | mtime_[new_idx] <<- time 330 | atime_[new_idx] <<- time 331 | 332 | prune() 333 | 334 | invisible(TRUE) 335 | } 336 | 337 | exists <- function(key) { 338 | validate_key(key) 339 | 340 | if (PRUNE_BY_AGE) { 341 | # Prunes a single object if it exceeds max_age. This code path looks a bit 342 | # complicated for what it does, but this is for performance. 343 | idx <- key_idx_map_$get(key) 344 | if (is.null(idx)) { 345 | return(FALSE) 346 | } 347 | 348 | time <- as.numeric(Sys.time()) 349 | if (time - mtime_[idx] > max_age_) { 350 | log_(paste0("pruning single object exceeding max_age: Removing ", key)) 351 | remove_(key) 352 | return(FALSE) 353 | } 354 | 355 | return(TRUE) 356 | 357 | } else { 358 | key_idx_map_$has(key) 359 | } 360 | } 361 | 362 | keys <- function() { 363 | if (PRUNE_BY_AGE) { 364 | # When there's no max_age, pruning is only needed when set() is called, 365 | # because that's the only way for max_n or max_size to be exceeded. But 366 | # when there is a max_age, we might need to prune here simply because time 367 | # has passed. (This could be made faster by having an option to prune() to 368 | # only prunes by age (and not by n or size). It could also avoid sorting 369 | # the metadata.) 370 | prune() 371 | } 372 | 373 | key_idx_map_$keys() 374 | } 375 | 376 | remove <- function(key) { 377 | log_(paste0('remove: key "', key, '"')) 378 | validate_key(key) 379 | remove_(key) 380 | invisible(TRUE) 381 | } 382 | 383 | prune <- function() { 384 | log_(paste0('prune')) 385 | 386 | # Quick check to see if we need to prune 387 | if ((!PRUNE_BY_SIZE || total_size_ <= max_size_) && 388 | (!PRUNE_BY_N || total_n_ <= max_n_ ) && 389 | (!PRUNE_BY_AGE)) 390 | { 391 | return(invisible(TRUE)) 392 | } 393 | 394 | info <- get_metadata_() 395 | 396 | if (DEBUG) { 397 | # Sanity checks 398 | if (PRUNE_BY_SIZE && sum(info$size) != total_size_) { 399 | stop("Size mismatch") 400 | } 401 | if (length(info$key) != total_n_) { 402 | stop("Count mismatch") 403 | } 404 | } 405 | 406 | # 1. Remove any objects where the age exceeds max age. 407 | if (PRUNE_BY_AGE) { 408 | time <- as.numeric(Sys.time()) 409 | timediff <- time - info$mtime 410 | rm_idx <- timediff > max_age_ 411 | if (any(rm_idx)) { 412 | log_(paste0("prune max_age: Removing ", paste(info$key[rm_idx], collapse = ", "))) 413 | remove_(info$key[rm_idx]) 414 | 415 | # Trim all the vectors (need to do each individually since we're using a 416 | # list of vectors instead of a data frame, for performance). 417 | info$key <- info$key [!rm_idx] 418 | info$size <- info$size [!rm_idx] 419 | info$mtime <- info$mtime[!rm_idx] 420 | info$atime <- info$atime[!rm_idx] 421 | } 422 | } 423 | 424 | # 2. Remove objects if there are too many. 425 | if (PRUNE_BY_N && length(info$key) > max_n_) { 426 | rm_idx <- seq_along(info$key) > max_n_ 427 | log_(paste0("prune max_n: Removing ", paste(info$key[rm_idx], collapse = ", "))) 428 | remove_(info$key[rm_idx]) 429 | 430 | info$key <- info$key [!rm_idx] 431 | info$size <- info$size [!rm_idx] 432 | info$mtime <- info$mtime[!rm_idx] 433 | info$atime <- info$atime[!rm_idx] 434 | } 435 | 436 | # 3. Remove objects if cache is too large. 437 | if (PRUNE_BY_SIZE && sum(info$size) > max_size_) { 438 | cum_size <- cumsum(info$size) 439 | rm_idx <- cum_size > max_size_ 440 | log_(paste0("prune max_size: Removing ", paste(info$key[rm_idx], collapse = ", "))) 441 | remove_(info$key[rm_idx]) 442 | 443 | # No need to trim vectors this time, since this is the last pruning step. 444 | } 445 | 446 | invisible(TRUE) 447 | } 448 | 449 | size <- function() { 450 | if (PRUNE_BY_AGE) { 451 | # See note in exists() about why we prune here. 452 | prune() 453 | } 454 | if (DEBUG) { 455 | if (key_idx_map_$size() != total_n_) stop("n mismatch") 456 | } 457 | total_n_ 458 | } 459 | 460 | info <- function() { 461 | list( 462 | max_size = max_size_, 463 | max_age = max_age_, 464 | max_n = max_n_, 465 | evict = evict_, 466 | missing = missing_, 467 | logfile = logfile_ 468 | ) 469 | } 470 | 471 | 472 | # ============================================================================ 473 | # Private methods 474 | # ============================================================================ 475 | 476 | # Called when get() with lru. If fifo, no need to update. 477 | update_atime_ <- function(key) { 478 | if (evict_ != "lru") return() 479 | 480 | idx <- key_idx_map_$get(key) 481 | time <- as.numeric(Sys.time()) 482 | 483 | if (is.null(idx)) { 484 | stop("Can't update atime because entry doesn't exist") 485 | } 486 | 487 | if (MAINTAIN_TIME_SORT) { 488 | if (idx == last_idx_) { 489 | # last_idx_ entry; simply update time 490 | atime_[idx] <<- time 491 | } else { 492 | # "Move" this entry to the end. 493 | last_idx_ <<- last_idx_ + 1L 494 | # Add new entry to end. Fast on R 3.4 and above, slow on older versions. 495 | key_idx_map_$set(key, last_idx_) 496 | key_ [last_idx_] <<- key 497 | value_[[last_idx_]] <<- value_[[idx]] 498 | size_ [last_idx_] <<- size_ [idx] 499 | mtime_[last_idx_] <<- mtime_[idx] 500 | atime_[last_idx_] <<- time 501 | 502 | # Clear out old entry 503 | key_ [idx] <<- NA_character_ 504 | value_[idx] <<- list(NULL) 505 | size_ [idx] <<- NA_real_ 506 | mtime_[idx] <<- NA_real_ 507 | atime_[idx] <<- NA_real_ 508 | } 509 | 510 | } else { 511 | atime_[idx] <<- time 512 | } 513 | 514 | } 515 | 516 | 517 | remove_ <- function(keys) { 518 | if (length(keys) == 1) { 519 | remove_one_(keys) 520 | } else { 521 | vapply(keys, remove_one_, TRUE) 522 | } 523 | 524 | compact_() 525 | } 526 | 527 | remove_one_ <- function(key) { 528 | idx <- key_idx_map_$get(key) 529 | 530 | if (is.null(idx)) { 531 | return() 532 | } 533 | 534 | # Overall n and size bookkeeping 535 | total_n_ <<- total_n_ - 1L 536 | if (PRUNE_BY_SIZE) { 537 | total_size_ <<- total_size_ - size_[idx] 538 | } 539 | 540 | # Clear out entry 541 | key_ [idx] <<- NA_character_ 542 | value_[idx] <<- list(NULL) 543 | size_ [idx] <<- NA_real_ 544 | mtime_[idx] <<- NA_real_ 545 | atime_[idx] <<- NA_real_ 546 | 547 | key_idx_map_$remove(key) 548 | } 549 | 550 | compact_ <- function() { 551 | if (last_idx_ <= INITIAL_SIZE || last_idx_ <= total_n_ * COMPACT_MULT) { 552 | return() 553 | } 554 | 555 | from_idxs <- key_[seq_len(last_idx_)] 556 | from_idxs <- !is.na(from_idxs) 557 | from_idxs <- which(from_idxs) 558 | 559 | if (DEBUG) stopifnot(total_n_ == length(from_idxs)) 560 | 561 | new_size <- max(INITIAL_SIZE, ceiling(total_n_ * COMPACT_MULT)) 562 | 563 | # Allocate new vectors for metadata. 564 | new_key_ <- rep_len(NA_character_, new_size) 565 | new_value_ <- vector("list", new_size) 566 | new_size_ <- rep_len(NA_real_, new_size) 567 | new_mtime_ <- rep_len(NA_real_, new_size) 568 | new_atime_ <- rep_len(NA_real_, new_size) 569 | 570 | # Copy (and compact, removing gaps) from old vectors to new ones. 571 | to_idxs <- seq_len(total_n_) 572 | new_key_ [to_idxs] <- key_ [from_idxs] 573 | new_value_[to_idxs] <- value_[from_idxs] 574 | new_size_ [to_idxs] <- size_ [from_idxs] 575 | new_mtime_[to_idxs] <- mtime_[from_idxs] 576 | new_atime_[to_idxs] <- atime_[from_idxs] 577 | 578 | # Replace old vectors with new ones. 579 | key_ <<- new_key_ 580 | value_ <<- new_value_ 581 | size_ <<- new_size_ 582 | mtime_ <<- new_mtime_ 583 | atime_ <<- new_atime_ 584 | 585 | # Update the index values in the key-index map. 586 | args <- to_idxs 587 | names(args) <- key_[to_idxs] 588 | key_idx_map_$mset(.list = args) 589 | 590 | last_idx_ <<- total_n_ 591 | } 592 | 593 | # Returns data frame of info, with gaps removed. 594 | # If evict=="lru", this will be sorted by atime. 595 | # If evict=="fifo", this will be sorted by mtime. 596 | get_metadata_ <- function() { 597 | idxs <- !is.na(mtime_[seq_len(last_idx_)]) 598 | idxs <- which(idxs) 599 | 600 | if (!MAINTAIN_TIME_SORT) { 601 | if (evict_ == "lru") { 602 | idxs <- idxs[order(atime_[idxs])] 603 | } else { 604 | idxs <- idxs[order(mtime_[idxs])] 605 | } 606 | } 607 | 608 | idxs <- rev(idxs) 609 | 610 | # Return a list -- this basically same structure as a data frame, but 611 | # we're using a plain list to avoid data frame slowness 612 | list( 613 | key = key_ [idxs], 614 | size = size_ [idxs], 615 | mtime = mtime_[idxs], 616 | atime = atime_[idxs] 617 | ) 618 | } 619 | 620 | log_ <- function(text) { 621 | if (is.null(logfile_)) return() 622 | 623 | text <- paste0(format(Sys.time(), "[%Y-%m-%d %H:%M:%OS3] cache_mem "), text) 624 | cat(text, sep = "\n", file = logfile_, append = TRUE) 625 | } 626 | 627 | 628 | reset() 629 | 630 | # ============================================================================ 631 | # Returned object 632 | # ============================================================================ 633 | structure( 634 | list( 635 | get = get, 636 | set = set, 637 | exists = exists, 638 | keys = keys, 639 | remove = remove, 640 | reset = reset, 641 | prune = prune, 642 | size = size, 643 | info = info 644 | ), 645 | class = c("cache_mem", "cachem") 646 | ) 647 | } 648 | -------------------------------------------------------------------------------- /R/cachem-package.R: -------------------------------------------------------------------------------- 1 | #' @docType package 2 | #' @useDynLib cachem, .registration = TRUE 3 | #' @import fastmap 4 | NULL 5 | -------------------------------------------------------------------------------- /R/cachem.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | format.cachem <- function(x, ...) { 3 | paste0( 4 | paste0("<", class(x), ">", collapse= " "), "\n", 5 | " Methods:\n", 6 | paste0( 7 | " ", format_methods(x), 8 | collapse ="\n" 9 | ) 10 | ) 11 | } 12 | 13 | format_methods <- function(x) { 14 | vapply(seq_along(x), 15 | function(i) { 16 | name <- names(x)[i] 17 | f <- x[[i]] 18 | if (is.function(f)) { 19 | paste0(name, "(", format_args(f), ")") 20 | } else { 21 | name 22 | } 23 | }, character(1) 24 | ) 25 | } 26 | 27 | format_args <- function(x) { 28 | nms <- names(formals(x)) 29 | vals <- as.character(formals(x)) 30 | args <- mapply(nms, vals, FUN = function(name, value) { 31 | if (value == "") { 32 | name 33 | } else { 34 | paste0(name, " = ", value) 35 | } 36 | }) 37 | paste(args, collapse = ", ") 38 | } 39 | 40 | #' @export 41 | print.cachem <- function(x, ...) { 42 | cat(format(x, ...)) 43 | } 44 | -------------------------------------------------------------------------------- /R/reexports.R: -------------------------------------------------------------------------------- 1 | #' @importFrom fastmap key_missing 2 | #' @export 3 | fastmap::key_missing 4 | 5 | #' @importFrom fastmap is.key_missing 6 | #' @export 7 | fastmap::is.key_missing 8 | 9 | 10 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | hex_digits <- c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", 2 | "a", "b", "c", "d", "e", "f") 3 | 4 | random_hex <- function(digits = 16) { 5 | paste(sample(hex_digits, digits, replace = TRUE), collapse = "") 6 | } 7 | 8 | 9 | dir_remove <- function(path) { 10 | for (p in path) { 11 | if (!dir.exists(p)) { 12 | stop("Cannot remove non-existent directory ", p, ".") 13 | } 14 | if (length(dir(p, all.files = TRUE, no.. = TRUE)) != 0) { 15 | stop("Cannot remove non-empty directory ", p, ".") 16 | } 17 | result <- unlink(p, recursive = TRUE) 18 | if (result == 1) { 19 | stop("Error removing directory ", p, ".") 20 | } 21 | } 22 | } 23 | 24 | absolute_path <- function(path) { 25 | norm_path <- normalizePath(path, mustWork = FALSE) 26 | if (path == norm_path) { 27 | file.path(getwd(), path) 28 | } else { 29 | norm_path 30 | } 31 | } 32 | 33 | validate_key <- function(key) { 34 | # This C function does the same as `grepl("[^a-z0-9_-]")`, but faster. 35 | .Call(C_validate_key, key) 36 | } 37 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | github_document: 4 | toc: true 5 | toc_depth: 3 6 | editor_options: 7 | chunk_output_type: console 8 | --- 9 | 10 | 11 | 12 | ```{r, include = FALSE} 13 | knitr::opts_chunk$set( 14 | collapse = TRUE, 15 | comment = "#>", 16 | fig.path = "man/figures/README-", 17 | out.width = "100%" 18 | ) 19 | ``` 20 | 21 | # cachem 22 | 23 | 24 | [![R build status](https://github.com/r-lib/cachem/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-lib/cachem/actions) 25 | 26 | 27 | The **cachem** R package provides objects creating and managing caches. These cache objects are key-value stores, but unlike other basic key-value stores, they have built-in support for memory and age limits so that they won't have unbounded growth. 28 | 29 | The cache objects in **cachem** differ from some other key-value stores in the following ways: 30 | 31 | * The cache objects provide automatic pruning so that they remain within memory limits. 32 | * Fetching a non-existing object returns a sentinel value. An alternative is to simply return `NULL`. This is what R lists and environments do, but it is ambiguous whether the value really is `NULL`, or if it is not present. Another alternative is to throw an exception when fetching a non-existent object. However, this results in more complicated code, as every `get()` needs to be wrapped in a `tryCatch()`. 33 | 34 | ## Installation 35 | 36 | To install the CRAN version: 37 | 38 | ```{r eval=FALSE} 39 | install.packages("cachem") 40 | ``` 41 | 42 | You can install the development version from with: 43 | 44 | ```{r eval=FALSE} 45 | if (!require("remotes")) install.packages("remotes") 46 | remotes::install_github("r-lib/cachem") 47 | ``` 48 | 49 | ## Usage 50 | 51 | To create a memory-based cache, call `cache_mem()`. 52 | 53 | ```{r} 54 | library(cachem) 55 | m <- cache_mem() 56 | ``` 57 | 58 | Add arbitrary R objects to the cache using `$set(key, value)`: 59 | 60 | ```{r} 61 | m$set("abc123", c("Hello", "world")) 62 | m$set("xyz", function() message("Goodbye")) 63 | ``` 64 | 65 | The `key` must be a string consisting of lowercase letters, numbers, and the underscore (`_`) and hyphen (`-`) characters. (Upper-case characters are not allowed because some storage backends do not distinguish between lowercase and uppercase letters.) The `value` can be any R object. 66 | 67 | Get the values with `$get()`: 68 | ``` r 69 | m$get("abc123") 70 | #> [1] "Hello" "world" 71 | 72 | m$get("xyz") 73 | #> function() message("Goodbye") 74 | ``` 75 | 76 | If you call `get()` on a key that doesn't exists, it will return a `key_missing()` sentinel value: 77 | 78 | ```{r} 79 | m$get("dog") 80 | ``` 81 | 82 | A common usage pattern is to call `get()`, and then check if the result is a `key_missing` object: 83 | 84 | ```{r eval=FALSE} 85 | value <- m$get(key) 86 | 87 | if (is.key_missing(value)) { 88 | # Cache miss - do something 89 | } else { 90 | # Cache hit - do another thing 91 | } 92 | ``` 93 | 94 | The reason for doing this (instead of calling `$exists(key)` and then `$get(key)`) is that for some storage backends, there is a potential race condition: the object could be removed from the cache between the `exists()` and `get()` calls. For example: 95 | 96 | * If multiple R processes have `cache_disk`s that share the same directory, one process could remove an object from the cache in between the `exists()` and `get()` calls in another process, resulting in an error. 97 | * If you use a `cache_mem` with a `max_age`, it's possible for an object to be present when you call `exists()`, but for its age to exceed `max_age` by the time `get()` is called. In that case, the `get()` will return a `key_missing()` object. 98 | 99 | ```{r eval=FALSE} 100 | # Avoid this pattern, due to a potential race condition! 101 | if (m$exists(key)) { 102 | value <- m$get(key) 103 | } 104 | ``` 105 | 106 | 107 | ## Cache types 108 | 109 | **cachem** comes with two kinds of cache objects: a memory cache, and a disk cache. 110 | 111 | ### `cache_mem()` 112 | 113 | The memory cache stores stores objects in memory, by simply keeping a reference to each object. To create a memory cache: 114 | 115 | ```{r} 116 | m <- cache_mem() 117 | ``` 118 | 119 | The default size of the cache is 200MB, but this can be customized with `max_size`: 120 | 121 | ```{r} 122 | m <- cache_mem(max_size = 10 * 1024^2) 123 | ``` 124 | 125 | It may also be useful to set a maximum age of objects. For example, if you only want objects to stay for a maximum of one hour: 126 | 127 | ```{r} 128 | m <- cache_mem(max_size = 10 * 1024^2, max_age = 3600) 129 | ``` 130 | 131 | For more about how objects are evicted from the cache, see section [Pruning](#pruning) below. 132 | 133 | An advantage that the memory cache has over the disk cache (and any other type of cache that stores the objects outside of the R process's memory), is that it does not need to serialize objects. Instead, it merely stores references to the objects. This means that it can store objects that other caches cannot, and with more efficient use of memory -- if two objects in the cache share some of their contents (such that they refer to the same sub-object in memory), then `cache_mem` will not create duplicate copies of the contents, as `cache_disk` would, since it serializes the objects with the `serialize()` function. 134 | 135 | Compared to the memory usage, the size _calculation_ is not as intelligent: if there are two objects that share contents, their sizes are computed separately, even if they have items that share the exact same represention in memory. This is done with the `object.size()` function, which does not account for multiple references to the same object in memory. 136 | 137 | In short, a memory cache, if anything, over-counts the amount of memory actually consumed. In practice, this means that if you set a 200MB limit to the size of cache, and the cache _thinks_ it has 200MB of contents, the actual amount of memory consumed could be less than 200MB. 138 | 139 |
140 | Demonstration of memory over-counting from `object.size()` 141 | 142 | ```{r} 143 | # Create a and b which both contain the same numeric vector. 144 | x <- list(rnorm(1e5)) 145 | a <- list(1, x) 146 | b <- list(2, x) 147 | 148 | # Add to cache 149 | m$set("a", a) 150 | m$set("b", b) 151 | 152 | # Each object is about 800kB in memory, so the cache_mem() will consider the 153 | # total memory used to be 1600kB. 154 | object.size(m$get("a")) 155 | object.size(m$get("b")) 156 | ``` 157 | 158 | For reference, lobstr::obj_size can detect shared objects, and knows that these objects share most of their memory. 159 | 160 | ```{r} 161 | lobstr::obj_size(m$get("a")) 162 | lobstr::obj_size(list(m$get("a"), m$get("b"))) 163 | ``` 164 | 165 | However, lobstr is not on CRAN, and if obj_size() were used to find the incremental memory used when an object was added to the cache, it would have to walk all objects in the cache every time a single object is added. For these reasons, cache_mem uses `object.size()` to compute the object sizes. 166 | 167 |
168 | 169 | ### `cache_disk()` 170 | 171 | Disk caches are stored in a directory on disk. A disk cache is slower than a memory cache, but can generally be larger. To create one: 172 | 173 | ```{r} 174 | d <- cache_disk() 175 | ``` 176 | 177 | By default, it creates a subdirectory of the R process's temp directory, and it will persist until the R process exits. 178 | 179 | ``` r 180 | d$info()$dir 181 | #> "/tmp/Rtmp6h5iB3/cache_disk-d1901b2b615a" 182 | ``` 183 | 184 | Like a `cache_mem`, the `max_size`, `max_n`, `max_age` can be customized. See section [Pruning](#pruning) below for more information. 185 | 186 | Each object in the cache is stored as an RDS file on disk, using the `serialize()` function. 187 | 188 | ```{r} 189 | d$set("abc", 100) 190 | d$set("x01", list(1, 2, 3)) 191 | 192 | dir(d$info()$dir) 193 | ``` 194 | 195 | Since objects in a disk cache are serialized, they are subject to the limitations of the `serialize()` function. For more information, see section [Limitations of serialized objects](#limitations-of-serialized-objects). 196 | 197 | The storage directory can be specified with `dir`; it will be created if necessary. 198 | 199 | ``` r 200 | cache_disk(dir = "cachedir") 201 | ``` 202 | 203 | #### Sharing a disk cache among processes 204 | 205 | Multiple R processes can use `disk_cache` objects that share the same cache directory. To do this, simply point each `cache_disk` to the same directory. 206 | 207 | 208 | #### `disk_cache` pruning 209 | 210 | For a `disk_cache`, pruning does not happen on every access, because finding the size of files in the cache directory can take a nontrivial amount of time. By default, pruning happens once every 20 times that `$set()` is called, or if at least five seconds have elapsed since the last pruning. The `prune_rate` controls how many times `$set()` must be called before a pruning occurs. It defaults to 20; smaller values result in more frequent pruning and larger values result in less frequent pruning (but keep in mind pruning always occurs if it has been at least five seconds since the last pruning). 211 | 212 | 213 | #### Cleaning up the cache directory 214 | 215 | The cache directory can be deleted by calling `$destroy()`. After it is destroyed, the cache object can no longer be used. 216 | 217 | ``` r 218 | d$destroy() 219 | d$set("a", 1) # Error 220 | ``` 221 | 222 | To create a `cache_disk` that will automatically delete its storage directory when garbage collected, use `destroy_on_finalize=TRUE`: 223 | 224 | ``` r 225 | d <- cache_disk(destroy_on_finalize = TRUE) 226 | d$set("a", 1) 227 | 228 | cachedir <- d$info()$dir 229 | dir(cachedir) 230 | #> [1] "a.rds" 231 | 232 | # Remove reference to d and trigger a garbage collection 233 | rm(d) 234 | gc() 235 | 236 | dir.exists(cachedir) 237 | ``` 238 | 239 | #### Using custom serialization functions 240 | 241 | It is possible to use custom serialization functions rather than the default of `writeRDS()` and `readRDS()` with the `write_fn`, `read_fn` and `extension` arguments respectively. This could be used to use alternative serialization formats like [qs](https://github.com/traversc/qs), or specialized object formats [fst](http://www.fstpackage.org/fst/) or parquet. 242 | 243 | ``` r 244 | library(qs) 245 | 246 | d <- cache_disk(read_fn = qs::qread, write_fn = qs::qsave, extension = ".qs") 247 | 248 | d$set("a", list(1, 2, 3)) 249 | 250 | cachedir <- d$info()$dir 251 | dir(cachedir) 252 | #> [1] "a.qs" 253 | d$get("a") 254 | #> [[1]] 255 | #> [1] 1 256 | #> 257 | #> [[2]] 258 | #> [1] 2 259 | #> 260 | #> [[3]] 261 | #> [1] 3 262 | ``` 263 | 264 | ## Cache API 265 | 266 | `cache_mem()` and `cache_disk()` support all of the methods listed below. If you want to create a compatible caching object, it must have at least the `get()` and `set()` methods: 267 | 268 | * `get(key, missing = missing_)`: Get the object associated with `key`. The `missing` parameter allows customized behavior if the key is not present: it actually is an expression which is evaluated when there is a cache miss, and it could return a value or throw an error. 269 | * `set(key, value)`: Set a key to a value. 270 | * `exists(key)`: Check whether a particular key exists in the cache. 271 | * `remove(key)`: Remove a key-value from the cache. 272 | 273 | Some optional methods: 274 | 275 | * `reset()`: Clear all objects from the cache. 276 | * `keys()`: Return a character vector of all keys in the cache. 277 | * `prune()`: Prune the cache. (Some types of caches may not prune on every access, and may temporarily grow past their limits, until the next pruning is triggered automatically, or manually with this function.) 278 | * `size()`: Return the number of objects in the cache. 279 | * `size()`: Return the number of objects in the cache. 280 | 281 | For these methods: 282 | 283 | * `key`: can be any string with lowercase letters, numbers, underscore (`_`) and hyphen (`-`). Some storage backends may not be handle very long keys well. For example, with a `cache_disk()`, the key is used as a filename, and on some filesystems, very filenames may hit limits on path lengths. 284 | * `value`: can be any R object, with some exceptions noted below. 285 | 286 | 287 | #### Limitations of serialized objects 288 | 289 | For any cache that serializes the object for storage outside of the R process -- in other words, any cache other than a `cache_mem()` -- some types of objects will not save and restore as well. Notably, reference objects may consume more memory when restored, since R may not know to deduplicate shared objects. External pointers are not be able to be serialized, since they point to memory in the R process. See `?serialize` for more information. 290 | 291 | 292 | #### Read-only caches 293 | 294 | It is possible to create a read-only cache by making the `set()`, `remove()`, `reset()`, and `prune()` methods into no-ops. This can be useful if sharing a cache with another R process which can write to the cache. For example, one (or more) processes can write to the cache, and other processes can read from it. 295 | 296 | This function will wrap a cache object in a read-only wrapper. Note, however, that code that uses such a cache must not require that `$set()` actually sets a value in the cache. This is good practice anyway, because with these cache objects, items can be pruned from them at any time. 297 | 298 | ```{r} 299 | cache_readonly_wrap <- function(cache) { 300 | structure( 301 | list( 302 | get = cache$get, 303 | set = function(key, value) NULL, 304 | exists = cache$exists, 305 | keys = cache$keys, 306 | remove = function(key) NULL, 307 | reset = function() NULL, 308 | prune = function() NULL, 309 | size = cache$size 310 | ), 311 | class = c("cache_readonly", class(cache)) 312 | ) 313 | } 314 | 315 | mr <- cache_readonly_wrap(m) 316 | ``` 317 | 318 | 319 | ## Pruning 320 | 321 | The cache objects provided by cachem have automatic pruning. (Note that pruning is not required by the API, so one could implement an API-compatible cache without pruning.) 322 | 323 | This section describes how pruning works for `cache_mem()` and `cache_disk()`. 324 | 325 | When the cache object is created, the maximum size (in bytes) is specified by `max_size`. When the size of objects in the cache exceeds `max_size`, objects will be pruned from the cache. 326 | 327 | When objects are pruned from the cache, which ones are removed is determined by the eviction policy, `evict`: 328 | 329 | * **`lru`**: The least-recently-used objects will be removed from the cache, until it fits within the limit. This is the default and is appropriate for most cases. 330 | * **`fifo`**: The oldest objects will be removed first. 331 | 332 | It is also possible to set the maximum number of items that can be in the cache, with `max_n`. By default this is set to `Inf`, or no limit. 333 | 334 | The `max_age` parameter is somewhat different from `max_size` and `max_n`. The latter two set limits on the cache store as a whole, whereas `max_age` sets limits for each individual item; for each item, if its age exceeds `max_age`, then it will be removed from the cache. 335 | 336 | 337 | ## Layered caches 338 | 339 | Multiple caches can be composed into a single cache, using `cache_layered()`. This can be used to create a multi-level cache. (Note thate `cache_layered()` is currently experimental.) For example, we can create a layered cache with a very fast 100MB memory cache and a larger but slower 2GB disk cache: 340 | 341 | 342 | ```{r} 343 | m <- cache_mem(max_size = 100 * 1024^2) 344 | d <- cache_disk(max_size = 2 * 1024^3) 345 | 346 | cl <- cache_layered(m, d) 347 | ``` 348 | 349 | The layered cache will have the same API, with `$get()`, `$set()`, and so on, so it can be used interchangeably with other caching objects. 350 | 351 | For this example, we'll recreate the `cache_layered` with logging enabled, so that it will show cache hits and misses. 352 | 353 | ``` r 354 | cl <- cache_layered(m, d, logfile = stderr()) 355 | 356 | # Each of the objects generated by rnorm() is about 40 MB 357 | cl$set("a", rnorm(5e6)) 358 | cl$set("b", rnorm(5e6)) 359 | cl$set("c", rnorm(5e6)) 360 | 361 | # View the objects in each of the component caches 362 | m$keys() 363 | #> [1] "c" "b" 364 | d$keys() 365 | #> [1] "a" "b" "c" 366 | 367 | # The layered cache reports having all keys 368 | cl$keys() 369 | #> [1] "c" "b" "a" 370 | ``` 371 | 372 | When `$get()` is called, it searches the first cache, and if it's missing there, it searches the next cache, and so on. If not found in any caches, it returns `key_missing()`. 373 | 374 | ``` r 375 | # Get object that exists in the memory cache 376 | x <- cl$get("c") 377 | #> [2020-10-23 13:11:09.985] cache_layered Get: c 378 | #> [2020-10-23 13:11:09.985] cache_layered Get from cache_mem... hit 379 | 380 | # Get object that doesn't exist in the memory cache 381 | x <- cl$get("a") 382 | #> [2020-10-23 13:13:10.968] cache_layered Get: a 383 | #> [2020-10-23 13:13:10.969] cache_layered Get from cache_mem... miss 384 | #> [2020-10-23 13:13:11.329] cache_layered Get from cache_disk... hit 385 | 386 | # Object is not present in any component caches 387 | cl$get("d") 388 | #> [2020-10-23 13:13:40.197] cache_layered Get: d 389 | #> [2020-10-23 13:13:40.197] cache_layered Get from cache_mem... miss 390 | #> [2020-10-23 13:13:40.198] cache_layered Get from cache_disk... miss 391 | #> 392 | ``` 393 | 394 | Multiple cache objects can be layered this way. You could even add a cache which uses a remote store, such as a network file system or even AWS S3. 395 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | - [cachem](#cachem) 3 | - [Installation](#installation) 4 | - [Usage](#usage) 5 | - [Cache types](#cache-types) 6 | - [`cache_mem()`](#cache_mem) 7 | - [`cache_disk()`](#cache_disk) 8 | - [Cache API](#cache-api) 9 | - [Pruning](#pruning) 10 | - [Layered caches](#layered-caches) 11 | 12 | 13 | 14 | # cachem 15 | 16 | 17 | 18 | [![R build 19 | status](https://github.com/r-lib/cachem/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-lib/cachem/actions) 20 | 21 | 22 | The **cachem** R package provides objects creating and managing caches. 23 | These cache objects are key-value stores, but unlike other basic 24 | key-value stores, they have built-in support for memory and age limits 25 | so that they won’t have unbounded growth. 26 | 27 | The cache objects in **cachem** differ from some other key-value stores 28 | in the following ways: 29 | 30 | - The cache objects provide automatic pruning so that they remain within 31 | memory limits. 32 | - Fetching a non-existing object returns a sentinel value. An 33 | alternative is to simply return `NULL`. This is what R lists and 34 | environments do, but it is ambiguous whether the value really is 35 | `NULL`, or if it is not present. Another alternative is to throw an 36 | exception when fetching a non-existent object. However, this results 37 | in more complicated code, as every `get()` needs to be wrapped in a 38 | `tryCatch()`. 39 | 40 | ## Installation 41 | 42 | To install the CRAN version: 43 | 44 | ``` r 45 | install.packages("cachem") 46 | ``` 47 | 48 | You can install the development version from with: 49 | 50 | ``` r 51 | if (!require("remotes")) install.packages("remotes") 52 | remotes::install_github("r-lib/cachem") 53 | ``` 54 | 55 | ## Usage 56 | 57 | To create a memory-based cache, call `cache_mem()`. 58 | 59 | ``` r 60 | library(cachem) 61 | m <- cache_mem() 62 | ``` 63 | 64 | Add arbitrary R objects to the cache using `$set(key, value)`: 65 | 66 | ``` r 67 | m$set("abc123", c("Hello", "world")) 68 | m$set("xyz", function() message("Goodbye")) 69 | ``` 70 | 71 | The `key` must be a string consisting of lowercase letters, numbers, and 72 | the underscore (`_`) and hyphen (`-`) characters. (Upper-case characters 73 | are not allowed because some storage backends do not distinguish between 74 | lowercase and uppercase letters.) The `value` can be any R object. 75 | 76 | Get the values with `$get()`: 77 | 78 | ``` r 79 | m$get("abc123") 80 | #> [1] "Hello" "world" 81 | 82 | m$get("xyz") 83 | #> function() message("Goodbye") 84 | ``` 85 | 86 | If you call `get()` on a key that doesn’t exists, it will return a 87 | `key_missing()` sentinel value: 88 | 89 | ``` r 90 | m$get("dog") 91 | #> 92 | ``` 93 | 94 | A common usage pattern is to call `get()`, and then check if the result 95 | is a `key_missing` object: 96 | 97 | ``` r 98 | value <- m$get(key) 99 | 100 | if (is.key_missing(value)) { 101 | # Cache miss - do something 102 | } else { 103 | # Cache hit - do another thing 104 | } 105 | ``` 106 | 107 | The reason for doing this (instead of calling `$exists(key)` and then 108 | `$get(key)`) is that for some storage backends, there is a potential 109 | race condition: the object could be removed from the cache between the 110 | `exists()` and `get()` calls. For example: 111 | 112 | - If multiple R processes have `cache_disk`s that share the same 113 | directory, one process could remove an object from the cache in 114 | between the `exists()` and `get()` calls in another process, resulting 115 | in an error. 116 | - If you use a `cache_mem` with a `max_age`, it’s possible for an object 117 | to be present when you call `exists()`, but for its age to exceed 118 | `max_age` by the time `get()` is called. In that case, the `get()` 119 | will return a `key_missing()` object. 120 | 121 | ``` r 122 | # Avoid this pattern, due to a potential race condition! 123 | if (m$exists(key)) { 124 | value <- m$get(key) 125 | } 126 | ``` 127 | 128 | ## Cache types 129 | 130 | **cachem** comes with two kinds of cache objects: a memory cache, and a 131 | disk cache. 132 | 133 | ### `cache_mem()` 134 | 135 | The memory cache stores stores objects in memory, by simply keeping a 136 | reference to each object. To create a memory cache: 137 | 138 | ``` r 139 | m <- cache_mem() 140 | ``` 141 | 142 | The default size of the cache is 200MB, but this can be customized with 143 | `max_size`: 144 | 145 | ``` r 146 | m <- cache_mem(max_size = 10 * 1024^2) 147 | ``` 148 | 149 | It may also be useful to set a maximum age of objects. For example, if 150 | you only want objects to stay for a maximum of one hour: 151 | 152 | ``` r 153 | m <- cache_mem(max_size = 10 * 1024^2, max_age = 3600) 154 | ``` 155 | 156 | For more about how objects are evicted from the cache, see section 157 | [Pruning](#pruning) below. 158 | 159 | An advantage that the memory cache has over the disk cache (and any 160 | other type of cache that stores the objects outside of the R process’s 161 | memory), is that it does not need to serialize objects. Instead, it 162 | merely stores references to the objects. This means that it can store 163 | objects that other caches cannot, and with more efficient use of memory 164 | – if two objects in the cache share some of their contents (such that 165 | they refer to the same sub-object in memory), then `cache_mem` will not 166 | create duplicate copies of the contents, as `cache_disk` would, since it 167 | serializes the objects with the `serialize()` function. 168 | 169 | Compared to the memory usage, the size *calculation* is not as 170 | intelligent: if there are two objects that share contents, their sizes 171 | are computed separately, even if they have items that share the exact 172 | same represention in memory. This is done with the `object.size()` 173 | function, which does not account for multiple references to the same 174 | object in memory. 175 | 176 | In short, a memory cache, if anything, over-counts the amount of memory 177 | actually consumed. In practice, this means that if you set a 200MB limit 178 | to the size of cache, and the cache *thinks* it has 200MB of contents, 179 | the actual amount of memory consumed could be less than 200MB. 180 | 181 |
182 | 183 | Demonstration of memory over-counting from `object.size()` 184 | 185 | 186 | ``` r 187 | # Create a and b which both contain the same numeric vector. 188 | x <- list(rnorm(1e5)) 189 | a <- list(1, x) 190 | b <- list(2, x) 191 | 192 | # Add to cache 193 | m$set("a", a) 194 | m$set("b", b) 195 | 196 | # Each object is about 800kB in memory, so the cache_mem() will consider the 197 | # total memory used to be 1600kB. 198 | object.size(m$get("a")) 199 | #> 800224 bytes 200 | object.size(m$get("b")) 201 | #> 800224 bytes 202 | ``` 203 | 204 | For reference, lobstr::obj_size can detect shared objects, and knows 205 | that these objects share most of their memory. 206 | 207 | ``` r 208 | lobstr::obj_size(m$get("a")) 209 | #> 800.22 kB 210 | lobstr::obj_size(list(m$get("a"), m$get("b"))) 211 | #> 800.41 kB 212 | ``` 213 | 214 | However, lobstr is not on CRAN, and if obj_size() were used to find the 215 | incremental memory used when an object was added to the cache, it would 216 | have to walk all objects in the cache every time a single object is 217 | added. For these reasons, cache_mem uses `object.size()` to compute the 218 | object sizes. 219 | 220 |
221 | 222 | ### `cache_disk()` 223 | 224 | Disk caches are stored in a directory on disk. A disk cache is slower 225 | than a memory cache, but can generally be larger. To create one: 226 | 227 | ``` r 228 | d <- cache_disk() 229 | ``` 230 | 231 | By default, it creates a subdirectory of the R process’s temp directory, 232 | and it will persist until the R process exits. 233 | 234 | ``` r 235 | d$info()$dir 236 | #> "/tmp/Rtmp6h5iB3/cache_disk-d1901b2b615a" 237 | ``` 238 | 239 | Like a `cache_mem`, the `max_size`, `max_n`, `max_age` can be 240 | customized. See section [Pruning](#pruning) below for more information. 241 | 242 | Each object in the cache is stored as an RDS file on disk, using the 243 | `serialize()` function. 244 | 245 | ``` r 246 | d$set("abc", 100) 247 | d$set("x01", list(1, 2, 3)) 248 | 249 | dir(d$info()$dir) 250 | #> [1] "abc.rds" "x01.rds" 251 | ``` 252 | 253 | Since objects in a disk cache are serialized, they are subject to the 254 | limitations of the `serialize()` function. For more information, see 255 | section [Limitations of serialized 256 | objects](#limitations-of-serialized-objects). 257 | 258 | The storage directory can be specified with `dir`; it will be created if 259 | necessary. 260 | 261 | ``` r 262 | cache_disk(dir = "cachedir") 263 | ``` 264 | 265 | #### Sharing a disk cache among processes 266 | 267 | Multiple R processes can use `disk_cache` objects that share the same 268 | cache directory. To do this, simply point each `cache_disk` to the same 269 | directory. 270 | 271 | #### `disk_cache` pruning 272 | 273 | For a `disk_cache`, pruning does not happen on every access, because 274 | finding the size of files in the cache directory can take a nontrivial 275 | amount of time. By default, pruning happens once every 20 times that 276 | `$set()` is called, or if at least five seconds have elapsed since the 277 | last pruning. The `prune_rate` controls how many times `$set()` must be 278 | called before a pruning occurs. It defaults to 20; smaller values result 279 | in more frequent pruning and larger values result in less frequent 280 | pruning (but keep in mind pruning always occurs if it has been at least 281 | five seconds since the last pruning). 282 | 283 | #### Cleaning up the cache directory 284 | 285 | The cache directory can be deleted by calling `$destroy()`. After it is 286 | destroyed, the cache object can no longer be used. 287 | 288 | ``` r 289 | d$destroy() 290 | d$set("a", 1) # Error 291 | ``` 292 | 293 | To create a `cache_disk` that will automatically delete its storage 294 | directory when garbage collected, use `destroy_on_finalize=TRUE`: 295 | 296 | ``` r 297 | d <- cache_disk(destroy_on_finalize = TRUE) 298 | d$set("a", 1) 299 | 300 | cachedir <- d$info()$dir 301 | dir(cachedir) 302 | #> [1] "a.rds" 303 | 304 | # Remove reference to d and trigger a garbage collection 305 | rm(d) 306 | gc() 307 | 308 | dir.exists(cachedir) 309 | ``` 310 | 311 | #### Using custom serialization functions 312 | 313 | It is possible to use custom serialization functions rather than the 314 | default of `writeRDS()` and `readRDS()` with the `write_fn`, `read_fn` 315 | and `extension` arguments respectively. This could be used to use 316 | alternative serialization formats like 317 | [qs](https://github.com/traversc/qs), or specialized object formats 318 | [fst](http://www.fstpackage.org/fst/) or parquet. 319 | 320 | ``` r 321 | library(qs) 322 | 323 | d <- cache_disk(read_fn = qs::qread, write_fn = qs::qsave, extension = ".qs") 324 | 325 | d$set("a", list(1, 2, 3)) 326 | 327 | cachedir <- d$info()$dir 328 | dir(cachedir) 329 | #> [1] "a.qs" 330 | d$get("a") 331 | #> [[1]] 332 | #> [1] 1 333 | #> 334 | #> [[2]] 335 | #> [1] 2 336 | #> 337 | #> [[3]] 338 | #> [1] 3 339 | ``` 340 | 341 | ## Cache API 342 | 343 | `cache_mem()` and `cache_disk()` support all of the methods listed 344 | below. If you want to create a compatible caching object, it must have 345 | at least the `get()` and `set()` methods: 346 | 347 | - `get(key, missing = missing_)`: Get the object associated with `key`. 348 | The `missing` parameter allows customized behavior if the key is not 349 | present: it actually is an expression which is evaluated when there is 350 | a cache miss, and it could return a value or throw an error. 351 | - `set(key, value)`: Set a key to a value. 352 | - `exists(key)`: Check whether a particular key exists in the cache. 353 | - `remove(key)`: Remove a key-value from the cache. 354 | 355 | Some optional methods: 356 | 357 | - `reset()`: Clear all objects from the cache. 358 | - `keys()`: Return a character vector of all keys in the cache. 359 | - `prune()`: Prune the cache. (Some types of caches may not prune on 360 | every access, and may temporarily grow past their limits, until the 361 | next pruning is triggered automatically, or manually with this 362 | function.) 363 | - `size()`: Return the number of objects in the cache. 364 | 365 | For these methods: 366 | 367 | - `key`: can be any string with lowercase letters, numbers, underscore 368 | (`_`) and hyphen (`-`). Some storage backends may not be handle very 369 | long keys well. For example, with a `cache_disk()`, the key is used as 370 | a filename, and on some filesystems, very filenames may hit limits on 371 | path lengths. 372 | - `value`: can be any R object, with some exceptions noted below. 373 | 374 | #### Limitations of serialized objects 375 | 376 | For any cache that serializes the object for storage outside of the R 377 | process – in other words, any cache other than a `cache_mem()` – some 378 | types of objects will not save and restore as well. Notably, reference 379 | objects may consume more memory when restored, since R may not know to 380 | deduplicate shared objects. External pointers are not be able to be 381 | serialized, since they point to memory in the R process. See 382 | `?serialize` for more information. 383 | 384 | #### Read-only caches 385 | 386 | It is possible to create a read-only cache by making the `set()`, 387 | `remove()`, `reset()`, and `prune()` methods into no-ops. This can be 388 | useful if sharing a cache with another R process which can write to the 389 | cache. For example, one (or more) processes can write to the cache, and 390 | other processes can read from it. 391 | 392 | This function will wrap a cache object in a read-only wrapper. Note, 393 | however, that code that uses such a cache must not require that `$set()` 394 | actually sets a value in the cache. This is good practice anyway, 395 | because with these cache objects, items can be pruned from them at any 396 | time. 397 | 398 | ``` r 399 | cache_readonly_wrap <- function(cache) { 400 | structure( 401 | list( 402 | get = cache$get, 403 | set = function(key, value) NULL, 404 | exists = cache$exists, 405 | keys = cache$keys, 406 | remove = function(key) NULL, 407 | reset = function() NULL, 408 | prune = function() NULL, 409 | size = cache$size 410 | ), 411 | class = c("cache_readonly", class(cache)) 412 | ) 413 | } 414 | 415 | mr <- cache_readonly_wrap(m) 416 | ``` 417 | 418 | ## Pruning 419 | 420 | The cache objects provided by cachem have automatic pruning. (Note that 421 | pruning is not required by the API, so one could implement an 422 | API-compatible cache without pruning.) 423 | 424 | This section describes how pruning works for `cache_mem()` and 425 | `cache_disk()`. 426 | 427 | When the cache object is created, the maximum size (in bytes) is 428 | specified by `max_size`. When the size of objects in the cache exceeds 429 | `max_size`, objects will be pruned from the cache. 430 | 431 | When objects are pruned from the cache, which ones are removed is 432 | determined by the eviction policy, `evict`: 433 | 434 | - **`lru`**: The least-recently-used objects will be removed from the 435 | cache, until it fits within the limit. This is the default and is 436 | appropriate for most cases. 437 | - **`fifo`**: The oldest objects will be removed first. 438 | 439 | It is also possible to set the maximum number of items that can be in 440 | the cache, with `max_n`. By default this is set to `Inf`, or no limit. 441 | 442 | The `max_age` parameter is somewhat different from `max_size` and 443 | `max_n`. The latter two set limits on the cache store as a whole, 444 | whereas `max_age` sets limits for each individual item; for each item, 445 | if its age exceeds `max_age`, then it will be removed from the cache. 446 | 447 | ## Layered caches 448 | 449 | Multiple caches can be composed into a single cache, using 450 | `cache_layered()`. This can be used to create a multi-level cache. (Note 451 | thate `cache_layered()` is currently experimental.) For example, we can 452 | create a layered cache with a very fast 100MB memory cache and a larger 453 | but slower 2GB disk cache: 454 | 455 | ``` r 456 | m <- cache_mem(max_size = 100 * 1024^2) 457 | d <- cache_disk(max_size = 2 * 1024^3) 458 | 459 | cl <- cache_layered(m, d) 460 | ``` 461 | 462 | The layered cache will have the same API, with `$get()`, `$set()`, and 463 | so on, so it can be used interchangeably with other caching objects. 464 | 465 | For this example, we’ll recreate the `cache_layered` with logging 466 | enabled, so that it will show cache hits and misses. 467 | 468 | ``` r 469 | cl <- cache_layered(m, d, logfile = stderr()) 470 | 471 | # Each of the objects generated by rnorm() is about 40 MB 472 | cl$set("a", rnorm(5e6)) 473 | cl$set("b", rnorm(5e6)) 474 | cl$set("c", rnorm(5e6)) 475 | 476 | # View the objects in each of the component caches 477 | m$keys() 478 | #> [1] "c" "b" 479 | d$keys() 480 | #> [1] "a" "b" "c" 481 | 482 | # The layered cache reports having all keys 483 | cl$keys() 484 | #> [1] "c" "b" "a" 485 | ``` 486 | 487 | When `$get()` is called, it searches the first cache, and if it’s 488 | missing there, it searches the next cache, and so on. If not found in 489 | any caches, it returns `key_missing()`. 490 | 491 | ``` r 492 | # Get object that exists in the memory cache 493 | x <- cl$get("c") 494 | #> [2020-10-23 13:11:09.985] cache_layered Get: c 495 | #> [2020-10-23 13:11:09.985] cache_layered Get from cache_mem... hit 496 | 497 | # Get object that doesn't exist in the memory cache 498 | x <- cl$get("a") 499 | #> [2020-10-23 13:13:10.968] cache_layered Get: a 500 | #> [2020-10-23 13:13:10.969] cache_layered Get from cache_mem... miss 501 | #> [2020-10-23 13:13:11.329] cache_layered Get from cache_disk... hit 502 | 503 | # Object is not present in any component caches 504 | cl$get("d") 505 | #> [2020-10-23 13:13:40.197] cache_layered Get: d 506 | #> [2020-10-23 13:13:40.197] cache_layered Get from cache_mem... miss 507 | #> [2020-10-23 13:13:40.198] cache_layered Get from cache_disk... miss 508 | #> 509 | ``` 510 | 511 | Multiple cache objects can be layered this way. You could even add a 512 | cache which uses a remote store, such as a network file system or even 513 | AWS S3. 514 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://cachem.r-lib.org 2 | 3 | template: 4 | bootstrap: 5 5 | -------------------------------------------------------------------------------- /cachem.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Comments 2 | 3 | #### 2021-8-19 4 | 5 | Bug fixes. 6 | 7 | Thank you, 8 | Winston 9 | 10 | 11 | ## Test environments 12 | 13 | * GitHub Actions - https://github.com/r-lib/cachem/pull/16/checks 14 | * macOS 15 | * devel, release 16 | * windows 17 | * release, 3.6 18 | * ubuntu20 19 | * devel, release, oldrel/1, oldrel/2, oldrel/3, oldrel/4 20 | * devtools:: 21 | * check_win_devel() 22 | * check_win_release() 23 | * check_win_oldrelease() 24 | 25 | 0 errors ✔ | 0 warnings ✔ | 0 notes ✔ 26 | 27 | 28 | ## revdepcheck results 29 | 30 | We checked 6 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. 31 | 32 | * We saw 0 new problems 33 | * We failed to check 0 packages 34 | -------------------------------------------------------------------------------- /man/cache_disk.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cache-disk.R 3 | \name{cache_disk} 4 | \alias{cache_disk} 5 | \title{Create a disk cache object} 6 | \usage{ 7 | cache_disk( 8 | dir = NULL, 9 | max_size = 1024 * 1024^2, 10 | max_age = Inf, 11 | max_n = Inf, 12 | evict = c("lru", "fifo"), 13 | destroy_on_finalize = FALSE, 14 | read_fn = NULL, 15 | write_fn = NULL, 16 | extension = ".rds", 17 | missing = key_missing(), 18 | prune_rate = 20, 19 | warn_ref_objects = FALSE, 20 | logfile = NULL 21 | ) 22 | } 23 | \arguments{ 24 | \item{dir}{Directory to store files for the cache. If \code{NULL} (the default) it 25 | will create and use a temporary directory.} 26 | 27 | \item{max_size}{Maximum size of the cache, in bytes. If the cache exceeds 28 | this size, cached objects will be removed according to the value of the 29 | \code{evict}. Use \code{Inf} for no size limit. The default is 1 gigabyte.} 30 | 31 | \item{max_age}{Maximum age of files in cache before they are evicted, in 32 | seconds. Use \code{Inf} for no age limit.} 33 | 34 | \item{max_n}{Maximum number of objects in the cache. If the number of objects 35 | exceeds this value, then cached objects will be removed according to the 36 | value of \code{evict}. Use \code{Inf} for no limit of number of items.} 37 | 38 | \item{evict}{The eviction policy to use to decide which objects are removed 39 | when a cache pruning occurs. Currently, \code{"lru"} and \code{"fifo"} are supported.} 40 | 41 | \item{destroy_on_finalize}{If \code{TRUE}, then when the cache_disk object is 42 | garbage collected, the cache directory and all objects inside of it will be 43 | deleted from disk. If \code{FALSE} (the default), it will do nothing when 44 | finalized.} 45 | 46 | \item{read_fn}{The function used to read the values from disk. If \code{NULL} 47 | (the default) it will use \code{readRDS}.} 48 | 49 | \item{write_fn}{The function used to write the values from disk. If \code{NULL} 50 | (the default) it will use \code{writeRDS}.} 51 | 52 | \item{extension}{The file extension to use for files on disk.} 53 | 54 | \item{missing}{A value to return when \code{get(key)} is called but the key is not 55 | present in the cache. The default is a \code{\link[=key_missing]{key_missing()}} object. It is 56 | actually an expression that is evaluated each time there is a cache miss. 57 | See section Missing keys for more information.} 58 | 59 | \item{prune_rate}{How often to prune the cache. See section Cache Pruning for 60 | more information.} 61 | 62 | \item{warn_ref_objects}{Should a warning be emitted when a reference is 63 | stored in the cache? This can be useful because serializing and 64 | deserializing a reference object (such as environments and external 65 | pointers) can lead to unexpected behavior.} 66 | 67 | \item{logfile}{An optional filename or connection object to where logging 68 | information will be written. To log to the console, use \code{stderr()} or 69 | \code{stdout()}.} 70 | } 71 | \value{ 72 | A disk caching object, with class \code{cache_disk}. 73 | } 74 | \description{ 75 | A disk cache object is a key-value store that saves the values as files in a 76 | directory on disk. Objects can be stored and retrieved using the \code{get()} and 77 | \code{set()} methods. Objects are automatically pruned from the cache according to 78 | the parameters \code{max_size}, \code{max_age}, \code{max_n}, and \code{evict}. 79 | } 80 | \section{Missing keys}{ 81 | 82 | 83 | The \code{missing} parameter controls what happens when \code{get()} is called with a 84 | key that is not in the cache (a cache miss). The default behavior is to 85 | return a \code{\link[=key_missing]{key_missing()}} object. This is a \emph{sentinel value} that indicates 86 | that the key was not present in the cache. You can test if the returned 87 | value represents a missing key by using the \code{\link[=is.key_missing]{is.key_missing()}} function. 88 | You can also have \code{get()} return a different sentinel value, like \code{NULL}. 89 | If you want to throw an error on a cache miss, you can do so by providing 90 | an expression for \code{missing}, as in \code{missing = stop("Missing key")}. 91 | 92 | When the cache is created, you can supply a value for \code{missing}, which sets 93 | the default value to be returned for missing values. It can also be 94 | overridden when \code{get()} is called, by supplying a \code{missing} argument. For 95 | example, if you use \code{cache$get("mykey", missing = NULL)}, it will return 96 | \code{NULL} if the key is not in the cache. 97 | 98 | The \code{missing} parameter is actually an expression which is evaluated each 99 | time there is a cache miss. A quosure (from the rlang package) can be used. 100 | 101 | If you use this, the code that calls \code{get()} should be wrapped with 102 | \code{\link[=tryCatch]{tryCatch()}} to gracefully handle missing keys. 103 | } 104 | 105 | \section{Cache pruning}{ 106 | 107 | 108 | Cache pruning occurs when \code{set()} is called, or it can be invoked manually 109 | by calling \code{prune()}. 110 | 111 | The disk cache will throttle the pruning so that it does not happen on 112 | every call to \code{set()}, because the filesystem operations for checking the 113 | status of files can be slow. Instead, it will prune once in every 114 | \code{prune_rate} calls to \code{set()}, or if at least 5 seconds have elapsed since 115 | the last prune occurred, whichever is first. 116 | 117 | When a pruning occurs, if there are any objects that are older than 118 | \code{max_age}, they will be removed. 119 | 120 | The \code{max_size} and \code{max_n} parameters are applied to the cache as a whole, 121 | in contrast to \code{max_age}, which is applied to each object individually. 122 | 123 | If the number of objects in the cache exceeds \code{max_n}, then objects will be 124 | removed from the cache according to the eviction policy, which is set with 125 | the \code{evict} parameter. Objects will be removed so that the number of items 126 | is \code{max_n}. 127 | 128 | If the size of the objects in the cache exceeds \code{max_size}, then objects 129 | will be removed from the cache. Objects will be removed from the cache so 130 | that the total size remains under \code{max_size}. Note that the size is 131 | calculated using the size of the files, not the size of disk space used by 132 | the files --- these two values can differ because of files are stored in 133 | blocks on disk. For example, if the block size is 4096 bytes, then a file 134 | that is one byte in size will take 4096 bytes on disk. 135 | 136 | Another time that objects can be removed from the cache is when \code{get()} is 137 | called. If the target object is older than \code{max_age}, it will be removed 138 | and the cache will report it as a missing value. 139 | } 140 | 141 | \section{Eviction policies}{ 142 | 143 | 144 | If \code{max_n} or \code{max_size} are used, then objects will be removed from the 145 | cache according to an eviction policy. The available eviction policies are: 146 | 147 | \describe{ 148 | \item{\code{"lru"}}{ 149 | Least Recently Used. The least recently used objects will be removed. 150 | This uses the filesystem's mtime property. When "lru" is used, each 151 | \code{get()} is called, it will update the file's mtime using 152 | \code{\link[=Sys.setFileTime]{Sys.setFileTime()}}. Note that on some platforms, the resolution of 153 | \code{\link[=Sys.setFileTime]{Sys.setFileTime()}} may be low, one or two seconds. 154 | } 155 | \item{\code{"fifo"}}{ 156 | First-in-first-out. The oldest objects will be removed. 157 | } 158 | } 159 | 160 | Both of these policies use files' mtime. Note that some filesystems (notably 161 | FAT) have poor mtime resolution. (atime is not used because support for atime 162 | is worse than mtime.) 163 | } 164 | 165 | \section{Sharing among multiple processes}{ 166 | 167 | 168 | The directory for a cache_disk can be shared among multiple R processes. To 169 | do this, each R process should have a cache_disk object that uses the same 170 | directory. Each cache_disk will do pruning independently of the others, so 171 | if they have different pruning parameters, then one cache_disk may remove 172 | cached objects before another cache_disk would do so. 173 | 174 | Even though it is possible for multiple processes to share a cache_disk 175 | directory, this should not be done on networked file systems, because of 176 | slow performance of networked file systems can cause problems. If you need 177 | a high-performance shared cache, you can use one built on a database like 178 | Redis, SQLite, mySQL, or similar. 179 | 180 | When multiple processes share a cache directory, there are some potential 181 | race conditions. For example, if your code calls \code{exists(key)} to check if 182 | an object is in the cache, and then call \code{get(key)}, the object may be 183 | removed from the cache in between those two calls, and \code{get(key)} will 184 | throw an error. Instead of calling the two functions, it is better to 185 | simply call \code{get(key)}, and check that the returned object is not a 186 | \code{key_missing()} object, using \code{is.key_missing()}. This effectively tests 187 | for existence and gets the object in one operation. 188 | 189 | It is also possible for one processes to prune objects at the same time 190 | that another processes is trying to prune objects. If this happens, you may 191 | see a warning from \code{file.remove()} failing to remove a file that has 192 | already been deleted. 193 | } 194 | 195 | \section{Methods}{ 196 | 197 | 198 | A disk cache object has the following methods: 199 | 200 | \describe{ 201 | \item{\code{get(key, missing)}}{ 202 | Returns the value associated with \code{key}. If the key is not in the 203 | cache, then it evaluates the expression specified by \code{missing} and 204 | returns the value. If \code{missing} is specified here, then it will 205 | override the default that was set when the \code{cache_mem} object was 206 | created. See section Missing Keys for more information. 207 | } 208 | \item{\code{set(key, value)}}{ 209 | Stores the \code{key}-\code{value} pair in the cache. 210 | } 211 | \item{\code{exists(key)}}{ 212 | Returns \code{TRUE} if the cache contains the key, otherwise 213 | \code{FALSE}. 214 | } 215 | \item{\code{remove(key)}}{ 216 | Removes \code{key} from the cache, if it exists in the cache. If the key is 217 | not in the cache, this does nothing. 218 | } 219 | \item{\code{size()}}{ 220 | Returns the number of items currently in the cache. 221 | } 222 | \item{\code{keys()}}{ 223 | Returns a character vector of all keys currently in the cache. 224 | } 225 | \item{\code{reset()}}{ 226 | Clears all objects from the cache. 227 | } 228 | \item{\code{destroy()}}{ 229 | Clears all objects in the cache, and removes the cache directory from 230 | disk. 231 | } 232 | \item{\code{prune()}}{ 233 | Prunes the cache, using the parameters specified by \code{max_size}, 234 | \code{max_age}, \code{max_n}, and \code{evict}. 235 | } 236 | } 237 | } 238 | 239 | -------------------------------------------------------------------------------- /man/cache_layered.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cache-layered.R 3 | \name{cache_layered} 4 | \alias{cache_layered} 5 | \title{Compose any number of cache objects into a new, layered cache object} 6 | \usage{ 7 | cache_layered(..., logfile = NULL) 8 | } 9 | \arguments{ 10 | \item{...}{Cache objects to compose into a new, layered cache object.} 11 | 12 | \item{logfile}{An optional filename or connection object to where logging 13 | information will be written. To log to the console, use \code{stderr()} or 14 | \code{stdout()}.} 15 | } 16 | \value{ 17 | A layered caching object, with class \code{cache_layered}. 18 | } 19 | \description{ 20 | Note that \code{cache_layered} is currently experimental. 21 | } 22 | \examples{ 23 | 24 | # Make a layered cache from a small memory cache and large disk cache 25 | m <- cache_mem(max_size = 100 * 1024^2) 26 | d <- cache_disk(max_size = 2 * 1024^3) 27 | cl <- cache_layered(m, d) 28 | } 29 | -------------------------------------------------------------------------------- /man/cache_mem.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cache-mem.R 3 | \name{cache_mem} 4 | \alias{cache_mem} 5 | \title{Create a memory cache object} 6 | \usage{ 7 | cache_mem( 8 | max_size = 512 * 1024^2, 9 | max_age = Inf, 10 | max_n = Inf, 11 | evict = c("lru", "fifo"), 12 | missing = key_missing(), 13 | logfile = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{max_size}{Maximum size of the cache, in bytes. If the cache exceeds 18 | this size, cached objects will be removed according to the value of the 19 | \code{evict}. Use \code{Inf} for no size limit. The default is 512 megabytes.} 20 | 21 | \item{max_age}{Maximum age of files in cache before they are evicted, in 22 | seconds. Use \code{Inf} for no age limit.} 23 | 24 | \item{max_n}{Maximum number of objects in the cache. If the number of objects 25 | exceeds this value, then cached objects will be removed according to the 26 | value of \code{evict}. Use \code{Inf} for no limit of number of items.} 27 | 28 | \item{evict}{The eviction policy to use to decide which objects are removed 29 | when a cache pruning occurs. Currently, \code{"lru"} and \code{"fifo"} are supported.} 30 | 31 | \item{missing}{A value to return when \code{get(key)} is called but the key is not 32 | present in the cache. The default is a \code{\link[=key_missing]{key_missing()}} object. It is 33 | actually an expression that is evaluated each time there is a cache miss. 34 | See section Missing keys for more information.} 35 | 36 | \item{logfile}{An optional filename or connection object to where logging 37 | information will be written. To log to the console, use \code{stderr()} or 38 | \code{stdout()}.} 39 | } 40 | \value{ 41 | A memory caching object, with class \code{cache_mem}. 42 | } 43 | \description{ 44 | A memory cache object is a key-value store that saves the values in an 45 | environment. Objects can be stored and retrieved using the \code{get()} and 46 | \code{set()} methods. Objects are automatically pruned from the cache according to 47 | the parameters \code{max_size}, \code{max_age}, \code{max_n}, and \code{evict}. 48 | } 49 | \details{ 50 | In a \code{cache_mem}, R objects are stored directly in the cache; they are not 51 | \emph{not} serialized before being stored in the cache. This contrasts with other 52 | cache types, like \code{\link[=cache_disk]{cache_disk()}}, where objects are serialized, and the 53 | serialized object is cached. This can result in some differences of behavior. 54 | For example, as long as an object is stored in a cache_mem, it will not be 55 | garbage collected. 56 | } 57 | \section{Missing keys}{ 58 | 59 | 60 | The \code{missing} parameter controls what happens when \code{get()} is called with a 61 | key that is not in the cache (a cache miss). The default behavior is to 62 | return a \code{\link[=key_missing]{key_missing()}} object. This is a \emph{sentinel value} that indicates 63 | that the key was not present in the cache. You can test if the returned 64 | value represents a missing key by using the \code{\link[=is.key_missing]{is.key_missing()}} function. 65 | You can also have \code{get()} return a different sentinel value, like \code{NULL}. 66 | If you want to throw an error on a cache miss, you can do so by providing 67 | an expression for \code{missing}, as in \code{missing = stop("Missing key")}. 68 | 69 | When the cache is created, you can supply a value for \code{missing}, which sets 70 | the default value to be returned for missing values. It can also be 71 | overridden when \code{get()} is called, by supplying a \code{missing} argument. For 72 | example, if you use \code{cache$get("mykey", missing = NULL)}, it will return 73 | \code{NULL} if the key is not in the cache. 74 | 75 | The \code{missing} parameter is actually an expression which is evaluated each 76 | time there is a cache miss. A quosure (from the rlang package) can be used. 77 | 78 | If you use this, the code that calls \code{get()} should be wrapped with 79 | \code{\link[=tryCatch]{tryCatch()}} to gracefully handle missing keys. 80 | 81 | @section Cache pruning: 82 | 83 | Cache pruning occurs when \code{set()} is called, or it can be invoked manually 84 | by calling \code{prune()}. 85 | 86 | When a pruning occurs, if there are any objects that are older than 87 | \code{max_age}, they will be removed. 88 | 89 | The \code{max_size} and \code{max_n} parameters are applied to the cache as a whole, 90 | in contrast to \code{max_age}, which is applied to each object individually. 91 | 92 | If the number of objects in the cache exceeds \code{max_n}, then objects will be 93 | removed from the cache according to the eviction policy, which is set with 94 | the \code{evict} parameter. Objects will be removed so that the number of items 95 | is \code{max_n}. 96 | 97 | If the size of the objects in the cache exceeds \code{max_size}, then objects 98 | will be removed from the cache. Objects will be removed from the cache so 99 | that the total size remains under \code{max_size}. 100 | 101 | Another time that objects can be removed from the cache is when \code{get()} is 102 | called. If the target object is older than \code{max_age}, it will be removed 103 | and the cache will report it as a missing value. 104 | } 105 | 106 | \section{Eviction policies}{ 107 | 108 | 109 | If \code{max_n} or \code{max_size} are used, then objects will be removed 110 | from the cache according to an eviction policy. The available eviction 111 | policies are: 112 | 113 | \describe{ 114 | \item{\code{"lru"}}{ 115 | Least Recently Used. The least recently used objects will be removed. 116 | } 117 | \item{\code{"fifo"}}{ 118 | First-in-first-out. The oldest objects will be removed. 119 | } 120 | } 121 | } 122 | 123 | \section{Methods}{ 124 | 125 | 126 | A disk cache object has the following methods: 127 | 128 | \describe{ 129 | \item{\code{get(key, missing)}}{ 130 | Returns the value associated with \code{key}. If the key is not in the 131 | cache, then it evaluates the expression specified by \code{missing} and 132 | returns the value. If \code{missing} is specified here, then it will 133 | override the default that was set when the \code{cache_mem} object was 134 | created. See section Missing Keys for more information. 135 | } 136 | \item{\code{set(key, value)}}{ 137 | Stores the \code{key}-\code{value} pair in the cache. 138 | } 139 | \item{\code{exists(key)}}{ 140 | Returns \code{TRUE} if the cache contains the key, otherwise 141 | \code{FALSE}. 142 | } 143 | \item{\code{remove(key)}}{ 144 | Removes \code{key} from the cache, if it exists in the cache. If the key is 145 | not in the cache, this does nothing. 146 | } 147 | \item{\code{size()}}{ 148 | Returns the number of items currently in the cache. 149 | } 150 | \item{\code{keys()}}{ 151 | Returns a character vector of all keys currently in the cache. 152 | } 153 | \item{\code{reset()}}{ 154 | Clears all objects from the cache. 155 | } 156 | \item{\code{destroy()}}{ 157 | Clears all objects in the cache, and removes the cache directory from 158 | disk. 159 | } 160 | \item{\code{prune()}}{ 161 | Prunes the cache, using the parameters specified by \code{max_size}, 162 | \code{max_age}, \code{max_n}, and \code{evict}. 163 | } 164 | } 165 | } 166 | 167 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reexports.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{key_missing} 7 | \alias{is.key_missing} 8 | \title{Objects exported from other packages} 9 | \keyword{internal} 10 | \description{ 11 | These objects are imported from other packages. Follow the links 12 | below to see their documentation. 13 | 14 | \describe{ 15 | \item{fastmap}{\code{\link[fastmap:key_missing]{is.key_missing}}, \code{\link[fastmap]{key_missing}}} 16 | }} 17 | 18 | -------------------------------------------------------------------------------- /revdep/.gitignore: -------------------------------------------------------------------------------- 1 | checks 2 | library 3 | checks.noindex 4 | library.noindex 5 | cloud.noindex 6 | data.sqlite 7 | *.html 8 | -------------------------------------------------------------------------------- /revdep/README.md: -------------------------------------------------------------------------------- 1 | # Platform 2 | 3 | |field |value | 4 | |:--------|:----------------------------| 5 | |version |R version 4.0.2 (2020-06-22) | 6 | |os |macOS 10.16 | 7 | |system |x86_64, darwin17.0 | 8 | |ui |X11 | 9 | |language |(EN) | 10 | |collate |en_US.UTF-8 | 11 | |ctype |en_US.UTF-8 | 12 | |tz |America/New_York | 13 | |date |2021-08-17 | 14 | 15 | # Dependencies 16 | 17 | |package |old |new |Δ | 18 | |:-------|:-----|:-----|:--| 19 | |cachem |1.0.5 |1.0.6 |* | 20 | 21 | # Revdeps 22 | 23 | ## All (6) 24 | 25 | |package |version |error |warning |note | 26 | |:--------------------------------|:-------|:-----|:-------|:----| 27 | |aquodom |0.1.0 | | | | 28 | |ffscrapr |1.4.5 | | | | 29 | |memoise |2.0.0 | | | | 30 | |[nflreadr](problems.md#nflreadr) |1.0.0 | | |1 | 31 | |[shiny](problems.md#shiny) |1.6.0 | | |1 | 32 | |[slackr](problems.md#slackr) |3.0.0 | | |1 | 33 | 34 | -------------------------------------------------------------------------------- /revdep/cran.md: -------------------------------------------------------------------------------- 1 | ## revdepcheck results 2 | 3 | We checked 6 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. 4 | 5 | * We saw 0 new problems 6 | * We failed to check 0 packages 7 | 8 | -------------------------------------------------------------------------------- /revdep/failures.md: -------------------------------------------------------------------------------- 1 | *Wow, no problems at all. :)* -------------------------------------------------------------------------------- /revdep/problems.md: -------------------------------------------------------------------------------- 1 | # nflreadr 2 | 3 |
4 | 5 | * Version: 1.0.0 6 | * GitHub: https://github.com/nflverse/nflreadr 7 | * Source code: https://github.com/cran/nflreadr 8 | * Date/Publication: 2021-08-09 14:40:02 UTC 9 | * Number of recursive dependencies: 70 10 | 11 | Run `revdep_details(, "nflreadr")` for more info 12 | 13 |
14 | 15 | ## In both 16 | 17 | * checking dependencies in R code ... NOTE 18 | ``` 19 | Namespace in Imports field not imported from: ‘dplyr’ 20 | All declared Imports should be used. 21 | ``` 22 | 23 | # shiny 24 | 25 |
26 | 27 | * Version: 1.6.0 28 | * GitHub: https://github.com/rstudio/shiny 29 | * Source code: https://github.com/cran/shiny 30 | * Date/Publication: 2021-01-25 21:50:02 UTC 31 | * Number of recursive dependencies: 104 32 | 33 | Run `revdep_details(, "shiny")` for more info 34 | 35 |
36 | 37 | ## In both 38 | 39 | * checking installed package size ... NOTE 40 | ``` 41 | installed size is 12.1Mb 42 | sub-directories of 1Mb or more: 43 | R 2.0Mb 44 | www 8.8Mb 45 | ``` 46 | 47 | # slackr 48 | 49 |
50 | 51 | * Version: 3.0.0 52 | * GitHub: https://github.com/mrkaye97/slackr 53 | * Source code: https://github.com/cran/slackr 54 | * Date/Publication: 2021-08-07 19:30:02 UTC 55 | * Number of recursive dependencies: 85 56 | 57 | Run `revdep_details(, "slackr")` for more info 58 | 59 |
60 | 61 | ## In both 62 | 63 | * checking dependencies in R code ... NOTE 64 | ``` 65 | Namespaces in Imports field not imported from: 66 | ‘methods’ ‘reprex’ 67 | All declared Imports should be used. 68 | ``` 69 | 70 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | -------------------------------------------------------------------------------- /src/cache.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | SEXP C_validate_key(SEXP key_r) { 6 | if (TYPEOF(key_r) != STRSXP || Rf_length(key_r) != 1) { 7 | Rf_error("key must be a one-element character vector"); 8 | } 9 | SEXP key_c = STRING_ELT(key_r, 0); 10 | if (key_c == NA_STRING || Rf_StringBlank(key_c)) { 11 | Rf_error("key must be not be \"\" or NA"); 12 | } 13 | 14 | const char* s = R_CHAR(key_c); 15 | char cset[] = "1234567890abcdefghijklmnopqrstuvwxyz_-"; 16 | int i = strspn(s, cset); 17 | if (i != strlen(s)) { 18 | Rf_error("Invalid key: %s. Only lowercase letters and numbers are allowed.", s); 19 | } 20 | 21 | return Rf_ScalarLogical(TRUE); 22 | } 23 | 24 | -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include // for NULL 4 | #include 5 | #include 6 | 7 | /* .Call calls */ 8 | extern SEXP C_validate_key(SEXP); 9 | 10 | static const R_CallMethodDef CallEntries[] = { 11 | {"C_validate_key", (DL_FUNC) &C_validate_key, 1}, 12 | {NULL, NULL, 0} 13 | }; 14 | 15 | attribute_visible void R_init_cachem(DllInfo *dll) 16 | { 17 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 18 | R_useDynamicSymbols(dll, FALSE); 19 | } 20 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(cachem) 3 | 4 | test_check("cachem") 5 | -------------------------------------------------------------------------------- /tests/testthat/helper-utils.R: -------------------------------------------------------------------------------- 1 | is_on_github_actions <- function() { 2 | nzchar(Sys.getenv("GITHUB_ACTIONS")) 3 | } 4 | -------------------------------------------------------------------------------- /tests/testthat/test-cache-disk.R: -------------------------------------------------------------------------------- 1 | 2 | cache_disk_deterministic <- function(...) { 3 | d <- cache_disk(...) 4 | 5 | # Normally the throttle counter starts with a random value, but for these 6 | # tests we need to make it deterministic. 7 | environment(d$set)$prune_throttle_counter_ <- 0 8 | 9 | d 10 | } 11 | 12 | 13 | test_that("cache_disk: handling missing values", { 14 | d <- cache_disk() 15 | expect_true(is.key_missing(d$get("abcd"))) 16 | d$set("a", 100) 17 | expect_identical(d$get("a"), 100) 18 | expect_identical(d$get("y", missing = NULL), NULL) 19 | expect_error( 20 | d$get("y", missing = stop("Missing key")), 21 | "^Missing key$", 22 | ) 23 | 24 | d <- cache_disk(missing = NULL) 25 | expect_true(is.null(d$get("abcd"))) 26 | d$set("a", 100) 27 | expect_identical(d$get("a"), 100) 28 | expect_identical(d$get("y", missing = -1), -1) 29 | expect_error( 30 | d$get("y", missing = stop("Missing key")), 31 | "^Missing key$", 32 | ) 33 | 34 | d <- cache_disk(missing = stop("Missing key")) 35 | expect_error(d$get("abcd"), "^Missing key$") 36 | d$set("x", NULL) 37 | d$set("a", 100) 38 | expect_identical(d$get("a"), 100) 39 | expect_error(d$get("y"), "^Missing key$") 40 | expect_identical(d$get("y", missing = NULL), NULL) 41 | expect_true(is.key_missing(d$get("y", missing = key_missing()))) 42 | expect_error( 43 | d$get("y", missing = stop("Missing key 2")), 44 | "^Missing key 2$", 45 | ) 46 | 47 | # Pass in a quosure 48 | expr <- rlang::quo(stop("Missing key")) 49 | d <- cache_disk(missing = !!expr) 50 | expect_error(d$get("y"), "^Missing key$") 51 | expect_error(d$get("y"), "^Missing key$") # Make sure a second time also throws 52 | }) 53 | 54 | 55 | test_that("cache_disk: pruning respects max_n", { 56 | # Timing is apparently unreliable on CRAN, so skip tests there. It's possible 57 | # that a heavily loaded system will have issues with these tests because of 58 | # the time resolution. 59 | skip_on_cran() 60 | delay <- 0.01 61 | 62 | d <- cache_disk_deterministic(max_n = 3) 63 | # NOTE: The short delays after each item are meant to tests more reliable on 64 | # CI systems. 65 | d$set("a", rnorm(100)); Sys.sleep(delay) 66 | d$set("b", rnorm(100)); Sys.sleep(delay) 67 | d$set("c", rnorm(100)); Sys.sleep(delay) 68 | d$set("d", rnorm(100)); Sys.sleep(delay) 69 | d$set("e", rnorm(100)); Sys.sleep(delay) 70 | d$prune() 71 | expect_identical(sort(d$keys()), c("c", "d", "e")) 72 | }) 73 | 74 | test_that("cache_disk: pruning respects max_size", { 75 | skip_on_cran() 76 | delay <- 0.01 77 | 78 | d <- cache_disk_deterministic(max_size = 200) 79 | d$set("a", rnorm(100)); Sys.sleep(delay) 80 | d$set("b", rnorm(100)); Sys.sleep(delay) 81 | d$set("c", 1); Sys.sleep(delay) 82 | d$prune() 83 | expect_identical(sort(d$keys()), c("c")) 84 | d$set("d", rnorm(100)); Sys.sleep(delay) 85 | # Objects are pruned with oldest first, so even though "c" would fit in the 86 | # cache, it is removed after adding "d" (and "d" is removed as well because it 87 | # doesn't fit). 88 | d$prune() 89 | expect_length(d$keys(), 0) 90 | d$set("e", 2); Sys.sleep(delay) 91 | d$set("f", 3); Sys.sleep(delay) 92 | d$prune() 93 | expect_identical(sort(d$keys()), c("e", "f")) 94 | }) 95 | 96 | # Issue shiny#3033 97 | test_that("cache_disk: pruning respects both max_n and max_size", { 98 | skip_on_cran() 99 | d <- cache_disk_deterministic(max_n = 3, max_size = 200) 100 | 101 | # Set some values. Use rnorm so that object size is large; a simple vector 102 | # like 1:100 will be stored very efficiently by R's ALTREP, and won't exceed 103 | # the max_size. We want each of these objects to exceed max_size so that 104 | # they'll be pruned. 105 | d$set("a", rnorm(100)) 106 | d$set("b", rnorm(100)) 107 | d$set("c", rnorm(100)) 108 | d$set("d", rnorm(100)) 109 | d$set("e", rnorm(100)) 110 | Sys.sleep(0.1) # For systems that have low mtime resolution. 111 | d$set("f", 1) # This object is small and shouldn't be pruned. 112 | d$prune() 113 | expect_identical(d$keys(), "f") 114 | }) 115 | 116 | # Return TRUE if the Sys.setFileTime() has subsecond resolution, FALSE 117 | # otherwise. 118 | setfiletime_has_subsecond_resolution <- function() { 119 | tmp <- tempfile() 120 | file.create(tmp) 121 | Sys.setFileTime(tmp, Sys.time()) 122 | time <- as.numeric(file.info(tmp)[['mtime']]) 123 | if (time == floor(time)) { 124 | return(FALSE) 125 | } else { 126 | return(TRUE) 127 | } 128 | } 129 | 130 | test_that('cache_disk: pruning with evict="lru"', { 131 | skip_on_cran() 132 | delay <- 0.01 133 | # For lru tests, make sure there's sub-second resolution for 134 | # Sys.setFileTime(), because that's what the lru code uses to update times. 135 | skip_if_not( 136 | setfiletime_has_subsecond_resolution(), 137 | "Sys.setFileTime() does not have subsecond resolution on this platform." 138 | ) 139 | 140 | d <- cache_disk_deterministic(max_n = 2) 141 | d$set("a", 1); Sys.sleep(delay) 142 | d$set("b", 1); Sys.sleep(delay) 143 | d$set("c", 1); Sys.sleep(delay) 144 | d$prune() 145 | expect_identical(sort(d$keys()), c("b", "c")) 146 | d$get("b"); Sys.sleep(delay) 147 | d$set("d", 1); Sys.sleep(delay) 148 | d$prune() 149 | expect_identical(sort(d$keys()), c("b", "d")) 150 | d$get("b"); Sys.sleep(delay) 151 | d$set("e", 2); Sys.sleep(delay) 152 | d$get("b"); Sys.sleep(delay) 153 | d$set("f", 3); Sys.sleep(delay) 154 | d$prune() 155 | expect_identical(sort(d$keys()), c("b", "f")) 156 | }) 157 | 158 | test_that('cache_disk: pruning with evict="fifo"', { 159 | skip_on_cran() 160 | delay <- 0.01 161 | 162 | d <- cache_disk_deterministic(max_n = 2, evict = "fifo") 163 | d$set("a", 1); Sys.sleep(delay) 164 | d$set("b", 1); Sys.sleep(delay) 165 | d$set("c", 1); Sys.sleep(delay) 166 | d$prune() 167 | expect_identical(sort(d$keys()), c("b", "c")) 168 | d$get("b") 169 | d$set("d", 1); Sys.sleep(delay) 170 | d$prune() 171 | expect_identical(sort(d$keys()), c("c", "d")) 172 | d$get("b") 173 | d$set("e", 2); Sys.sleep(delay) 174 | d$get("b") 175 | d$set("f", 3); Sys.sleep(delay) 176 | d$prune() 177 | expect_identical(sort(d$keys()), c("e", "f")) 178 | }) 179 | 180 | 181 | test_that("cache_disk: pruning throttling", { 182 | skip_on_cran() 183 | delay <- 0.01 184 | 185 | # Pruning won't happen when the number of items is less than prune_rate AND 186 | # the set() calls happen within 5 seconds. 187 | d <- cache_disk_deterministic(max_n = 2, prune_rate = 20) 188 | d$set("a", 1); Sys.sleep(delay) 189 | d$set("b", 1); Sys.sleep(delay) 190 | d$set("c", 1); Sys.sleep(delay) 191 | d$set("d", 1); Sys.sleep(delay) 192 | expect_identical(sort(d$keys()), c("a", "b", "c", "d")) 193 | 194 | # Pruning will happen with a lower prune_rate value. 195 | d <- cache_disk_deterministic(max_n = 2, prune_rate = 3) 196 | d$set("a", 1); Sys.sleep(delay) 197 | d$set("b", 1); Sys.sleep(delay) 198 | d$set("c", 1); Sys.sleep(delay) 199 | expect_identical(sort(d$keys()), c("b", "c")) 200 | d$set("d", 1); Sys.sleep(delay) 201 | expect_identical(sort(d$keys()), c("b", "c", "d")) 202 | d$set("e", 1); Sys.sleep(delay) 203 | expect_identical(sort(d$keys()), c("b", "c", "d", "e")) 204 | d$set("f", 1); Sys.sleep(delay) 205 | expect_identical(sort(d$keys()), c("e", "f")) 206 | 207 | Sys.sleep(5) 208 | d$set("f", 1); Sys.sleep(delay) 209 | expect_identical(sort(d$keys()), c("e", "f")) 210 | }) 211 | 212 | test_that("destroy_on_finalize works", { 213 | d <- cache_disk(destroy_on_finalize = TRUE) 214 | cache_dir <- d$info()$dir 215 | 216 | expect_true(dir.exists(cache_dir)) 217 | rm(d) 218 | gc() 219 | expect_false(dir.exists(cache_dir)) 220 | }) 221 | 222 | 223 | test_that("Warnings for caching reference objects", { 224 | d <- cache_disk(warn_ref_objects = TRUE) 225 | expect_warning(d$set("a", new.env())) 226 | expect_warning(d$set("a", function() NULL)) 227 | expect_warning(d$set("a", fastmap())) # fastmap objects contain an external pointer 228 | 229 | # Default is to not warn on ref objects 230 | d <- cache_disk() 231 | expect_silent(d$set("a", new.env())) 232 | expect_silent(d$set("a", function() NULL)) 233 | expect_silent(d$set("a", fastmap())) 234 | }) 235 | 236 | test_that("Cache disk can use different formts", { 237 | my_write <- function(...) write.csv(..., row.names = FALSE) 238 | 239 | d <- cache_disk(read_fn = read.csv, write_fn = my_write, extension = ".csv") 240 | 241 | mt <- mtcars 242 | rownames(mt) <- NULL 243 | d$set("mt", mt) 244 | expect_equal(d$get("mt"), mt) 245 | }) 246 | -------------------------------------------------------------------------------- /tests/testthat/test-cache-mem.R: -------------------------------------------------------------------------------- 1 | time_factor <- 1 2 | # Do things slower on GHA because of slow machines 3 | if (is_on_github_actions()) time_factor <- 4 4 | 5 | 6 | test_that("cache_mem: handling missing values", { 7 | d <- cache_mem() 8 | expect_true(is.key_missing(d$get("abcd"))) 9 | d$set("a", 100) 10 | expect_identical(d$get("a"), 100) 11 | expect_identical(d$get("y", missing = NULL), NULL) 12 | expect_error( 13 | d$get("y", missing = stop("Missing key")), 14 | "^Missing key$", 15 | ) 16 | 17 | d <- cache_mem(missing = NULL) 18 | expect_true(is.null(d$get("abcd"))) 19 | d$set("a", 100) 20 | expect_identical(d$get("a"), 100) 21 | expect_identical(d$get("y", missing = -1), -1) 22 | expect_error( 23 | d$get("y", missing = stop("Missing key")), 24 | "^Missing key$", 25 | ) 26 | 27 | d <- cache_mem(missing = stop("Missing key")) 28 | expect_error(d$get("abcd"), "^Missing key$") 29 | d$set("x", NULL) 30 | d$set("a", 100) 31 | expect_identical(d$get("a"), 100) 32 | expect_error(d$get("y"), "^Missing key$") 33 | expect_identical(d$get("y", missing = NULL), NULL) 34 | expect_true(is.key_missing(d$get("y", missing = key_missing()))) 35 | expect_error( 36 | d$get("y", missing = stop("Missing key 2")), 37 | "^Missing key 2$", 38 | ) 39 | 40 | # Pass in a quosure 41 | expr <- rlang::quo(stop("Missing key")) 42 | d <- cache_mem(missing = !!expr) 43 | expect_error(d$get("y"), "^Missing key$") 44 | expect_error(d$get("y"), "^Missing key$") # Make sure a second time also throws 45 | }) 46 | 47 | test_that("cache_mem: reset", { 48 | mc <- cache_mem() 49 | mc$set("a", "A") 50 | mc$set("b", "B") 51 | mc$reset() 52 | expect_identical(mc$keys(), character()) 53 | expect_identical(mc$size(), 0L) 54 | mc$set("c", "C") 55 | expect_identical(mc$keys(), "c") 56 | expect_identical(mc$size(), 1L) 57 | expect_false(mc$exists("a")) 58 | expect_true(mc$exists("c")) 59 | }) 60 | 61 | test_that("cache_mem: pruning respects max_n", { 62 | delay <- 0.001 * time_factor 63 | d <- cache_mem(max_n = 3) 64 | # NOTE: The short delays after each item are meant to tests more reliable on 65 | # CI systems. 66 | d$set("a", rnorm(100)); Sys.sleep(delay) 67 | d$set("b", rnorm(100)); Sys.sleep(delay) 68 | d$set("c", rnorm(100)); Sys.sleep(delay) 69 | d$set("d", rnorm(100)); Sys.sleep(delay) 70 | d$set("e", rnorm(100)); Sys.sleep(delay) 71 | expect_identical(sort(d$keys()), c("c", "d", "e")) 72 | }) 73 | 74 | test_that("cache_mem: pruning respects max_size", { 75 | delay <- 0.001 * time_factor 76 | d <- cache_mem(max_size = object.size(123) * 3) 77 | d$set("a", rnorm(100)); Sys.sleep(delay) 78 | d$set("b", rnorm(100)); Sys.sleep(delay) 79 | d$set("c", 1); Sys.sleep(delay) 80 | expect_identical(sort(d$keys()), c("c")) 81 | d$set("d", rnorm(100)); Sys.sleep(delay) 82 | # Objects are pruned with oldest first, so even though "c" would fit in the 83 | # cache, it is removed after adding "d" (and "d" is removed as well because it 84 | # doesn't fit). 85 | expect_length(d$keys(), 0) 86 | d$set("e", 2); Sys.sleep(delay) 87 | d$set("f", 3); Sys.sleep(delay) 88 | expect_identical(sort(d$keys()), c("e", "f")) 89 | }) 90 | 91 | test_that("cache_mem: max_size=Inf", { 92 | mc <- cachem::cache_mem(max_size = Inf) 93 | mc$set("a", 123) 94 | expect_identical(mc$get("a"), 123) 95 | mc$prune() 96 | expect_identical(mc$get("a"), 123) 97 | }) 98 | 99 | test_that("cache_mem: pruning respects both max_n and max_size", { 100 | delay <- 0.001 * time_factor 101 | d <- cache_mem(max_n = 3, max_size = object.size(123) * 3) 102 | # Set some values. Use rnorm so that object size is large; a simple vector 103 | # like 1:100 will be stored very efficiently by R's ALTREP, and won't exceed 104 | # the max_size. We want each of these objects to exceed max_size so that 105 | # they'll be pruned. 106 | d$set("a", rnorm(100)); Sys.sleep(delay) 107 | d$set("b", rnorm(100)); Sys.sleep(delay) 108 | d$set("c", rnorm(100)); Sys.sleep(delay) 109 | d$set("d", rnorm(100)); Sys.sleep(delay) 110 | d$set("e", rnorm(100)); Sys.sleep(delay) 111 | d$set("f", 1); Sys.sleep(delay) 112 | d$set("g", 1); Sys.sleep(delay) 113 | d$set("h", 1); Sys.sleep(delay) 114 | expect_identical(sort(d$keys()), c("f", "g", "h")) 115 | 116 | # This will cause f to be pruned (due to max_n) and g to be pruned (due to 117 | # max_size). 118 | d$set("i", c(2, 3)); Sys.sleep(0.001) 119 | expect_identical(sort(d$keys()), c("h", "i")) 120 | }) 121 | 122 | test_that('cache_mem: pruning with evict="lru"', { 123 | delay <- 0.001 * time_factor 124 | d <- cache_mem(max_n = 2) 125 | d$set("a", 1); Sys.sleep(delay) 126 | d$set("b", 1); Sys.sleep(delay) 127 | d$set("c", 1); Sys.sleep(delay) 128 | expect_identical(sort(d$keys()), c("b", "c")) 129 | d$get("b") 130 | d$set("d", 1); Sys.sleep(delay) 131 | expect_identical(sort(d$keys()), c("b", "d")) 132 | d$get("b") 133 | d$set("e", 2); Sys.sleep(delay) 134 | d$get("b") 135 | d$set("f", 3); Sys.sleep(delay) 136 | expect_identical(sort(d$keys()), c("b", "f")) 137 | 138 | d <- cache_mem(max_n = 2, evict = "lru") 139 | d$set("a", 1); Sys.sleep(delay) 140 | d$set("b", 1); Sys.sleep(delay) 141 | d$set("c", 1); Sys.sleep(delay) 142 | d$set("b", 2); Sys.sleep(delay) 143 | d$set("d", 2); Sys.sleep(delay) 144 | expect_identical(sort(d$keys()), c("b", "d")) 145 | }) 146 | 147 | test_that('cache_mem: pruning with evict="fifo"', { 148 | delay <- 0.001 * time_factor 149 | d <- cache_mem(max_n = 2, evict = "fifo") 150 | d$set("a", 1); Sys.sleep(delay) 151 | d$set("b", 1); Sys.sleep(delay) 152 | d$set("c", 1); Sys.sleep(delay) 153 | expect_identical(sort(d$keys()), c("b", "c")) 154 | d$get("b") 155 | d$set("d", 1); Sys.sleep(delay) 156 | expect_identical(sort(d$keys()), c("c", "d")) 157 | d$get("b") 158 | d$set("e", 2); Sys.sleep(delay) 159 | d$get("b") 160 | d$set("f", 3); Sys.sleep(delay) 161 | expect_identical(sort(d$keys()), c("e", "f")) 162 | 163 | d <- cache_mem(max_n = 2, evict = "fifo") 164 | d$set("a", 1); Sys.sleep(delay) 165 | d$set("b", 1); Sys.sleep(delay) 166 | d$set("c", 1); Sys.sleep(delay) 167 | d$set("b", 2); Sys.sleep(delay) 168 | d$set("d", 2); Sys.sleep(delay) 169 | expect_identical(sort(d$keys()), c("b", "d")) 170 | }) 171 | 172 | test_that("Pruning by max_age", { 173 | skip_on_cran() 174 | 175 | # Should prune target item on get() 176 | d <- cache_mem(max_age = 0.25*time_factor) 177 | d$set("a", 1) 178 | expect_identical(d$get("a"), 1) 179 | Sys.sleep(0.3*time_factor) 180 | expect_identical(d$get("a"), key_missing()) 181 | expect_identical(d$get("x"), key_missing()) 182 | 183 | # Should prune all items on set() 184 | d <- cache_mem(max_age = 0.25*time_factor) 185 | d$set("a", 1) 186 | expect_identical(d$get("a"), 1) 187 | Sys.sleep(0.3*time_factor) 188 | d$set("b", 1) 189 | expect_identical(d$keys(), "b") 190 | 191 | # Should prune target item on exists() 192 | d <- cache_mem(max_age = 0.25*time_factor) 193 | d$set("a", 1) 194 | expect_identical(d$get("a"), 1) 195 | expect_true(d$exists("a")) 196 | expect_false(d$exists("b")) 197 | Sys.sleep(0.15*time_factor) 198 | d$set("b", 1) 199 | expect_true(d$exists("a")) 200 | expect_true(d$exists("b")) 201 | Sys.sleep(0.15*time_factor) 202 | expect_false(d$exists("a")) 203 | expect_true(d$exists("b")) 204 | 205 | # Should prune all items on keys() 206 | d <- cache_mem(max_age = 0.25*time_factor) 207 | d$set("a", 1) 208 | expect_identical(d$keys(), "a") 209 | Sys.sleep(0.15*time_factor) 210 | d$set("b", 1) 211 | Sys.sleep(0.15*time_factor) 212 | expect_identical(d$keys(), "b") 213 | 214 | # Should prune all items on size() 215 | d <- cache_mem(max_age = 0.25*time_factor) 216 | d$set("a", 1) 217 | expect_identical(d$size(), 1L) 218 | Sys.sleep(0.15*time_factor) 219 | d$set("b", 1) 220 | expect_identical(d$size(), 2L) 221 | Sys.sleep(0.15*time_factor) 222 | expect_identical(d$size(), 1L) 223 | }) 224 | 225 | test_that("Removed objects can be GC'd", { 226 | mc <- cache_mem() 227 | e <- new.env() 228 | finalized <- FALSE 229 | reg.finalizer(e, function(x) finalized <<- TRUE) 230 | mc$set("e", e) 231 | rm(e) 232 | mc$set("x", 1) 233 | gc() 234 | expect_false(finalized) 235 | expect_true(is.environment(mc$get("e"))) 236 | }) 237 | 238 | test_that("Pruned objects can be GC'd", { 239 | delay <- 0.001 * time_factor 240 | # Cache is large enough to hold one environment and one number 241 | mc <- cache_mem(max_size = object.size(new.env()) + object.size(1234)) 242 | e <- new.env() 243 | finalized <- FALSE 244 | reg.finalizer(e, function(x) finalized <<- TRUE) 245 | mc$set("e", e) 246 | rm(e) 247 | mc$set("x", 1) 248 | gc() 249 | expect_false(finalized) 250 | expect_true(is.environment(mc$get("e"))) 251 | 252 | # Get x so that the atime is updated 253 | Sys.sleep(delay) 254 | mc$get("x") 255 | Sys.sleep(delay) 256 | 257 | # e should be pruned when we add another item 258 | mc$set("y", 2) 259 | gc() 260 | expect_true(finalized) 261 | expect_true(is.key_missing(mc$get("e"))) 262 | }) 263 | 264 | 265 | # For https://github.com/r-lib/cachem/issues/47, https://github.com/r-lib/cachem/pull/48/ 266 | test_that("Cache doesn't shrink smaller than INITIAL_SIZE", { 267 | # This test also makes sure that the cache doesn't keep adding elements to the 268 | # storage vectors when there are zero items, then an item is added and 269 | # removed, repeatedly. 270 | m <- cache_mem() 271 | e <- environment(m$get) 272 | for (i in seq_len(e$INITIAL_SIZE)) { 273 | m$set(as.character(i), i) 274 | m$remove(as.character(i)) 275 | } 276 | expect_equal(e$total_n_, 0) 277 | expect_equal(e$last_idx_, e$INITIAL_SIZE) 278 | expect_length(e$key_, e$INITIAL_SIZE) 279 | expect_length(e$value_, e$INITIAL_SIZE) 280 | 281 | # Adding one more item should trigger a compact_() 282 | m$set("a", 1) 283 | m$remove("a") 284 | 285 | expect_equal(e$total_n_, 0) 286 | # last_idx_ should be reset after we pass the INITIAL_SIZE, even if there are 287 | # no items in the cache. Prior to the fix in #48, it could keep growing. 288 | expect_equal(e$last_idx_, 0) 289 | expect_length(e$key_, e$INITIAL_SIZE) 290 | expect_length(e$value_, e$INITIAL_SIZE) 291 | }) 292 | -------------------------------------------------------------------------------- /tests/testthat/test-utils.R: -------------------------------------------------------------------------------- 1 | 2 | test_that("validate_key", { 3 | expect_true(validate_key("e")) 4 | expect_true(validate_key("abc")) 5 | expect_true(validate_key("abcd123-_")) 6 | expect_true(validate_key("-")) 7 | expect_true(validate_key("_")) 8 | 9 | expect_error(validate_key("a.b")) 10 | expect_error(validate_key("a,b")) 11 | expect_error(validate_key("é")) 12 | expect_error(validate_key("ABC")) 13 | expect_error(validate_key("_A")) 14 | expect_error(validate_key("!")) 15 | expect_error(validate_key("a b")) 16 | expect_error(validate_key("ab\n")) 17 | }) 18 | --------------------------------------------------------------------------------