├── .Rbuildignore
├── .github
├── .gitignore
└── workflows
│ └── R-CMD-check.yaml
├── .gitignore
├── CRAN-RELEASE
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── Makefile
├── NAMESPACE
├── NEWS.md
├── R
├── cache-disk.R
├── cache-layered.R
├── cache-mem.R
├── cachem-package.R
├── cachem.R
├── reexports.R
└── utils.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── cachem.Rproj
├── cran-comments.md
├── man
├── cache_disk.Rd
├── cache_layered.Rd
├── cache_mem.Rd
└── reexports.Rd
├── revdep
├── .gitignore
├── README.md
├── cran.md
├── failures.md
└── problems.md
├── src
├── .gitignore
├── cache.c
└── init.c
└── tests
├── testthat.R
└── testthat
├── helper-utils.R
├── test-cache-disk.R
├── test-cache-mem.R
└── test-utils.R
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^LICENSE\.md$
4 | ^\.github$
5 | ^.*\.sublime-project$
6 | ^README\.Rmd$
7 | ^_pkgdown\.yml$
8 | ^docs$
9 | ^pkgdown$
10 | ^Makefile$
11 | ^cran-comments\.md$
12 | ^revdep$
13 | ^CRAN-RELEASE$
14 |
--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/rstudio/shiny-workflows
2 | #
3 | # NOTE: This Shiny team GHA workflow is overkill for most R packages.
4 | # For most R packages it is better to use https://github.com/r-lib/actions
5 | on:
6 | push:
7 | branches: [main, rc-**]
8 | pull_request:
9 | branches: [main]
10 | schedule:
11 | - cron: '0 7 * * 1' # every monday
12 |
13 | name: Package checks
14 |
15 | jobs:
16 | website:
17 | uses: rstudio/shiny-workflows/.github/workflows/website.yaml@v1
18 | routine:
19 | uses: rstudio/shiny-workflows/.github/workflows/routine.yaml@v1
20 | R-CMD-check:
21 | uses: rstudio/shiny-workflows/.github/workflows/R-CMD-check.yaml@v1
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | docs
3 | CRAN-RELEASE
4 |
--------------------------------------------------------------------------------
/CRAN-RELEASE:
--------------------------------------------------------------------------------
1 | This package was submitted to CRAN on 2021-08-19.
2 | Once it is accepted, delete this file and tag the release (commit 9e4b75d).
3 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: cachem
2 | Version: 1.1.0
3 | Title: Cache R Objects with Automatic Pruning
4 | Description: Key-value stores with automatic pruning. Caches can limit
5 | either their total size or the age of the oldest object (or both),
6 | automatically pruning objects to maintain the constraints.
7 | Authors@R: c(
8 | person("Winston", "Chang", , "winston@posit.co", c("aut", "cre")),
9 | person(family = "Posit Software, PBC", role = c("cph", "fnd")))
10 | License: MIT + file LICENSE
11 | Encoding: UTF-8
12 | ByteCompile: true
13 | URL: https://cachem.r-lib.org/, https://github.com/r-lib/cachem
14 | Imports:
15 | rlang,
16 | fastmap (>= 1.2.0)
17 | Suggests:
18 | testthat
19 | RoxygenNote: 7.2.3
20 | Roxygen: list(markdown = TRUE)
21 | Config/Needs/routine:
22 | lobstr
23 | Config/Needs/website:
24 | pkgdown
25 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2023
2 | COPYRIGHT HOLDER: Posit Software, PBC
3 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | Copyright (c) 2023 Posit Software, PBC.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 |
2 | README.md: README.Rmd
3 | # Instead of just knitr::knit, use rmarkdown::render and remove the .html
4 | # file, because the latter uses pandoc to process the .md file, and it
5 | # supports adding a table of contents.
6 | Rscript -e "rmarkdown::render('$<', output_file = '$@')"
7 | rm -f README.html
8 |
9 | clean:
10 | rm -f README.md
11 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | S3method(format,cachem)
4 | S3method(print,cachem)
5 | export(cache_disk)
6 | export(cache_layered)
7 | export(cache_mem)
8 | export(is.key_missing)
9 | export(key_missing)
10 | import(fastmap)
11 | importFrom(fastmap,is.key_missing)
12 | importFrom(fastmap,key_missing)
13 | importFrom(rlang,as_quosure)
14 | importFrom(rlang,enquo)
15 | importFrom(rlang,eval_tidy)
16 | importFrom(utils,object.size)
17 | useDynLib(cachem, .registration = TRUE)
18 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # cachem 1.1.0
2 |
3 | * Fixed #47: In some cases, with a `cache_mem`, removing an item could result in the spurious message "nothing to compact" being printed out. (#48)
4 |
5 | # cachem 1.0.8
6 |
7 | * Fixed #38: With a `cache_mem`, `$reset()` did not fully reset the cache, and calling calling `$prune()` could result in an error. (#39)
8 |
9 | # cachem 1.0.7
10 |
11 | * `cache_disk()` gains a `read_fn`, `write_fn` and `extension` arguments, to allow specifying custom formats for serializing object to disk. (@jimhester)
12 |
13 | # cachem 1.0.6
14 |
15 | * Fixed #14: Fix off-by-one error when checking pruning throttling counter for `cache_disk`. (#15)
16 |
17 | * Closed #13: Added documentation for the `remove()` method.
18 |
19 |
20 | # cachem 1.0.5
21 |
22 | * `cache_mem()` and `cache_disk()` now allow `-` and `_` (hyphen and underscore) characters in the keys. (#9)
23 |
24 | * `cache_disk()` previously did not correctly throttle pruning. (#11)
25 |
26 |
27 | # cachem 1.0.4
28 |
29 | * More pruning speed enhancements for `cache_mem()`. (#7)
30 |
31 |
32 | # cachem 1.0.3
33 |
34 | * Addressed issues with timing-sensitive tests.
35 |
36 |
37 | # cachem 1.0.2
38 |
39 | * Closed #4: Sped up pruning for `cache_mem`. (#5)
40 |
41 | * Fixed `cache_mem` pruning with `evict="lru"`.
42 |
43 |
44 | # cachem 1.0.1
45 |
46 | * Fixed function declaration of `C_validate_key`.
47 |
48 |
49 | # cachem 1.0.0
50 |
51 | * First CRAN release.
52 |
--------------------------------------------------------------------------------
/R/cache-disk.R:
--------------------------------------------------------------------------------
1 | #' Create a disk cache object
2 | #'
3 | #' A disk cache object is a key-value store that saves the values as files in a
4 | #' directory on disk. Objects can be stored and retrieved using the `get()` and
5 | #' `set()` methods. Objects are automatically pruned from the cache according to
6 | #' the parameters `max_size`, `max_age`, `max_n`, and `evict`.
7 | #'
8 | #'
9 | #' @section Missing keys:
10 | #'
11 | #' The `missing` parameter controls what happens when `get()` is called with a
12 | #' key that is not in the cache (a cache miss). The default behavior is to
13 | #' return a [key_missing()] object. This is a *sentinel value* that indicates
14 | #' that the key was not present in the cache. You can test if the returned
15 | #' value represents a missing key by using the [is.key_missing()] function.
16 | #' You can also have `get()` return a different sentinel value, like `NULL`.
17 | #' If you want to throw an error on a cache miss, you can do so by providing
18 | #' an expression for `missing`, as in `missing = stop("Missing key")`.
19 | #'
20 | #' When the cache is created, you can supply a value for `missing`, which sets
21 | #' the default value to be returned for missing values. It can also be
22 | #' overridden when `get()` is called, by supplying a `missing` argument. For
23 | #' example, if you use `cache$get("mykey", missing = NULL)`, it will return
24 | #' `NULL` if the key is not in the cache.
25 | #'
26 | #' The `missing` parameter is actually an expression which is evaluated each
27 | #' time there is a cache miss. A quosure (from the rlang package) can be used.
28 | #'
29 | #' If you use this, the code that calls `get()` should be wrapped with
30 | #' [tryCatch()] to gracefully handle missing keys.
31 | #'
32 | #'
33 | #' @section Cache pruning:
34 | #'
35 | #' Cache pruning occurs when `set()` is called, or it can be invoked manually
36 | #' by calling `prune()`.
37 | #'
38 | #' The disk cache will throttle the pruning so that it does not happen on
39 | #' every call to `set()`, because the filesystem operations for checking the
40 | #' status of files can be slow. Instead, it will prune once in every
41 | #' `prune_rate` calls to `set()`, or if at least 5 seconds have elapsed since
42 | #' the last prune occurred, whichever is first.
43 | #'
44 | #' When a pruning occurs, if there are any objects that are older than
45 | #' `max_age`, they will be removed.
46 | #'
47 | #' The `max_size` and `max_n` parameters are applied to the cache as a whole,
48 | #' in contrast to `max_age`, which is applied to each object individually.
49 | #'
50 | #' If the number of objects in the cache exceeds `max_n`, then objects will be
51 | #' removed from the cache according to the eviction policy, which is set with
52 | #' the `evict` parameter. Objects will be removed so that the number of items
53 | #' is `max_n`.
54 | #'
55 | #' If the size of the objects in the cache exceeds `max_size`, then objects
56 | #' will be removed from the cache. Objects will be removed from the cache so
57 | #' that the total size remains under `max_size`. Note that the size is
58 | #' calculated using the size of the files, not the size of disk space used by
59 | #' the files --- these two values can differ because of files are stored in
60 | #' blocks on disk. For example, if the block size is 4096 bytes, then a file
61 | #' that is one byte in size will take 4096 bytes on disk.
62 | #'
63 | #' Another time that objects can be removed from the cache is when `get()` is
64 | #' called. If the target object is older than `max_age`, it will be removed
65 | #' and the cache will report it as a missing value.
66 | #'
67 | #' @section Eviction policies:
68 | #'
69 | #' If `max_n` or `max_size` are used, then objects will be removed from the
70 | #' cache according to an eviction policy. The available eviction policies are:
71 | #'
72 | #' \describe{
73 | #' \item{`"lru"`}{
74 | #' Least Recently Used. The least recently used objects will be removed.
75 | #' This uses the filesystem's mtime property. When "lru" is used, each
76 | #' `get()` is called, it will update the file's mtime using
77 | #' [Sys.setFileTime()]. Note that on some platforms, the resolution of
78 | #' [Sys.setFileTime()] may be low, one or two seconds.
79 | #' }
80 | #' \item{`"fifo"`}{
81 | #' First-in-first-out. The oldest objects will be removed.
82 | #' }
83 | #' }
84 | #'
85 | #' Both of these policies use files' mtime. Note that some filesystems (notably
86 | #' FAT) have poor mtime resolution. (atime is not used because support for atime
87 | #' is worse than mtime.)
88 | #'
89 | #'
90 | #' @section Sharing among multiple processes:
91 | #'
92 | #' The directory for a cache_disk can be shared among multiple R processes. To
93 | #' do this, each R process should have a cache_disk object that uses the same
94 | #' directory. Each cache_disk will do pruning independently of the others, so
95 | #' if they have different pruning parameters, then one cache_disk may remove
96 | #' cached objects before another cache_disk would do so.
97 | #'
98 | #' Even though it is possible for multiple processes to share a cache_disk
99 | #' directory, this should not be done on networked file systems, because of
100 | #' slow performance of networked file systems can cause problems. If you need
101 | #' a high-performance shared cache, you can use one built on a database like
102 | #' Redis, SQLite, mySQL, or similar.
103 | #'
104 | #' When multiple processes share a cache directory, there are some potential
105 | #' race conditions. For example, if your code calls `exists(key)` to check if
106 | #' an object is in the cache, and then call `get(key)`, the object may be
107 | #' removed from the cache in between those two calls, and `get(key)` will
108 | #' throw an error. Instead of calling the two functions, it is better to
109 | #' simply call `get(key)`, and check that the returned object is not a
110 | #' `key_missing()` object, using `is.key_missing()`. This effectively tests
111 | #' for existence and gets the object in one operation.
112 | #'
113 | #' It is also possible for one processes to prune objects at the same time
114 | #' that another processes is trying to prune objects. If this happens, you may
115 | #' see a warning from `file.remove()` failing to remove a file that has
116 | #' already been deleted.
117 | #'
118 | #'
119 | #' @section Methods:
120 | #'
121 | #' A disk cache object has the following methods:
122 | #'
123 | #' \describe{
124 | #' \item{`get(key, missing)`}{
125 | #' Returns the value associated with `key`. If the key is not in the
126 | #' cache, then it evaluates the expression specified by `missing` and
127 | #' returns the value. If `missing` is specified here, then it will
128 | #' override the default that was set when the `cache_mem` object was
129 | #' created. See section Missing Keys for more information.
130 | #' }
131 | #' \item{`set(key, value)`}{
132 | #' Stores the `key`-`value` pair in the cache.
133 | #' }
134 | #' \item{`exists(key)`}{
135 | #' Returns `TRUE` if the cache contains the key, otherwise
136 | #' `FALSE`.
137 | #' }
138 | #' \item{`remove(key)`}{
139 | #' Removes `key` from the cache, if it exists in the cache. If the key is
140 | #' not in the cache, this does nothing.
141 | #' }
142 | #' \item{`size()`}{
143 | #' Returns the number of items currently in the cache.
144 | #' }
145 | #' \item{`keys()`}{
146 | #' Returns a character vector of all keys currently in the cache.
147 | #' }
148 | #' \item{`reset()`}{
149 | #' Clears all objects from the cache.
150 | #' }
151 | #' \item{`destroy()`}{
152 | #' Clears all objects in the cache, and removes the cache directory from
153 | #' disk.
154 | #' }
155 | #' \item{`prune()`}{
156 | #' Prunes the cache, using the parameters specified by `max_size`,
157 | #' `max_age`, `max_n`, and `evict`.
158 | #' }
159 | #' }
160 | #'
161 | #' @param dir Directory to store files for the cache. If `NULL` (the default) it
162 | #' will create and use a temporary directory.
163 | #' @param read_fn The function used to read the values from disk. If `NULL`
164 | #' (the default) it will use `readRDS`.
165 | #' @param write_fn The function used to write the values from disk. If `NULL`
166 | #' (the default) it will use `writeRDS`.
167 | #' @param extension The file extension to use for files on disk.
168 | #' @param max_age Maximum age of files in cache before they are evicted, in
169 | #' seconds. Use `Inf` for no age limit.
170 | #' @param max_size Maximum size of the cache, in bytes. If the cache exceeds
171 | #' this size, cached objects will be removed according to the value of the
172 | #' `evict`. Use `Inf` for no size limit. The default is 1 gigabyte.
173 | #' @param max_n Maximum number of objects in the cache. If the number of objects
174 | #' exceeds this value, then cached objects will be removed according to the
175 | #' value of `evict`. Use `Inf` for no limit of number of items.
176 | #' @param evict The eviction policy to use to decide which objects are removed
177 | #' when a cache pruning occurs. Currently, `"lru"` and `"fifo"` are supported.
178 | #' @param destroy_on_finalize If `TRUE`, then when the cache_disk object is
179 | #' garbage collected, the cache directory and all objects inside of it will be
180 | #' deleted from disk. If `FALSE` (the default), it will do nothing when
181 | #' finalized.
182 | #' @param missing A value to return when `get(key)` is called but the key is not
183 | #' present in the cache. The default is a [key_missing()] object. It is
184 | #' actually an expression that is evaluated each time there is a cache miss.
185 | #' See section Missing keys for more information.
186 | #' @param prune_rate How often to prune the cache. See section Cache Pruning for
187 | #' more information.
188 | #' @param warn_ref_objects Should a warning be emitted when a reference is
189 | #' stored in the cache? This can be useful because serializing and
190 | #' deserializing a reference object (such as environments and external
191 | #' pointers) can lead to unexpected behavior.
192 | #' @param logfile An optional filename or connection object to where logging
193 | #' information will be written. To log to the console, use `stderr()` or
194 | #' `stdout()`.
195 | #'
196 | #' @return A disk caching object, with class `cache_disk`.
197 | #' @importFrom rlang enquo eval_tidy as_quosure
198 | #' @export
199 | cache_disk <- function(
200 | dir = NULL,
201 | max_size = 1024 * 1024 ^ 2,
202 | max_age = Inf,
203 | max_n = Inf,
204 | evict = c("lru", "fifo"),
205 | destroy_on_finalize = FALSE,
206 | read_fn = NULL,
207 | write_fn = NULL,
208 | extension = ".rds",
209 | missing = key_missing(),
210 | prune_rate = 20,
211 | warn_ref_objects = FALSE,
212 | logfile = NULL
213 | ) {
214 | # ============================================================================
215 | # Constants
216 | # ============================================================================
217 | PRUNE_THROTTLE_TIME_LIMIT <- 5
218 |
219 | # ============================================================================
220 | # Logging
221 | # ============================================================================
222 | # This needs to be defined first, because it's used in initialization.
223 | log_ <- function(text) {
224 | if (is.null(logfile_)) return()
225 |
226 | text <- paste0(format(Sys.time(), "[%Y-%m-%d %H:%M:%OS3] cache_disk "), text)
227 | cat(text, sep = "\n", file = logfile_, append = TRUE)
228 | }
229 |
230 | # ============================================================================
231 | # Initialization
232 | # ============================================================================
233 | if (is.null(dir)) {
234 | dir <- tempfile("cache_disk-")
235 | }
236 | if (!is.numeric(max_size)) stop("max_size must be a number. Use `Inf` for no limit.")
237 | if (!is.numeric(max_age)) stop("max_age must be a number. Use `Inf` for no limit.")
238 | if (!is.numeric(max_n)) stop("max_n must be a number. Use `Inf` for no limit.")
239 |
240 | if (!dir.exists(dir)) {
241 | # log_(paste0("initialize: Creating ", dir))
242 | dir.create(dir, recursive = TRUE)
243 | }
244 |
245 | logfile_ <- logfile
246 | dir_ <- normalizePath(dir)
247 | extension_ <- extension
248 | extension_regex_ <- sub("\\.(.*)", "\\\\.\\1$", extension)
249 | read_fn_ <- read_fn
250 | write_fn_ <- write_fn
251 | max_size_ <- max_size
252 | max_age_ <- max_age
253 | max_n_ <- max_n
254 | evict_ <- match.arg(evict)
255 | destroy_on_finalize_ <- destroy_on_finalize
256 | missing_ <- enquo(missing)
257 | prune_rate_ <- prune_rate
258 |
259 | destroyed_ <- FALSE
260 |
261 |
262 | if (is.null(read_fn_)) {
263 | read_fn_ <- readRDS
264 | }
265 |
266 | if (is.null(write_fn_)) {
267 | write_fn_ <- local({
268 | ref_object <- FALSE
269 | function(value, file) {
270 | on.exit({
271 | ref_object <<- TRUE
272 | unlink(temp_file)
273 | })
274 | temp_file <- paste0(file, "-temp-", random_hex(16))
275 | saveRDS(value, file = temp_file,
276 | refhook = function(x) {
277 | ref_object <<- TRUE
278 | NULL
279 | }
280 | )
281 | file.rename(temp_file, file)
282 | if (warn_ref_objects && ref_object) {
283 | log_(paste0('set: value is a reference object'))
284 | warning("A reference object was cached in a serialized format. The restored object may not work as expected.")
285 | }
286 | }
287 | })
288 | }
289 |
290 | # Start the prune throttle counter with a random number from 0-19. This is
291 | # so that, in the case where multiple cache_disk objects that point to the
292 | # same directory are created and discarded after just a few uses each,
293 | # pruning will still occur.
294 | prune_throttle_counter_ <- sample.int(prune_rate_, 1) - 1
295 | prune_last_time_ <- as.numeric(Sys.time())
296 |
297 | if (destroy_on_finalize_) {
298 | reg.finalizer(
299 | environment(),
300 | function(e) { e$destroy() }
301 | )
302 | }
303 |
304 | # ============================================================================
305 | # Public methods
306 | # ============================================================================
307 | get <- function(key, missing = missing_) {
308 | log_(paste0('get: key "', key, '"'))
309 | is_destroyed(throw = TRUE)
310 | validate_key(key)
311 |
312 | maybe_prune_single_(key)
313 |
314 | filename <- key_to_filename_(key)
315 |
316 | # Instead of calling exists() before fetching the value, just try to
317 | # fetch the value. This reduces the risk of a race condition when
318 | # multiple processes share a cache.
319 | read_error <- FALSE
320 | tryCatch(
321 | {
322 | value <- suppressWarnings(read_fn_(filename))
323 | if (evict_ == "lru"){
324 | Sys.setFileTime(filename, Sys.time())
325 | }
326 | },
327 | error = function(e) {
328 | read_error <<- TRUE
329 | }
330 | )
331 | if (read_error) {
332 | log_(paste0('get: key "', key, '" is missing'))
333 | missing <- as_quosure(missing)
334 | return(eval_tidy(missing))
335 | }
336 |
337 | log_(paste0('get: key "', key, '" found'))
338 | value
339 | }
340 |
341 | set <- function(key, value) {
342 | log_(paste0('set: key "', key, '"'))
343 | is_destroyed(throw = TRUE)
344 | validate_key(key)
345 |
346 | file <- key_to_filename_(key)
347 |
348 | save_error <- FALSE
349 | tryCatch(
350 | {
351 | write_fn_(value, file)
352 | },
353 | error = function(e) {
354 | save_error <<- TRUE
355 | }
356 | )
357 | if (save_error) {
358 | log_(paste0('set: key "', key, '" error'))
359 | stop('Error setting value for key "', key, '".')
360 | }
361 |
362 | prune_throttled_()
363 | invisible(TRUE)
364 | }
365 |
366 | exists <- function(key) {
367 | is_destroyed(throw = TRUE)
368 | validate_key(key)
369 | file.exists(key_to_filename_(key))
370 | }
371 |
372 | # Return all keys in the cache
373 | keys <- function() {
374 | is_destroyed(throw = TRUE)
375 | files <- dir(dir_, extension_regex_)
376 | sub(extension_regex_, "", files)
377 | }
378 |
379 | remove <- function(key) {
380 | log_(paste0('remove: key "', key, '"'))
381 | is_destroyed(throw = TRUE)
382 | validate_key(key)
383 | # Remove file; use unlink() instead of file.remove() because it won't
384 | # warn if the file doesn't exist.
385 | unlink(key_to_filename_(key))
386 | invisible(TRUE)
387 | }
388 |
389 | reset <- function() {
390 | log_(paste0('reset'))
391 | is_destroyed(throw = TRUE)
392 | file.remove(dir(dir_, extension_regex_, full.names = TRUE))
393 | invisible(TRUE)
394 | }
395 |
396 | prune <- function() {
397 | # TODO: It would be good to add parameters `n` and `size`, so that the
398 | # cache can be pruned to `max_n - n` and `max_size - size` before adding
399 | # an object. Right now we prune after adding the object, so the cache
400 | # can temporarily grow past the limits. The reason we don't do this now
401 | # is because it is expensive to find the size of the serialized object
402 | # before adding it.
403 |
404 | log_('prune')
405 | is_destroyed(throw = TRUE)
406 |
407 | current_time <- Sys.time()
408 |
409 | filenames <- dir(dir_, extension_regex_, full.names = TRUE)
410 | info <- file.info(filenames, extra_cols = FALSE)
411 | info <- info[info$isdir == FALSE, ]
412 | info$name <- rownames(info)
413 | rownames(info) <- NULL
414 | # Files could be removed between the dir() and file.info() calls. The
415 | # entire row for such files will have NA values. Remove those rows.
416 | info <- info[!is.na(info$size), ]
417 |
418 | # 1. Remove any files where the age exceeds max age.
419 | if (is.finite(max_age_)) {
420 | timediff <- as.numeric(current_time - info$mtime, units = "secs")
421 | rm_idx <- timediff > max_age_
422 | if (any(rm_idx)) {
423 | log_(paste0("prune max_age: Removing ", paste(info$name[rm_idx], collapse = ", ")))
424 | rm_success <- file.remove(info$name[rm_idx])
425 | # This maps rm_success back into the TRUEs in the rm_idx vector.
426 | # If (for example) rm_idx is c(F,T,F,T,T) and rm_success is c(T,F,T),
427 | # then this line modifies rm_idx to be c(F,T,F,F,T).
428 | rm_idx[rm_idx] <- rm_success
429 | info <- info[!rm_idx, ]
430 | }
431 | }
432 |
433 | # Sort objects by priority. The sorting is done in a function which can be
434 | # called multiple times but only does the work the first time.
435 | info_is_sorted <- FALSE
436 | ensure_info_is_sorted <- function() {
437 | if (info_is_sorted) return()
438 |
439 | info <<- info[order(info$mtime, decreasing = TRUE), ]
440 | info_is_sorted <<- TRUE
441 | }
442 |
443 | # 2. Remove files if there are too many.
444 | if (is.finite(max_n_) && nrow(info) > max_n_) {
445 | ensure_info_is_sorted()
446 | rm_idx <- seq_len(nrow(info)) > max_n_
447 | log_(paste0("prune max_n: Removing ", paste(info$name[rm_idx], collapse = ", ")))
448 | rm_success <- file.remove(info$name[rm_idx])
449 | rm_idx[rm_idx] <- rm_success
450 | info <- info[!rm_idx, ]
451 | }
452 |
453 | # 3. Remove files if cache is too large.
454 | if (is.finite(max_size_) && sum(info$size) > max_size_) {
455 | ensure_info_is_sorted()
456 | cum_size <- cumsum(info$size)
457 | rm_idx <- cum_size > max_size_
458 | log_(paste0("prune max_size: Removing ", paste(info$name[rm_idx], collapse = ", ")))
459 | rm_success <- file.remove(info$name[rm_idx])
460 | rm_idx[rm_idx] <- rm_success
461 | info <- info[!rm_idx, ]
462 | }
463 |
464 | prune_last_time_ <<- as.numeric(current_time)
465 |
466 | invisible(TRUE)
467 | }
468 |
469 | size <- function() {
470 | is_destroyed(throw = TRUE)
471 | length(dir(dir_, extension_regex_))
472 | }
473 |
474 | info <- function() {
475 | list(
476 | dir = dir_,
477 | max_size = max_size_,
478 | max_age = max_age_,
479 | max_n = max_n_,
480 | evict = evict_,
481 | destroy_on_finalize = destroy_on_finalize_,
482 | missing = missing_,
483 | prune_rate = prune_rate,
484 | logfile = logfile_,
485 | prune_throttle_counter = prune_throttle_counter_,
486 | prune_last_time = as.POSIXct(prune_last_time_, origin = "1970-01-01")
487 | )
488 | }
489 |
490 | destroy <- function() {
491 | if (is_destroyed()) {
492 | return(invisible(FALSE))
493 | }
494 |
495 | log_(paste0("destroy: Removing ", dir_))
496 | # First create a sentinel file so that other processes sharing this
497 | # cache know that the cache is to be destroyed. This is needed because
498 | # the recursive unlink is not atomic: another process can add a file to
499 | # the directory after unlink starts removing files but before it removes
500 | # the directory, and when that happens, the directory removal will fail.
501 | file.create(file.path(dir_, "__destroyed__"))
502 | # Remove all the extension files. This will not remove the setinel file.
503 | file.remove(dir(dir_, extension_regex_, full.names = TRUE))
504 | # Next remove dir recursively, including sentinel file.
505 | unlink(dir_, recursive = TRUE)
506 | destroyed_ <<- TRUE
507 | invisible(TRUE)
508 | }
509 |
510 | is_destroyed <- function(throw = FALSE) {
511 | if (!dir.exists(dir_) ||
512 | file.exists(file.path(dir_, "__destroyed__")))
513 | {
514 | # It's possible for another process to destroy a shared cache directory
515 | destroyed_ <<- TRUE
516 | }
517 |
518 | if (throw) {
519 | if (destroyed_) {
520 | stop("Attempted to use cache which has been destroyed:\n ", dir_)
521 | }
522 |
523 | } else {
524 | destroyed_
525 | }
526 | }
527 |
528 | # ============================================================================
529 | # Private methods
530 | # ============================================================================
531 | key_to_filename_ <- function(key) {
532 | validate_key(key)
533 | # Additional validation. This 80-char limit is arbitrary, and is
534 | # intended to avoid hitting a filename length limit on Windows.
535 | if (nchar(key) > 80) {
536 | stop("Invalid key: key must have fewer than 80 characters.")
537 | }
538 | file.path(dir_, paste0(key, extension_))
539 | }
540 |
541 | # A wrapper for prune() that throttles it, because prune() can be expensive
542 | # due to filesystem operations. This function will prune only once every
543 | # `prune_rate` times it is called, or if it has been more than 5 seconds since
544 | # the last time the cache was actually pruned, whichever is first. In the
545 | # future, the behavior may be customizable.
546 | prune_throttled_ <- function() {
547 | # Count the number of times prune() has been called.
548 | prune_throttle_counter_ <<- prune_throttle_counter_ + 1
549 |
550 | if (prune_throttle_counter_ >= prune_rate_ ||
551 | as.numeric(Sys.time()) - prune_last_time_ > PRUNE_THROTTLE_TIME_LIMIT)
552 | {
553 | prune()
554 | prune_throttle_counter_ <<- 0
555 | }
556 | }
557 |
558 | # Prunes a single object if it exceeds max_age. If the object does not
559 | # exceed max_age, or if the object doesn't exist, do nothing.
560 | maybe_prune_single_ <- function(key) {
561 | # obj <- cache_[[key]]
562 | # if (is.null(obj)) return()
563 | filepath <- file.path(dir_, paste0(key, extension_))
564 | info <- file.info(filepath, extra_cols = FALSE)
565 | if (is.na(info$mtime)) return()
566 |
567 | timediff <- as.numeric(Sys.time()) - as.numeric(info$mtime)
568 | if (timediff > max_age_) {
569 | log_(paste0("pruning single object exceeding max_age: Removing ", key))
570 | unlink(filepath)
571 | }
572 | }
573 |
574 |
575 | # ============================================================================
576 | # Returned object
577 | # ============================================================================
578 | structure(
579 | list(
580 | get = get,
581 | set = set,
582 | exists = exists,
583 | keys = keys,
584 | remove = remove,
585 | reset = reset,
586 | prune = prune,
587 | size = size,
588 | destroy = destroy,
589 | is_destroyed = is_destroyed,
590 | info = info
591 | ),
592 | class = c("cache_disk", "cachem")
593 | )
594 | }
595 |
--------------------------------------------------------------------------------
/R/cache-layered.R:
--------------------------------------------------------------------------------
1 | #' Compose any number of cache objects into a new, layered cache object
2 | #'
3 | #' Note that `cache_layered` is currently experimental.
4 | #'
5 | #' @param ... Cache objects to compose into a new, layered cache object.
6 | #' @inheritParams cache_disk
7 | #'
8 | #' @return A layered caching object, with class `cache_layered`.
9 | #' @examples
10 | #'
11 | #' # Make a layered cache from a small memory cache and large disk cache
12 | #' m <- cache_mem(max_size = 100 * 1024^2)
13 | #' d <- cache_disk(max_size = 2 * 1024^3)
14 | #' cl <- cache_layered(m, d)
15 | #' @export
16 | cache_layered <- function(..., logfile = NULL) {
17 | caches <- list(...)
18 | logfile_ <- logfile
19 |
20 | # ============================================================================
21 | # Logging
22 | # ============================================================================
23 | # This needs to be defined first, because it's used in initialization.
24 | log_ <- function(text) {
25 | if (is.null(logfile_)) return()
26 |
27 | text <- paste0(format(Sys.time(), "[%Y-%m-%d %H:%M:%OS3] cache_layered "), text)
28 | cat(text, sep = "\n", file = logfile_, append = TRUE)
29 | }
30 |
31 | get <- function(key) {
32 | log_(paste0("Get: ", key))
33 | value <- NULL
34 | # Search down the caches for the object
35 | for (i in seq_along(caches)) {
36 | value <- caches[[i]]$get(key)
37 |
38 | if (!is.key_missing(value)) {
39 | log_(paste0("Get from ", class(caches[[i]])[1], "... hit"))
40 | # Set the value in any caches where we searched and missed.
41 | for (j in seq_len(i-1)) {
42 | caches[[j]]$set(key, value)
43 | }
44 | break
45 | } else {
46 | log_(paste0("Get from ", class(caches[[i]])[1], "... miss"))
47 | }
48 | }
49 |
50 | value
51 | }
52 |
53 | set <- function(key, value) {
54 | for (cache in caches) {
55 | cache$set(key, value)
56 | }
57 | }
58 |
59 | exists <- function(key) {
60 | for (cache in caches) {
61 | if (cache$exists(key)) {
62 | return(TRUE)
63 | }
64 | }
65 | FALSE
66 | }
67 |
68 | keys <- function() {
69 | unique(unlist(lapply(caches, function (cache) {
70 | cache$keys()
71 | })))
72 | }
73 |
74 | remove <- function(key) {
75 | for (cache in caches) {
76 | cache$remove(key)
77 | }
78 | }
79 |
80 | reset <- function() {
81 | for (cache in caches) {
82 | cache$reset()
83 | }
84 | }
85 |
86 | get_caches <- function() {
87 | caches
88 | }
89 |
90 | structure(
91 | list(
92 | get = get,
93 | set = set,
94 | exists = exists,
95 | keys = keys,
96 | remove = remove,
97 | reset = reset,
98 | get_caches = get_caches
99 | ),
100 | class = c("cache_layered", "cachem")
101 | )
102 | }
103 |
104 |
--------------------------------------------------------------------------------
/R/cache-mem.R:
--------------------------------------------------------------------------------
1 | #' Create a memory cache object
2 | #'
3 | #' A memory cache object is a key-value store that saves the values in an
4 | #' environment. Objects can be stored and retrieved using the `get()` and
5 | #' `set()` methods. Objects are automatically pruned from the cache according to
6 | #' the parameters `max_size`, `max_age`, `max_n`, and `evict`.
7 | #'
8 | #' In a `cache_mem`, R objects are stored directly in the cache; they are not
9 | #' *not* serialized before being stored in the cache. This contrasts with other
10 | #' cache types, like [cache_disk()], where objects are serialized, and the
11 | #' serialized object is cached. This can result in some differences of behavior.
12 | #' For example, as long as an object is stored in a cache_mem, it will not be
13 | #' garbage collected.
14 | #'
15 | #' @section Missing keys:
16 | #'
17 | #' The `missing` parameter controls what happens when `get()` is called with a
18 | #' key that is not in the cache (a cache miss). The default behavior is to
19 | #' return a [key_missing()] object. This is a *sentinel value* that indicates
20 | #' that the key was not present in the cache. You can test if the returned
21 | #' value represents a missing key by using the [is.key_missing()] function.
22 | #' You can also have `get()` return a different sentinel value, like `NULL`.
23 | #' If you want to throw an error on a cache miss, you can do so by providing
24 | #' an expression for `missing`, as in `missing = stop("Missing key")`.
25 | #'
26 | #' When the cache is created, you can supply a value for `missing`, which sets
27 | #' the default value to be returned for missing values. It can also be
28 | #' overridden when `get()` is called, by supplying a `missing` argument. For
29 | #' example, if you use `cache$get("mykey", missing = NULL)`, it will return
30 | #' `NULL` if the key is not in the cache.
31 | #'
32 | #' The `missing` parameter is actually an expression which is evaluated each
33 | #' time there is a cache miss. A quosure (from the rlang package) can be used.
34 | #'
35 | #' If you use this, the code that calls `get()` should be wrapped with
36 | #' [tryCatch()] to gracefully handle missing keys.
37 | #'
38 | #'
39 | #' @section Cache pruning:
40 | #'
41 | #' Cache pruning occurs when `set()` is called, or it can be invoked manually
42 | #' by calling `prune()`.
43 | #'
44 | #' When a pruning occurs, if there are any objects that are older than
45 | #' `max_age`, they will be removed.
46 | #'
47 | #' The `max_size` and `max_n` parameters are applied to the cache as a whole,
48 | #' in contrast to `max_age`, which is applied to each object individually.
49 | #'
50 | #' If the number of objects in the cache exceeds `max_n`, then objects will be
51 | #' removed from the cache according to the eviction policy, which is set with
52 | #' the `evict` parameter. Objects will be removed so that the number of items
53 | #' is `max_n`.
54 | #'
55 | #' If the size of the objects in the cache exceeds `max_size`, then objects
56 | #' will be removed from the cache. Objects will be removed from the cache so
57 | #' that the total size remains under `max_size`.
58 | #'
59 | #' Another time that objects can be removed from the cache is when `get()` is
60 | #' called. If the target object is older than `max_age`, it will be removed
61 | #' and the cache will report it as a missing value.
62 | #'
63 | #' @section Eviction policies:
64 | #'
65 | #' If `max_n` or `max_size` are used, then objects will be removed
66 | #' from the cache according to an eviction policy. The available eviction
67 | #' policies are:
68 | #'
69 | #' \describe{
70 | #' \item{`"lru"`}{
71 | #' Least Recently Used. The least recently used objects will be removed.
72 | #' }
73 | #' \item{`"fifo"`}{
74 | #' First-in-first-out. The oldest objects will be removed.
75 | #' }
76 | #' }
77 | #'
78 | #' @section Methods:
79 | #'
80 | #' A disk cache object has the following methods:
81 | #'
82 | #' \describe{
83 | #' \item{`get(key, missing)`}{
84 | #' Returns the value associated with `key`. If the key is not in the
85 | #' cache, then it evaluates the expression specified by `missing` and
86 | #' returns the value. If `missing` is specified here, then it will
87 | #' override the default that was set when the `cache_mem` object was
88 | #' created. See section Missing Keys for more information.
89 | #' }
90 | #' \item{`set(key, value)`}{
91 | #' Stores the `key`-`value` pair in the cache.
92 | #' }
93 | #' \item{`exists(key)`}{
94 | #' Returns `TRUE` if the cache contains the key, otherwise
95 | #' `FALSE`.
96 | #' }
97 | #' \item{`remove(key)`}{
98 | #' Removes `key` from the cache, if it exists in the cache. If the key is
99 | #' not in the cache, this does nothing.
100 | #' }
101 | #' \item{`size()`}{
102 | #' Returns the number of items currently in the cache.
103 | #' }
104 | #' \item{`keys()`}{
105 | #' Returns a character vector of all keys currently in the cache.
106 | #' }
107 | #' \item{`reset()`}{
108 | #' Clears all objects from the cache.
109 | #' }
110 | #' \item{`destroy()`}{
111 | #' Clears all objects in the cache, and removes the cache directory from
112 | #' disk.
113 | #' }
114 | #' \item{`prune()`}{
115 | #' Prunes the cache, using the parameters specified by `max_size`,
116 | #' `max_age`, `max_n`, and `evict`.
117 | #' }
118 | #' }
119 | #'
120 | #' @param max_size Maximum size of the cache, in bytes. If the cache exceeds
121 | #' this size, cached objects will be removed according to the value of the
122 | #' `evict`. Use `Inf` for no size limit. The default is 512 megabytes.
123 | #' @inheritParams cache_disk
124 | #'
125 | #' @return A memory caching object, with class `cache_mem`.
126 | #' @importFrom utils object.size
127 | #' @export
128 | cache_mem <- function(
129 | max_size = 512 * 1024 ^ 2,
130 | max_age = Inf,
131 | max_n = Inf,
132 | evict = c("lru", "fifo"),
133 | missing = key_missing(),
134 | logfile = NULL)
135 | {
136 | # ============================================================================
137 | # Constants
138 | # ============================================================================
139 | DEBUG <- TRUE
140 | INITIAL_SIZE <- 64L
141 | # When compacting, how much space should be reserved? For example, if there
142 | # are 75 items in the cache when it is compacted and COMPACT_MULT is 2, then
143 | # the data store will be compacted to have a capacity of 150 items.
144 | COMPACT_MULT <- 2
145 | # If TRUE, the data will be kept in the correct atime (for lru) or mtime (for
146 | # fifo) order each time get() or set() is called, though the metadata log will
147 | # grow by one entry each time (it will also occasionally be compacted). If
148 | # FALSE, the metadata entry will be kept in place (so the metadata log won't
149 | # grow as quickly), but the atimes/mtimes will not be kept in order; instead,
150 | # the metadata will be sorted by atime/mtime each time prune() is called (and
151 | # prune() is called by set()). The overall behavior is the same, but there are
152 | # somewhat different performance characteristics. The tradeoff is either
153 | # growing the log for every get() (and needing to occasionally compact it), or
154 | # having to sort it every time set() is called. Sorting data of a reasonable
155 | # size (up to around 1e5) is fast in R. For larger numbers of items it may be
156 | # better to set this to TRUE.
157 | MAINTAIN_TIME_SORT <- FALSE
158 |
159 | # ============================================================================
160 | # Initialization
161 | # ============================================================================
162 | if (!is.numeric(max_size)) stop("max_size must be a number. Use `Inf` for no limit.")
163 | if (!is.numeric(max_age)) stop("max_age must be a number. Use `Inf` for no limit.")
164 | if (!is.numeric(max_n)) stop("max_n must be a number. Use `Inf` for no limit.")
165 |
166 | max_size_ <- max_size
167 | max_age_ <- max_age
168 | max_n_ <- max_n
169 | evict_ <- match.arg(evict)
170 | missing_ <- enquo(missing)
171 | logfile_ <- logfile
172 |
173 | PRUNE_BY_SIZE <- is.finite(max_size_)
174 | PRUNE_BY_AGE <- is.finite(max_age_)
175 | PRUNE_BY_N <- is.finite(max_n_)
176 |
177 | # ============================================================================
178 | # Internal state
179 | # ============================================================================
180 | # The keys, values, and metadata are stored in columnar format. The vectors
181 | # key_, value_, size_, mtime_, and atime_ are the columns. Separate vectors
182 | # are used instead of a data frame, because operations for modifying and
183 | # growing vectors are much faster than the same operations on data frames.
184 | #
185 | # It uses a column-first format because a row-first format is much slower for
186 | # doing the manipulations and computations that are needed for pruning, such
187 | # as sorting by atime, and calculating a cumulative sum of sizes.
188 | #
189 | # For fast get() performance, there is also key_idx_map_, which maps between
190 | # the key, and the "row" index in our "data frame".
191 | #
192 | # An older version of this code stored the value along with metadata (size,
193 | # mtime, and atime) in a fastmap object, but this had poor performance for
194 | # pruning operations. This is because, for pruning, it needs to fetch the
195 | # metadata for all objects, then sort by atime (if evict="lru"), then take a
196 | # cumulative sum of sizes. Fetching the metadata for all objects was slow, as
197 | # was converting the resulting row-first data into column-first data. The
198 | # current column-first approach is much, much faster.
199 | key_idx_map_ <- fastmap()
200 |
201 | # These values are set in the reset() method.
202 | key_ <- NULL
203 | value_ <- NULL
204 | size_ <- NULL
205 | mtime_ <- NULL
206 | atime_ <- NULL
207 |
208 | total_n_ <- NULL # Total number of items
209 | total_size_ <- NULL # Total number of bytes used
210 | last_idx_ <- NULL # Most recent (and largest) index used
211 |
212 |
213 | # ============================================================================
214 | # Public methods
215 | # ============================================================================
216 |
217 | reset <- function() {
218 | log_(paste0('reset'))
219 | key_idx_map_$reset()
220 | key_ <<- rep_len(NA_character_, INITIAL_SIZE)
221 | value_ <<- vector("list", INITIAL_SIZE)
222 | size_ <<- rep_len(NA_real_, INITIAL_SIZE)
223 | mtime_ <<- rep_len(NA_real_, INITIAL_SIZE)
224 | atime_ <<- rep_len(NA_real_, INITIAL_SIZE)
225 |
226 | total_n_ <<- 0L
227 | total_size_ <<- 0
228 | last_idx_ <<- 0L
229 | invisible(TRUE)
230 | }
231 |
232 | get <- function(key, missing = missing_) {
233 | log_(paste0('get: key "', key, '"'))
234 | validate_key(key)
235 |
236 | idx <- key_idx_map_$get(key)
237 |
238 | if (is.null(idx)) {
239 | log_(paste0('get: key "', key, '" is missing'))
240 | missing <- as_quosure(missing)
241 | return(eval_tidy(missing))
242 | }
243 |
244 | # Prunes a single object if it exceeds max_age. If the object does not
245 | # exceed max_age, or if the object doesn't exist, do nothing.
246 | if (PRUNE_BY_AGE) {
247 | time <- as.numeric(Sys.time())
248 | if (time - mtime_[idx] > max_age_) {
249 | log_(paste0("pruning single object exceeding max_age: Removing ", key))
250 | remove_(key)
251 | missing <- as_quosure(missing)
252 | return(eval_tidy(missing))
253 | }
254 | }
255 |
256 | log_(paste0('get: key "', key, '" found'))
257 |
258 | # Get the value before updating atime, because that can move items around
259 | # when MAINTAIN_TIME_SORT is TRUE.
260 | value <- value_[[idx]]
261 | update_atime_(key)
262 | value
263 | }
264 |
265 | set <- function(key, value) {
266 | log_(paste0('set: key "', key, '"'))
267 | validate_key(key)
268 |
269 | time <- as.numeric(Sys.time())
270 |
271 | if (PRUNE_BY_SIZE) {
272 | # Reported size is rough! See ?object.size.
273 | size <- as.numeric(object.size(value))
274 | total_size_ <<- total_size_ + size
275 | } else {
276 | size <- NA_real_
277 | }
278 |
279 | old_idx <- key_idx_map_$get(key)
280 |
281 | # We'll set this to TRUE if we need to append to the data; FALSE if we can
282 | # modify the existing entry in place.
283 | append <- NULL
284 |
285 | if (!is.null(old_idx)) {
286 | # If there's an existing entry with this key, clear out its row, because
287 | # we'll be appending a new one later.
288 | if (PRUNE_BY_SIZE) {
289 | total_size_ <<- total_size_ - size_[old_idx]
290 | }
291 |
292 | if (MAINTAIN_TIME_SORT && old_idx != last_idx_) {
293 | append <- TRUE
294 |
295 | key_ [old_idx] <<- NA_character_
296 | value_[old_idx] <<- list(NULL)
297 | size_ [old_idx] <<- NA_real_
298 | mtime_[old_idx] <<- NA_real_
299 | atime_[old_idx] <<- NA_real_
300 |
301 | } else {
302 | append <- FALSE
303 | }
304 |
305 | } else {
306 | append <- TRUE
307 | total_n_ <<- total_n_ + 1L
308 | }
309 |
310 | if (append) {
311 | # If we're appending, update the last_idx_ and use it for storage. This
312 | # assign past the end of the vector. As of R 3.4, this grows the vector in
313 | # place if possible, and is generally very fast, because vectors are
314 | # allocated with extra memory at the end. For older versions of R, this
315 | # can be very slow because a copy of the whole vector must be made each
316 | # time.
317 | last_idx_ <<- last_idx_ + 1L
318 | key_idx_map_$set(key, last_idx_)
319 | new_idx <- last_idx_
320 |
321 | } else {
322 | # Not appending; replace the old item in place.
323 | new_idx <- old_idx
324 | }
325 |
326 | key_ [new_idx] <<- key
327 | value_[[new_idx]] <<- value
328 | size_ [new_idx] <<- size
329 | mtime_[new_idx] <<- time
330 | atime_[new_idx] <<- time
331 |
332 | prune()
333 |
334 | invisible(TRUE)
335 | }
336 |
337 | exists <- function(key) {
338 | validate_key(key)
339 |
340 | if (PRUNE_BY_AGE) {
341 | # Prunes a single object if it exceeds max_age. This code path looks a bit
342 | # complicated for what it does, but this is for performance.
343 | idx <- key_idx_map_$get(key)
344 | if (is.null(idx)) {
345 | return(FALSE)
346 | }
347 |
348 | time <- as.numeric(Sys.time())
349 | if (time - mtime_[idx] > max_age_) {
350 | log_(paste0("pruning single object exceeding max_age: Removing ", key))
351 | remove_(key)
352 | return(FALSE)
353 | }
354 |
355 | return(TRUE)
356 |
357 | } else {
358 | key_idx_map_$has(key)
359 | }
360 | }
361 |
362 | keys <- function() {
363 | if (PRUNE_BY_AGE) {
364 | # When there's no max_age, pruning is only needed when set() is called,
365 | # because that's the only way for max_n or max_size to be exceeded. But
366 | # when there is a max_age, we might need to prune here simply because time
367 | # has passed. (This could be made faster by having an option to prune() to
368 | # only prunes by age (and not by n or size). It could also avoid sorting
369 | # the metadata.)
370 | prune()
371 | }
372 |
373 | key_idx_map_$keys()
374 | }
375 |
376 | remove <- function(key) {
377 | log_(paste0('remove: key "', key, '"'))
378 | validate_key(key)
379 | remove_(key)
380 | invisible(TRUE)
381 | }
382 |
383 | prune <- function() {
384 | log_(paste0('prune'))
385 |
386 | # Quick check to see if we need to prune
387 | if ((!PRUNE_BY_SIZE || total_size_ <= max_size_) &&
388 | (!PRUNE_BY_N || total_n_ <= max_n_ ) &&
389 | (!PRUNE_BY_AGE))
390 | {
391 | return(invisible(TRUE))
392 | }
393 |
394 | info <- get_metadata_()
395 |
396 | if (DEBUG) {
397 | # Sanity checks
398 | if (PRUNE_BY_SIZE && sum(info$size) != total_size_) {
399 | stop("Size mismatch")
400 | }
401 | if (length(info$key) != total_n_) {
402 | stop("Count mismatch")
403 | }
404 | }
405 |
406 | # 1. Remove any objects where the age exceeds max age.
407 | if (PRUNE_BY_AGE) {
408 | time <- as.numeric(Sys.time())
409 | timediff <- time - info$mtime
410 | rm_idx <- timediff > max_age_
411 | if (any(rm_idx)) {
412 | log_(paste0("prune max_age: Removing ", paste(info$key[rm_idx], collapse = ", ")))
413 | remove_(info$key[rm_idx])
414 |
415 | # Trim all the vectors (need to do each individually since we're using a
416 | # list of vectors instead of a data frame, for performance).
417 | info$key <- info$key [!rm_idx]
418 | info$size <- info$size [!rm_idx]
419 | info$mtime <- info$mtime[!rm_idx]
420 | info$atime <- info$atime[!rm_idx]
421 | }
422 | }
423 |
424 | # 2. Remove objects if there are too many.
425 | if (PRUNE_BY_N && length(info$key) > max_n_) {
426 | rm_idx <- seq_along(info$key) > max_n_
427 | log_(paste0("prune max_n: Removing ", paste(info$key[rm_idx], collapse = ", ")))
428 | remove_(info$key[rm_idx])
429 |
430 | info$key <- info$key [!rm_idx]
431 | info$size <- info$size [!rm_idx]
432 | info$mtime <- info$mtime[!rm_idx]
433 | info$atime <- info$atime[!rm_idx]
434 | }
435 |
436 | # 3. Remove objects if cache is too large.
437 | if (PRUNE_BY_SIZE && sum(info$size) > max_size_) {
438 | cum_size <- cumsum(info$size)
439 | rm_idx <- cum_size > max_size_
440 | log_(paste0("prune max_size: Removing ", paste(info$key[rm_idx], collapse = ", ")))
441 | remove_(info$key[rm_idx])
442 |
443 | # No need to trim vectors this time, since this is the last pruning step.
444 | }
445 |
446 | invisible(TRUE)
447 | }
448 |
449 | size <- function() {
450 | if (PRUNE_BY_AGE) {
451 | # See note in exists() about why we prune here.
452 | prune()
453 | }
454 | if (DEBUG) {
455 | if (key_idx_map_$size() != total_n_) stop("n mismatch")
456 | }
457 | total_n_
458 | }
459 |
460 | info <- function() {
461 | list(
462 | max_size = max_size_,
463 | max_age = max_age_,
464 | max_n = max_n_,
465 | evict = evict_,
466 | missing = missing_,
467 | logfile = logfile_
468 | )
469 | }
470 |
471 |
472 | # ============================================================================
473 | # Private methods
474 | # ============================================================================
475 |
476 | # Called when get() with lru. If fifo, no need to update.
477 | update_atime_ <- function(key) {
478 | if (evict_ != "lru") return()
479 |
480 | idx <- key_idx_map_$get(key)
481 | time <- as.numeric(Sys.time())
482 |
483 | if (is.null(idx)) {
484 | stop("Can't update atime because entry doesn't exist")
485 | }
486 |
487 | if (MAINTAIN_TIME_SORT) {
488 | if (idx == last_idx_) {
489 | # last_idx_ entry; simply update time
490 | atime_[idx] <<- time
491 | } else {
492 | # "Move" this entry to the end.
493 | last_idx_ <<- last_idx_ + 1L
494 | # Add new entry to end. Fast on R 3.4 and above, slow on older versions.
495 | key_idx_map_$set(key, last_idx_)
496 | key_ [last_idx_] <<- key
497 | value_[[last_idx_]] <<- value_[[idx]]
498 | size_ [last_idx_] <<- size_ [idx]
499 | mtime_[last_idx_] <<- mtime_[idx]
500 | atime_[last_idx_] <<- time
501 |
502 | # Clear out old entry
503 | key_ [idx] <<- NA_character_
504 | value_[idx] <<- list(NULL)
505 | size_ [idx] <<- NA_real_
506 | mtime_[idx] <<- NA_real_
507 | atime_[idx] <<- NA_real_
508 | }
509 |
510 | } else {
511 | atime_[idx] <<- time
512 | }
513 |
514 | }
515 |
516 |
517 | remove_ <- function(keys) {
518 | if (length(keys) == 1) {
519 | remove_one_(keys)
520 | } else {
521 | vapply(keys, remove_one_, TRUE)
522 | }
523 |
524 | compact_()
525 | }
526 |
527 | remove_one_ <- function(key) {
528 | idx <- key_idx_map_$get(key)
529 |
530 | if (is.null(idx)) {
531 | return()
532 | }
533 |
534 | # Overall n and size bookkeeping
535 | total_n_ <<- total_n_ - 1L
536 | if (PRUNE_BY_SIZE) {
537 | total_size_ <<- total_size_ - size_[idx]
538 | }
539 |
540 | # Clear out entry
541 | key_ [idx] <<- NA_character_
542 | value_[idx] <<- list(NULL)
543 | size_ [idx] <<- NA_real_
544 | mtime_[idx] <<- NA_real_
545 | atime_[idx] <<- NA_real_
546 |
547 | key_idx_map_$remove(key)
548 | }
549 |
550 | compact_ <- function() {
551 | if (last_idx_ <= INITIAL_SIZE || last_idx_ <= total_n_ * COMPACT_MULT) {
552 | return()
553 | }
554 |
555 | from_idxs <- key_[seq_len(last_idx_)]
556 | from_idxs <- !is.na(from_idxs)
557 | from_idxs <- which(from_idxs)
558 |
559 | if (DEBUG) stopifnot(total_n_ == length(from_idxs))
560 |
561 | new_size <- max(INITIAL_SIZE, ceiling(total_n_ * COMPACT_MULT))
562 |
563 | # Allocate new vectors for metadata.
564 | new_key_ <- rep_len(NA_character_, new_size)
565 | new_value_ <- vector("list", new_size)
566 | new_size_ <- rep_len(NA_real_, new_size)
567 | new_mtime_ <- rep_len(NA_real_, new_size)
568 | new_atime_ <- rep_len(NA_real_, new_size)
569 |
570 | # Copy (and compact, removing gaps) from old vectors to new ones.
571 | to_idxs <- seq_len(total_n_)
572 | new_key_ [to_idxs] <- key_ [from_idxs]
573 | new_value_[to_idxs] <- value_[from_idxs]
574 | new_size_ [to_idxs] <- size_ [from_idxs]
575 | new_mtime_[to_idxs] <- mtime_[from_idxs]
576 | new_atime_[to_idxs] <- atime_[from_idxs]
577 |
578 | # Replace old vectors with new ones.
579 | key_ <<- new_key_
580 | value_ <<- new_value_
581 | size_ <<- new_size_
582 | mtime_ <<- new_mtime_
583 | atime_ <<- new_atime_
584 |
585 | # Update the index values in the key-index map.
586 | args <- to_idxs
587 | names(args) <- key_[to_idxs]
588 | key_idx_map_$mset(.list = args)
589 |
590 | last_idx_ <<- total_n_
591 | }
592 |
593 | # Returns data frame of info, with gaps removed.
594 | # If evict=="lru", this will be sorted by atime.
595 | # If evict=="fifo", this will be sorted by mtime.
596 | get_metadata_ <- function() {
597 | idxs <- !is.na(mtime_[seq_len(last_idx_)])
598 | idxs <- which(idxs)
599 |
600 | if (!MAINTAIN_TIME_SORT) {
601 | if (evict_ == "lru") {
602 | idxs <- idxs[order(atime_[idxs])]
603 | } else {
604 | idxs <- idxs[order(mtime_[idxs])]
605 | }
606 | }
607 |
608 | idxs <- rev(idxs)
609 |
610 | # Return a list -- this basically same structure as a data frame, but
611 | # we're using a plain list to avoid data frame slowness
612 | list(
613 | key = key_ [idxs],
614 | size = size_ [idxs],
615 | mtime = mtime_[idxs],
616 | atime = atime_[idxs]
617 | )
618 | }
619 |
620 | log_ <- function(text) {
621 | if (is.null(logfile_)) return()
622 |
623 | text <- paste0(format(Sys.time(), "[%Y-%m-%d %H:%M:%OS3] cache_mem "), text)
624 | cat(text, sep = "\n", file = logfile_, append = TRUE)
625 | }
626 |
627 |
628 | reset()
629 |
630 | # ============================================================================
631 | # Returned object
632 | # ============================================================================
633 | structure(
634 | list(
635 | get = get,
636 | set = set,
637 | exists = exists,
638 | keys = keys,
639 | remove = remove,
640 | reset = reset,
641 | prune = prune,
642 | size = size,
643 | info = info
644 | ),
645 | class = c("cache_mem", "cachem")
646 | )
647 | }
648 |
--------------------------------------------------------------------------------
/R/cachem-package.R:
--------------------------------------------------------------------------------
1 | #' @docType package
2 | #' @useDynLib cachem, .registration = TRUE
3 | #' @import fastmap
4 | NULL
5 |
--------------------------------------------------------------------------------
/R/cachem.R:
--------------------------------------------------------------------------------
1 | #' @export
2 | format.cachem <- function(x, ...) {
3 | paste0(
4 | paste0("<", class(x), ">", collapse= " "), "\n",
5 | " Methods:\n",
6 | paste0(
7 | " ", format_methods(x),
8 | collapse ="\n"
9 | )
10 | )
11 | }
12 |
13 | format_methods <- function(x) {
14 | vapply(seq_along(x),
15 | function(i) {
16 | name <- names(x)[i]
17 | f <- x[[i]]
18 | if (is.function(f)) {
19 | paste0(name, "(", format_args(f), ")")
20 | } else {
21 | name
22 | }
23 | }, character(1)
24 | )
25 | }
26 |
27 | format_args <- function(x) {
28 | nms <- names(formals(x))
29 | vals <- as.character(formals(x))
30 | args <- mapply(nms, vals, FUN = function(name, value) {
31 | if (value == "") {
32 | name
33 | } else {
34 | paste0(name, " = ", value)
35 | }
36 | })
37 | paste(args, collapse = ", ")
38 | }
39 |
40 | #' @export
41 | print.cachem <- function(x, ...) {
42 | cat(format(x, ...))
43 | }
44 |
--------------------------------------------------------------------------------
/R/reexports.R:
--------------------------------------------------------------------------------
1 | #' @importFrom fastmap key_missing
2 | #' @export
3 | fastmap::key_missing
4 |
5 | #' @importFrom fastmap is.key_missing
6 | #' @export
7 | fastmap::is.key_missing
8 |
9 |
10 |
--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 | hex_digits <- c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
2 | "a", "b", "c", "d", "e", "f")
3 |
4 | random_hex <- function(digits = 16) {
5 | paste(sample(hex_digits, digits, replace = TRUE), collapse = "")
6 | }
7 |
8 |
9 | dir_remove <- function(path) {
10 | for (p in path) {
11 | if (!dir.exists(p)) {
12 | stop("Cannot remove non-existent directory ", p, ".")
13 | }
14 | if (length(dir(p, all.files = TRUE, no.. = TRUE)) != 0) {
15 | stop("Cannot remove non-empty directory ", p, ".")
16 | }
17 | result <- unlink(p, recursive = TRUE)
18 | if (result == 1) {
19 | stop("Error removing directory ", p, ".")
20 | }
21 | }
22 | }
23 |
24 | absolute_path <- function(path) {
25 | norm_path <- normalizePath(path, mustWork = FALSE)
26 | if (path == norm_path) {
27 | file.path(getwd(), path)
28 | } else {
29 | norm_path
30 | }
31 | }
32 |
33 | validate_key <- function(key) {
34 | # This C function does the same as `grepl("[^a-z0-9_-]")`, but faster.
35 | .Call(C_validate_key, key)
36 | }
37 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output:
3 | github_document:
4 | toc: true
5 | toc_depth: 3
6 | editor_options:
7 | chunk_output_type: console
8 | ---
9 |
10 |
11 |
12 | ```{r, include = FALSE}
13 | knitr::opts_chunk$set(
14 | collapse = TRUE,
15 | comment = "#>",
16 | fig.path = "man/figures/README-",
17 | out.width = "100%"
18 | )
19 | ```
20 |
21 | # cachem
22 |
23 |
24 | [](https://github.com/r-lib/cachem/actions)
25 |
26 |
27 | The **cachem** R package provides objects creating and managing caches. These cache objects are key-value stores, but unlike other basic key-value stores, they have built-in support for memory and age limits so that they won't have unbounded growth.
28 |
29 | The cache objects in **cachem** differ from some other key-value stores in the following ways:
30 |
31 | * The cache objects provide automatic pruning so that they remain within memory limits.
32 | * Fetching a non-existing object returns a sentinel value. An alternative is to simply return `NULL`. This is what R lists and environments do, but it is ambiguous whether the value really is `NULL`, or if it is not present. Another alternative is to throw an exception when fetching a non-existent object. However, this results in more complicated code, as every `get()` needs to be wrapped in a `tryCatch()`.
33 |
34 | ## Installation
35 |
36 | To install the CRAN version:
37 |
38 | ```{r eval=FALSE}
39 | install.packages("cachem")
40 | ```
41 |
42 | You can install the development version from with:
43 |
44 | ```{r eval=FALSE}
45 | if (!require("remotes")) install.packages("remotes")
46 | remotes::install_github("r-lib/cachem")
47 | ```
48 |
49 | ## Usage
50 |
51 | To create a memory-based cache, call `cache_mem()`.
52 |
53 | ```{r}
54 | library(cachem)
55 | m <- cache_mem()
56 | ```
57 |
58 | Add arbitrary R objects to the cache using `$set(key, value)`:
59 |
60 | ```{r}
61 | m$set("abc123", c("Hello", "world"))
62 | m$set("xyz", function() message("Goodbye"))
63 | ```
64 |
65 | The `key` must be a string consisting of lowercase letters, numbers, and the underscore (`_`) and hyphen (`-`) characters. (Upper-case characters are not allowed because some storage backends do not distinguish between lowercase and uppercase letters.) The `value` can be any R object.
66 |
67 | Get the values with `$get()`:
68 | ``` r
69 | m$get("abc123")
70 | #> [1] "Hello" "world"
71 |
72 | m$get("xyz")
73 | #> function() message("Goodbye")
74 | ```
75 |
76 | If you call `get()` on a key that doesn't exists, it will return a `key_missing()` sentinel value:
77 |
78 | ```{r}
79 | m$get("dog")
80 | ```
81 |
82 | A common usage pattern is to call `get()`, and then check if the result is a `key_missing` object:
83 |
84 | ```{r eval=FALSE}
85 | value <- m$get(key)
86 |
87 | if (is.key_missing(value)) {
88 | # Cache miss - do something
89 | } else {
90 | # Cache hit - do another thing
91 | }
92 | ```
93 |
94 | The reason for doing this (instead of calling `$exists(key)` and then `$get(key)`) is that for some storage backends, there is a potential race condition: the object could be removed from the cache between the `exists()` and `get()` calls. For example:
95 |
96 | * If multiple R processes have `cache_disk`s that share the same directory, one process could remove an object from the cache in between the `exists()` and `get()` calls in another process, resulting in an error.
97 | * If you use a `cache_mem` with a `max_age`, it's possible for an object to be present when you call `exists()`, but for its age to exceed `max_age` by the time `get()` is called. In that case, the `get()` will return a `key_missing()` object.
98 |
99 | ```{r eval=FALSE}
100 | # Avoid this pattern, due to a potential race condition!
101 | if (m$exists(key)) {
102 | value <- m$get(key)
103 | }
104 | ```
105 |
106 |
107 | ## Cache types
108 |
109 | **cachem** comes with two kinds of cache objects: a memory cache, and a disk cache.
110 |
111 | ### `cache_mem()`
112 |
113 | The memory cache stores stores objects in memory, by simply keeping a reference to each object. To create a memory cache:
114 |
115 | ```{r}
116 | m <- cache_mem()
117 | ```
118 |
119 | The default size of the cache is 200MB, but this can be customized with `max_size`:
120 |
121 | ```{r}
122 | m <- cache_mem(max_size = 10 * 1024^2)
123 | ```
124 |
125 | It may also be useful to set a maximum age of objects. For example, if you only want objects to stay for a maximum of one hour:
126 |
127 | ```{r}
128 | m <- cache_mem(max_size = 10 * 1024^2, max_age = 3600)
129 | ```
130 |
131 | For more about how objects are evicted from the cache, see section [Pruning](#pruning) below.
132 |
133 | An advantage that the memory cache has over the disk cache (and any other type of cache that stores the objects outside of the R process's memory), is that it does not need to serialize objects. Instead, it merely stores references to the objects. This means that it can store objects that other caches cannot, and with more efficient use of memory -- if two objects in the cache share some of their contents (such that they refer to the same sub-object in memory), then `cache_mem` will not create duplicate copies of the contents, as `cache_disk` would, since it serializes the objects with the `serialize()` function.
134 |
135 | Compared to the memory usage, the size _calculation_ is not as intelligent: if there are two objects that share contents, their sizes are computed separately, even if they have items that share the exact same represention in memory. This is done with the `object.size()` function, which does not account for multiple references to the same object in memory.
136 |
137 | In short, a memory cache, if anything, over-counts the amount of memory actually consumed. In practice, this means that if you set a 200MB limit to the size of cache, and the cache _thinks_ it has 200MB of contents, the actual amount of memory consumed could be less than 200MB.
138 |
139 |
140 | Demonstration of memory over-counting from `object.size()`
141 |
142 | ```{r}
143 | # Create a and b which both contain the same numeric vector.
144 | x <- list(rnorm(1e5))
145 | a <- list(1, x)
146 | b <- list(2, x)
147 |
148 | # Add to cache
149 | m$set("a", a)
150 | m$set("b", b)
151 |
152 | # Each object is about 800kB in memory, so the cache_mem() will consider the
153 | # total memory used to be 1600kB.
154 | object.size(m$get("a"))
155 | object.size(m$get("b"))
156 | ```
157 |
158 | For reference, lobstr::obj_size can detect shared objects, and knows that these objects share most of their memory.
159 |
160 | ```{r}
161 | lobstr::obj_size(m$get("a"))
162 | lobstr::obj_size(list(m$get("a"), m$get("b")))
163 | ```
164 |
165 | However, lobstr is not on CRAN, and if obj_size() were used to find the incremental memory used when an object was added to the cache, it would have to walk all objects in the cache every time a single object is added. For these reasons, cache_mem uses `object.size()` to compute the object sizes.
166 |
167 |
168 |
169 | ### `cache_disk()`
170 |
171 | Disk caches are stored in a directory on disk. A disk cache is slower than a memory cache, but can generally be larger. To create one:
172 |
173 | ```{r}
174 | d <- cache_disk()
175 | ```
176 |
177 | By default, it creates a subdirectory of the R process's temp directory, and it will persist until the R process exits.
178 |
179 | ``` r
180 | d$info()$dir
181 | #> "/tmp/Rtmp6h5iB3/cache_disk-d1901b2b615a"
182 | ```
183 |
184 | Like a `cache_mem`, the `max_size`, `max_n`, `max_age` can be customized. See section [Pruning](#pruning) below for more information.
185 |
186 | Each object in the cache is stored as an RDS file on disk, using the `serialize()` function.
187 |
188 | ```{r}
189 | d$set("abc", 100)
190 | d$set("x01", list(1, 2, 3))
191 |
192 | dir(d$info()$dir)
193 | ```
194 |
195 | Since objects in a disk cache are serialized, they are subject to the limitations of the `serialize()` function. For more information, see section [Limitations of serialized objects](#limitations-of-serialized-objects).
196 |
197 | The storage directory can be specified with `dir`; it will be created if necessary.
198 |
199 | ``` r
200 | cache_disk(dir = "cachedir")
201 | ```
202 |
203 | #### Sharing a disk cache among processes
204 |
205 | Multiple R processes can use `disk_cache` objects that share the same cache directory. To do this, simply point each `cache_disk` to the same directory.
206 |
207 |
208 | #### `disk_cache` pruning
209 |
210 | For a `disk_cache`, pruning does not happen on every access, because finding the size of files in the cache directory can take a nontrivial amount of time. By default, pruning happens once every 20 times that `$set()` is called, or if at least five seconds have elapsed since the last pruning. The `prune_rate` controls how many times `$set()` must be called before a pruning occurs. It defaults to 20; smaller values result in more frequent pruning and larger values result in less frequent pruning (but keep in mind pruning always occurs if it has been at least five seconds since the last pruning).
211 |
212 |
213 | #### Cleaning up the cache directory
214 |
215 | The cache directory can be deleted by calling `$destroy()`. After it is destroyed, the cache object can no longer be used.
216 |
217 | ``` r
218 | d$destroy()
219 | d$set("a", 1) # Error
220 | ```
221 |
222 | To create a `cache_disk` that will automatically delete its storage directory when garbage collected, use `destroy_on_finalize=TRUE`:
223 |
224 | ``` r
225 | d <- cache_disk(destroy_on_finalize = TRUE)
226 | d$set("a", 1)
227 |
228 | cachedir <- d$info()$dir
229 | dir(cachedir)
230 | #> [1] "a.rds"
231 |
232 | # Remove reference to d and trigger a garbage collection
233 | rm(d)
234 | gc()
235 |
236 | dir.exists(cachedir)
237 | ```
238 |
239 | #### Using custom serialization functions
240 |
241 | It is possible to use custom serialization functions rather than the default of `writeRDS()` and `readRDS()` with the `write_fn`, `read_fn` and `extension` arguments respectively. This could be used to use alternative serialization formats like [qs](https://github.com/traversc/qs), or specialized object formats [fst](http://www.fstpackage.org/fst/) or parquet.
242 |
243 | ``` r
244 | library(qs)
245 |
246 | d <- cache_disk(read_fn = qs::qread, write_fn = qs::qsave, extension = ".qs")
247 |
248 | d$set("a", list(1, 2, 3))
249 |
250 | cachedir <- d$info()$dir
251 | dir(cachedir)
252 | #> [1] "a.qs"
253 | d$get("a")
254 | #> [[1]]
255 | #> [1] 1
256 | #>
257 | #> [[2]]
258 | #> [1] 2
259 | #>
260 | #> [[3]]
261 | #> [1] 3
262 | ```
263 |
264 | ## Cache API
265 |
266 | `cache_mem()` and `cache_disk()` support all of the methods listed below. If you want to create a compatible caching object, it must have at least the `get()` and `set()` methods:
267 |
268 | * `get(key, missing = missing_)`: Get the object associated with `key`. The `missing` parameter allows customized behavior if the key is not present: it actually is an expression which is evaluated when there is a cache miss, and it could return a value or throw an error.
269 | * `set(key, value)`: Set a key to a value.
270 | * `exists(key)`: Check whether a particular key exists in the cache.
271 | * `remove(key)`: Remove a key-value from the cache.
272 |
273 | Some optional methods:
274 |
275 | * `reset()`: Clear all objects from the cache.
276 | * `keys()`: Return a character vector of all keys in the cache.
277 | * `prune()`: Prune the cache. (Some types of caches may not prune on every access, and may temporarily grow past their limits, until the next pruning is triggered automatically, or manually with this function.)
278 | * `size()`: Return the number of objects in the cache.
279 | * `size()`: Return the number of objects in the cache.
280 |
281 | For these methods:
282 |
283 | * `key`: can be any string with lowercase letters, numbers, underscore (`_`) and hyphen (`-`). Some storage backends may not be handle very long keys well. For example, with a `cache_disk()`, the key is used as a filename, and on some filesystems, very filenames may hit limits on path lengths.
284 | * `value`: can be any R object, with some exceptions noted below.
285 |
286 |
287 | #### Limitations of serialized objects
288 |
289 | For any cache that serializes the object for storage outside of the R process -- in other words, any cache other than a `cache_mem()` -- some types of objects will not save and restore as well. Notably, reference objects may consume more memory when restored, since R may not know to deduplicate shared objects. External pointers are not be able to be serialized, since they point to memory in the R process. See `?serialize` for more information.
290 |
291 |
292 | #### Read-only caches
293 |
294 | It is possible to create a read-only cache by making the `set()`, `remove()`, `reset()`, and `prune()` methods into no-ops. This can be useful if sharing a cache with another R process which can write to the cache. For example, one (or more) processes can write to the cache, and other processes can read from it.
295 |
296 | This function will wrap a cache object in a read-only wrapper. Note, however, that code that uses such a cache must not require that `$set()` actually sets a value in the cache. This is good practice anyway, because with these cache objects, items can be pruned from them at any time.
297 |
298 | ```{r}
299 | cache_readonly_wrap <- function(cache) {
300 | structure(
301 | list(
302 | get = cache$get,
303 | set = function(key, value) NULL,
304 | exists = cache$exists,
305 | keys = cache$keys,
306 | remove = function(key) NULL,
307 | reset = function() NULL,
308 | prune = function() NULL,
309 | size = cache$size
310 | ),
311 | class = c("cache_readonly", class(cache))
312 | )
313 | }
314 |
315 | mr <- cache_readonly_wrap(m)
316 | ```
317 |
318 |
319 | ## Pruning
320 |
321 | The cache objects provided by cachem have automatic pruning. (Note that pruning is not required by the API, so one could implement an API-compatible cache without pruning.)
322 |
323 | This section describes how pruning works for `cache_mem()` and `cache_disk()`.
324 |
325 | When the cache object is created, the maximum size (in bytes) is specified by `max_size`. When the size of objects in the cache exceeds `max_size`, objects will be pruned from the cache.
326 |
327 | When objects are pruned from the cache, which ones are removed is determined by the eviction policy, `evict`:
328 |
329 | * **`lru`**: The least-recently-used objects will be removed from the cache, until it fits within the limit. This is the default and is appropriate for most cases.
330 | * **`fifo`**: The oldest objects will be removed first.
331 |
332 | It is also possible to set the maximum number of items that can be in the cache, with `max_n`. By default this is set to `Inf`, or no limit.
333 |
334 | The `max_age` parameter is somewhat different from `max_size` and `max_n`. The latter two set limits on the cache store as a whole, whereas `max_age` sets limits for each individual item; for each item, if its age exceeds `max_age`, then it will be removed from the cache.
335 |
336 |
337 | ## Layered caches
338 |
339 | Multiple caches can be composed into a single cache, using `cache_layered()`. This can be used to create a multi-level cache. (Note thate `cache_layered()` is currently experimental.) For example, we can create a layered cache with a very fast 100MB memory cache and a larger but slower 2GB disk cache:
340 |
341 |
342 | ```{r}
343 | m <- cache_mem(max_size = 100 * 1024^2)
344 | d <- cache_disk(max_size = 2 * 1024^3)
345 |
346 | cl <- cache_layered(m, d)
347 | ```
348 |
349 | The layered cache will have the same API, with `$get()`, `$set()`, and so on, so it can be used interchangeably with other caching objects.
350 |
351 | For this example, we'll recreate the `cache_layered` with logging enabled, so that it will show cache hits and misses.
352 |
353 | ``` r
354 | cl <- cache_layered(m, d, logfile = stderr())
355 |
356 | # Each of the objects generated by rnorm() is about 40 MB
357 | cl$set("a", rnorm(5e6))
358 | cl$set("b", rnorm(5e6))
359 | cl$set("c", rnorm(5e6))
360 |
361 | # View the objects in each of the component caches
362 | m$keys()
363 | #> [1] "c" "b"
364 | d$keys()
365 | #> [1] "a" "b" "c"
366 |
367 | # The layered cache reports having all keys
368 | cl$keys()
369 | #> [1] "c" "b" "a"
370 | ```
371 |
372 | When `$get()` is called, it searches the first cache, and if it's missing there, it searches the next cache, and so on. If not found in any caches, it returns `key_missing()`.
373 |
374 | ``` r
375 | # Get object that exists in the memory cache
376 | x <- cl$get("c")
377 | #> [2020-10-23 13:11:09.985] cache_layered Get: c
378 | #> [2020-10-23 13:11:09.985] cache_layered Get from cache_mem... hit
379 |
380 | # Get object that doesn't exist in the memory cache
381 | x <- cl$get("a")
382 | #> [2020-10-23 13:13:10.968] cache_layered Get: a
383 | #> [2020-10-23 13:13:10.969] cache_layered Get from cache_mem... miss
384 | #> [2020-10-23 13:13:11.329] cache_layered Get from cache_disk... hit
385 |
386 | # Object is not present in any component caches
387 | cl$get("d")
388 | #> [2020-10-23 13:13:40.197] cache_layered Get: d
389 | #> [2020-10-23 13:13:40.197] cache_layered Get from cache_mem... miss
390 | #> [2020-10-23 13:13:40.198] cache_layered Get from cache_disk... miss
391 | #>
392 | ```
393 |
394 | Multiple cache objects can be layered this way. You could even add a cache which uses a remote store, such as a network file system or even AWS S3.
395 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | - [cachem](#cachem)
3 | - [Installation](#installation)
4 | - [Usage](#usage)
5 | - [Cache types](#cache-types)
6 | - [`cache_mem()`](#cache_mem)
7 | - [`cache_disk()`](#cache_disk)
8 | - [Cache API](#cache-api)
9 | - [Pruning](#pruning)
10 | - [Layered caches](#layered-caches)
11 |
12 |
13 |
14 | # cachem
15 |
16 |
17 |
18 | [](https://github.com/r-lib/cachem/actions)
20 |
21 |
22 | The **cachem** R package provides objects creating and managing caches.
23 | These cache objects are key-value stores, but unlike other basic
24 | key-value stores, they have built-in support for memory and age limits
25 | so that they won’t have unbounded growth.
26 |
27 | The cache objects in **cachem** differ from some other key-value stores
28 | in the following ways:
29 |
30 | - The cache objects provide automatic pruning so that they remain within
31 | memory limits.
32 | - Fetching a non-existing object returns a sentinel value. An
33 | alternative is to simply return `NULL`. This is what R lists and
34 | environments do, but it is ambiguous whether the value really is
35 | `NULL`, or if it is not present. Another alternative is to throw an
36 | exception when fetching a non-existent object. However, this results
37 | in more complicated code, as every `get()` needs to be wrapped in a
38 | `tryCatch()`.
39 |
40 | ## Installation
41 |
42 | To install the CRAN version:
43 |
44 | ``` r
45 | install.packages("cachem")
46 | ```
47 |
48 | You can install the development version from with:
49 |
50 | ``` r
51 | if (!require("remotes")) install.packages("remotes")
52 | remotes::install_github("r-lib/cachem")
53 | ```
54 |
55 | ## Usage
56 |
57 | To create a memory-based cache, call `cache_mem()`.
58 |
59 | ``` r
60 | library(cachem)
61 | m <- cache_mem()
62 | ```
63 |
64 | Add arbitrary R objects to the cache using `$set(key, value)`:
65 |
66 | ``` r
67 | m$set("abc123", c("Hello", "world"))
68 | m$set("xyz", function() message("Goodbye"))
69 | ```
70 |
71 | The `key` must be a string consisting of lowercase letters, numbers, and
72 | the underscore (`_`) and hyphen (`-`) characters. (Upper-case characters
73 | are not allowed because some storage backends do not distinguish between
74 | lowercase and uppercase letters.) The `value` can be any R object.
75 |
76 | Get the values with `$get()`:
77 |
78 | ``` r
79 | m$get("abc123")
80 | #> [1] "Hello" "world"
81 |
82 | m$get("xyz")
83 | #> function() message("Goodbye")
84 | ```
85 |
86 | If you call `get()` on a key that doesn’t exists, it will return a
87 | `key_missing()` sentinel value:
88 |
89 | ``` r
90 | m$get("dog")
91 | #>
92 | ```
93 |
94 | A common usage pattern is to call `get()`, and then check if the result
95 | is a `key_missing` object:
96 |
97 | ``` r
98 | value <- m$get(key)
99 |
100 | if (is.key_missing(value)) {
101 | # Cache miss - do something
102 | } else {
103 | # Cache hit - do another thing
104 | }
105 | ```
106 |
107 | The reason for doing this (instead of calling `$exists(key)` and then
108 | `$get(key)`) is that for some storage backends, there is a potential
109 | race condition: the object could be removed from the cache between the
110 | `exists()` and `get()` calls. For example:
111 |
112 | - If multiple R processes have `cache_disk`s that share the same
113 | directory, one process could remove an object from the cache in
114 | between the `exists()` and `get()` calls in another process, resulting
115 | in an error.
116 | - If you use a `cache_mem` with a `max_age`, it’s possible for an object
117 | to be present when you call `exists()`, but for its age to exceed
118 | `max_age` by the time `get()` is called. In that case, the `get()`
119 | will return a `key_missing()` object.
120 |
121 | ``` r
122 | # Avoid this pattern, due to a potential race condition!
123 | if (m$exists(key)) {
124 | value <- m$get(key)
125 | }
126 | ```
127 |
128 | ## Cache types
129 |
130 | **cachem** comes with two kinds of cache objects: a memory cache, and a
131 | disk cache.
132 |
133 | ### `cache_mem()`
134 |
135 | The memory cache stores stores objects in memory, by simply keeping a
136 | reference to each object. To create a memory cache:
137 |
138 | ``` r
139 | m <- cache_mem()
140 | ```
141 |
142 | The default size of the cache is 200MB, but this can be customized with
143 | `max_size`:
144 |
145 | ``` r
146 | m <- cache_mem(max_size = 10 * 1024^2)
147 | ```
148 |
149 | It may also be useful to set a maximum age of objects. For example, if
150 | you only want objects to stay for a maximum of one hour:
151 |
152 | ``` r
153 | m <- cache_mem(max_size = 10 * 1024^2, max_age = 3600)
154 | ```
155 |
156 | For more about how objects are evicted from the cache, see section
157 | [Pruning](#pruning) below.
158 |
159 | An advantage that the memory cache has over the disk cache (and any
160 | other type of cache that stores the objects outside of the R process’s
161 | memory), is that it does not need to serialize objects. Instead, it
162 | merely stores references to the objects. This means that it can store
163 | objects that other caches cannot, and with more efficient use of memory
164 | – if two objects in the cache share some of their contents (such that
165 | they refer to the same sub-object in memory), then `cache_mem` will not
166 | create duplicate copies of the contents, as `cache_disk` would, since it
167 | serializes the objects with the `serialize()` function.
168 |
169 | Compared to the memory usage, the size *calculation* is not as
170 | intelligent: if there are two objects that share contents, their sizes
171 | are computed separately, even if they have items that share the exact
172 | same represention in memory. This is done with the `object.size()`
173 | function, which does not account for multiple references to the same
174 | object in memory.
175 |
176 | In short, a memory cache, if anything, over-counts the amount of memory
177 | actually consumed. In practice, this means that if you set a 200MB limit
178 | to the size of cache, and the cache *thinks* it has 200MB of contents,
179 | the actual amount of memory consumed could be less than 200MB.
180 |
181 |
182 |
183 | Demonstration of memory over-counting from `object.size()`
184 |
185 |
186 | ``` r
187 | # Create a and b which both contain the same numeric vector.
188 | x <- list(rnorm(1e5))
189 | a <- list(1, x)
190 | b <- list(2, x)
191 |
192 | # Add to cache
193 | m$set("a", a)
194 | m$set("b", b)
195 |
196 | # Each object is about 800kB in memory, so the cache_mem() will consider the
197 | # total memory used to be 1600kB.
198 | object.size(m$get("a"))
199 | #> 800224 bytes
200 | object.size(m$get("b"))
201 | #> 800224 bytes
202 | ```
203 |
204 | For reference, lobstr::obj_size can detect shared objects, and knows
205 | that these objects share most of their memory.
206 |
207 | ``` r
208 | lobstr::obj_size(m$get("a"))
209 | #> 800.22 kB
210 | lobstr::obj_size(list(m$get("a"), m$get("b")))
211 | #> 800.41 kB
212 | ```
213 |
214 | However, lobstr is not on CRAN, and if obj_size() were used to find the
215 | incremental memory used when an object was added to the cache, it would
216 | have to walk all objects in the cache every time a single object is
217 | added. For these reasons, cache_mem uses `object.size()` to compute the
218 | object sizes.
219 |
220 |
221 |
222 | ### `cache_disk()`
223 |
224 | Disk caches are stored in a directory on disk. A disk cache is slower
225 | than a memory cache, but can generally be larger. To create one:
226 |
227 | ``` r
228 | d <- cache_disk()
229 | ```
230 |
231 | By default, it creates a subdirectory of the R process’s temp directory,
232 | and it will persist until the R process exits.
233 |
234 | ``` r
235 | d$info()$dir
236 | #> "/tmp/Rtmp6h5iB3/cache_disk-d1901b2b615a"
237 | ```
238 |
239 | Like a `cache_mem`, the `max_size`, `max_n`, `max_age` can be
240 | customized. See section [Pruning](#pruning) below for more information.
241 |
242 | Each object in the cache is stored as an RDS file on disk, using the
243 | `serialize()` function.
244 |
245 | ``` r
246 | d$set("abc", 100)
247 | d$set("x01", list(1, 2, 3))
248 |
249 | dir(d$info()$dir)
250 | #> [1] "abc.rds" "x01.rds"
251 | ```
252 |
253 | Since objects in a disk cache are serialized, they are subject to the
254 | limitations of the `serialize()` function. For more information, see
255 | section [Limitations of serialized
256 | objects](#limitations-of-serialized-objects).
257 |
258 | The storage directory can be specified with `dir`; it will be created if
259 | necessary.
260 |
261 | ``` r
262 | cache_disk(dir = "cachedir")
263 | ```
264 |
265 | #### Sharing a disk cache among processes
266 |
267 | Multiple R processes can use `disk_cache` objects that share the same
268 | cache directory. To do this, simply point each `cache_disk` to the same
269 | directory.
270 |
271 | #### `disk_cache` pruning
272 |
273 | For a `disk_cache`, pruning does not happen on every access, because
274 | finding the size of files in the cache directory can take a nontrivial
275 | amount of time. By default, pruning happens once every 20 times that
276 | `$set()` is called, or if at least five seconds have elapsed since the
277 | last pruning. The `prune_rate` controls how many times `$set()` must be
278 | called before a pruning occurs. It defaults to 20; smaller values result
279 | in more frequent pruning and larger values result in less frequent
280 | pruning (but keep in mind pruning always occurs if it has been at least
281 | five seconds since the last pruning).
282 |
283 | #### Cleaning up the cache directory
284 |
285 | The cache directory can be deleted by calling `$destroy()`. After it is
286 | destroyed, the cache object can no longer be used.
287 |
288 | ``` r
289 | d$destroy()
290 | d$set("a", 1) # Error
291 | ```
292 |
293 | To create a `cache_disk` that will automatically delete its storage
294 | directory when garbage collected, use `destroy_on_finalize=TRUE`:
295 |
296 | ``` r
297 | d <- cache_disk(destroy_on_finalize = TRUE)
298 | d$set("a", 1)
299 |
300 | cachedir <- d$info()$dir
301 | dir(cachedir)
302 | #> [1] "a.rds"
303 |
304 | # Remove reference to d and trigger a garbage collection
305 | rm(d)
306 | gc()
307 |
308 | dir.exists(cachedir)
309 | ```
310 |
311 | #### Using custom serialization functions
312 |
313 | It is possible to use custom serialization functions rather than the
314 | default of `writeRDS()` and `readRDS()` with the `write_fn`, `read_fn`
315 | and `extension` arguments respectively. This could be used to use
316 | alternative serialization formats like
317 | [qs](https://github.com/traversc/qs), or specialized object formats
318 | [fst](http://www.fstpackage.org/fst/) or parquet.
319 |
320 | ``` r
321 | library(qs)
322 |
323 | d <- cache_disk(read_fn = qs::qread, write_fn = qs::qsave, extension = ".qs")
324 |
325 | d$set("a", list(1, 2, 3))
326 |
327 | cachedir <- d$info()$dir
328 | dir(cachedir)
329 | #> [1] "a.qs"
330 | d$get("a")
331 | #> [[1]]
332 | #> [1] 1
333 | #>
334 | #> [[2]]
335 | #> [1] 2
336 | #>
337 | #> [[3]]
338 | #> [1] 3
339 | ```
340 |
341 | ## Cache API
342 |
343 | `cache_mem()` and `cache_disk()` support all of the methods listed
344 | below. If you want to create a compatible caching object, it must have
345 | at least the `get()` and `set()` methods:
346 |
347 | - `get(key, missing = missing_)`: Get the object associated with `key`.
348 | The `missing` parameter allows customized behavior if the key is not
349 | present: it actually is an expression which is evaluated when there is
350 | a cache miss, and it could return a value or throw an error.
351 | - `set(key, value)`: Set a key to a value.
352 | - `exists(key)`: Check whether a particular key exists in the cache.
353 | - `remove(key)`: Remove a key-value from the cache.
354 |
355 | Some optional methods:
356 |
357 | - `reset()`: Clear all objects from the cache.
358 | - `keys()`: Return a character vector of all keys in the cache.
359 | - `prune()`: Prune the cache. (Some types of caches may not prune on
360 | every access, and may temporarily grow past their limits, until the
361 | next pruning is triggered automatically, or manually with this
362 | function.)
363 | - `size()`: Return the number of objects in the cache.
364 |
365 | For these methods:
366 |
367 | - `key`: can be any string with lowercase letters, numbers, underscore
368 | (`_`) and hyphen (`-`). Some storage backends may not be handle very
369 | long keys well. For example, with a `cache_disk()`, the key is used as
370 | a filename, and on some filesystems, very filenames may hit limits on
371 | path lengths.
372 | - `value`: can be any R object, with some exceptions noted below.
373 |
374 | #### Limitations of serialized objects
375 |
376 | For any cache that serializes the object for storage outside of the R
377 | process – in other words, any cache other than a `cache_mem()` – some
378 | types of objects will not save and restore as well. Notably, reference
379 | objects may consume more memory when restored, since R may not know to
380 | deduplicate shared objects. External pointers are not be able to be
381 | serialized, since they point to memory in the R process. See
382 | `?serialize` for more information.
383 |
384 | #### Read-only caches
385 |
386 | It is possible to create a read-only cache by making the `set()`,
387 | `remove()`, `reset()`, and `prune()` methods into no-ops. This can be
388 | useful if sharing a cache with another R process which can write to the
389 | cache. For example, one (or more) processes can write to the cache, and
390 | other processes can read from it.
391 |
392 | This function will wrap a cache object in a read-only wrapper. Note,
393 | however, that code that uses such a cache must not require that `$set()`
394 | actually sets a value in the cache. This is good practice anyway,
395 | because with these cache objects, items can be pruned from them at any
396 | time.
397 |
398 | ``` r
399 | cache_readonly_wrap <- function(cache) {
400 | structure(
401 | list(
402 | get = cache$get,
403 | set = function(key, value) NULL,
404 | exists = cache$exists,
405 | keys = cache$keys,
406 | remove = function(key) NULL,
407 | reset = function() NULL,
408 | prune = function() NULL,
409 | size = cache$size
410 | ),
411 | class = c("cache_readonly", class(cache))
412 | )
413 | }
414 |
415 | mr <- cache_readonly_wrap(m)
416 | ```
417 |
418 | ## Pruning
419 |
420 | The cache objects provided by cachem have automatic pruning. (Note that
421 | pruning is not required by the API, so one could implement an
422 | API-compatible cache without pruning.)
423 |
424 | This section describes how pruning works for `cache_mem()` and
425 | `cache_disk()`.
426 |
427 | When the cache object is created, the maximum size (in bytes) is
428 | specified by `max_size`. When the size of objects in the cache exceeds
429 | `max_size`, objects will be pruned from the cache.
430 |
431 | When objects are pruned from the cache, which ones are removed is
432 | determined by the eviction policy, `evict`:
433 |
434 | - **`lru`**: The least-recently-used objects will be removed from the
435 | cache, until it fits within the limit. This is the default and is
436 | appropriate for most cases.
437 | - **`fifo`**: The oldest objects will be removed first.
438 |
439 | It is also possible to set the maximum number of items that can be in
440 | the cache, with `max_n`. By default this is set to `Inf`, or no limit.
441 |
442 | The `max_age` parameter is somewhat different from `max_size` and
443 | `max_n`. The latter two set limits on the cache store as a whole,
444 | whereas `max_age` sets limits for each individual item; for each item,
445 | if its age exceeds `max_age`, then it will be removed from the cache.
446 |
447 | ## Layered caches
448 |
449 | Multiple caches can be composed into a single cache, using
450 | `cache_layered()`. This can be used to create a multi-level cache. (Note
451 | thate `cache_layered()` is currently experimental.) For example, we can
452 | create a layered cache with a very fast 100MB memory cache and a larger
453 | but slower 2GB disk cache:
454 |
455 | ``` r
456 | m <- cache_mem(max_size = 100 * 1024^2)
457 | d <- cache_disk(max_size = 2 * 1024^3)
458 |
459 | cl <- cache_layered(m, d)
460 | ```
461 |
462 | The layered cache will have the same API, with `$get()`, `$set()`, and
463 | so on, so it can be used interchangeably with other caching objects.
464 |
465 | For this example, we’ll recreate the `cache_layered` with logging
466 | enabled, so that it will show cache hits and misses.
467 |
468 | ``` r
469 | cl <- cache_layered(m, d, logfile = stderr())
470 |
471 | # Each of the objects generated by rnorm() is about 40 MB
472 | cl$set("a", rnorm(5e6))
473 | cl$set("b", rnorm(5e6))
474 | cl$set("c", rnorm(5e6))
475 |
476 | # View the objects in each of the component caches
477 | m$keys()
478 | #> [1] "c" "b"
479 | d$keys()
480 | #> [1] "a" "b" "c"
481 |
482 | # The layered cache reports having all keys
483 | cl$keys()
484 | #> [1] "c" "b" "a"
485 | ```
486 |
487 | When `$get()` is called, it searches the first cache, and if it’s
488 | missing there, it searches the next cache, and so on. If not found in
489 | any caches, it returns `key_missing()`.
490 |
491 | ``` r
492 | # Get object that exists in the memory cache
493 | x <- cl$get("c")
494 | #> [2020-10-23 13:11:09.985] cache_layered Get: c
495 | #> [2020-10-23 13:11:09.985] cache_layered Get from cache_mem... hit
496 |
497 | # Get object that doesn't exist in the memory cache
498 | x <- cl$get("a")
499 | #> [2020-10-23 13:13:10.968] cache_layered Get: a
500 | #> [2020-10-23 13:13:10.969] cache_layered Get from cache_mem... miss
501 | #> [2020-10-23 13:13:11.329] cache_layered Get from cache_disk... hit
502 |
503 | # Object is not present in any component caches
504 | cl$get("d")
505 | #> [2020-10-23 13:13:40.197] cache_layered Get: d
506 | #> [2020-10-23 13:13:40.197] cache_layered Get from cache_mem... miss
507 | #> [2020-10-23 13:13:40.198] cache_layered Get from cache_disk... miss
508 | #>
509 | ```
510 |
511 | Multiple cache objects can be layered this way. You could even add a
512 | cache which uses a remote store, such as a network file system or even
513 | AWS S3.
514 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://cachem.r-lib.org
2 |
3 | template:
4 | bootstrap: 5
5 |
--------------------------------------------------------------------------------
/cachem.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: No
4 | SaveWorkspace: No
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 |
--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## Comments
2 |
3 | #### 2021-8-19
4 |
5 | Bug fixes.
6 |
7 | Thank you,
8 | Winston
9 |
10 |
11 | ## Test environments
12 |
13 | * GitHub Actions - https://github.com/r-lib/cachem/pull/16/checks
14 | * macOS
15 | * devel, release
16 | * windows
17 | * release, 3.6
18 | * ubuntu20
19 | * devel, release, oldrel/1, oldrel/2, oldrel/3, oldrel/4
20 | * devtools::
21 | * check_win_devel()
22 | * check_win_release()
23 | * check_win_oldrelease()
24 |
25 | 0 errors ✔ | 0 warnings ✔ | 0 notes ✔
26 |
27 |
28 | ## revdepcheck results
29 |
30 | We checked 6 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
31 |
32 | * We saw 0 new problems
33 | * We failed to check 0 packages
34 |
--------------------------------------------------------------------------------
/man/cache_disk.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cache-disk.R
3 | \name{cache_disk}
4 | \alias{cache_disk}
5 | \title{Create a disk cache object}
6 | \usage{
7 | cache_disk(
8 | dir = NULL,
9 | max_size = 1024 * 1024^2,
10 | max_age = Inf,
11 | max_n = Inf,
12 | evict = c("lru", "fifo"),
13 | destroy_on_finalize = FALSE,
14 | read_fn = NULL,
15 | write_fn = NULL,
16 | extension = ".rds",
17 | missing = key_missing(),
18 | prune_rate = 20,
19 | warn_ref_objects = FALSE,
20 | logfile = NULL
21 | )
22 | }
23 | \arguments{
24 | \item{dir}{Directory to store files for the cache. If \code{NULL} (the default) it
25 | will create and use a temporary directory.}
26 |
27 | \item{max_size}{Maximum size of the cache, in bytes. If the cache exceeds
28 | this size, cached objects will be removed according to the value of the
29 | \code{evict}. Use \code{Inf} for no size limit. The default is 1 gigabyte.}
30 |
31 | \item{max_age}{Maximum age of files in cache before they are evicted, in
32 | seconds. Use \code{Inf} for no age limit.}
33 |
34 | \item{max_n}{Maximum number of objects in the cache. If the number of objects
35 | exceeds this value, then cached objects will be removed according to the
36 | value of \code{evict}. Use \code{Inf} for no limit of number of items.}
37 |
38 | \item{evict}{The eviction policy to use to decide which objects are removed
39 | when a cache pruning occurs. Currently, \code{"lru"} and \code{"fifo"} are supported.}
40 |
41 | \item{destroy_on_finalize}{If \code{TRUE}, then when the cache_disk object is
42 | garbage collected, the cache directory and all objects inside of it will be
43 | deleted from disk. If \code{FALSE} (the default), it will do nothing when
44 | finalized.}
45 |
46 | \item{read_fn}{The function used to read the values from disk. If \code{NULL}
47 | (the default) it will use \code{readRDS}.}
48 |
49 | \item{write_fn}{The function used to write the values from disk. If \code{NULL}
50 | (the default) it will use \code{writeRDS}.}
51 |
52 | \item{extension}{The file extension to use for files on disk.}
53 |
54 | \item{missing}{A value to return when \code{get(key)} is called but the key is not
55 | present in the cache. The default is a \code{\link[=key_missing]{key_missing()}} object. It is
56 | actually an expression that is evaluated each time there is a cache miss.
57 | See section Missing keys for more information.}
58 |
59 | \item{prune_rate}{How often to prune the cache. See section Cache Pruning for
60 | more information.}
61 |
62 | \item{warn_ref_objects}{Should a warning be emitted when a reference is
63 | stored in the cache? This can be useful because serializing and
64 | deserializing a reference object (such as environments and external
65 | pointers) can lead to unexpected behavior.}
66 |
67 | \item{logfile}{An optional filename or connection object to where logging
68 | information will be written. To log to the console, use \code{stderr()} or
69 | \code{stdout()}.}
70 | }
71 | \value{
72 | A disk caching object, with class \code{cache_disk}.
73 | }
74 | \description{
75 | A disk cache object is a key-value store that saves the values as files in a
76 | directory on disk. Objects can be stored and retrieved using the \code{get()} and
77 | \code{set()} methods. Objects are automatically pruned from the cache according to
78 | the parameters \code{max_size}, \code{max_age}, \code{max_n}, and \code{evict}.
79 | }
80 | \section{Missing keys}{
81 |
82 |
83 | The \code{missing} parameter controls what happens when \code{get()} is called with a
84 | key that is not in the cache (a cache miss). The default behavior is to
85 | return a \code{\link[=key_missing]{key_missing()}} object. This is a \emph{sentinel value} that indicates
86 | that the key was not present in the cache. You can test if the returned
87 | value represents a missing key by using the \code{\link[=is.key_missing]{is.key_missing()}} function.
88 | You can also have \code{get()} return a different sentinel value, like \code{NULL}.
89 | If you want to throw an error on a cache miss, you can do so by providing
90 | an expression for \code{missing}, as in \code{missing = stop("Missing key")}.
91 |
92 | When the cache is created, you can supply a value for \code{missing}, which sets
93 | the default value to be returned for missing values. It can also be
94 | overridden when \code{get()} is called, by supplying a \code{missing} argument. For
95 | example, if you use \code{cache$get("mykey", missing = NULL)}, it will return
96 | \code{NULL} if the key is not in the cache.
97 |
98 | The \code{missing} parameter is actually an expression which is evaluated each
99 | time there is a cache miss. A quosure (from the rlang package) can be used.
100 |
101 | If you use this, the code that calls \code{get()} should be wrapped with
102 | \code{\link[=tryCatch]{tryCatch()}} to gracefully handle missing keys.
103 | }
104 |
105 | \section{Cache pruning}{
106 |
107 |
108 | Cache pruning occurs when \code{set()} is called, or it can be invoked manually
109 | by calling \code{prune()}.
110 |
111 | The disk cache will throttle the pruning so that it does not happen on
112 | every call to \code{set()}, because the filesystem operations for checking the
113 | status of files can be slow. Instead, it will prune once in every
114 | \code{prune_rate} calls to \code{set()}, or if at least 5 seconds have elapsed since
115 | the last prune occurred, whichever is first.
116 |
117 | When a pruning occurs, if there are any objects that are older than
118 | \code{max_age}, they will be removed.
119 |
120 | The \code{max_size} and \code{max_n} parameters are applied to the cache as a whole,
121 | in contrast to \code{max_age}, which is applied to each object individually.
122 |
123 | If the number of objects in the cache exceeds \code{max_n}, then objects will be
124 | removed from the cache according to the eviction policy, which is set with
125 | the \code{evict} parameter. Objects will be removed so that the number of items
126 | is \code{max_n}.
127 |
128 | If the size of the objects in the cache exceeds \code{max_size}, then objects
129 | will be removed from the cache. Objects will be removed from the cache so
130 | that the total size remains under \code{max_size}. Note that the size is
131 | calculated using the size of the files, not the size of disk space used by
132 | the files --- these two values can differ because of files are stored in
133 | blocks on disk. For example, if the block size is 4096 bytes, then a file
134 | that is one byte in size will take 4096 bytes on disk.
135 |
136 | Another time that objects can be removed from the cache is when \code{get()} is
137 | called. If the target object is older than \code{max_age}, it will be removed
138 | and the cache will report it as a missing value.
139 | }
140 |
141 | \section{Eviction policies}{
142 |
143 |
144 | If \code{max_n} or \code{max_size} are used, then objects will be removed from the
145 | cache according to an eviction policy. The available eviction policies are:
146 |
147 | \describe{
148 | \item{\code{"lru"}}{
149 | Least Recently Used. The least recently used objects will be removed.
150 | This uses the filesystem's mtime property. When "lru" is used, each
151 | \code{get()} is called, it will update the file's mtime using
152 | \code{\link[=Sys.setFileTime]{Sys.setFileTime()}}. Note that on some platforms, the resolution of
153 | \code{\link[=Sys.setFileTime]{Sys.setFileTime()}} may be low, one or two seconds.
154 | }
155 | \item{\code{"fifo"}}{
156 | First-in-first-out. The oldest objects will be removed.
157 | }
158 | }
159 |
160 | Both of these policies use files' mtime. Note that some filesystems (notably
161 | FAT) have poor mtime resolution. (atime is not used because support for atime
162 | is worse than mtime.)
163 | }
164 |
165 | \section{Sharing among multiple processes}{
166 |
167 |
168 | The directory for a cache_disk can be shared among multiple R processes. To
169 | do this, each R process should have a cache_disk object that uses the same
170 | directory. Each cache_disk will do pruning independently of the others, so
171 | if they have different pruning parameters, then one cache_disk may remove
172 | cached objects before another cache_disk would do so.
173 |
174 | Even though it is possible for multiple processes to share a cache_disk
175 | directory, this should not be done on networked file systems, because of
176 | slow performance of networked file systems can cause problems. If you need
177 | a high-performance shared cache, you can use one built on a database like
178 | Redis, SQLite, mySQL, or similar.
179 |
180 | When multiple processes share a cache directory, there are some potential
181 | race conditions. For example, if your code calls \code{exists(key)} to check if
182 | an object is in the cache, and then call \code{get(key)}, the object may be
183 | removed from the cache in between those two calls, and \code{get(key)} will
184 | throw an error. Instead of calling the two functions, it is better to
185 | simply call \code{get(key)}, and check that the returned object is not a
186 | \code{key_missing()} object, using \code{is.key_missing()}. This effectively tests
187 | for existence and gets the object in one operation.
188 |
189 | It is also possible for one processes to prune objects at the same time
190 | that another processes is trying to prune objects. If this happens, you may
191 | see a warning from \code{file.remove()} failing to remove a file that has
192 | already been deleted.
193 | }
194 |
195 | \section{Methods}{
196 |
197 |
198 | A disk cache object has the following methods:
199 |
200 | \describe{
201 | \item{\code{get(key, missing)}}{
202 | Returns the value associated with \code{key}. If the key is not in the
203 | cache, then it evaluates the expression specified by \code{missing} and
204 | returns the value. If \code{missing} is specified here, then it will
205 | override the default that was set when the \code{cache_mem} object was
206 | created. See section Missing Keys for more information.
207 | }
208 | \item{\code{set(key, value)}}{
209 | Stores the \code{key}-\code{value} pair in the cache.
210 | }
211 | \item{\code{exists(key)}}{
212 | Returns \code{TRUE} if the cache contains the key, otherwise
213 | \code{FALSE}.
214 | }
215 | \item{\code{remove(key)}}{
216 | Removes \code{key} from the cache, if it exists in the cache. If the key is
217 | not in the cache, this does nothing.
218 | }
219 | \item{\code{size()}}{
220 | Returns the number of items currently in the cache.
221 | }
222 | \item{\code{keys()}}{
223 | Returns a character vector of all keys currently in the cache.
224 | }
225 | \item{\code{reset()}}{
226 | Clears all objects from the cache.
227 | }
228 | \item{\code{destroy()}}{
229 | Clears all objects in the cache, and removes the cache directory from
230 | disk.
231 | }
232 | \item{\code{prune()}}{
233 | Prunes the cache, using the parameters specified by \code{max_size},
234 | \code{max_age}, \code{max_n}, and \code{evict}.
235 | }
236 | }
237 | }
238 |
239 |
--------------------------------------------------------------------------------
/man/cache_layered.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cache-layered.R
3 | \name{cache_layered}
4 | \alias{cache_layered}
5 | \title{Compose any number of cache objects into a new, layered cache object}
6 | \usage{
7 | cache_layered(..., logfile = NULL)
8 | }
9 | \arguments{
10 | \item{...}{Cache objects to compose into a new, layered cache object.}
11 |
12 | \item{logfile}{An optional filename or connection object to where logging
13 | information will be written. To log to the console, use \code{stderr()} or
14 | \code{stdout()}.}
15 | }
16 | \value{
17 | A layered caching object, with class \code{cache_layered}.
18 | }
19 | \description{
20 | Note that \code{cache_layered} is currently experimental.
21 | }
22 | \examples{
23 |
24 | # Make a layered cache from a small memory cache and large disk cache
25 | m <- cache_mem(max_size = 100 * 1024^2)
26 | d <- cache_disk(max_size = 2 * 1024^3)
27 | cl <- cache_layered(m, d)
28 | }
29 |
--------------------------------------------------------------------------------
/man/cache_mem.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cache-mem.R
3 | \name{cache_mem}
4 | \alias{cache_mem}
5 | \title{Create a memory cache object}
6 | \usage{
7 | cache_mem(
8 | max_size = 512 * 1024^2,
9 | max_age = Inf,
10 | max_n = Inf,
11 | evict = c("lru", "fifo"),
12 | missing = key_missing(),
13 | logfile = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{max_size}{Maximum size of the cache, in bytes. If the cache exceeds
18 | this size, cached objects will be removed according to the value of the
19 | \code{evict}. Use \code{Inf} for no size limit. The default is 512 megabytes.}
20 |
21 | \item{max_age}{Maximum age of files in cache before they are evicted, in
22 | seconds. Use \code{Inf} for no age limit.}
23 |
24 | \item{max_n}{Maximum number of objects in the cache. If the number of objects
25 | exceeds this value, then cached objects will be removed according to the
26 | value of \code{evict}. Use \code{Inf} for no limit of number of items.}
27 |
28 | \item{evict}{The eviction policy to use to decide which objects are removed
29 | when a cache pruning occurs. Currently, \code{"lru"} and \code{"fifo"} are supported.}
30 |
31 | \item{missing}{A value to return when \code{get(key)} is called but the key is not
32 | present in the cache. The default is a \code{\link[=key_missing]{key_missing()}} object. It is
33 | actually an expression that is evaluated each time there is a cache miss.
34 | See section Missing keys for more information.}
35 |
36 | \item{logfile}{An optional filename or connection object to where logging
37 | information will be written. To log to the console, use \code{stderr()} or
38 | \code{stdout()}.}
39 | }
40 | \value{
41 | A memory caching object, with class \code{cache_mem}.
42 | }
43 | \description{
44 | A memory cache object is a key-value store that saves the values in an
45 | environment. Objects can be stored and retrieved using the \code{get()} and
46 | \code{set()} methods. Objects are automatically pruned from the cache according to
47 | the parameters \code{max_size}, \code{max_age}, \code{max_n}, and \code{evict}.
48 | }
49 | \details{
50 | In a \code{cache_mem}, R objects are stored directly in the cache; they are not
51 | \emph{not} serialized before being stored in the cache. This contrasts with other
52 | cache types, like \code{\link[=cache_disk]{cache_disk()}}, where objects are serialized, and the
53 | serialized object is cached. This can result in some differences of behavior.
54 | For example, as long as an object is stored in a cache_mem, it will not be
55 | garbage collected.
56 | }
57 | \section{Missing keys}{
58 |
59 |
60 | The \code{missing} parameter controls what happens when \code{get()} is called with a
61 | key that is not in the cache (a cache miss). The default behavior is to
62 | return a \code{\link[=key_missing]{key_missing()}} object. This is a \emph{sentinel value} that indicates
63 | that the key was not present in the cache. You can test if the returned
64 | value represents a missing key by using the \code{\link[=is.key_missing]{is.key_missing()}} function.
65 | You can also have \code{get()} return a different sentinel value, like \code{NULL}.
66 | If you want to throw an error on a cache miss, you can do so by providing
67 | an expression for \code{missing}, as in \code{missing = stop("Missing key")}.
68 |
69 | When the cache is created, you can supply a value for \code{missing}, which sets
70 | the default value to be returned for missing values. It can also be
71 | overridden when \code{get()} is called, by supplying a \code{missing} argument. For
72 | example, if you use \code{cache$get("mykey", missing = NULL)}, it will return
73 | \code{NULL} if the key is not in the cache.
74 |
75 | The \code{missing} parameter is actually an expression which is evaluated each
76 | time there is a cache miss. A quosure (from the rlang package) can be used.
77 |
78 | If you use this, the code that calls \code{get()} should be wrapped with
79 | \code{\link[=tryCatch]{tryCatch()}} to gracefully handle missing keys.
80 |
81 | @section Cache pruning:
82 |
83 | Cache pruning occurs when \code{set()} is called, or it can be invoked manually
84 | by calling \code{prune()}.
85 |
86 | When a pruning occurs, if there are any objects that are older than
87 | \code{max_age}, they will be removed.
88 |
89 | The \code{max_size} and \code{max_n} parameters are applied to the cache as a whole,
90 | in contrast to \code{max_age}, which is applied to each object individually.
91 |
92 | If the number of objects in the cache exceeds \code{max_n}, then objects will be
93 | removed from the cache according to the eviction policy, which is set with
94 | the \code{evict} parameter. Objects will be removed so that the number of items
95 | is \code{max_n}.
96 |
97 | If the size of the objects in the cache exceeds \code{max_size}, then objects
98 | will be removed from the cache. Objects will be removed from the cache so
99 | that the total size remains under \code{max_size}.
100 |
101 | Another time that objects can be removed from the cache is when \code{get()} is
102 | called. If the target object is older than \code{max_age}, it will be removed
103 | and the cache will report it as a missing value.
104 | }
105 |
106 | \section{Eviction policies}{
107 |
108 |
109 | If \code{max_n} or \code{max_size} are used, then objects will be removed
110 | from the cache according to an eviction policy. The available eviction
111 | policies are:
112 |
113 | \describe{
114 | \item{\code{"lru"}}{
115 | Least Recently Used. The least recently used objects will be removed.
116 | }
117 | \item{\code{"fifo"}}{
118 | First-in-first-out. The oldest objects will be removed.
119 | }
120 | }
121 | }
122 |
123 | \section{Methods}{
124 |
125 |
126 | A disk cache object has the following methods:
127 |
128 | \describe{
129 | \item{\code{get(key, missing)}}{
130 | Returns the value associated with \code{key}. If the key is not in the
131 | cache, then it evaluates the expression specified by \code{missing} and
132 | returns the value. If \code{missing} is specified here, then it will
133 | override the default that was set when the \code{cache_mem} object was
134 | created. See section Missing Keys for more information.
135 | }
136 | \item{\code{set(key, value)}}{
137 | Stores the \code{key}-\code{value} pair in the cache.
138 | }
139 | \item{\code{exists(key)}}{
140 | Returns \code{TRUE} if the cache contains the key, otherwise
141 | \code{FALSE}.
142 | }
143 | \item{\code{remove(key)}}{
144 | Removes \code{key} from the cache, if it exists in the cache. If the key is
145 | not in the cache, this does nothing.
146 | }
147 | \item{\code{size()}}{
148 | Returns the number of items currently in the cache.
149 | }
150 | \item{\code{keys()}}{
151 | Returns a character vector of all keys currently in the cache.
152 | }
153 | \item{\code{reset()}}{
154 | Clears all objects from the cache.
155 | }
156 | \item{\code{destroy()}}{
157 | Clears all objects in the cache, and removes the cache directory from
158 | disk.
159 | }
160 | \item{\code{prune()}}{
161 | Prunes the cache, using the parameters specified by \code{max_size},
162 | \code{max_age}, \code{max_n}, and \code{evict}.
163 | }
164 | }
165 | }
166 |
167 |
--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/reexports.R
3 | \docType{import}
4 | \name{reexports}
5 | \alias{reexports}
6 | \alias{key_missing}
7 | \alias{is.key_missing}
8 | \title{Objects exported from other packages}
9 | \keyword{internal}
10 | \description{
11 | These objects are imported from other packages. Follow the links
12 | below to see their documentation.
13 |
14 | \describe{
15 | \item{fastmap}{\code{\link[fastmap:key_missing]{is.key_missing}}, \code{\link[fastmap]{key_missing}}}
16 | }}
17 |
18 |
--------------------------------------------------------------------------------
/revdep/.gitignore:
--------------------------------------------------------------------------------
1 | checks
2 | library
3 | checks.noindex
4 | library.noindex
5 | cloud.noindex
6 | data.sqlite
7 | *.html
8 |
--------------------------------------------------------------------------------
/revdep/README.md:
--------------------------------------------------------------------------------
1 | # Platform
2 |
3 | |field |value |
4 | |:--------|:----------------------------|
5 | |version |R version 4.0.2 (2020-06-22) |
6 | |os |macOS 10.16 |
7 | |system |x86_64, darwin17.0 |
8 | |ui |X11 |
9 | |language |(EN) |
10 | |collate |en_US.UTF-8 |
11 | |ctype |en_US.UTF-8 |
12 | |tz |America/New_York |
13 | |date |2021-08-17 |
14 |
15 | # Dependencies
16 |
17 | |package |old |new |Δ |
18 | |:-------|:-----|:-----|:--|
19 | |cachem |1.0.5 |1.0.6 |* |
20 |
21 | # Revdeps
22 |
23 | ## All (6)
24 |
25 | |package |version |error |warning |note |
26 | |:--------------------------------|:-------|:-----|:-------|:----|
27 | |aquodom |0.1.0 | | | |
28 | |ffscrapr |1.4.5 | | | |
29 | |memoise |2.0.0 | | | |
30 | |[nflreadr](problems.md#nflreadr) |1.0.0 | | |1 |
31 | |[shiny](problems.md#shiny) |1.6.0 | | |1 |
32 | |[slackr](problems.md#slackr) |3.0.0 | | |1 |
33 |
34 |
--------------------------------------------------------------------------------
/revdep/cran.md:
--------------------------------------------------------------------------------
1 | ## revdepcheck results
2 |
3 | We checked 6 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
4 |
5 | * We saw 0 new problems
6 | * We failed to check 0 packages
7 |
8 |
--------------------------------------------------------------------------------
/revdep/failures.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*
--------------------------------------------------------------------------------
/revdep/problems.md:
--------------------------------------------------------------------------------
1 | # nflreadr
2 |
3 |
4 |
5 | * Version: 1.0.0
6 | * GitHub: https://github.com/nflverse/nflreadr
7 | * Source code: https://github.com/cran/nflreadr
8 | * Date/Publication: 2021-08-09 14:40:02 UTC
9 | * Number of recursive dependencies: 70
10 |
11 | Run `revdep_details(, "nflreadr")` for more info
12 |
13 |
14 |
15 | ## In both
16 |
17 | * checking dependencies in R code ... NOTE
18 | ```
19 | Namespace in Imports field not imported from: ‘dplyr’
20 | All declared Imports should be used.
21 | ```
22 |
23 | # shiny
24 |
25 |
26 |
27 | * Version: 1.6.0
28 | * GitHub: https://github.com/rstudio/shiny
29 | * Source code: https://github.com/cran/shiny
30 | * Date/Publication: 2021-01-25 21:50:02 UTC
31 | * Number of recursive dependencies: 104
32 |
33 | Run `revdep_details(, "shiny")` for more info
34 |
35 |
36 |
37 | ## In both
38 |
39 | * checking installed package size ... NOTE
40 | ```
41 | installed size is 12.1Mb
42 | sub-directories of 1Mb or more:
43 | R 2.0Mb
44 | www 8.8Mb
45 | ```
46 |
47 | # slackr
48 |
49 |
50 |
51 | * Version: 3.0.0
52 | * GitHub: https://github.com/mrkaye97/slackr
53 | * Source code: https://github.com/cran/slackr
54 | * Date/Publication: 2021-08-07 19:30:02 UTC
55 | * Number of recursive dependencies: 85
56 |
57 | Run `revdep_details(, "slackr")` for more info
58 |
59 |
60 |
61 | ## In both
62 |
63 | * checking dependencies in R code ... NOTE
64 | ```
65 | Namespaces in Imports field not imported from:
66 | ‘methods’ ‘reprex’
67 | All declared Imports should be used.
68 | ```
69 |
70 |
--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 |
--------------------------------------------------------------------------------
/src/cache.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | SEXP C_validate_key(SEXP key_r) {
6 | if (TYPEOF(key_r) != STRSXP || Rf_length(key_r) != 1) {
7 | Rf_error("key must be a one-element character vector");
8 | }
9 | SEXP key_c = STRING_ELT(key_r, 0);
10 | if (key_c == NA_STRING || Rf_StringBlank(key_c)) {
11 | Rf_error("key must be not be \"\" or NA");
12 | }
13 |
14 | const char* s = R_CHAR(key_c);
15 | char cset[] = "1234567890abcdefghijklmnopqrstuvwxyz_-";
16 | int i = strspn(s, cset);
17 | if (i != strlen(s)) {
18 | Rf_error("Invalid key: %s. Only lowercase letters and numbers are allowed.", s);
19 | }
20 |
21 | return Rf_ScalarLogical(TRUE);
22 | }
23 |
24 |
--------------------------------------------------------------------------------
/src/init.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include // for NULL
4 | #include
5 | #include
6 |
7 | /* .Call calls */
8 | extern SEXP C_validate_key(SEXP);
9 |
10 | static const R_CallMethodDef CallEntries[] = {
11 | {"C_validate_key", (DL_FUNC) &C_validate_key, 1},
12 | {NULL, NULL, 0}
13 | };
14 |
15 | attribute_visible void R_init_cachem(DllInfo *dll)
16 | {
17 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
18 | R_useDynamicSymbols(dll, FALSE);
19 | }
20 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(cachem)
3 |
4 | test_check("cachem")
5 |
--------------------------------------------------------------------------------
/tests/testthat/helper-utils.R:
--------------------------------------------------------------------------------
1 | is_on_github_actions <- function() {
2 | nzchar(Sys.getenv("GITHUB_ACTIONS"))
3 | }
4 |
--------------------------------------------------------------------------------
/tests/testthat/test-cache-disk.R:
--------------------------------------------------------------------------------
1 |
2 | cache_disk_deterministic <- function(...) {
3 | d <- cache_disk(...)
4 |
5 | # Normally the throttle counter starts with a random value, but for these
6 | # tests we need to make it deterministic.
7 | environment(d$set)$prune_throttle_counter_ <- 0
8 |
9 | d
10 | }
11 |
12 |
13 | test_that("cache_disk: handling missing values", {
14 | d <- cache_disk()
15 | expect_true(is.key_missing(d$get("abcd")))
16 | d$set("a", 100)
17 | expect_identical(d$get("a"), 100)
18 | expect_identical(d$get("y", missing = NULL), NULL)
19 | expect_error(
20 | d$get("y", missing = stop("Missing key")),
21 | "^Missing key$",
22 | )
23 |
24 | d <- cache_disk(missing = NULL)
25 | expect_true(is.null(d$get("abcd")))
26 | d$set("a", 100)
27 | expect_identical(d$get("a"), 100)
28 | expect_identical(d$get("y", missing = -1), -1)
29 | expect_error(
30 | d$get("y", missing = stop("Missing key")),
31 | "^Missing key$",
32 | )
33 |
34 | d <- cache_disk(missing = stop("Missing key"))
35 | expect_error(d$get("abcd"), "^Missing key$")
36 | d$set("x", NULL)
37 | d$set("a", 100)
38 | expect_identical(d$get("a"), 100)
39 | expect_error(d$get("y"), "^Missing key$")
40 | expect_identical(d$get("y", missing = NULL), NULL)
41 | expect_true(is.key_missing(d$get("y", missing = key_missing())))
42 | expect_error(
43 | d$get("y", missing = stop("Missing key 2")),
44 | "^Missing key 2$",
45 | )
46 |
47 | # Pass in a quosure
48 | expr <- rlang::quo(stop("Missing key"))
49 | d <- cache_disk(missing = !!expr)
50 | expect_error(d$get("y"), "^Missing key$")
51 | expect_error(d$get("y"), "^Missing key$") # Make sure a second time also throws
52 | })
53 |
54 |
55 | test_that("cache_disk: pruning respects max_n", {
56 | # Timing is apparently unreliable on CRAN, so skip tests there. It's possible
57 | # that a heavily loaded system will have issues with these tests because of
58 | # the time resolution.
59 | skip_on_cran()
60 | delay <- 0.01
61 |
62 | d <- cache_disk_deterministic(max_n = 3)
63 | # NOTE: The short delays after each item are meant to tests more reliable on
64 | # CI systems.
65 | d$set("a", rnorm(100)); Sys.sleep(delay)
66 | d$set("b", rnorm(100)); Sys.sleep(delay)
67 | d$set("c", rnorm(100)); Sys.sleep(delay)
68 | d$set("d", rnorm(100)); Sys.sleep(delay)
69 | d$set("e", rnorm(100)); Sys.sleep(delay)
70 | d$prune()
71 | expect_identical(sort(d$keys()), c("c", "d", "e"))
72 | })
73 |
74 | test_that("cache_disk: pruning respects max_size", {
75 | skip_on_cran()
76 | delay <- 0.01
77 |
78 | d <- cache_disk_deterministic(max_size = 200)
79 | d$set("a", rnorm(100)); Sys.sleep(delay)
80 | d$set("b", rnorm(100)); Sys.sleep(delay)
81 | d$set("c", 1); Sys.sleep(delay)
82 | d$prune()
83 | expect_identical(sort(d$keys()), c("c"))
84 | d$set("d", rnorm(100)); Sys.sleep(delay)
85 | # Objects are pruned with oldest first, so even though "c" would fit in the
86 | # cache, it is removed after adding "d" (and "d" is removed as well because it
87 | # doesn't fit).
88 | d$prune()
89 | expect_length(d$keys(), 0)
90 | d$set("e", 2); Sys.sleep(delay)
91 | d$set("f", 3); Sys.sleep(delay)
92 | d$prune()
93 | expect_identical(sort(d$keys()), c("e", "f"))
94 | })
95 |
96 | # Issue shiny#3033
97 | test_that("cache_disk: pruning respects both max_n and max_size", {
98 | skip_on_cran()
99 | d <- cache_disk_deterministic(max_n = 3, max_size = 200)
100 |
101 | # Set some values. Use rnorm so that object size is large; a simple vector
102 | # like 1:100 will be stored very efficiently by R's ALTREP, and won't exceed
103 | # the max_size. We want each of these objects to exceed max_size so that
104 | # they'll be pruned.
105 | d$set("a", rnorm(100))
106 | d$set("b", rnorm(100))
107 | d$set("c", rnorm(100))
108 | d$set("d", rnorm(100))
109 | d$set("e", rnorm(100))
110 | Sys.sleep(0.1) # For systems that have low mtime resolution.
111 | d$set("f", 1) # This object is small and shouldn't be pruned.
112 | d$prune()
113 | expect_identical(d$keys(), "f")
114 | })
115 |
116 | # Return TRUE if the Sys.setFileTime() has subsecond resolution, FALSE
117 | # otherwise.
118 | setfiletime_has_subsecond_resolution <- function() {
119 | tmp <- tempfile()
120 | file.create(tmp)
121 | Sys.setFileTime(tmp, Sys.time())
122 | time <- as.numeric(file.info(tmp)[['mtime']])
123 | if (time == floor(time)) {
124 | return(FALSE)
125 | } else {
126 | return(TRUE)
127 | }
128 | }
129 |
130 | test_that('cache_disk: pruning with evict="lru"', {
131 | skip_on_cran()
132 | delay <- 0.01
133 | # For lru tests, make sure there's sub-second resolution for
134 | # Sys.setFileTime(), because that's what the lru code uses to update times.
135 | skip_if_not(
136 | setfiletime_has_subsecond_resolution(),
137 | "Sys.setFileTime() does not have subsecond resolution on this platform."
138 | )
139 |
140 | d <- cache_disk_deterministic(max_n = 2)
141 | d$set("a", 1); Sys.sleep(delay)
142 | d$set("b", 1); Sys.sleep(delay)
143 | d$set("c", 1); Sys.sleep(delay)
144 | d$prune()
145 | expect_identical(sort(d$keys()), c("b", "c"))
146 | d$get("b"); Sys.sleep(delay)
147 | d$set("d", 1); Sys.sleep(delay)
148 | d$prune()
149 | expect_identical(sort(d$keys()), c("b", "d"))
150 | d$get("b"); Sys.sleep(delay)
151 | d$set("e", 2); Sys.sleep(delay)
152 | d$get("b"); Sys.sleep(delay)
153 | d$set("f", 3); Sys.sleep(delay)
154 | d$prune()
155 | expect_identical(sort(d$keys()), c("b", "f"))
156 | })
157 |
158 | test_that('cache_disk: pruning with evict="fifo"', {
159 | skip_on_cran()
160 | delay <- 0.01
161 |
162 | d <- cache_disk_deterministic(max_n = 2, evict = "fifo")
163 | d$set("a", 1); Sys.sleep(delay)
164 | d$set("b", 1); Sys.sleep(delay)
165 | d$set("c", 1); Sys.sleep(delay)
166 | d$prune()
167 | expect_identical(sort(d$keys()), c("b", "c"))
168 | d$get("b")
169 | d$set("d", 1); Sys.sleep(delay)
170 | d$prune()
171 | expect_identical(sort(d$keys()), c("c", "d"))
172 | d$get("b")
173 | d$set("e", 2); Sys.sleep(delay)
174 | d$get("b")
175 | d$set("f", 3); Sys.sleep(delay)
176 | d$prune()
177 | expect_identical(sort(d$keys()), c("e", "f"))
178 | })
179 |
180 |
181 | test_that("cache_disk: pruning throttling", {
182 | skip_on_cran()
183 | delay <- 0.01
184 |
185 | # Pruning won't happen when the number of items is less than prune_rate AND
186 | # the set() calls happen within 5 seconds.
187 | d <- cache_disk_deterministic(max_n = 2, prune_rate = 20)
188 | d$set("a", 1); Sys.sleep(delay)
189 | d$set("b", 1); Sys.sleep(delay)
190 | d$set("c", 1); Sys.sleep(delay)
191 | d$set("d", 1); Sys.sleep(delay)
192 | expect_identical(sort(d$keys()), c("a", "b", "c", "d"))
193 |
194 | # Pruning will happen with a lower prune_rate value.
195 | d <- cache_disk_deterministic(max_n = 2, prune_rate = 3)
196 | d$set("a", 1); Sys.sleep(delay)
197 | d$set("b", 1); Sys.sleep(delay)
198 | d$set("c", 1); Sys.sleep(delay)
199 | expect_identical(sort(d$keys()), c("b", "c"))
200 | d$set("d", 1); Sys.sleep(delay)
201 | expect_identical(sort(d$keys()), c("b", "c", "d"))
202 | d$set("e", 1); Sys.sleep(delay)
203 | expect_identical(sort(d$keys()), c("b", "c", "d", "e"))
204 | d$set("f", 1); Sys.sleep(delay)
205 | expect_identical(sort(d$keys()), c("e", "f"))
206 |
207 | Sys.sleep(5)
208 | d$set("f", 1); Sys.sleep(delay)
209 | expect_identical(sort(d$keys()), c("e", "f"))
210 | })
211 |
212 | test_that("destroy_on_finalize works", {
213 | d <- cache_disk(destroy_on_finalize = TRUE)
214 | cache_dir <- d$info()$dir
215 |
216 | expect_true(dir.exists(cache_dir))
217 | rm(d)
218 | gc()
219 | expect_false(dir.exists(cache_dir))
220 | })
221 |
222 |
223 | test_that("Warnings for caching reference objects", {
224 | d <- cache_disk(warn_ref_objects = TRUE)
225 | expect_warning(d$set("a", new.env()))
226 | expect_warning(d$set("a", function() NULL))
227 | expect_warning(d$set("a", fastmap())) # fastmap objects contain an external pointer
228 |
229 | # Default is to not warn on ref objects
230 | d <- cache_disk()
231 | expect_silent(d$set("a", new.env()))
232 | expect_silent(d$set("a", function() NULL))
233 | expect_silent(d$set("a", fastmap()))
234 | })
235 |
236 | test_that("Cache disk can use different formts", {
237 | my_write <- function(...) write.csv(..., row.names = FALSE)
238 |
239 | d <- cache_disk(read_fn = read.csv, write_fn = my_write, extension = ".csv")
240 |
241 | mt <- mtcars
242 | rownames(mt) <- NULL
243 | d$set("mt", mt)
244 | expect_equal(d$get("mt"), mt)
245 | })
246 |
--------------------------------------------------------------------------------
/tests/testthat/test-cache-mem.R:
--------------------------------------------------------------------------------
1 | time_factor <- 1
2 | # Do things slower on GHA because of slow machines
3 | if (is_on_github_actions()) time_factor <- 4
4 |
5 |
6 | test_that("cache_mem: handling missing values", {
7 | d <- cache_mem()
8 | expect_true(is.key_missing(d$get("abcd")))
9 | d$set("a", 100)
10 | expect_identical(d$get("a"), 100)
11 | expect_identical(d$get("y", missing = NULL), NULL)
12 | expect_error(
13 | d$get("y", missing = stop("Missing key")),
14 | "^Missing key$",
15 | )
16 |
17 | d <- cache_mem(missing = NULL)
18 | expect_true(is.null(d$get("abcd")))
19 | d$set("a", 100)
20 | expect_identical(d$get("a"), 100)
21 | expect_identical(d$get("y", missing = -1), -1)
22 | expect_error(
23 | d$get("y", missing = stop("Missing key")),
24 | "^Missing key$",
25 | )
26 |
27 | d <- cache_mem(missing = stop("Missing key"))
28 | expect_error(d$get("abcd"), "^Missing key$")
29 | d$set("x", NULL)
30 | d$set("a", 100)
31 | expect_identical(d$get("a"), 100)
32 | expect_error(d$get("y"), "^Missing key$")
33 | expect_identical(d$get("y", missing = NULL), NULL)
34 | expect_true(is.key_missing(d$get("y", missing = key_missing())))
35 | expect_error(
36 | d$get("y", missing = stop("Missing key 2")),
37 | "^Missing key 2$",
38 | )
39 |
40 | # Pass in a quosure
41 | expr <- rlang::quo(stop("Missing key"))
42 | d <- cache_mem(missing = !!expr)
43 | expect_error(d$get("y"), "^Missing key$")
44 | expect_error(d$get("y"), "^Missing key$") # Make sure a second time also throws
45 | })
46 |
47 | test_that("cache_mem: reset", {
48 | mc <- cache_mem()
49 | mc$set("a", "A")
50 | mc$set("b", "B")
51 | mc$reset()
52 | expect_identical(mc$keys(), character())
53 | expect_identical(mc$size(), 0L)
54 | mc$set("c", "C")
55 | expect_identical(mc$keys(), "c")
56 | expect_identical(mc$size(), 1L)
57 | expect_false(mc$exists("a"))
58 | expect_true(mc$exists("c"))
59 | })
60 |
61 | test_that("cache_mem: pruning respects max_n", {
62 | delay <- 0.001 * time_factor
63 | d <- cache_mem(max_n = 3)
64 | # NOTE: The short delays after each item are meant to tests more reliable on
65 | # CI systems.
66 | d$set("a", rnorm(100)); Sys.sleep(delay)
67 | d$set("b", rnorm(100)); Sys.sleep(delay)
68 | d$set("c", rnorm(100)); Sys.sleep(delay)
69 | d$set("d", rnorm(100)); Sys.sleep(delay)
70 | d$set("e", rnorm(100)); Sys.sleep(delay)
71 | expect_identical(sort(d$keys()), c("c", "d", "e"))
72 | })
73 |
74 | test_that("cache_mem: pruning respects max_size", {
75 | delay <- 0.001 * time_factor
76 | d <- cache_mem(max_size = object.size(123) * 3)
77 | d$set("a", rnorm(100)); Sys.sleep(delay)
78 | d$set("b", rnorm(100)); Sys.sleep(delay)
79 | d$set("c", 1); Sys.sleep(delay)
80 | expect_identical(sort(d$keys()), c("c"))
81 | d$set("d", rnorm(100)); Sys.sleep(delay)
82 | # Objects are pruned with oldest first, so even though "c" would fit in the
83 | # cache, it is removed after adding "d" (and "d" is removed as well because it
84 | # doesn't fit).
85 | expect_length(d$keys(), 0)
86 | d$set("e", 2); Sys.sleep(delay)
87 | d$set("f", 3); Sys.sleep(delay)
88 | expect_identical(sort(d$keys()), c("e", "f"))
89 | })
90 |
91 | test_that("cache_mem: max_size=Inf", {
92 | mc <- cachem::cache_mem(max_size = Inf)
93 | mc$set("a", 123)
94 | expect_identical(mc$get("a"), 123)
95 | mc$prune()
96 | expect_identical(mc$get("a"), 123)
97 | })
98 |
99 | test_that("cache_mem: pruning respects both max_n and max_size", {
100 | delay <- 0.001 * time_factor
101 | d <- cache_mem(max_n = 3, max_size = object.size(123) * 3)
102 | # Set some values. Use rnorm so that object size is large; a simple vector
103 | # like 1:100 will be stored very efficiently by R's ALTREP, and won't exceed
104 | # the max_size. We want each of these objects to exceed max_size so that
105 | # they'll be pruned.
106 | d$set("a", rnorm(100)); Sys.sleep(delay)
107 | d$set("b", rnorm(100)); Sys.sleep(delay)
108 | d$set("c", rnorm(100)); Sys.sleep(delay)
109 | d$set("d", rnorm(100)); Sys.sleep(delay)
110 | d$set("e", rnorm(100)); Sys.sleep(delay)
111 | d$set("f", 1); Sys.sleep(delay)
112 | d$set("g", 1); Sys.sleep(delay)
113 | d$set("h", 1); Sys.sleep(delay)
114 | expect_identical(sort(d$keys()), c("f", "g", "h"))
115 |
116 | # This will cause f to be pruned (due to max_n) and g to be pruned (due to
117 | # max_size).
118 | d$set("i", c(2, 3)); Sys.sleep(0.001)
119 | expect_identical(sort(d$keys()), c("h", "i"))
120 | })
121 |
122 | test_that('cache_mem: pruning with evict="lru"', {
123 | delay <- 0.001 * time_factor
124 | d <- cache_mem(max_n = 2)
125 | d$set("a", 1); Sys.sleep(delay)
126 | d$set("b", 1); Sys.sleep(delay)
127 | d$set("c", 1); Sys.sleep(delay)
128 | expect_identical(sort(d$keys()), c("b", "c"))
129 | d$get("b")
130 | d$set("d", 1); Sys.sleep(delay)
131 | expect_identical(sort(d$keys()), c("b", "d"))
132 | d$get("b")
133 | d$set("e", 2); Sys.sleep(delay)
134 | d$get("b")
135 | d$set("f", 3); Sys.sleep(delay)
136 | expect_identical(sort(d$keys()), c("b", "f"))
137 |
138 | d <- cache_mem(max_n = 2, evict = "lru")
139 | d$set("a", 1); Sys.sleep(delay)
140 | d$set("b", 1); Sys.sleep(delay)
141 | d$set("c", 1); Sys.sleep(delay)
142 | d$set("b", 2); Sys.sleep(delay)
143 | d$set("d", 2); Sys.sleep(delay)
144 | expect_identical(sort(d$keys()), c("b", "d"))
145 | })
146 |
147 | test_that('cache_mem: pruning with evict="fifo"', {
148 | delay <- 0.001 * time_factor
149 | d <- cache_mem(max_n = 2, evict = "fifo")
150 | d$set("a", 1); Sys.sleep(delay)
151 | d$set("b", 1); Sys.sleep(delay)
152 | d$set("c", 1); Sys.sleep(delay)
153 | expect_identical(sort(d$keys()), c("b", "c"))
154 | d$get("b")
155 | d$set("d", 1); Sys.sleep(delay)
156 | expect_identical(sort(d$keys()), c("c", "d"))
157 | d$get("b")
158 | d$set("e", 2); Sys.sleep(delay)
159 | d$get("b")
160 | d$set("f", 3); Sys.sleep(delay)
161 | expect_identical(sort(d$keys()), c("e", "f"))
162 |
163 | d <- cache_mem(max_n = 2, evict = "fifo")
164 | d$set("a", 1); Sys.sleep(delay)
165 | d$set("b", 1); Sys.sleep(delay)
166 | d$set("c", 1); Sys.sleep(delay)
167 | d$set("b", 2); Sys.sleep(delay)
168 | d$set("d", 2); Sys.sleep(delay)
169 | expect_identical(sort(d$keys()), c("b", "d"))
170 | })
171 |
172 | test_that("Pruning by max_age", {
173 | skip_on_cran()
174 |
175 | # Should prune target item on get()
176 | d <- cache_mem(max_age = 0.25*time_factor)
177 | d$set("a", 1)
178 | expect_identical(d$get("a"), 1)
179 | Sys.sleep(0.3*time_factor)
180 | expect_identical(d$get("a"), key_missing())
181 | expect_identical(d$get("x"), key_missing())
182 |
183 | # Should prune all items on set()
184 | d <- cache_mem(max_age = 0.25*time_factor)
185 | d$set("a", 1)
186 | expect_identical(d$get("a"), 1)
187 | Sys.sleep(0.3*time_factor)
188 | d$set("b", 1)
189 | expect_identical(d$keys(), "b")
190 |
191 | # Should prune target item on exists()
192 | d <- cache_mem(max_age = 0.25*time_factor)
193 | d$set("a", 1)
194 | expect_identical(d$get("a"), 1)
195 | expect_true(d$exists("a"))
196 | expect_false(d$exists("b"))
197 | Sys.sleep(0.15*time_factor)
198 | d$set("b", 1)
199 | expect_true(d$exists("a"))
200 | expect_true(d$exists("b"))
201 | Sys.sleep(0.15*time_factor)
202 | expect_false(d$exists("a"))
203 | expect_true(d$exists("b"))
204 |
205 | # Should prune all items on keys()
206 | d <- cache_mem(max_age = 0.25*time_factor)
207 | d$set("a", 1)
208 | expect_identical(d$keys(), "a")
209 | Sys.sleep(0.15*time_factor)
210 | d$set("b", 1)
211 | Sys.sleep(0.15*time_factor)
212 | expect_identical(d$keys(), "b")
213 |
214 | # Should prune all items on size()
215 | d <- cache_mem(max_age = 0.25*time_factor)
216 | d$set("a", 1)
217 | expect_identical(d$size(), 1L)
218 | Sys.sleep(0.15*time_factor)
219 | d$set("b", 1)
220 | expect_identical(d$size(), 2L)
221 | Sys.sleep(0.15*time_factor)
222 | expect_identical(d$size(), 1L)
223 | })
224 |
225 | test_that("Removed objects can be GC'd", {
226 | mc <- cache_mem()
227 | e <- new.env()
228 | finalized <- FALSE
229 | reg.finalizer(e, function(x) finalized <<- TRUE)
230 | mc$set("e", e)
231 | rm(e)
232 | mc$set("x", 1)
233 | gc()
234 | expect_false(finalized)
235 | expect_true(is.environment(mc$get("e")))
236 | })
237 |
238 | test_that("Pruned objects can be GC'd", {
239 | delay <- 0.001 * time_factor
240 | # Cache is large enough to hold one environment and one number
241 | mc <- cache_mem(max_size = object.size(new.env()) + object.size(1234))
242 | e <- new.env()
243 | finalized <- FALSE
244 | reg.finalizer(e, function(x) finalized <<- TRUE)
245 | mc$set("e", e)
246 | rm(e)
247 | mc$set("x", 1)
248 | gc()
249 | expect_false(finalized)
250 | expect_true(is.environment(mc$get("e")))
251 |
252 | # Get x so that the atime is updated
253 | Sys.sleep(delay)
254 | mc$get("x")
255 | Sys.sleep(delay)
256 |
257 | # e should be pruned when we add another item
258 | mc$set("y", 2)
259 | gc()
260 | expect_true(finalized)
261 | expect_true(is.key_missing(mc$get("e")))
262 | })
263 |
264 |
265 | # For https://github.com/r-lib/cachem/issues/47, https://github.com/r-lib/cachem/pull/48/
266 | test_that("Cache doesn't shrink smaller than INITIAL_SIZE", {
267 | # This test also makes sure that the cache doesn't keep adding elements to the
268 | # storage vectors when there are zero items, then an item is added and
269 | # removed, repeatedly.
270 | m <- cache_mem()
271 | e <- environment(m$get)
272 | for (i in seq_len(e$INITIAL_SIZE)) {
273 | m$set(as.character(i), i)
274 | m$remove(as.character(i))
275 | }
276 | expect_equal(e$total_n_, 0)
277 | expect_equal(e$last_idx_, e$INITIAL_SIZE)
278 | expect_length(e$key_, e$INITIAL_SIZE)
279 | expect_length(e$value_, e$INITIAL_SIZE)
280 |
281 | # Adding one more item should trigger a compact_()
282 | m$set("a", 1)
283 | m$remove("a")
284 |
285 | expect_equal(e$total_n_, 0)
286 | # last_idx_ should be reset after we pass the INITIAL_SIZE, even if there are
287 | # no items in the cache. Prior to the fix in #48, it could keep growing.
288 | expect_equal(e$last_idx_, 0)
289 | expect_length(e$key_, e$INITIAL_SIZE)
290 | expect_length(e$value_, e$INITIAL_SIZE)
291 | })
292 |
--------------------------------------------------------------------------------
/tests/testthat/test-utils.R:
--------------------------------------------------------------------------------
1 |
2 | test_that("validate_key", {
3 | expect_true(validate_key("e"))
4 | expect_true(validate_key("abc"))
5 | expect_true(validate_key("abcd123-_"))
6 | expect_true(validate_key("-"))
7 | expect_true(validate_key("_"))
8 |
9 | expect_error(validate_key("a.b"))
10 | expect_error(validate_key("a,b"))
11 | expect_error(validate_key("é"))
12 | expect_error(validate_key("ABC"))
13 | expect_error(validate_key("_A"))
14 | expect_error(validate_key("!"))
15 | expect_error(validate_key("a b"))
16 | expect_error(validate_key("ab\n"))
17 | })
18 |
--------------------------------------------------------------------------------