├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   └── R-CMD-check.yaml
├── .gitignore
├── CRAN-RELEASE
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── Makefile
├── NAMESPACE
├── NEWS.md
├── R
    ├── cache-disk.R
    ├── cache-layered.R
    ├── cache-mem.R
    ├── cachem-package.R
    ├── cachem.R
    ├── reexports.R
    └── utils.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── cachem.Rproj
├── cran-comments.md
├── man
    ├── cache_disk.Rd
    ├── cache_layered.Rd
    ├── cache_mem.Rd
    └── reexports.Rd
├── revdep
    ├── .gitignore
    ├── README.md
    ├── cran.md
    ├── failures.md
    └── problems.md
├── src
    ├── .gitignore
    ├── cache.c
    └── init.c
└── tests
    ├── testthat.R
    └── testthat
        ├── helper-utils.R
        ├── test-cache-disk.R
        ├── test-cache-mem.R
        └── test-utils.R


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^LICENSE\.md$
 4 | ^\.github$
 5 | ^.*\.sublime-project$
 6 | ^README\.Rmd$
 7 | ^_pkgdown\.yml$
 8 | ^docs$
 9 | ^pkgdown$
10 | ^Makefile$
11 | ^cran-comments\.md$
12 | ^revdep$
13 | ^CRAN-RELEASE$
14 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/rstudio/shiny-workflows
 2 | #
 3 | # NOTE: This Shiny team GHA workflow is overkill for most R packages.
 4 | # For most R packages it is better to use https://github.com/r-lib/actions
 5 | on:
 6 |   push:
 7 |     branches: [main, rc-**]
 8 |   pull_request:
 9 |     branches: [main]
10 |   schedule:
11 |     - cron: '0 7 * * 1' # every monday
12 | 
13 | name: Package checks
14 | 
15 | jobs:
16 |   website:
17 |     uses: rstudio/shiny-workflows/.github/workflows/website.yaml@v1
18 |   routine:
19 |     uses: rstudio/shiny-workflows/.github/workflows/routine.yaml@v1
20 |   R-CMD-check:
21 |     uses: rstudio/shiny-workflows/.github/workflows/R-CMD-check.yaml@v1
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | docs
3 | CRAN-RELEASE
4 | 


--------------------------------------------------------------------------------
/CRAN-RELEASE:
--------------------------------------------------------------------------------
1 | This package was submitted to CRAN on 2021-08-19.
2 | Once it is accepted, delete this file and tag the release (commit 9e4b75d).
3 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: cachem
 2 | Version: 1.1.0
 3 | Title: Cache R Objects with Automatic Pruning
 4 | Description: Key-value stores with automatic pruning. Caches can limit
 5 |     either their total size or the age of the oldest object (or both),
 6 |     automatically pruning objects to maintain the constraints.
 7 | Authors@R: c(
 8 |     person("Winston", "Chang", , "winston@posit.co", c("aut", "cre")),
 9 |     person(family = "Posit Software, PBC", role = c("cph", "fnd")))
10 | License: MIT + file LICENSE
11 | Encoding: UTF-8
12 | ByteCompile: true
13 | URL: https://cachem.r-lib.org/, https://github.com/r-lib/cachem
14 | Imports:
15 |     rlang,
16 |     fastmap (>= 1.2.0)
17 | Suggests:
18 |     testthat
19 | RoxygenNote: 7.2.3
20 | Roxygen: list(markdown = TRUE)
21 | Config/Needs/routine:
22 |     lobstr
23 | Config/Needs/website:
24 |     pkgdown
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2023
2 | COPYRIGHT HOLDER: Posit Software, PBC
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2023 Posit Software, PBC. 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | README.md: README.Rmd
 3 | 	# Instead of just knitr::knit, use rmarkdown::render and remove the .html
 4 | 	# file, because the latter uses pandoc to process the .md file, and it
 5 | 	# supports adding a table of contents.
 6 | 	Rscript -e "rmarkdown::render('$<', output_file = '$@')"
 7 | 	rm -f README.html
 8 | 
 9 | clean:
10 | 	rm -f README.md
11 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(format,cachem)
 4 | S3method(print,cachem)
 5 | export(cache_disk)
 6 | export(cache_layered)
 7 | export(cache_mem)
 8 | export(is.key_missing)
 9 | export(key_missing)
10 | import(fastmap)
11 | importFrom(fastmap,is.key_missing)
12 | importFrom(fastmap,key_missing)
13 | importFrom(rlang,as_quosure)
14 | importFrom(rlang,enquo)
15 | importFrom(rlang,eval_tidy)
16 | importFrom(utils,object.size)
17 | useDynLib(cachem, .registration = TRUE)
18 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # cachem 1.1.0
 2 | 
 3 | * Fixed #47: In some cases, with a `cache_mem`, removing an item could result in the spurious message "nothing to compact" being printed out. (#48)
 4 | 
 5 | # cachem 1.0.8
 6 | 
 7 | * Fixed #38: With a `cache_mem`, `$reset()` did not fully reset the cache, and calling calling `$prune()` could result in an error. (#39)
 8 | 
 9 | # cachem 1.0.7
10 | 
11 | * `cache_disk()` gains a `read_fn`, `write_fn` and `extension` arguments, to allow specifying custom formats for serializing object to disk. (@jimhester)
12 | 
13 | # cachem 1.0.6
14 | 
15 | * Fixed #14: Fix off-by-one error when checking pruning throttling counter for `cache_disk`. (#15)
16 | 
17 | * Closed #13: Added documentation for the `remove()` method.
18 | 
19 | 
20 | # cachem 1.0.5
21 | 
22 | * `cache_mem()` and `cache_disk()` now allow `-` and `_` (hyphen and underscore) characters in the keys. (#9)
23 | 
24 | * `cache_disk()` previously did not correctly throttle pruning. (#11)
25 | 
26 | 
27 | # cachem 1.0.4
28 | 
29 | * More pruning speed enhancements for `cache_mem()`. (#7)
30 | 
31 | 
32 | # cachem 1.0.3
33 | 
34 | * Addressed issues with timing-sensitive tests.
35 | 
36 | 
37 | # cachem 1.0.2
38 | 
39 | * Closed #4: Sped up pruning for `cache_mem`. (#5)
40 | 
41 | * Fixed `cache_mem` pruning with `evict="lru"`.
42 | 
43 | 
44 | # cachem 1.0.1
45 | 
46 | * Fixed function declaration of `C_validate_key`.
47 | 
48 | 
49 | # cachem 1.0.0
50 | 
51 | * First CRAN release.
52 | 


--------------------------------------------------------------------------------
/R/cache-disk.R:
--------------------------------------------------------------------------------
  1 | #' Create a disk cache object
  2 | #'
  3 | #' A disk cache object is a key-value store that saves the values as files in a
  4 | #' directory on disk. Objects can be stored and retrieved using the `get()` and
  5 | #' `set()` methods. Objects are automatically pruned from the cache according to
  6 | #' the parameters `max_size`, `max_age`, `max_n`, and `evict`.
  7 | #'
  8 | #'
  9 | #' @section Missing keys:
 10 | #'
 11 | #'   The `missing` parameter controls what happens when `get()` is called with a
 12 | #'   key that is not in the cache (a cache miss). The default behavior is to
 13 | #'   return a [key_missing()] object. This is a *sentinel value* that indicates
 14 | #'   that the key was not present in the cache. You can test if the returned
 15 | #'   value represents a missing key by using the [is.key_missing()] function.
 16 | #'   You can also have `get()` return a different sentinel value, like `NULL`.
 17 | #'   If you want to throw an error on a cache miss, you can do so by providing
 18 | #'   an expression for `missing`, as in `missing = stop("Missing key")`.
 19 | #'
 20 | #'   When the cache is created, you can supply a value for `missing`, which sets
 21 | #'   the default value to be returned for missing values. It can also be
 22 | #'   overridden when `get()` is called, by supplying a `missing` argument. For
 23 | #'   example, if you use `cache$get("mykey", missing = NULL)`, it will return
 24 | #'   `NULL` if the key is not in the cache.
 25 | #'
 26 | #'   The `missing` parameter is actually an expression which is evaluated each
 27 | #'   time there is a cache miss. A quosure (from the rlang package) can be used.
 28 | #'
 29 | #'   If you use this, the code that calls `get()` should be wrapped with
 30 | #'   [tryCatch()] to gracefully handle missing keys.
 31 | #'
 32 | #'
 33 | #' @section Cache pruning:
 34 | #'
 35 | #'   Cache pruning occurs when `set()` is called, or it can be invoked manually
 36 | #'   by calling `prune()`.
 37 | #'
 38 | #'   The disk cache will throttle the pruning so that it does not happen on
 39 | #'   every call to `set()`, because the filesystem operations for checking the
 40 | #'   status of files can be slow. Instead, it will prune once in every
 41 | #'   `prune_rate` calls to `set()`, or if at least 5 seconds have elapsed since
 42 | #'   the last prune occurred, whichever is first.
 43 | #'
 44 | #'   When a pruning occurs, if there are any objects that are older than
 45 | #'   `max_age`, they will be removed.
 46 | #'
 47 | #'   The `max_size` and `max_n` parameters are applied to the cache as a whole,
 48 | #'   in contrast to `max_age`, which is applied to each object individually.
 49 | #'
 50 | #'   If the number of objects in the cache exceeds `max_n`, then objects will be
 51 | #'   removed from the cache according to the eviction policy, which is set with
 52 | #'   the `evict` parameter. Objects will be removed so that the number of items
 53 | #'   is `max_n`.
 54 | #'
 55 | #'   If the size of the objects in the cache exceeds `max_size`, then objects
 56 | #'   will be removed from the cache. Objects will be removed from the cache so
 57 | #'   that the total size remains under `max_size`. Note that the size is
 58 | #'   calculated using the size of the files, not the size of disk space used by
 59 | #'   the files --- these two values can differ because of files are stored in
 60 | #'   blocks on disk. For example, if the block size is 4096 bytes, then a file
 61 | #'   that is one byte in size will take 4096 bytes on disk.
 62 | #'
 63 | #'   Another time that objects can be removed from the cache is when `get()` is
 64 | #'   called. If the target object is older than `max_age`, it will be removed
 65 | #'   and the cache will report it as a missing value.
 66 | #'
 67 | #' @section Eviction policies:
 68 | #'
 69 | #'   If `max_n` or `max_size` are used, then objects will be removed from the
 70 | #'   cache according to an eviction policy. The available eviction policies are:
 71 | #'
 72 | #'   \describe{
 73 | #'     \item{`"lru"`}{
 74 | #'       Least Recently Used. The least recently used objects will be removed.
 75 | #'       This uses the filesystem's mtime property. When "lru" is used, each
 76 | #'       `get()` is called, it will update the file's mtime using
 77 | #'       [Sys.setFileTime()]. Note that on some platforms, the resolution of
 78 | #'       [Sys.setFileTime()] may be low, one or two seconds.
 79 | #'     }
 80 | #'     \item{`"fifo"`}{
 81 | #'       First-in-first-out. The oldest objects will be removed.
 82 | #'     }
 83 | #'   }
 84 | #'
 85 | #' Both of these policies use files' mtime. Note that some filesystems (notably
 86 | #' FAT) have poor mtime resolution. (atime is not used because support for atime
 87 | #' is worse than mtime.)
 88 | #'
 89 | #'
 90 | #' @section Sharing among multiple processes:
 91 | #'
 92 | #'   The directory for a cache_disk can be shared among multiple R processes. To
 93 | #'   do this, each R process should have a cache_disk object that uses the same
 94 | #'   directory. Each cache_disk will do pruning independently of the others, so
 95 | #'   if they have different pruning parameters, then one cache_disk may remove
 96 | #'   cached objects before another cache_disk would do so.
 97 | #'
 98 | #'   Even though it is possible for multiple processes to share a cache_disk
 99 | #'   directory, this should not be done on networked file systems, because of
100 | #'   slow performance of networked file systems can cause problems. If you need
101 | #'   a high-performance shared cache, you can use one built on a database like
102 | #'   Redis, SQLite, mySQL, or similar.
103 | #'
104 | #'   When multiple processes share a cache directory, there are some potential
105 | #'   race conditions. For example, if your code calls `exists(key)` to check if
106 | #'   an object is in the cache, and then call `get(key)`, the object may be
107 | #'   removed from the cache in between those two calls, and `get(key)` will
108 | #'   throw an error. Instead of calling the two functions, it is better to
109 | #'   simply call `get(key)`, and check that the returned object is not a
110 | #'   `key_missing()` object, using `is.key_missing()`. This effectively tests
111 | #'   for existence and gets the object in one operation.
112 | #'
113 | #'   It is also possible for one processes to prune objects at the same time
114 | #'   that another processes is trying to prune objects. If this happens, you may
115 | #'   see a warning from `file.remove()` failing to remove a file that has
116 | #'   already been deleted.
117 | #'
118 | #'
119 | #' @section Methods:
120 | #'
121 | #'  A disk cache object has the following methods:
122 | #'
123 | #'   \describe{
124 | #'     \item{`get(key, missing)`}{
125 | #'       Returns the value associated with `key`. If the key is not in the
126 | #'       cache, then it evaluates the expression specified by `missing` and
127 | #'       returns the value. If `missing` is specified here, then it will
128 | #'       override the default that was set when the `cache_mem` object was
129 | #'       created. See section Missing Keys for more information.
130 | #'     }
131 | #'     \item{`set(key, value)`}{
132 | #'       Stores the `key`-`value` pair in the cache.
133 | #'     }
134 | #'     \item{`exists(key)`}{
135 | #'       Returns `TRUE` if the cache contains the key, otherwise
136 | #'       `FALSE`.
137 | #'     }
138 | #'     \item{`remove(key)`}{
139 | #'       Removes `key` from the cache, if it exists in the cache. If the key is
140 | #'       not in the cache, this does nothing.
141 | #'     }
142 | #'     \item{`size()`}{
143 | #'       Returns the number of items currently in the cache.
144 | #'     }
145 | #'     \item{`keys()`}{
146 | #'       Returns a character vector of all keys currently in the cache.
147 | #'     }
148 | #'     \item{`reset()`}{
149 | #'       Clears all objects from the cache.
150 | #'     }
151 | #'     \item{`destroy()`}{
152 | #'       Clears all objects in the cache, and removes the cache directory from
153 | #'       disk.
154 | #'     }
155 | #'     \item{`prune()`}{
156 | #'       Prunes the cache, using the parameters specified by `max_size`,
157 | #'       `max_age`, `max_n`, and `evict`.
158 | #'     }
159 | #'   }
160 | #'
161 | #' @param dir Directory to store files for the cache. If `NULL` (the default) it
162 | #'   will create and use a temporary directory.
163 | #' @param read_fn The function used to read the values from disk. If `NULL`
164 | #'   (the default) it will use `readRDS`.
165 | #' @param write_fn The function used to write the values from disk. If `NULL`
166 | #'   (the default) it will use `writeRDS`.
167 | #' @param extension The file extension to use for files on disk.
168 | #' @param max_age Maximum age of files in cache before they are evicted, in
169 | #'   seconds. Use `Inf` for no age limit.
170 | #' @param max_size Maximum size of the cache, in bytes. If the cache exceeds
171 | #'   this size, cached objects will be removed according to the value of the
172 | #'   `evict`. Use `Inf` for no size limit. The default is 1 gigabyte.
173 | #' @param max_n Maximum number of objects in the cache. If the number of objects
174 | #'   exceeds this value, then cached objects will be removed according to the
175 | #'   value of `evict`. Use `Inf` for no limit of number of items.
176 | #' @param evict The eviction policy to use to decide which objects are removed
177 | #'   when a cache pruning occurs. Currently, `"lru"` and `"fifo"` are supported.
178 | #' @param destroy_on_finalize If `TRUE`, then when the cache_disk object is
179 | #'   garbage collected, the cache directory and all objects inside of it will be
180 | #'   deleted from disk. If `FALSE` (the default), it will do nothing when
181 | #'   finalized.
182 | #' @param missing A value to return when `get(key)` is called but the key is not
183 | #'   present in the cache. The default is a [key_missing()] object. It is
184 | #'   actually an expression that is evaluated each time there is a cache miss.
185 | #'   See section Missing keys for more information.
186 | #' @param prune_rate How often to prune the cache. See section Cache Pruning for
187 | #'   more information.
188 | #' @param warn_ref_objects Should a warning be emitted when a reference is
189 | #'   stored in the cache? This can be useful because serializing and
190 | #'   deserializing a reference object (such as environments and external
191 | #'   pointers) can lead to unexpected behavior.
192 | #' @param logfile An optional filename or connection object to where logging
193 | #'   information will be written. To log to the console, use `stderr()` or
194 | #'   `stdout()`.
195 | #'
196 | #' @return A disk caching object, with class `cache_disk`.
197 | #' @importFrom rlang enquo eval_tidy as_quosure
198 | #' @export
199 | cache_disk <- function(
200 |   dir = NULL,
201 |   max_size = 1024 * 1024 ^ 2,
202 |   max_age = Inf,
203 |   max_n = Inf,
204 |   evict = c("lru", "fifo"),
205 |   destroy_on_finalize = FALSE,
206 |   read_fn = NULL,
207 |   write_fn = NULL,
208 |   extension = ".rds",
209 |   missing = key_missing(),
210 |   prune_rate = 20,
211 |   warn_ref_objects = FALSE,
212 |   logfile = NULL
213 | ) {
214 |   # ============================================================================
215 |   # Constants
216 |   # ============================================================================
217 |   PRUNE_THROTTLE_TIME_LIMIT <- 5
218 | 
219 |   # ============================================================================
220 |   # Logging
221 |   # ============================================================================
222 |   # This needs to be defined first, because it's used in initialization.
223 |   log_ <- function(text) {
224 |     if (is.null(logfile_)) return()
225 | 
226 |     text <- paste0(format(Sys.time(), "[%Y-%m-%d %H:%M:%OS3] cache_disk "), text)
227 |     cat(text, sep = "\n", file = logfile_, append = TRUE)
228 |   }
229 | 
230 |   # ============================================================================
231 |   # Initialization
232 |   # ============================================================================
233 |   if (is.null(dir)) {
234 |     dir <- tempfile("cache_disk-")
235 |   }
236 |   if (!is.numeric(max_size)) stop("max_size must be a number. Use `Inf` for no limit.")
237 |   if (!is.numeric(max_age))  stop("max_age must be a number. Use `Inf` for no limit.")
238 |   if (!is.numeric(max_n))    stop("max_n must be a number. Use `Inf` for no limit.")
239 | 
240 |   if (!dir.exists(dir)) {
241 |     # log_(paste0("initialize: Creating ", dir))
242 |     dir.create(dir, recursive = TRUE)
243 |   }
244 | 
245 |   logfile_             <- logfile
246 |   dir_                 <- normalizePath(dir)
247 |   extension_           <- extension
248 |   extension_regex_     <- sub("\\.(.*)", "\\\\.\\1$", extension)
249 |   read_fn_             <- read_fn
250 |   write_fn_            <- write_fn
251 |   max_size_            <- max_size
252 |   max_age_             <- max_age
253 |   max_n_               <- max_n
254 |   evict_               <- match.arg(evict)
255 |   destroy_on_finalize_ <- destroy_on_finalize
256 |   missing_             <- enquo(missing)
257 |   prune_rate_          <- prune_rate
258 | 
259 |   destroyed_           <- FALSE
260 | 
261 | 
262 |   if (is.null(read_fn_)) {
263 |     read_fn_ <- readRDS
264 |   }
265 | 
266 |   if (is.null(write_fn_)) {
267 |     write_fn_ <- local({
268 |       ref_object <- FALSE
269 |       function(value, file) {
270 |         on.exit({
271 |           ref_object <<- TRUE
272 |           unlink(temp_file)
273 |         })
274 |         temp_file <- paste0(file, "-temp-", random_hex(16))
275 |         saveRDS(value, file = temp_file,
276 |           refhook = function(x) {
277 |             ref_object <<- TRUE
278 |             NULL
279 |           }
280 |         )
281 |         file.rename(temp_file, file)
282 |         if (warn_ref_objects && ref_object) {
283 |           log_(paste0('set: value is a reference object'))
284 |           warning("A reference object was cached in a serialized format. The restored object may not work as expected.")
285 |         }
286 |       }
287 |     })
288 |   }
289 | 
290 |   # Start the prune throttle counter with a random number from 0-19. This is
291 |   # so that, in the case where multiple cache_disk objects that point to the
292 |   # same directory are created and discarded after just a few uses each,
293 |   # pruning will still occur.
294 |   prune_throttle_counter_ <- sample.int(prune_rate_, 1) - 1
295 |   prune_last_time_        <- as.numeric(Sys.time())
296 | 
297 |   if (destroy_on_finalize_) {
298 |     reg.finalizer(
299 |       environment(),
300 |       function(e) { e$destroy() }
301 |     )
302 |   }
303 | 
304 |   # ============================================================================
305 |   # Public methods
306 |   # ============================================================================
307 |   get <- function(key, missing = missing_) {
308 |     log_(paste0('get: key "', key, '"'))
309 |     is_destroyed(throw = TRUE)
310 |     validate_key(key)
311 | 
312 |     maybe_prune_single_(key)
313 | 
314 |     filename <- key_to_filename_(key)
315 | 
316 |     # Instead of calling exists() before fetching the value, just try to
317 |     # fetch the value. This reduces the risk of a race condition when
318 |     # multiple processes share a cache.
319 |     read_error <- FALSE
320 |     tryCatch(
321 |       {
322 |         value <- suppressWarnings(read_fn_(filename))
323 |         if (evict_ == "lru"){
324 |           Sys.setFileTime(filename, Sys.time())
325 |         }
326 |       },
327 |       error = function(e) {
328 |         read_error <<- TRUE
329 |       }
330 |     )
331 |     if (read_error) {
332 |       log_(paste0('get: key "', key, '" is missing'))
333 |       missing <- as_quosure(missing)
334 |       return(eval_tidy(missing))
335 |     }
336 | 
337 |     log_(paste0('get: key "', key, '" found'))
338 |     value
339 |   }
340 | 
341 |   set <- function(key, value) {
342 |     log_(paste0('set: key "', key, '"'))
343 |     is_destroyed(throw = TRUE)
344 |     validate_key(key)
345 | 
346 |     file <- key_to_filename_(key)
347 | 
348 |     save_error <- FALSE
349 |     tryCatch(
350 |       {
351 |         write_fn_(value, file)
352 |       },
353 |       error = function(e) {
354 |         save_error <<- TRUE
355 |       }
356 |     )
357 |     if (save_error) {
358 |       log_(paste0('set: key "', key, '" error'))
359 |       stop('Error setting value for key "', key, '".')
360 |     }
361 | 
362 |     prune_throttled_()
363 |     invisible(TRUE)
364 |   }
365 | 
366 |   exists <- function(key) {
367 |     is_destroyed(throw = TRUE)
368 |     validate_key(key)
369 |     file.exists(key_to_filename_(key))
370 |   }
371 | 
372 |   # Return all keys in the cache
373 |   keys <- function() {
374 |     is_destroyed(throw = TRUE)
375 |     files <- dir(dir_, extension_regex_)
376 |     sub(extension_regex_, "", files)
377 |   }
378 | 
379 |   remove <- function(key) {
380 |     log_(paste0('remove: key "', key, '"'))
381 |     is_destroyed(throw = TRUE)
382 |     validate_key(key)
383 |     # Remove file; use unlink() instead of file.remove() because it won't
384 |     # warn if the file doesn't exist.
385 |     unlink(key_to_filename_(key))
386 |     invisible(TRUE)
387 |   }
388 | 
389 |   reset <- function() {
390 |     log_(paste0('reset'))
391 |     is_destroyed(throw = TRUE)
392 |     file.remove(dir(dir_, extension_regex_, full.names = TRUE))
393 |     invisible(TRUE)
394 |   }
395 | 
396 |   prune <- function() {
397 |     # TODO: It would be good to add parameters `n` and `size`, so that the
398 |     # cache can be pruned to `max_n - n` and `max_size - size` before adding
399 |     # an object. Right now we prune after adding the object, so the cache
400 |     # can temporarily grow past the limits. The reason we don't do this now
401 |     # is because it is expensive to find the size of the serialized object
402 |     # before adding it.
403 | 
404 |     log_('prune')
405 |     is_destroyed(throw = TRUE)
406 | 
407 |     current_time <- Sys.time()
408 | 
409 |     filenames <- dir(dir_, extension_regex_, full.names = TRUE)
410 |     info <- file.info(filenames, extra_cols = FALSE)
411 |     info <- info[info$isdir == FALSE, ]
412 |     info$name <- rownames(info)
413 |     rownames(info) <- NULL
414 |     # Files could be removed between the dir() and file.info() calls. The
415 |     # entire row for such files will have NA values. Remove those rows.
416 |     info <- info[!is.na(info$size), ]
417 | 
418 |     # 1. Remove any files where the age exceeds max age.
419 |     if (is.finite(max_age_)) {
420 |       timediff <- as.numeric(current_time - info$mtime, units = "secs")
421 |       rm_idx <- timediff > max_age_
422 |       if (any(rm_idx)) {
423 |         log_(paste0("prune max_age: Removing ", paste(info$name[rm_idx], collapse = ", ")))
424 |         rm_success <- file.remove(info$name[rm_idx])
425 |         # This maps rm_success back into the TRUEs in the rm_idx vector.
426 |         # If (for example) rm_idx is c(F,T,F,T,T) and rm_success is c(T,F,T),
427 |         # then this line modifies rm_idx to be c(F,T,F,F,T).
428 |         rm_idx[rm_idx] <- rm_success
429 |         info <- info[!rm_idx, ]
430 |       }
431 |     }
432 | 
433 |     # Sort objects by priority. The sorting is done in a function which can be
434 |     # called multiple times but only does the work the first time.
435 |     info_is_sorted <- FALSE
436 |     ensure_info_is_sorted <- function() {
437 |       if (info_is_sorted) return()
438 | 
439 |       info <<- info[order(info$mtime, decreasing = TRUE), ]
440 |       info_is_sorted <<- TRUE
441 |     }
442 | 
443 |     # 2. Remove files if there are too many.
444 |     if (is.finite(max_n_) && nrow(info) > max_n_) {
445 |       ensure_info_is_sorted()
446 |       rm_idx <- seq_len(nrow(info)) > max_n_
447 |       log_(paste0("prune max_n: Removing ", paste(info$name[rm_idx], collapse = ", ")))
448 |       rm_success <- file.remove(info$name[rm_idx])
449 |       rm_idx[rm_idx] <- rm_success
450 |       info <- info[!rm_idx, ]
451 |     }
452 | 
453 |     # 3. Remove files if cache is too large.
454 |     if (is.finite(max_size_) && sum(info$size) > max_size_) {
455 |       ensure_info_is_sorted()
456 |       cum_size <- cumsum(info$size)
457 |       rm_idx <- cum_size > max_size_
458 |       log_(paste0("prune max_size: Removing ", paste(info$name[rm_idx], collapse = ", ")))
459 |       rm_success <- file.remove(info$name[rm_idx])
460 |       rm_idx[rm_idx] <- rm_success
461 |       info <- info[!rm_idx, ]
462 |     }
463 | 
464 |     prune_last_time_ <<- as.numeric(current_time)
465 | 
466 |     invisible(TRUE)
467 |   }
468 | 
469 |   size <- function() {
470 |     is_destroyed(throw = TRUE)
471 |     length(dir(dir_, extension_regex_))
472 |   }
473 | 
474 |   info <- function() {
475 |     list(
476 |       dir = dir_,
477 |       max_size = max_size_,
478 |       max_age = max_age_,
479 |       max_n = max_n_,
480 |       evict = evict_,
481 |       destroy_on_finalize = destroy_on_finalize_,
482 |       missing = missing_,
483 |       prune_rate = prune_rate,
484 |       logfile = logfile_,
485 |       prune_throttle_counter = prune_throttle_counter_,
486 |       prune_last_time = as.POSIXct(prune_last_time_, origin = "1970-01-01")
487 |     )
488 |   }
489 | 
490 |   destroy <- function() {
491 |     if (is_destroyed()) {
492 |       return(invisible(FALSE))
493 |     }
494 | 
495 |     log_(paste0("destroy: Removing ", dir_))
496 |     # First create a sentinel file so that other processes sharing this
497 |     # cache know that the cache is to be destroyed. This is needed because
498 |     # the recursive unlink is not atomic: another process can add a file to
499 |     # the directory after unlink starts removing files but before it removes
500 |     # the directory, and when that happens, the directory removal will fail.
501 |     file.create(file.path(dir_, "__destroyed__"))
502 |     # Remove all the extension files. This will not remove the setinel file.
503 |     file.remove(dir(dir_, extension_regex_, full.names = TRUE))
504 |     # Next remove dir recursively, including sentinel file.
505 |     unlink(dir_, recursive = TRUE)
506 |     destroyed_ <<- TRUE
507 |     invisible(TRUE)
508 |   }
509 | 
510 |   is_destroyed <- function(throw = FALSE) {
511 |     if (!dir.exists(dir_) ||
512 |         file.exists(file.path(dir_, "__destroyed__")))
513 |     {
514 |       # It's possible for another process to destroy a shared cache directory
515 |       destroyed_ <<- TRUE
516 |     }
517 | 
518 |     if (throw) {
519 |       if (destroyed_) {
520 |         stop("Attempted to use cache which has been destroyed:\n  ", dir_)
521 |       }
522 | 
523 |     } else {
524 |       destroyed_
525 |     }
526 |   }
527 | 
528 |   # ============================================================================
529 |   # Private methods
530 |   # ============================================================================
531 |   key_to_filename_ <- function(key) {
532 |     validate_key(key)
533 |     # Additional validation. This 80-char limit is arbitrary, and is
534 |     # intended to avoid hitting a filename length limit on Windows.
535 |     if (nchar(key) > 80) {
536 |       stop("Invalid key: key must have fewer than 80 characters.")
537 |     }
538 |     file.path(dir_, paste0(key, extension_))
539 |   }
540 | 
541 |   # A wrapper for prune() that throttles it, because prune() can be expensive
542 |   # due to filesystem operations. This function will prune only once every
543 |   # `prune_rate` times it is called, or if it has been more than 5 seconds since
544 |   # the last time the cache was actually pruned, whichever is first. In the
545 |   # future, the behavior may be customizable.
546 |   prune_throttled_ <- function() {
547 |     # Count the number of times prune() has been called.
548 |     prune_throttle_counter_ <<- prune_throttle_counter_ + 1
549 | 
550 |     if (prune_throttle_counter_ >= prune_rate_ ||
551 |         as.numeric(Sys.time()) - prune_last_time_ > PRUNE_THROTTLE_TIME_LIMIT)
552 |     {
553 |       prune()
554 |       prune_throttle_counter_ <<- 0
555 |     }
556 |   }
557 | 
558 |   # Prunes a single object if it exceeds max_age. If the object does not
559 |   # exceed max_age, or if the object doesn't exist, do nothing.
560 |   maybe_prune_single_ <- function(key) {
561 |     # obj <- cache_[[key]]
562 |     # if (is.null(obj)) return()
563 |     filepath <- file.path(dir_, paste0(key, extension_))
564 |     info <- file.info(filepath, extra_cols = FALSE)
565 |     if (is.na(info$mtime)) return()
566 | 
567 |     timediff <- as.numeric(Sys.time()) - as.numeric(info$mtime)
568 |     if (timediff > max_age_) {
569 |       log_(paste0("pruning single object exceeding max_age: Removing ", key))
570 |       unlink(filepath)
571 |     }
572 |   }
573 | 
574 | 
575 |   # ============================================================================
576 |   # Returned object
577 |   # ============================================================================
578 |   structure(
579 |     list(
580 |       get = get,
581 |       set = set,
582 |       exists = exists,
583 |       keys = keys,
584 |       remove = remove,
585 |       reset = reset,
586 |       prune = prune,
587 |       size = size,
588 |       destroy = destroy,
589 |       is_destroyed = is_destroyed,
590 |       info = info
591 |     ),
592 |     class = c("cache_disk", "cachem")
593 |   )
594 | }
595 | 


--------------------------------------------------------------------------------
/R/cache-layered.R:
--------------------------------------------------------------------------------
  1 | #' Compose any number of cache objects into a new, layered cache object
  2 | #'
  3 | #' Note that `cache_layered` is currently experimental.
  4 | #'
  5 | #' @param ... Cache objects to compose into a new, layered cache object.
  6 | #' @inheritParams cache_disk
  7 | #'
  8 | #' @return A layered caching object, with class `cache_layered`.
  9 | #' @examples
 10 | #'
 11 | #' # Make a layered cache from a small memory cache and large disk cache
 12 | #' m <- cache_mem(max_size = 100 * 1024^2)
 13 | #' d <- cache_disk(max_size = 2 * 1024^3)
 14 | #' cl <- cache_layered(m, d)
 15 | #' @export
 16 | cache_layered <- function(..., logfile = NULL) {
 17 |   caches <- list(...)
 18 |   logfile_ <- logfile
 19 | 
 20 |   # ============================================================================
 21 |   # Logging
 22 |   # ============================================================================
 23 |   # This needs to be defined first, because it's used in initialization.
 24 |   log_ <- function(text) {
 25 |     if (is.null(logfile_)) return()
 26 | 
 27 |     text <- paste0(format(Sys.time(), "[%Y-%m-%d %H:%M:%OS3] cache_layered "), text)
 28 |     cat(text, sep = "\n", file = logfile_, append = TRUE)
 29 |   }
 30 | 
 31 |   get <- function(key) {
 32 |     log_(paste0("Get: ", key))
 33 |     value <- NULL
 34 |     # Search down the caches for the object
 35 |     for (i in seq_along(caches)) {
 36 |       value <- caches[[i]]$get(key)
 37 | 
 38 |       if (!is.key_missing(value)) {
 39 |         log_(paste0("Get from ", class(caches[[i]])[1], "... hit"))
 40 |         # Set the value in any caches where we searched and missed.
 41 |         for (j in seq_len(i-1)) {
 42 |           caches[[j]]$set(key, value)
 43 |         }
 44 |         break
 45 |       } else {
 46 |         log_(paste0("Get from ", class(caches[[i]])[1], "... miss"))
 47 |       }
 48 |     }
 49 | 
 50 |     value
 51 |   }
 52 | 
 53 |   set <- function(key, value) {
 54 |     for (cache in caches) {
 55 |       cache$set(key, value)
 56 |     }
 57 |   }
 58 | 
 59 |   exists <- function(key) {
 60 |     for (cache in caches) {
 61 |       if (cache$exists(key)) {
 62 |         return(TRUE)
 63 |       }
 64 |     }
 65 |     FALSE
 66 |   }
 67 | 
 68 |   keys <- function() {
 69 |     unique(unlist(lapply(caches, function (cache) {
 70 |       cache$keys()
 71 |     })))
 72 |   }
 73 | 
 74 |   remove <- function(key) {
 75 |     for (cache in caches) {
 76 |       cache$remove(key)
 77 |     }
 78 |   }
 79 | 
 80 |   reset <- function() {
 81 |     for (cache in caches) {
 82 |       cache$reset()
 83 |     }
 84 |   }
 85 | 
 86 |   get_caches <- function() {
 87 |     caches
 88 |   }
 89 | 
 90 |   structure(
 91 |     list(
 92 |       get = get,
 93 |       set = set,
 94 |       exists = exists,
 95 |       keys = keys,
 96 |       remove = remove,
 97 |       reset = reset,
 98 |       get_caches = get_caches
 99 |     ),
100 |     class = c("cache_layered", "cachem")
101 |   )
102 | }
103 | 
104 | 


--------------------------------------------------------------------------------
/R/cache-mem.R:
--------------------------------------------------------------------------------
  1 | #' Create a memory cache object
  2 | #'
  3 | #' A memory cache object is a key-value store that saves the values in an
  4 | #' environment. Objects can be stored and retrieved using the `get()` and
  5 | #' `set()` methods. Objects are automatically pruned from the cache according to
  6 | #' the parameters `max_size`, `max_age`, `max_n`, and `evict`.
  7 | #'
  8 | #' In a `cache_mem`, R objects are stored directly in the cache; they are not
  9 | #' *not* serialized before being stored in the cache. This contrasts with other
 10 | #' cache types, like [cache_disk()], where objects are serialized, and the
 11 | #' serialized object is cached. This can result in some differences of behavior.
 12 | #' For example, as long as an object is stored in a cache_mem, it will not be
 13 | #' garbage collected.
 14 | #'
 15 | #' @section Missing keys:
 16 | #'
 17 | #'   The `missing` parameter controls what happens when `get()` is called with a
 18 | #'   key that is not in the cache (a cache miss). The default behavior is to
 19 | #'   return a [key_missing()] object. This is a *sentinel value* that indicates
 20 | #'   that the key was not present in the cache. You can test if the returned
 21 | #'   value represents a missing key by using the [is.key_missing()] function.
 22 | #'   You can also have `get()` return a different sentinel value, like `NULL`.
 23 | #'   If you want to throw an error on a cache miss, you can do so by providing
 24 | #'   an expression for `missing`, as in `missing = stop("Missing key")`.
 25 | #'
 26 | #'   When the cache is created, you can supply a value for `missing`, which sets
 27 | #'   the default value to be returned for missing values. It can also be
 28 | #'   overridden when `get()` is called, by supplying a `missing` argument. For
 29 | #'   example, if you use `cache$get("mykey", missing = NULL)`, it will return
 30 | #'   `NULL` if the key is not in the cache.
 31 | #'
 32 | #'   The `missing` parameter is actually an expression which is evaluated each
 33 | #'   time there is a cache miss. A quosure (from the rlang package) can be used.
 34 | #'
 35 | #'   If you use this, the code that calls `get()` should be wrapped with
 36 | #'   [tryCatch()] to gracefully handle missing keys.
 37 | #'
 38 | #'
 39 | #'   @section Cache pruning:
 40 | #'
 41 | #'   Cache pruning occurs when `set()` is called, or it can be invoked manually
 42 | #'   by calling `prune()`.
 43 | #'
 44 | #'   When a pruning occurs, if there are any objects that are older than
 45 | #'   `max_age`, they will be removed.
 46 | #'
 47 | #'   The `max_size` and `max_n` parameters are applied to the cache as a whole,
 48 | #'   in contrast to `max_age`, which is applied to each object individually.
 49 | #'
 50 | #'   If the number of objects in the cache exceeds `max_n`, then objects will be
 51 | #'   removed from the cache according to the eviction policy, which is set with
 52 | #'   the `evict` parameter. Objects will be removed so that the number of items
 53 | #'   is `max_n`.
 54 | #'
 55 | #'   If the size of the objects in the cache exceeds `max_size`, then objects
 56 | #'   will be removed from the cache. Objects will be removed from the cache so
 57 | #'   that the total size remains under `max_size`.
 58 | #'
 59 | #'   Another time that objects can be removed from the cache is when `get()` is
 60 | #'   called. If the target object is older than `max_age`, it will be removed
 61 | #'   and the cache will report it as a missing value.
 62 | #'
 63 | #' @section Eviction policies:
 64 | #'
 65 | #' If `max_n` or `max_size` are used, then objects will be removed
 66 | #' from the cache according to an eviction policy. The available eviction
 67 | #' policies are:
 68 | #'
 69 | #'   \describe{
 70 | #'     \item{`"lru"`}{
 71 | #'       Least Recently Used. The least recently used objects will be removed.
 72 | #'     }
 73 | #'     \item{`"fifo"`}{
 74 | #'       First-in-first-out. The oldest objects will be removed.
 75 | #'     }
 76 | #'   }
 77 | #'
 78 | #' @section Methods:
 79 | #'
 80 | #'  A disk cache object has the following methods:
 81 | #'
 82 | #'   \describe{
 83 | #'     \item{`get(key, missing)`}{
 84 | #'       Returns the value associated with `key`. If the key is not in the
 85 | #'       cache, then it evaluates the expression specified by `missing` and
 86 | #'       returns the value. If `missing` is specified here, then it will
 87 | #'       override the default that was set when the `cache_mem` object was
 88 | #'       created. See section Missing Keys for more information.
 89 | #'     }
 90 | #'     \item{`set(key, value)`}{
 91 | #'       Stores the `key`-`value` pair in the cache.
 92 | #'     }
 93 | #'     \item{`exists(key)`}{
 94 | #'       Returns `TRUE` if the cache contains the key, otherwise
 95 | #'       `FALSE`.
 96 | #'     }
 97 | #'     \item{`remove(key)`}{
 98 | #'       Removes `key` from the cache, if it exists in the cache. If the key is
 99 | #'       not in the cache, this does nothing.
100 | #'     }
101 | #'     \item{`size()`}{
102 | #'       Returns the number of items currently in the cache.
103 | #'     }
104 | #'     \item{`keys()`}{
105 | #'       Returns a character vector of all keys currently in the cache.
106 | #'     }
107 | #'     \item{`reset()`}{
108 | #'       Clears all objects from the cache.
109 | #'     }
110 | #'     \item{`destroy()`}{
111 | #'       Clears all objects in the cache, and removes the cache directory from
112 | #'       disk.
113 | #'     }
114 | #'     \item{`prune()`}{
115 | #'       Prunes the cache, using the parameters specified by `max_size`,
116 | #'       `max_age`, `max_n`, and `evict`.
117 | #'     }
118 | #'   }
119 | #'
120 | #' @param max_size Maximum size of the cache, in bytes. If the cache exceeds
121 | #'   this size, cached objects will be removed according to the value of the
122 | #'   `evict`. Use `Inf` for no size limit. The default is 512 megabytes.
123 | #' @inheritParams cache_disk
124 | #'
125 | #' @return A memory caching object, with class `cache_mem`.
126 | #' @importFrom utils object.size
127 | #' @export
128 | cache_mem <- function(
129 |   max_size = 512 * 1024 ^ 2,
130 |   max_age = Inf,
131 |   max_n = Inf,
132 |   evict = c("lru", "fifo"),
133 |   missing = key_missing(),
134 |   logfile = NULL)
135 | {
136 |   # ============================================================================
137 |   # Constants
138 |   # ============================================================================
139 |   DEBUG         <- TRUE
140 |   INITIAL_SIZE  <- 64L
141 |   # When compacting, how much space should be reserved? For example, if there
142 |   # are 75 items in the cache when it is compacted and COMPACT_MULT is 2, then
143 |   # the data store will be compacted to have a capacity of 150 items.
144 |   COMPACT_MULT  <- 2
145 |   # If TRUE, the data will be kept in the correct atime (for lru) or mtime (for
146 |   # fifo) order each time get() or set() is called, though the metadata log will
147 |   # grow by one entry each time (it will also occasionally be compacted). If
148 |   # FALSE, the metadata entry will be kept in place (so the metadata log won't
149 |   # grow as quickly), but the atimes/mtimes will not be kept in order; instead,
150 |   # the metadata will be sorted by atime/mtime each time prune() is called (and
151 |   # prune() is called by set()). The overall behavior is the same, but there are
152 |   # somewhat different performance characteristics. The tradeoff is either
153 |   # growing the log for every get() (and needing to occasionally compact it), or
154 |   # having to sort it every time set() is called. Sorting data of a reasonable
155 |   # size (up to around 1e5) is fast in R. For larger numbers of items it may be
156 |   # better to set this to TRUE.
157 |   MAINTAIN_TIME_SORT <- FALSE
158 | 
159 |   # ============================================================================
160 |   # Initialization
161 |   # ============================================================================
162 |   if (!is.numeric(max_size)) stop("max_size must be a number. Use `Inf` for no limit.")
163 |   if (!is.numeric(max_age))  stop("max_age must be a number. Use `Inf` for no limit.")
164 |   if (!is.numeric(max_n))    stop("max_n must be a number. Use `Inf` for no limit.")
165 | 
166 |   max_size_     <- max_size
167 |   max_age_      <- max_age
168 |   max_n_        <- max_n
169 |   evict_        <- match.arg(evict)
170 |   missing_      <- enquo(missing)
171 |   logfile_      <- logfile
172 | 
173 |   PRUNE_BY_SIZE <- is.finite(max_size_)
174 |   PRUNE_BY_AGE  <- is.finite(max_age_)
175 |   PRUNE_BY_N    <- is.finite(max_n_)
176 | 
177 |   # ============================================================================
178 |   # Internal state
179 |   # ============================================================================
180 |   # The keys, values, and metadata are stored in columnar format. The vectors
181 |   # key_, value_, size_, mtime_, and atime_ are the columns. Separate vectors
182 |   # are used instead of a data frame, because operations for modifying and
183 |   # growing vectors are much faster than the same operations on data frames.
184 |   #
185 |   # It uses a column-first format because a row-first format is much slower for
186 |   # doing the manipulations and computations that are needed for pruning, such
187 |   # as sorting by atime, and calculating a cumulative sum of sizes.
188 |   #
189 |   # For fast get() performance, there is also key_idx_map_, which maps between
190 |   # the key, and the "row" index in our "data frame".
191 |   #
192 |   # An older version of this code stored the value along with metadata (size,
193 |   # mtime, and atime) in a fastmap object, but this had poor performance for
194 |   # pruning operations. This is because, for pruning, it needs to fetch the
195 |   # metadata for all objects, then sort by atime (if evict="lru"), then take a
196 |   # cumulative sum of sizes. Fetching the metadata for all objects was slow, as
197 |   # was converting the resulting row-first data into column-first data. The
198 |   # current column-first approach is much, much faster.
199 |   key_idx_map_  <- fastmap()
200 | 
201 |   # These values are set in the reset() method.
202 |   key_          <- NULL
203 |   value_        <- NULL
204 |   size_         <- NULL
205 |   mtime_        <- NULL
206 |   atime_        <- NULL
207 | 
208 |   total_n_      <- NULL  # Total number of items
209 |   total_size_   <- NULL  # Total number of bytes used
210 |   last_idx_     <- NULL  # Most recent (and largest) index used
211 | 
212 | 
213 |   # ============================================================================
214 |   # Public methods
215 |   # ============================================================================
216 | 
217 |   reset <- function() {
218 |     log_(paste0('reset'))
219 |     key_idx_map_$reset()
220 |     key_        <<- rep_len(NA_character_, INITIAL_SIZE)
221 |     value_      <<- vector("list",         INITIAL_SIZE)
222 |     size_       <<- rep_len(NA_real_,      INITIAL_SIZE)
223 |     mtime_      <<- rep_len(NA_real_,      INITIAL_SIZE)
224 |     atime_      <<- rep_len(NA_real_,      INITIAL_SIZE)
225 | 
226 |     total_n_    <<- 0L
227 |     total_size_ <<- 0
228 |     last_idx_   <<- 0L
229 |     invisible(TRUE)
230 |   }
231 | 
232 |   get <- function(key, missing = missing_) {
233 |     log_(paste0('get: key "', key, '"'))
234 |     validate_key(key)
235 | 
236 |     idx <- key_idx_map_$get(key)
237 | 
238 |     if (is.null(idx)) {
239 |       log_(paste0('get: key "', key, '" is missing'))
240 |       missing <- as_quosure(missing)
241 |       return(eval_tidy(missing))
242 |     }
243 | 
244 |     # Prunes a single object if it exceeds max_age. If the object does not
245 |     # exceed max_age, or if the object doesn't exist, do nothing.
246 |     if (PRUNE_BY_AGE) {
247 |       time <- as.numeric(Sys.time())
248 |       if (time - mtime_[idx] > max_age_) {
249 |         log_(paste0("pruning single object exceeding max_age: Removing ", key))
250 |         remove_(key)
251 |         missing <- as_quosure(missing)
252 |         return(eval_tidy(missing))
253 |       }
254 |     }
255 | 
256 |     log_(paste0('get: key "', key, '" found'))
257 | 
258 |     # Get the value before updating atime, because that can move items around
259 |     # when MAINTAIN_TIME_SORT is TRUE.
260 |     value <- value_[[idx]]
261 |     update_atime_(key)
262 |     value
263 |   }
264 | 
265 |   set <- function(key, value) {
266 |     log_(paste0('set: key "', key, '"'))
267 |     validate_key(key)
268 | 
269 |     time <- as.numeric(Sys.time())
270 | 
271 |     if (PRUNE_BY_SIZE) {
272 |       # Reported size is rough! See ?object.size.
273 |       size <- as.numeric(object.size(value))
274 |       total_size_ <<- total_size_ + size
275 |     } else {
276 |       size <- NA_real_
277 |     }
278 | 
279 |     old_idx <- key_idx_map_$get(key)
280 | 
281 |     # We'll set this to TRUE if we need to append to the data; FALSE if we can
282 |     # modify the existing entry in place.
283 |     append <- NULL
284 | 
285 |     if (!is.null(old_idx)) {
286 |       # If there's an existing entry with this key, clear out its row, because
287 |       # we'll be appending a new one later.
288 |       if (PRUNE_BY_SIZE) {
289 |         total_size_ <<- total_size_ - size_[old_idx]
290 |       }
291 | 
292 |       if (MAINTAIN_TIME_SORT  &&  old_idx != last_idx_) {
293 |         append <- TRUE
294 | 
295 |         key_  [old_idx] <<- NA_character_
296 |         value_[old_idx] <<- list(NULL)
297 |         size_ [old_idx] <<- NA_real_
298 |         mtime_[old_idx] <<- NA_real_
299 |         atime_[old_idx] <<- NA_real_
300 | 
301 |       } else {
302 |         append <- FALSE
303 |       }
304 | 
305 |     } else {
306 |       append <- TRUE
307 |       total_n_ <<- total_n_ + 1L
308 |     }
309 | 
310 |     if (append) {
311 |       # If we're appending, update the last_idx_ and use it for storage. This
312 |       # assign past the end of the vector. As of R 3.4, this grows the vector in
313 |       # place if possible, and is generally very fast, because vectors are
314 |       # allocated with extra memory at the end. For older versions of R, this
315 |       # can be very slow because a copy of the whole vector must be made each
316 |       # time.
317 |       last_idx_ <<- last_idx_ + 1L
318 |       key_idx_map_$set(key, last_idx_)
319 |       new_idx <- last_idx_
320 | 
321 |     } else {
322 |       # Not appending; replace the old item in place.
323 |       new_idx <- old_idx
324 |     }
325 | 
326 |     key_  [new_idx]   <<- key
327 |     value_[[new_idx]] <<- value
328 |     size_ [new_idx]   <<- size
329 |     mtime_[new_idx]   <<- time
330 |     atime_[new_idx]   <<- time
331 | 
332 |     prune()
333 | 
334 |     invisible(TRUE)
335 |   }
336 | 
337 |   exists <- function(key) {
338 |     validate_key(key)
339 | 
340 |     if (PRUNE_BY_AGE) {
341 |       # Prunes a single object if it exceeds max_age. This code path looks a bit
342 |       # complicated for what it does, but this is for performance.
343 |       idx <- key_idx_map_$get(key)
344 |       if (is.null(idx)) {
345 |         return(FALSE)
346 |       }
347 | 
348 |       time <- as.numeric(Sys.time())
349 |       if (time - mtime_[idx] > max_age_) {
350 |         log_(paste0("pruning single object exceeding max_age: Removing ", key))
351 |         remove_(key)
352 |         return(FALSE)
353 |       }
354 | 
355 |       return(TRUE)
356 | 
357 |     } else {
358 |       key_idx_map_$has(key)
359 |     }
360 |   }
361 | 
362 |   keys <- function() {
363 |     if (PRUNE_BY_AGE) {
364 |       # When there's no max_age, pruning is only needed when set() is called,
365 |       # because that's the only way for max_n or max_size to be exceeded. But
366 |       # when there is a max_age, we might need to prune here simply because time
367 |       # has passed. (This could be made faster by having an option to prune() to
368 |       # only prunes by age (and not by n or size). It could also avoid sorting
369 |       # the metadata.)
370 |       prune()
371 |     }
372 | 
373 |     key_idx_map_$keys()
374 |   }
375 | 
376 |   remove <- function(key) {
377 |     log_(paste0('remove: key "', key, '"'))
378 |     validate_key(key)
379 |     remove_(key)
380 |     invisible(TRUE)
381 |   }
382 | 
383 |   prune <- function() {
384 |     log_(paste0('prune'))
385 | 
386 |     # Quick check to see if we need to prune
387 |     if ((!PRUNE_BY_SIZE || total_size_ <= max_size_) &&
388 |         (!PRUNE_BY_N    || total_n_    <= max_n_   ) &&
389 |         (!PRUNE_BY_AGE))
390 |     {
391 |       return(invisible(TRUE))
392 |     }
393 | 
394 |     info <- get_metadata_()
395 | 
396 |     if (DEBUG) {
397 |       # Sanity checks
398 |       if (PRUNE_BY_SIZE && sum(info$size) != total_size_) {
399 |         stop("Size mismatch")
400 |       }
401 |       if (length(info$key) != total_n_) {
402 |         stop("Count mismatch")
403 |       }
404 |     }
405 | 
406 |     # 1. Remove any objects where the age exceeds max age.
407 |     if (PRUNE_BY_AGE) {
408 |       time <- as.numeric(Sys.time())
409 |       timediff <- time - info$mtime
410 |       rm_idx <- timediff > max_age_
411 |       if (any(rm_idx)) {
412 |         log_(paste0("prune max_age: Removing ", paste(info$key[rm_idx], collapse = ", ")))
413 |         remove_(info$key[rm_idx])
414 | 
415 |         # Trim all the vectors (need to do each individually since we're using a
416 |         # list of vectors instead of a data frame, for performance).
417 |         info$key   <- info$key  [!rm_idx]
418 |         info$size  <- info$size [!rm_idx]
419 |         info$mtime <- info$mtime[!rm_idx]
420 |         info$atime <- info$atime[!rm_idx]
421 |       }
422 |     }
423 | 
424 |     # 2. Remove objects if there are too many.
425 |     if (PRUNE_BY_N && length(info$key) > max_n_) {
426 |       rm_idx <- seq_along(info$key) > max_n_
427 |       log_(paste0("prune max_n: Removing ", paste(info$key[rm_idx], collapse = ", ")))
428 |       remove_(info$key[rm_idx])
429 | 
430 |       info$key   <- info$key  [!rm_idx]
431 |       info$size  <- info$size [!rm_idx]
432 |       info$mtime <- info$mtime[!rm_idx]
433 |       info$atime <- info$atime[!rm_idx]
434 |     }
435 | 
436 |     # 3. Remove objects if cache is too large.
437 |     if (PRUNE_BY_SIZE && sum(info$size) > max_size_) {
438 |       cum_size <- cumsum(info$size)
439 |       rm_idx <- cum_size > max_size_
440 |       log_(paste0("prune max_size: Removing ", paste(info$key[rm_idx], collapse = ", ")))
441 |       remove_(info$key[rm_idx])
442 | 
443 |       # No need to trim vectors this time, since this is the last pruning step.
444 |     }
445 | 
446 |     invisible(TRUE)
447 |   }
448 | 
449 |   size <- function() {
450 |     if (PRUNE_BY_AGE) {
451 |       # See note in exists() about why we prune here.
452 |       prune()
453 |     }
454 |     if (DEBUG) {
455 |       if (key_idx_map_$size() != total_n_) stop("n mismatch")
456 |     }
457 |     total_n_
458 |   }
459 | 
460 |   info <- function() {
461 |     list(
462 |       max_size = max_size_,
463 |       max_age = max_age_,
464 |       max_n = max_n_,
465 |       evict = evict_,
466 |       missing = missing_,
467 |       logfile = logfile_
468 |     )
469 |   }
470 | 
471 | 
472 |   # ============================================================================
473 |   # Private methods
474 |   # ============================================================================
475 | 
476 |   # Called when get() with lru. If fifo, no need to update.
477 |   update_atime_ <- function(key) {
478 |     if (evict_ != "lru") return()
479 | 
480 |     idx <- key_idx_map_$get(key)
481 |     time <- as.numeric(Sys.time())
482 | 
483 |     if (is.null(idx)) {
484 |       stop("Can't update atime because entry doesn't exist")
485 |     }
486 | 
487 |     if (MAINTAIN_TIME_SORT) {
488 |       if (idx == last_idx_) {
489 |         # last_idx_ entry; simply update time
490 |         atime_[idx] <<- time
491 |       } else {
492 |         # "Move" this entry to the end.
493 |         last_idx_ <<- last_idx_ + 1L
494 |         # Add new entry to end. Fast on R 3.4 and above, slow on older versions.
495 |         key_idx_map_$set(key, last_idx_)
496 |         key_  [last_idx_]   <<- key
497 |         value_[[last_idx_]] <<- value_[[idx]]
498 |         size_ [last_idx_]   <<- size_ [idx]
499 |         mtime_[last_idx_]   <<- mtime_[idx]
500 |         atime_[last_idx_]   <<- time
501 | 
502 |         # Clear out old entry
503 |         key_  [idx] <<- NA_character_
504 |         value_[idx] <<- list(NULL)
505 |         size_ [idx] <<- NA_real_
506 |         mtime_[idx] <<- NA_real_
507 |         atime_[idx] <<- NA_real_
508 |       }
509 | 
510 |     } else {
511 |       atime_[idx] <<- time
512 |     }
513 | 
514 |   }
515 | 
516 | 
517 |   remove_ <- function(keys) {
518 |     if (length(keys) == 1) {
519 |       remove_one_(keys)
520 |     } else {
521 |       vapply(keys, remove_one_, TRUE)
522 |     }
523 | 
524 |     compact_()
525 |   }
526 | 
527 |   remove_one_ <- function(key) {
528 |     idx <- key_idx_map_$get(key)
529 | 
530 |     if (is.null(idx)) {
531 |       return()
532 |     }
533 | 
534 |     # Overall n and size bookkeeping
535 |     total_n_ <<- total_n_ - 1L
536 |     if (PRUNE_BY_SIZE) {
537 |       total_size_ <<- total_size_ - size_[idx]
538 |     }
539 | 
540 |     # Clear out entry
541 |     key_  [idx] <<- NA_character_
542 |     value_[idx] <<- list(NULL)
543 |     size_ [idx] <<- NA_real_
544 |     mtime_[idx] <<- NA_real_
545 |     atime_[idx] <<- NA_real_
546 | 
547 |     key_idx_map_$remove(key)
548 |   }
549 | 
550 |   compact_ <- function() {
551 |     if (last_idx_ <= INITIAL_SIZE  ||  last_idx_ <= total_n_ * COMPACT_MULT) {
552 |       return()
553 |     }
554 | 
555 |     from_idxs <- key_[seq_len(last_idx_)]
556 |     from_idxs <- !is.na(from_idxs)
557 |     from_idxs <- which(from_idxs)
558 | 
559 |     if (DEBUG) stopifnot(total_n_ == length(from_idxs))
560 | 
561 |     new_size <- max(INITIAL_SIZE, ceiling(total_n_ * COMPACT_MULT))
562 | 
563 |     # Allocate new vectors for metadata.
564 |     new_key_   <- rep_len(NA_character_, new_size)
565 |     new_value_ <- vector("list",         new_size)
566 |     new_size_  <- rep_len(NA_real_,      new_size)
567 |     new_mtime_ <- rep_len(NA_real_,      new_size)
568 |     new_atime_ <- rep_len(NA_real_,      new_size)
569 | 
570 |     # Copy (and compact, removing gaps) from old vectors to new ones.
571 |     to_idxs <- seq_len(total_n_)
572 |     new_key_  [to_idxs] <- key_  [from_idxs]
573 |     new_value_[to_idxs] <- value_[from_idxs]
574 |     new_size_ [to_idxs] <- size_ [from_idxs]
575 |     new_mtime_[to_idxs] <- mtime_[from_idxs]
576 |     new_atime_[to_idxs] <- atime_[from_idxs]
577 | 
578 |     # Replace old vectors with new ones.
579 |     key_   <<- new_key_
580 |     value_ <<- new_value_
581 |     size_  <<- new_size_
582 |     mtime_ <<- new_mtime_
583 |     atime_ <<- new_atime_
584 | 
585 |     # Update the index values in the key-index map.
586 |     args <- to_idxs
587 |     names(args) <- key_[to_idxs]
588 |     key_idx_map_$mset(.list = args)
589 | 
590 |     last_idx_ <<- total_n_
591 |   }
592 | 
593 |   # Returns data frame of info, with gaps removed.
594 |   # If evict=="lru", this will be sorted by atime.
595 |   # If evict=="fifo", this will be sorted by mtime.
596 |   get_metadata_ <- function() {
597 |     idxs <- !is.na(mtime_[seq_len(last_idx_)])
598 |     idxs <- which(idxs)
599 | 
600 |     if (!MAINTAIN_TIME_SORT) {
601 |       if (evict_ == "lru") {
602 |         idxs <- idxs[order(atime_[idxs])]
603 |       } else {
604 |         idxs <- idxs[order(mtime_[idxs])]
605 |       }
606 |     }
607 | 
608 |     idxs <- rev(idxs)
609 | 
610 |     # Return a list -- this basically same structure as a data frame, but
611 |     # we're using a plain list to avoid data frame slowness
612 |     list(
613 |       key   = key_  [idxs],
614 |       size  = size_ [idxs],
615 |       mtime = mtime_[idxs],
616 |       atime = atime_[idxs]
617 |     )
618 |   }
619 | 
620 |   log_ <- function(text) {
621 |     if (is.null(logfile_)) return()
622 | 
623 |     text <- paste0(format(Sys.time(), "[%Y-%m-%d %H:%M:%OS3] cache_mem "), text)
624 |     cat(text, sep = "\n", file = logfile_, append = TRUE)
625 |   }
626 | 
627 | 
628 |   reset()
629 | 
630 |   # ============================================================================
631 |   # Returned object
632 |   # ============================================================================
633 |   structure(
634 |     list(
635 |       get = get,
636 |       set = set,
637 |       exists = exists,
638 |       keys = keys,
639 |       remove = remove,
640 |       reset = reset,
641 |       prune = prune,
642 |       size = size,
643 |       info = info
644 |     ),
645 |     class = c("cache_mem", "cachem")
646 |   )
647 | }
648 | 


--------------------------------------------------------------------------------
/R/cachem-package.R:
--------------------------------------------------------------------------------
1 | #' @docType package
2 | #' @useDynLib cachem, .registration = TRUE
3 | #' @import fastmap
4 | NULL
5 | 


--------------------------------------------------------------------------------
/R/cachem.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | format.cachem <- function(x, ...) {
 3 |   paste0(
 4 |     paste0("<", class(x), ">", collapse= " "), "\n",
 5 |     "  Methods:\n",
 6 |     paste0(
 7 |       "    ", format_methods(x),
 8 |       collapse ="\n"
 9 |     )
10 |   )
11 | }
12 | 
13 | format_methods <- function(x) {
14 |   vapply(seq_along(x),
15 |     function(i) {
16 |       name <- names(x)[i]
17 |       f <- x[[i]]
18 |       if (is.function(f)) {
19 |         paste0(name, "(", format_args(f), ")")
20 |       } else {
21 |         name
22 |       }
23 |     }, character(1)
24 |   )
25 | }
26 | 
27 | format_args <- function(x) {
28 |   nms <- names(formals(x))
29 |   vals <- as.character(formals(x))
30 |   args <- mapply(nms, vals, FUN = function(name, value) {
31 |     if (value == "") {
32 |       name
33 |     } else {
34 |       paste0(name, " = ", value)
35 |     }
36 |   })
37 |   paste(args, collapse = ", ")
38 | }
39 | 
40 | #' @export
41 | print.cachem <- function(x, ...) {
42 |   cat(format(x, ...))
43 | }
44 | 


--------------------------------------------------------------------------------
/R/reexports.R:
--------------------------------------------------------------------------------
 1 | #' @importFrom fastmap key_missing
 2 | #' @export
 3 | fastmap::key_missing
 4 | 
 5 | #' @importFrom fastmap is.key_missing
 6 | #' @export
 7 | fastmap::is.key_missing
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | hex_digits <- c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
 2 |                 "a", "b", "c", "d", "e", "f")
 3 | 
 4 | random_hex <- function(digits = 16) {
 5 |   paste(sample(hex_digits, digits, replace = TRUE), collapse = "")
 6 | }
 7 | 
 8 | 
 9 | dir_remove <- function(path) {
10 |   for (p in path) {
11 |     if (!dir.exists(p)) {
12 |       stop("Cannot remove non-existent directory ", p, ".")
13 |     }
14 |     if (length(dir(p, all.files = TRUE, no.. = TRUE)) != 0) {
15 |       stop("Cannot remove non-empty directory ", p, ".")
16 |     }
17 |     result <- unlink(p, recursive = TRUE)
18 |     if (result == 1) {
19 |       stop("Error removing directory ", p, ".")
20 |     }
21 |   }
22 | }
23 | 
24 | absolute_path <- function(path) {
25 |   norm_path <- normalizePath(path, mustWork = FALSE)
26 |   if (path == norm_path) {
27 |     file.path(getwd(), path)
28 |   } else {
29 |     norm_path
30 |   }
31 | }
32 | 
33 | validate_key <- function(key) {
34 |   # This C function does the same as `grepl("[^a-z0-9_-]")`, but faster.
35 |   .Call(C_validate_key, key)
36 | }
37 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output:
  3 |   github_document:
  4 |     toc: true
  5 |     toc_depth: 3
  6 | editor_options:
  7 |   chunk_output_type: console
  8 | ---
  9 | 
 10 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 11 | 
 12 | ```{r, include = FALSE}
 13 | knitr::opts_chunk$set(
 14 |   collapse = TRUE,
 15 |   comment = "#>",
 16 |   fig.path = "man/figures/README-",
 17 |   out.width = "100%"
 18 | )
 19 | ```
 20 | 
 21 | # cachem
 22 | 
 23 | <!-- badges: start -->
 24 | [![R build status](https://github.com/r-lib/cachem/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-lib/cachem/actions)
 25 | <!-- badges: end -->
 26 | 
 27 | The **cachem** R package provides objects creating and managing caches. These cache objects are key-value stores, but unlike other basic key-value stores, they have built-in support for memory and age limits so that they won't have unbounded growth.
 28 | 
 29 | The cache objects in **cachem** differ from some other key-value stores in the following ways:
 30 | 
 31 | * The cache objects provide automatic pruning so that they remain within memory limits.
 32 | * Fetching a non-existing object returns a sentinel value. An alternative is to simply return `NULL`. This is what R lists and environments do, but it is ambiguous whether the value really is `NULL`, or if it is not present. Another alternative is to throw an exception when fetching a non-existent object. However, this results in more complicated code, as every `get()` needs to be wrapped in a `tryCatch()`.
 33 | 
 34 | ## Installation
 35 | 
 36 | To install the CRAN version:
 37 | 
 38 | ```{r eval=FALSE}
 39 | install.packages("cachem")
 40 | ```
 41 | 
 42 | You can install the development version from with:
 43 | 
 44 | ```{r eval=FALSE}
 45 | if (!require("remotes")) install.packages("remotes")
 46 | remotes::install_github("r-lib/cachem")
 47 | ```
 48 | 
 49 | ## Usage
 50 | 
 51 | To create a memory-based cache, call `cache_mem()`.
 52 | 
 53 | ```{r}
 54 | library(cachem)
 55 | m <- cache_mem()
 56 | ```
 57 | 
 58 | Add arbitrary R objects to the cache using `$set(key, value)`:
 59 | 
 60 | ```{r}
 61 | m$set("abc123", c("Hello", "world"))
 62 | m$set("xyz", function() message("Goodbye"))
 63 | ```
 64 | 
 65 | The `key` must be a string consisting of lowercase letters, numbers, and the underscore (`_`) and hyphen (`-`) characters. (Upper-case characters are not allowed because some storage backends do not distinguish between lowercase and uppercase letters.) The `value` can be any R object.
 66 | 
 67 | Get the values with `$get()`:
 68 | ``` r
 69 | m$get("abc123")
 70 | #> [1] "Hello" "world"
 71 | 
 72 | m$get("xyz")
 73 | #> function() message("Goodbye")
 74 | ```
 75 | 
 76 | If you call `get()` on a key that doesn't exists, it will return a `key_missing()` sentinel value:
 77 | 
 78 | ```{r}
 79 | m$get("dog")
 80 | ```
 81 | 
 82 | A common usage pattern is to call `get()`, and then check if the result is a `key_missing` object:
 83 | 
 84 | ```{r eval=FALSE}
 85 | value <- m$get(key)
 86 | 
 87 | if (is.key_missing(value)) {
 88 |   # Cache miss - do something
 89 | } else {
 90 |   # Cache hit - do another thing
 91 | }
 92 | ```
 93 | 
 94 | The reason for doing this (instead of calling `$exists(key)` and then `$get(key)`) is that for some storage backends, there is a potential race condition: the object could be removed from the cache between the `exists()` and `get()` calls. For example:
 95 | 
 96 | * If multiple R processes have `cache_disk`s that share the same directory, one process could remove an object from the cache in between the `exists()` and `get()` calls in another process, resulting in an error.
 97 | * If you use a `cache_mem` with a `max_age`, it's possible for an object to be present when you call `exists()`, but for its age to exceed `max_age` by the time `get()` is called. In that case, the `get()` will return a `key_missing()` object.
 98 | 
 99 | ```{r eval=FALSE}
100 | # Avoid this pattern, due to a potential race condition!
101 | if (m$exists(key)) {
102 |   value <- m$get(key)
103 | }
104 | ```
105 | 
106 | 
107 | ## Cache types
108 | 
109 | **cachem** comes with two kinds of cache objects: a memory cache, and a disk cache.
110 | 
111 | ### `cache_mem()`
112 | 
113 | The memory cache stores stores objects in memory, by simply keeping a reference to each object. To create a memory cache:
114 | 
115 | ```{r}
116 | m <- cache_mem()
117 | ```
118 | 
119 | The default size of the cache is 200MB, but this can be customized with `max_size`:
120 | 
121 | ```{r}
122 | m <- cache_mem(max_size = 10 * 1024^2)
123 | ```
124 | 
125 | It may also be useful to set a maximum age of objects. For example, if you only want objects to stay for a maximum of one hour:
126 | 
127 | ```{r}
128 | m <- cache_mem(max_size = 10 * 1024^2, max_age = 3600)
129 | ```
130 | 
131 | For more about how objects are evicted from the cache, see section [Pruning](#pruning) below.
132 | 
133 | An advantage that the memory cache has over the disk cache (and any other type of cache that stores the objects outside of the R process's memory), is that it does not need to serialize objects. Instead, it merely stores references to the objects. This means that it can store objects that other caches cannot, and with more efficient use of memory -- if two objects in the cache share some of their contents (such that they refer to the same sub-object in memory), then `cache_mem` will not create duplicate copies of the contents, as `cache_disk` would, since it serializes the objects with the `serialize()` function.
134 | 
135 | Compared to the memory usage, the size _calculation_ is not as intelligent: if there are two objects that share contents, their sizes are computed separately, even if they have items that share the exact same represention in memory. This is done with the `object.size()` function, which does not account for multiple references to the same object in memory.
136 | 
137 | In short, a memory cache, if anything, over-counts the amount of memory actually consumed. In practice, this means that if you set a 200MB limit to the size of cache, and the cache _thinks_ it has 200MB of contents, the actual amount of memory consumed could be less than 200MB.
138 | 
139 | <details>
140 | <summary>Demonstration of memory over-counting from `object.size()`</summary>
141 | 
142 | ```{r}
143 | # Create a and b which both contain the same numeric vector.
144 | x <- list(rnorm(1e5))
145 | a <- list(1, x)
146 | b <- list(2, x)
147 | 
148 | # Add to cache
149 | m$set("a", a)
150 | m$set("b", b)
151 | 
152 | # Each object is about 800kB in memory, so the cache_mem() will consider the
153 | # total memory used to be 1600kB.
154 | object.size(m$get("a"))
155 | object.size(m$get("b"))
156 | ```
157 | 
158 | For reference, lobstr::obj_size can detect shared objects, and knows that these objects share most of their memory.
159 | 
160 | ```{r}
161 | lobstr::obj_size(m$get("a"))
162 | lobstr::obj_size(list(m$get("a"), m$get("b")))
163 | ```
164 | 
165 | However, lobstr is not on CRAN, and if obj_size() were used to find the incremental memory used when an object was added to the cache, it would have to walk all objects in the cache every time a single object is added. For these reasons, cache_mem uses `object.size()` to compute the object sizes.
166 | 
167 | </details>
168 | 
169 | ### `cache_disk()`
170 | 
171 | Disk caches are stored in a directory on disk. A disk cache is slower than a memory cache, but can generally be larger. To create one:
172 | 
173 | ```{r}
174 | d <- cache_disk()
175 | ```
176 | 
177 | By default, it creates a subdirectory of the R process's temp directory, and it will persist until the R process exits.
178 | 
179 | ``` r
180 | d$info()$dir
181 | #>  "/tmp/Rtmp6h5iB3/cache_disk-d1901b2b615a"
182 | ```
183 | 
184 | Like a `cache_mem`, the `max_size`, `max_n`, `max_age` can be customized. See section [Pruning](#pruning) below for more information.
185 | 
186 | Each object in the cache is stored as an RDS file on disk, using the `serialize()` function.
187 | 
188 | ```{r}
189 | d$set("abc", 100)
190 | d$set("x01", list(1, 2, 3))
191 | 
192 | dir(d$info()$dir)
193 | ```
194 | 
195 | Since objects in a disk cache are serialized, they are subject to the limitations of the `serialize()` function. For more information, see section [Limitations of serialized objects](#limitations-of-serialized-objects).
196 | 
197 | The storage directory can be specified with `dir`; it will be created if necessary.
198 | 
199 | ``` r
200 | cache_disk(dir = "cachedir")
201 | ```
202 | 
203 | #### Sharing a disk cache among processes
204 | 
205 | Multiple R processes can use `disk_cache` objects that share the same cache directory. To do this, simply point each `cache_disk` to the same directory.
206 | 
207 | 
208 | #### `disk_cache` pruning
209 | 
210 | For a `disk_cache`, pruning does not happen on every access, because finding the size of files in the cache directory can take a nontrivial amount of time. By default, pruning happens once every 20 times that `$set()` is called, or if at least five seconds have elapsed since the last pruning. The `prune_rate` controls how many times `$set()` must be called before a pruning occurs. It defaults to 20; smaller values result in more frequent pruning and larger values result in less frequent pruning (but keep in mind pruning always occurs if it has been at least five seconds since the last pruning).
211 | 
212 | 
213 | #### Cleaning up the cache directory
214 | 
215 | The cache directory can be deleted by calling `$destroy()`. After it is destroyed, the cache object can no longer be used.
216 | 
217 | ``` r
218 | d$destroy()
219 | d$set("a", 1)  # Error
220 | ```
221 | 
222 | To create a `cache_disk` that will automatically delete its storage directory when garbage collected, use `destroy_on_finalize=TRUE`:
223 | 
224 | ``` r
225 | d <- cache_disk(destroy_on_finalize = TRUE)
226 | d$set("a", 1)
227 | 
228 | cachedir <- d$info()$dir
229 | dir(cachedir)
230 | #> [1] "a.rds"
231 | 
232 | # Remove reference to d and trigger a garbage collection
233 | rm(d)
234 | gc()
235 | 
236 | dir.exists(cachedir)
237 | ```
238 | 
239 | #### Using custom serialization functions
240 | 
241 | It is possible to use custom serialization functions rather than the default of `writeRDS()` and `readRDS()` with the `write_fn`, `read_fn` and `extension` arguments respectively. This could be used to use alternative serialization formats like [qs](https://github.com/traversc/qs), or specialized object formats [fst](http://www.fstpackage.org/fst/) or parquet.
242 | 
243 | ``` r
244 | library(qs)
245 | 
246 | d <- cache_disk(read_fn = qs::qread, write_fn = qs::qsave, extension = ".qs")
247 | 
248 | d$set("a", list(1, 2, 3))
249 | 
250 | cachedir <- d$info()$dir
251 | dir(cachedir)
252 | #> [1] "a.qs"
253 | d$get("a")
254 | #> [[1]]
255 | #> [1] 1
256 | #>
257 | #> [[2]]
258 | #> [1] 2
259 | #>
260 | #> [[3]]
261 | #> [1] 3
262 | ```
263 | 
264 | ## Cache API
265 | 
266 | `cache_mem()` and `cache_disk()` support all of the methods listed below. If you want to create a compatible caching object, it must have at least the `get()` and `set()` methods:
267 | 
268 | * `get(key, missing = missing_)`: Get the object associated with `key`. The `missing` parameter allows customized behavior if the key is not present: it actually is an expression which is evaluated when there is a cache miss, and it could return a value or throw an error.
269 | * `set(key, value)`: Set a key to a value.
270 | * `exists(key)`: Check whether a particular key exists in the cache.
271 | * `remove(key)`: Remove a key-value from the cache.
272 | 
273 | Some optional methods:
274 | 
275 | * `reset()`: Clear all objects from the cache.
276 | * `keys()`: Return a character vector of all keys in the cache.
277 | * `prune()`: Prune the cache. (Some types of caches may not prune on every access, and may temporarily grow past their limits, until the next pruning is triggered automatically, or manually with this function.)
278 | * `size()`: Return the number of objects in the cache.
279 | * `size()`: Return the number of objects in the cache.
280 | 
281 | For these methods:
282 | 
283 | * `key`: can be any string with lowercase letters, numbers, underscore (`_`) and hyphen (`-`). Some storage backends may not be handle very long keys well. For example, with a `cache_disk()`, the key is used as a filename, and on some filesystems, very filenames may hit limits on path lengths.
284 | * `value`: can be any R object, with some exceptions noted below.
285 | 
286 | 
287 | #### Limitations of serialized objects
288 | 
289 | For any cache that serializes the object for storage outside of the R process -- in other words, any cache other than a `cache_mem()` -- some types of objects will not save and restore as well. Notably, reference objects may consume more memory when restored, since R may not know to deduplicate shared objects. External pointers are not be able to be serialized, since they point to memory in the R process. See `?serialize` for more information.
290 | 
291 | 
292 | #### Read-only caches
293 | 
294 | It is possible to create a read-only cache by making the `set()`, `remove()`, `reset()`, and `prune()` methods into no-ops. This can be useful if sharing a cache with another R process which can write to the cache. For example, one (or more) processes can write to the cache, and other processes can read from it.
295 | 
296 | This function will wrap a cache object in a read-only wrapper. Note, however, that code that uses such a cache must not require that `$set()` actually sets a value in the cache. This is good practice anyway, because with these cache objects, items can be pruned from them at any time.
297 | 
298 | ```{r}
299 | cache_readonly_wrap <- function(cache) {
300 |   structure(
301 |     list(
302 |       get = cache$get,
303 |       set = function(key, value) NULL,
304 |       exists = cache$exists,
305 |       keys = cache$keys,
306 |       remove = function(key) NULL,
307 |       reset = function() NULL,
308 |       prune = function() NULL,
309 |       size = cache$size
310 |     ),
311 |     class = c("cache_readonly", class(cache))
312 |   )
313 | }
314 | 
315 | mr <- cache_readonly_wrap(m)
316 | ```
317 | 
318 | 
319 | ## Pruning
320 | 
321 | The cache objects provided by cachem have automatic pruning. (Note that pruning is not required by the API, so one could implement an API-compatible cache without pruning.)
322 | 
323 | This section describes how pruning works for `cache_mem()` and `cache_disk()`.
324 | 
325 | When the cache object is created, the maximum size (in bytes) is specified by `max_size`. When the size of objects in the cache exceeds `max_size`, objects will be pruned from the cache.
326 | 
327 | When objects are pruned from the cache, which ones are removed is determined by the eviction policy, `evict`:
328 | 
329 | * **`lru`**: The least-recently-used objects will be removed from the cache, until it fits within the limit. This is the default and is appropriate for most cases.
330 | * **`fifo`**: The oldest objects will be removed first.
331 | 
332 | It is also possible to set the maximum number of items that can be in the cache, with `max_n`. By default this is set to `Inf`, or no limit.
333 | 
334 | The `max_age` parameter is somewhat different from `max_size` and `max_n`. The latter two set limits on the cache store as a whole, whereas `max_age` sets limits for each individual item; for each item, if its age exceeds `max_age`, then it will be removed from the cache.
335 | 
336 | 
337 | ## Layered caches
338 | 
339 | Multiple caches can be composed into a single cache, using `cache_layered()`. This can be used to create a multi-level cache. (Note thate `cache_layered()` is currently experimental.) For example, we can create a layered cache with a very fast 100MB memory cache and a larger but slower 2GB disk cache:
340 | 
341 | 
342 | ```{r}
343 | m <- cache_mem(max_size = 100 * 1024^2)
344 | d <- cache_disk(max_size = 2 * 1024^3)
345 | 
346 | cl <- cache_layered(m, d)
347 | ```
348 | 
349 | The layered cache will have the same API, with `$get()`, `$set()`, and so on, so it can be used interchangeably with other caching objects.
350 | 
351 | For this example, we'll recreate the `cache_layered` with logging enabled, so that it will show cache hits and misses.
352 | 
353 | ``` r
354 | cl <- cache_layered(m, d, logfile = stderr())
355 | 
356 | # Each of the objects generated by rnorm() is about 40 MB
357 | cl$set("a", rnorm(5e6))
358 | cl$set("b", rnorm(5e6))
359 | cl$set("c", rnorm(5e6))
360 | 
361 | # View the objects in each of the component caches
362 | m$keys()
363 | #> [1] "c" "b"
364 | d$keys()
365 | #> [1] "a" "b" "c"
366 | 
367 | # The layered cache reports having all keys
368 | cl$keys()
369 | #> [1] "c" "b" "a"
370 | ```
371 | 
372 | When `$get()` is called, it searches the first cache, and if it's missing there, it searches the next cache, and so on. If not found in any caches, it returns `key_missing()`.
373 | 
374 | ``` r
375 | # Get object that exists in the memory cache
376 | x <- cl$get("c")
377 | #> [2020-10-23 13:11:09.985] cache_layered Get: c
378 | #> [2020-10-23 13:11:09.985] cache_layered Get from cache_mem... hit
379 | 
380 | # Get object that doesn't exist in the memory cache
381 | x <- cl$get("a")
382 | #> [2020-10-23 13:13:10.968] cache_layered Get: a
383 | #> [2020-10-23 13:13:10.969] cache_layered Get from cache_mem... miss
384 | #> [2020-10-23 13:13:11.329] cache_layered Get from cache_disk... hit
385 | 
386 | # Object is not present in any component caches
387 | cl$get("d")
388 | #> [2020-10-23 13:13:40.197] cache_layered Get: d
389 | #> [2020-10-23 13:13:40.197] cache_layered Get from cache_mem... miss
390 | #> [2020-10-23 13:13:40.198] cache_layered Get from cache_disk... miss
391 | #> <Key Missing>
392 | ```
393 | 
394 | Multiple cache objects can be layered this way. You could even add a cache which uses a remote store, such as a network file system or even AWS S3.
395 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | - [cachem](#cachem)
  3 |   - [Installation](#installation)
  4 |   - [Usage](#usage)
  5 |   - [Cache types](#cache-types)
  6 |     - [`cache_mem()`](#cache_mem)
  7 |     - [`cache_disk()`](#cache_disk)
  8 |   - [Cache API](#cache-api)
  9 |   - [Pruning](#pruning)
 10 |   - [Layered caches](#layered-caches)
 11 | 
 12 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 13 | 
 14 | # cachem
 15 | 
 16 | <!-- badges: start -->
 17 | 
 18 | [![R build
 19 | status](https://github.com/r-lib/cachem/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-lib/cachem/actions)
 20 | <!-- badges: end -->
 21 | 
 22 | The **cachem** R package provides objects creating and managing caches.
 23 | These cache objects are key-value stores, but unlike other basic
 24 | key-value stores, they have built-in support for memory and age limits
 25 | so that they won’t have unbounded growth.
 26 | 
 27 | The cache objects in **cachem** differ from some other key-value stores
 28 | in the following ways:
 29 | 
 30 | - The cache objects provide automatic pruning so that they remain within
 31 |   memory limits.
 32 | - Fetching a non-existing object returns a sentinel value. An
 33 |   alternative is to simply return `NULL`. This is what R lists and
 34 |   environments do, but it is ambiguous whether the value really is
 35 |   `NULL`, or if it is not present. Another alternative is to throw an
 36 |   exception when fetching a non-existent object. However, this results
 37 |   in more complicated code, as every `get()` needs to be wrapped in a
 38 |   `tryCatch()`.
 39 | 
 40 | ## Installation
 41 | 
 42 | To install the CRAN version:
 43 | 
 44 | ``` r
 45 | install.packages("cachem")
 46 | ```
 47 | 
 48 | You can install the development version from with:
 49 | 
 50 | ``` r
 51 | if (!require("remotes")) install.packages("remotes")
 52 | remotes::install_github("r-lib/cachem")
 53 | ```
 54 | 
 55 | ## Usage
 56 | 
 57 | To create a memory-based cache, call `cache_mem()`.
 58 | 
 59 | ``` r
 60 | library(cachem)
 61 | m <- cache_mem()
 62 | ```
 63 | 
 64 | Add arbitrary R objects to the cache using `$set(key, value)`:
 65 | 
 66 | ``` r
 67 | m$set("abc123", c("Hello", "world"))
 68 | m$set("xyz", function() message("Goodbye"))
 69 | ```
 70 | 
 71 | The `key` must be a string consisting of lowercase letters, numbers, and
 72 | the underscore (`_`) and hyphen (`-`) characters. (Upper-case characters
 73 | are not allowed because some storage backends do not distinguish between
 74 | lowercase and uppercase letters.) The `value` can be any R object.
 75 | 
 76 | Get the values with `$get()`:
 77 | 
 78 | ``` r
 79 | m$get("abc123")
 80 | #> [1] "Hello" "world"
 81 | 
 82 | m$get("xyz")
 83 | #> function() message("Goodbye")
 84 | ```
 85 | 
 86 | If you call `get()` on a key that doesn’t exists, it will return a
 87 | `key_missing()` sentinel value:
 88 | 
 89 | ``` r
 90 | m$get("dog")
 91 | #> <Key Missing>
 92 | ```
 93 | 
 94 | A common usage pattern is to call `get()`, and then check if the result
 95 | is a `key_missing` object:
 96 | 
 97 | ``` r
 98 | value <- m$get(key)
 99 | 
100 | if (is.key_missing(value)) {
101 |   # Cache miss - do something
102 | } else {
103 |   # Cache hit - do another thing
104 | }
105 | ```
106 | 
107 | The reason for doing this (instead of calling `$exists(key)` and then
108 | `$get(key)`) is that for some storage backends, there is a potential
109 | race condition: the object could be removed from the cache between the
110 | `exists()` and `get()` calls. For example:
111 | 
112 | - If multiple R processes have `cache_disk`s that share the same
113 |   directory, one process could remove an object from the cache in
114 |   between the `exists()` and `get()` calls in another process, resulting
115 |   in an error.
116 | - If you use a `cache_mem` with a `max_age`, it’s possible for an object
117 |   to be present when you call `exists()`, but for its age to exceed
118 |   `max_age` by the time `get()` is called. In that case, the `get()`
119 |   will return a `key_missing()` object.
120 | 
121 | ``` r
122 | # Avoid this pattern, due to a potential race condition!
123 | if (m$exists(key)) {
124 |   value <- m$get(key)
125 | }
126 | ```
127 | 
128 | ## Cache types
129 | 
130 | **cachem** comes with two kinds of cache objects: a memory cache, and a
131 | disk cache.
132 | 
133 | ### `cache_mem()`
134 | 
135 | The memory cache stores stores objects in memory, by simply keeping a
136 | reference to each object. To create a memory cache:
137 | 
138 | ``` r
139 | m <- cache_mem()
140 | ```
141 | 
142 | The default size of the cache is 200MB, but this can be customized with
143 | `max_size`:
144 | 
145 | ``` r
146 | m <- cache_mem(max_size = 10 * 1024^2)
147 | ```
148 | 
149 | It may also be useful to set a maximum age of objects. For example, if
150 | you only want objects to stay for a maximum of one hour:
151 | 
152 | ``` r
153 | m <- cache_mem(max_size = 10 * 1024^2, max_age = 3600)
154 | ```
155 | 
156 | For more about how objects are evicted from the cache, see section
157 | [Pruning](#pruning) below.
158 | 
159 | An advantage that the memory cache has over the disk cache (and any
160 | other type of cache that stores the objects outside of the R process’s
161 | memory), is that it does not need to serialize objects. Instead, it
162 | merely stores references to the objects. This means that it can store
163 | objects that other caches cannot, and with more efficient use of memory
164 | – if two objects in the cache share some of their contents (such that
165 | they refer to the same sub-object in memory), then `cache_mem` will not
166 | create duplicate copies of the contents, as `cache_disk` would, since it
167 | serializes the objects with the `serialize()` function.
168 | 
169 | Compared to the memory usage, the size *calculation* is not as
170 | intelligent: if there are two objects that share contents, their sizes
171 | are computed separately, even if they have items that share the exact
172 | same represention in memory. This is done with the `object.size()`
173 | function, which does not account for multiple references to the same
174 | object in memory.
175 | 
176 | In short, a memory cache, if anything, over-counts the amount of memory
177 | actually consumed. In practice, this means that if you set a 200MB limit
178 | to the size of cache, and the cache *thinks* it has 200MB of contents,
179 | the actual amount of memory consumed could be less than 200MB.
180 | 
181 | <details>
182 | <summary>
183 | Demonstration of memory over-counting from `object.size()`
184 | </summary>
185 | 
186 | ``` r
187 | # Create a and b which both contain the same numeric vector.
188 | x <- list(rnorm(1e5))
189 | a <- list(1, x)
190 | b <- list(2, x)
191 | 
192 | # Add to cache
193 | m$set("a", a)
194 | m$set("b", b)
195 | 
196 | # Each object is about 800kB in memory, so the cache_mem() will consider the
197 | # total memory used to be 1600kB.
198 | object.size(m$get("a"))
199 | #> 800224 bytes
200 | object.size(m$get("b"))
201 | #> 800224 bytes
202 | ```
203 | 
204 | For reference, lobstr::obj_size can detect shared objects, and knows
205 | that these objects share most of their memory.
206 | 
207 | ``` r
208 | lobstr::obj_size(m$get("a"))
209 | #> 800.22 kB
210 | lobstr::obj_size(list(m$get("a"), m$get("b")))
211 | #> 800.41 kB
212 | ```
213 | 
214 | However, lobstr is not on CRAN, and if obj_size() were used to find the
215 | incremental memory used when an object was added to the cache, it would
216 | have to walk all objects in the cache every time a single object is
217 | added. For these reasons, cache_mem uses `object.size()` to compute the
218 | object sizes.
219 | 
220 | </details>
221 | 
222 | ### `cache_disk()`
223 | 
224 | Disk caches are stored in a directory on disk. A disk cache is slower
225 | than a memory cache, but can generally be larger. To create one:
226 | 
227 | ``` r
228 | d <- cache_disk()
229 | ```
230 | 
231 | By default, it creates a subdirectory of the R process’s temp directory,
232 | and it will persist until the R process exits.
233 | 
234 | ``` r
235 | d$info()$dir
236 | #>  "/tmp/Rtmp6h5iB3/cache_disk-d1901b2b615a"
237 | ```
238 | 
239 | Like a `cache_mem`, the `max_size`, `max_n`, `max_age` can be
240 | customized. See section [Pruning](#pruning) below for more information.
241 | 
242 | Each object in the cache is stored as an RDS file on disk, using the
243 | `serialize()` function.
244 | 
245 | ``` r
246 | d$set("abc", 100)
247 | d$set("x01", list(1, 2, 3))
248 | 
249 | dir(d$info()$dir)
250 | #> [1] "abc.rds" "x01.rds"
251 | ```
252 | 
253 | Since objects in a disk cache are serialized, they are subject to the
254 | limitations of the `serialize()` function. For more information, see
255 | section [Limitations of serialized
256 | objects](#limitations-of-serialized-objects).
257 | 
258 | The storage directory can be specified with `dir`; it will be created if
259 | necessary.
260 | 
261 | ``` r
262 | cache_disk(dir = "cachedir")
263 | ```
264 | 
265 | #### Sharing a disk cache among processes
266 | 
267 | Multiple R processes can use `disk_cache` objects that share the same
268 | cache directory. To do this, simply point each `cache_disk` to the same
269 | directory.
270 | 
271 | #### `disk_cache` pruning
272 | 
273 | For a `disk_cache`, pruning does not happen on every access, because
274 | finding the size of files in the cache directory can take a nontrivial
275 | amount of time. By default, pruning happens once every 20 times that
276 | `$set()` is called, or if at least five seconds have elapsed since the
277 | last pruning. The `prune_rate` controls how many times `$set()` must be
278 | called before a pruning occurs. It defaults to 20; smaller values result
279 | in more frequent pruning and larger values result in less frequent
280 | pruning (but keep in mind pruning always occurs if it has been at least
281 | five seconds since the last pruning).
282 | 
283 | #### Cleaning up the cache directory
284 | 
285 | The cache directory can be deleted by calling `$destroy()`. After it is
286 | destroyed, the cache object can no longer be used.
287 | 
288 | ``` r
289 | d$destroy()
290 | d$set("a", 1)  # Error
291 | ```
292 | 
293 | To create a `cache_disk` that will automatically delete its storage
294 | directory when garbage collected, use `destroy_on_finalize=TRUE`:
295 | 
296 | ``` r
297 | d <- cache_disk(destroy_on_finalize = TRUE)
298 | d$set("a", 1)
299 | 
300 | cachedir <- d$info()$dir
301 | dir(cachedir)
302 | #> [1] "a.rds"
303 | 
304 | # Remove reference to d and trigger a garbage collection
305 | rm(d)
306 | gc()
307 | 
308 | dir.exists(cachedir)
309 | ```
310 | 
311 | #### Using custom serialization functions
312 | 
313 | It is possible to use custom serialization functions rather than the
314 | default of `writeRDS()` and `readRDS()` with the `write_fn`, `read_fn`
315 | and `extension` arguments respectively. This could be used to use
316 | alternative serialization formats like
317 | [qs](https://github.com/traversc/qs), or specialized object formats
318 | [fst](http://www.fstpackage.org/fst/) or parquet.
319 | 
320 | ``` r
321 | library(qs)
322 | 
323 | d <- cache_disk(read_fn = qs::qread, write_fn = qs::qsave, extension = ".qs")
324 | 
325 | d$set("a", list(1, 2, 3))
326 | 
327 | cachedir <- d$info()$dir
328 | dir(cachedir)
329 | #> [1] "a.qs"
330 | d$get("a")
331 | #> [[1]]
332 | #> [1] 1
333 | #>
334 | #> [[2]]
335 | #> [1] 2
336 | #>
337 | #> [[3]]
338 | #> [1] 3
339 | ```
340 | 
341 | ## Cache API
342 | 
343 | `cache_mem()` and `cache_disk()` support all of the methods listed
344 | below. If you want to create a compatible caching object, it must have
345 | at least the `get()` and `set()` methods:
346 | 
347 | - `get(key, missing = missing_)`: Get the object associated with `key`.
348 |   The `missing` parameter allows customized behavior if the key is not
349 |   present: it actually is an expression which is evaluated when there is
350 |   a cache miss, and it could return a value or throw an error.
351 | - `set(key, value)`: Set a key to a value.
352 | - `exists(key)`: Check whether a particular key exists in the cache.
353 | - `remove(key)`: Remove a key-value from the cache.
354 | 
355 | Some optional methods:
356 | 
357 | - `reset()`: Clear all objects from the cache.
358 | - `keys()`: Return a character vector of all keys in the cache.
359 | - `prune()`: Prune the cache. (Some types of caches may not prune on
360 |   every access, and may temporarily grow past their limits, until the
361 |   next pruning is triggered automatically, or manually with this
362 |   function.)
363 | - `size()`: Return the number of objects in the cache.
364 | 
365 | For these methods:
366 | 
367 | - `key`: can be any string with lowercase letters, numbers, underscore
368 |   (`_`) and hyphen (`-`). Some storage backends may not be handle very
369 |   long keys well. For example, with a `cache_disk()`, the key is used as
370 |   a filename, and on some filesystems, very filenames may hit limits on
371 |   path lengths.
372 | - `value`: can be any R object, with some exceptions noted below.
373 | 
374 | #### Limitations of serialized objects
375 | 
376 | For any cache that serializes the object for storage outside of the R
377 | process – in other words, any cache other than a `cache_mem()` – some
378 | types of objects will not save and restore as well. Notably, reference
379 | objects may consume more memory when restored, since R may not know to
380 | deduplicate shared objects. External pointers are not be able to be
381 | serialized, since they point to memory in the R process. See
382 | `?serialize` for more information.
383 | 
384 | #### Read-only caches
385 | 
386 | It is possible to create a read-only cache by making the `set()`,
387 | `remove()`, `reset()`, and `prune()` methods into no-ops. This can be
388 | useful if sharing a cache with another R process which can write to the
389 | cache. For example, one (or more) processes can write to the cache, and
390 | other processes can read from it.
391 | 
392 | This function will wrap a cache object in a read-only wrapper. Note,
393 | however, that code that uses such a cache must not require that `$set()`
394 | actually sets a value in the cache. This is good practice anyway,
395 | because with these cache objects, items can be pruned from them at any
396 | time.
397 | 
398 | ``` r
399 | cache_readonly_wrap <- function(cache) {
400 |   structure(
401 |     list(
402 |       get = cache$get,
403 |       set = function(key, value) NULL,
404 |       exists = cache$exists,
405 |       keys = cache$keys,
406 |       remove = function(key) NULL,
407 |       reset = function() NULL,
408 |       prune = function() NULL,
409 |       size = cache$size
410 |     ),
411 |     class = c("cache_readonly", class(cache))
412 |   )
413 | }
414 | 
415 | mr <- cache_readonly_wrap(m)
416 | ```
417 | 
418 | ## Pruning
419 | 
420 | The cache objects provided by cachem have automatic pruning. (Note that
421 | pruning is not required by the API, so one could implement an
422 | API-compatible cache without pruning.)
423 | 
424 | This section describes how pruning works for `cache_mem()` and
425 | `cache_disk()`.
426 | 
427 | When the cache object is created, the maximum size (in bytes) is
428 | specified by `max_size`. When the size of objects in the cache exceeds
429 | `max_size`, objects will be pruned from the cache.
430 | 
431 | When objects are pruned from the cache, which ones are removed is
432 | determined by the eviction policy, `evict`:
433 | 
434 | - **`lru`**: The least-recently-used objects will be removed from the
435 |   cache, until it fits within the limit. This is the default and is
436 |   appropriate for most cases.
437 | - **`fifo`**: The oldest objects will be removed first.
438 | 
439 | It is also possible to set the maximum number of items that can be in
440 | the cache, with `max_n`. By default this is set to `Inf`, or no limit.
441 | 
442 | The `max_age` parameter is somewhat different from `max_size` and
443 | `max_n`. The latter two set limits on the cache store as a whole,
444 | whereas `max_age` sets limits for each individual item; for each item,
445 | if its age exceeds `max_age`, then it will be removed from the cache.
446 | 
447 | ## Layered caches
448 | 
449 | Multiple caches can be composed into a single cache, using
450 | `cache_layered()`. This can be used to create a multi-level cache. (Note
451 | thate `cache_layered()` is currently experimental.) For example, we can
452 | create a layered cache with a very fast 100MB memory cache and a larger
453 | but slower 2GB disk cache:
454 | 
455 | ``` r
456 | m <- cache_mem(max_size = 100 * 1024^2)
457 | d <- cache_disk(max_size = 2 * 1024^3)
458 | 
459 | cl <- cache_layered(m, d)
460 | ```
461 | 
462 | The layered cache will have the same API, with `$get()`, `$set()`, and
463 | so on, so it can be used interchangeably with other caching objects.
464 | 
465 | For this example, we’ll recreate the `cache_layered` with logging
466 | enabled, so that it will show cache hits and misses.
467 | 
468 | ``` r
469 | cl <- cache_layered(m, d, logfile = stderr())
470 | 
471 | # Each of the objects generated by rnorm() is about 40 MB
472 | cl$set("a", rnorm(5e6))
473 | cl$set("b", rnorm(5e6))
474 | cl$set("c", rnorm(5e6))
475 | 
476 | # View the objects in each of the component caches
477 | m$keys()
478 | #> [1] "c" "b"
479 | d$keys()
480 | #> [1] "a" "b" "c"
481 | 
482 | # The layered cache reports having all keys
483 | cl$keys()
484 | #> [1] "c" "b" "a"
485 | ```
486 | 
487 | When `$get()` is called, it searches the first cache, and if it’s
488 | missing there, it searches the next cache, and so on. If not found in
489 | any caches, it returns `key_missing()`.
490 | 
491 | ``` r
492 | # Get object that exists in the memory cache
493 | x <- cl$get("c")
494 | #> [2020-10-23 13:11:09.985] cache_layered Get: c
495 | #> [2020-10-23 13:11:09.985] cache_layered Get from cache_mem... hit
496 | 
497 | # Get object that doesn't exist in the memory cache
498 | x <- cl$get("a")
499 | #> [2020-10-23 13:13:10.968] cache_layered Get: a
500 | #> [2020-10-23 13:13:10.969] cache_layered Get from cache_mem... miss
501 | #> [2020-10-23 13:13:11.329] cache_layered Get from cache_disk... hit
502 | 
503 | # Object is not present in any component caches
504 | cl$get("d")
505 | #> [2020-10-23 13:13:40.197] cache_layered Get: d
506 | #> [2020-10-23 13:13:40.197] cache_layered Get from cache_mem... miss
507 | #> [2020-10-23 13:13:40.198] cache_layered Get from cache_disk... miss
508 | #> <Key Missing>
509 | ```
510 | 
511 | Multiple cache objects can be layered this way. You could even add a
512 | cache which uses a remote store, such as a network file system or even
513 | AWS S3.
514 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://cachem.r-lib.org
2 | 
3 | template:
4 |   bootstrap: 5
5 | 


--------------------------------------------------------------------------------
/cachem.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Comments
 2 | 
 3 | #### 2021-8-19
 4 | 
 5 | Bug fixes.
 6 | 
 7 | Thank you,
 8 | Winston
 9 | 
10 | 
11 | ## Test environments
12 | 
13 | * GitHub Actions - https://github.com/r-lib/cachem/pull/16/checks
14 |   * macOS
15 |     * devel, release
16 |   * windows
17 |     * release, 3.6
18 |   * ubuntu20
19 |     * devel, release, oldrel/1, oldrel/2, oldrel/3, oldrel/4
20 | * devtools::
21 |   * check_win_devel()
22 |   * check_win_release()
23 |   * check_win_oldrelease()
24 | 
25 | 0 errors ✔ | 0 warnings ✔ | 0 notes ✔
26 | 
27 | 
28 | ## revdepcheck results
29 | 
30 | We checked 6 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
31 | 
32 |  * We saw 0 new problems
33 |  * We failed to check 0 packages
34 | 


--------------------------------------------------------------------------------
/man/cache_disk.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cache-disk.R
  3 | \name{cache_disk}
  4 | \alias{cache_disk}
  5 | \title{Create a disk cache object}
  6 | \usage{
  7 | cache_disk(
  8 |   dir = NULL,
  9 |   max_size = 1024 * 1024^2,
 10 |   max_age = Inf,
 11 |   max_n = Inf,
 12 |   evict = c("lru", "fifo"),
 13 |   destroy_on_finalize = FALSE,
 14 |   read_fn = NULL,
 15 |   write_fn = NULL,
 16 |   extension = ".rds",
 17 |   missing = key_missing(),
 18 |   prune_rate = 20,
 19 |   warn_ref_objects = FALSE,
 20 |   logfile = NULL
 21 | )
 22 | }
 23 | \arguments{
 24 | \item{dir}{Directory to store files for the cache. If \code{NULL} (the default) it
 25 | will create and use a temporary directory.}
 26 | 
 27 | \item{max_size}{Maximum size of the cache, in bytes. If the cache exceeds
 28 | this size, cached objects will be removed according to the value of the
 29 | \code{evict}. Use \code{Inf} for no size limit. The default is 1 gigabyte.}
 30 | 
 31 | \item{max_age}{Maximum age of files in cache before they are evicted, in
 32 | seconds. Use \code{Inf} for no age limit.}
 33 | 
 34 | \item{max_n}{Maximum number of objects in the cache. If the number of objects
 35 | exceeds this value, then cached objects will be removed according to the
 36 | value of \code{evict}. Use \code{Inf} for no limit of number of items.}
 37 | 
 38 | \item{evict}{The eviction policy to use to decide which objects are removed
 39 | when a cache pruning occurs. Currently, \code{"lru"} and \code{"fifo"} are supported.}
 40 | 
 41 | \item{destroy_on_finalize}{If \code{TRUE}, then when the cache_disk object is
 42 | garbage collected, the cache directory and all objects inside of it will be
 43 | deleted from disk. If \code{FALSE} (the default), it will do nothing when
 44 | finalized.}
 45 | 
 46 | \item{read_fn}{The function used to read the values from disk. If \code{NULL}
 47 | (the default) it will use \code{readRDS}.}
 48 | 
 49 | \item{write_fn}{The function used to write the values from disk. If \code{NULL}
 50 | (the default) it will use \code{writeRDS}.}
 51 | 
 52 | \item{extension}{The file extension to use for files on disk.}
 53 | 
 54 | \item{missing}{A value to return when \code{get(key)} is called but the key is not
 55 | present in the cache. The default is a \code{\link[=key_missing]{key_missing()}} object. It is
 56 | actually an expression that is evaluated each time there is a cache miss.
 57 | See section Missing keys for more information.}
 58 | 
 59 | \item{prune_rate}{How often to prune the cache. See section Cache Pruning for
 60 | more information.}
 61 | 
 62 | \item{warn_ref_objects}{Should a warning be emitted when a reference is
 63 | stored in the cache? This can be useful because serializing and
 64 | deserializing a reference object (such as environments and external
 65 | pointers) can lead to unexpected behavior.}
 66 | 
 67 | \item{logfile}{An optional filename or connection object to where logging
 68 | information will be written. To log to the console, use \code{stderr()} or
 69 | \code{stdout()}.}
 70 | }
 71 | \value{
 72 | A disk caching object, with class \code{cache_disk}.
 73 | }
 74 | \description{
 75 | A disk cache object is a key-value store that saves the values as files in a
 76 | directory on disk. Objects can be stored and retrieved using the \code{get()} and
 77 | \code{set()} methods. Objects are automatically pruned from the cache according to
 78 | the parameters \code{max_size}, \code{max_age}, \code{max_n}, and \code{evict}.
 79 | }
 80 | \section{Missing keys}{
 81 | 
 82 | 
 83 | The \code{missing} parameter controls what happens when \code{get()} is called with a
 84 | key that is not in the cache (a cache miss). The default behavior is to
 85 | return a \code{\link[=key_missing]{key_missing()}} object. This is a \emph{sentinel value} that indicates
 86 | that the key was not present in the cache. You can test if the returned
 87 | value represents a missing key by using the \code{\link[=is.key_missing]{is.key_missing()}} function.
 88 | You can also have \code{get()} return a different sentinel value, like \code{NULL}.
 89 | If you want to throw an error on a cache miss, you can do so by providing
 90 | an expression for \code{missing}, as in \code{missing = stop("Missing key")}.
 91 | 
 92 | When the cache is created, you can supply a value for \code{missing}, which sets
 93 | the default value to be returned for missing values. It can also be
 94 | overridden when \code{get()} is called, by supplying a \code{missing} argument. For
 95 | example, if you use \code{cache$get("mykey", missing = NULL)}, it will return
 96 | \code{NULL} if the key is not in the cache.
 97 | 
 98 | The \code{missing} parameter is actually an expression which is evaluated each
 99 | time there is a cache miss. A quosure (from the rlang package) can be used.
100 | 
101 | If you use this, the code that calls \code{get()} should be wrapped with
102 | \code{\link[=tryCatch]{tryCatch()}} to gracefully handle missing keys.
103 | }
104 | 
105 | \section{Cache pruning}{
106 | 
107 | 
108 | Cache pruning occurs when \code{set()} is called, or it can be invoked manually
109 | by calling \code{prune()}.
110 | 
111 | The disk cache will throttle the pruning so that it does not happen on
112 | every call to \code{set()}, because the filesystem operations for checking the
113 | status of files can be slow. Instead, it will prune once in every
114 | \code{prune_rate} calls to \code{set()}, or if at least 5 seconds have elapsed since
115 | the last prune occurred, whichever is first.
116 | 
117 | When a pruning occurs, if there are any objects that are older than
118 | \code{max_age}, they will be removed.
119 | 
120 | The \code{max_size} and \code{max_n} parameters are applied to the cache as a whole,
121 | in contrast to \code{max_age}, which is applied to each object individually.
122 | 
123 | If the number of objects in the cache exceeds \code{max_n}, then objects will be
124 | removed from the cache according to the eviction policy, which is set with
125 | the \code{evict} parameter. Objects will be removed so that the number of items
126 | is \code{max_n}.
127 | 
128 | If the size of the objects in the cache exceeds \code{max_size}, then objects
129 | will be removed from the cache. Objects will be removed from the cache so
130 | that the total size remains under \code{max_size}. Note that the size is
131 | calculated using the size of the files, not the size of disk space used by
132 | the files --- these two values can differ because of files are stored in
133 | blocks on disk. For example, if the block size is 4096 bytes, then a file
134 | that is one byte in size will take 4096 bytes on disk.
135 | 
136 | Another time that objects can be removed from the cache is when \code{get()} is
137 | called. If the target object is older than \code{max_age}, it will be removed
138 | and the cache will report it as a missing value.
139 | }
140 | 
141 | \section{Eviction policies}{
142 | 
143 | 
144 | If \code{max_n} or \code{max_size} are used, then objects will be removed from the
145 | cache according to an eviction policy. The available eviction policies are:
146 | 
147 | \describe{
148 | \item{\code{"lru"}}{
149 | Least Recently Used. The least recently used objects will be removed.
150 | This uses the filesystem's mtime property. When "lru" is used, each
151 | \code{get()} is called, it will update the file's mtime using
152 | \code{\link[=Sys.setFileTime]{Sys.setFileTime()}}. Note that on some platforms, the resolution of
153 | \code{\link[=Sys.setFileTime]{Sys.setFileTime()}} may be low, one or two seconds.
154 | }
155 | \item{\code{"fifo"}}{
156 | First-in-first-out. The oldest objects will be removed.
157 | }
158 | }
159 | 
160 | Both of these policies use files' mtime. Note that some filesystems (notably
161 | FAT) have poor mtime resolution. (atime is not used because support for atime
162 | is worse than mtime.)
163 | }
164 | 
165 | \section{Sharing among multiple processes}{
166 | 
167 | 
168 | The directory for a cache_disk can be shared among multiple R processes. To
169 | do this, each R process should have a cache_disk object that uses the same
170 | directory. Each cache_disk will do pruning independently of the others, so
171 | if they have different pruning parameters, then one cache_disk may remove
172 | cached objects before another cache_disk would do so.
173 | 
174 | Even though it is possible for multiple processes to share a cache_disk
175 | directory, this should not be done on networked file systems, because of
176 | slow performance of networked file systems can cause problems. If you need
177 | a high-performance shared cache, you can use one built on a database like
178 | Redis, SQLite, mySQL, or similar.
179 | 
180 | When multiple processes share a cache directory, there are some potential
181 | race conditions. For example, if your code calls \code{exists(key)} to check if
182 | an object is in the cache, and then call \code{get(key)}, the object may be
183 | removed from the cache in between those two calls, and \code{get(key)} will
184 | throw an error. Instead of calling the two functions, it is better to
185 | simply call \code{get(key)}, and check that the returned object is not a
186 | \code{key_missing()} object, using \code{is.key_missing()}. This effectively tests
187 | for existence and gets the object in one operation.
188 | 
189 | It is also possible for one processes to prune objects at the same time
190 | that another processes is trying to prune objects. If this happens, you may
191 | see a warning from \code{file.remove()} failing to remove a file that has
192 | already been deleted.
193 | }
194 | 
195 | \section{Methods}{
196 | 
197 | 
198 | A disk cache object has the following methods:
199 | 
200 | \describe{
201 | \item{\code{get(key, missing)}}{
202 | Returns the value associated with \code{key}. If the key is not in the
203 | cache, then it evaluates the expression specified by \code{missing} and
204 | returns the value. If \code{missing} is specified here, then it will
205 | override the default that was set when the \code{cache_mem} object was
206 | created. See section Missing Keys for more information.
207 | }
208 | \item{\code{set(key, value)}}{
209 | Stores the \code{key}-\code{value} pair in the cache.
210 | }
211 | \item{\code{exists(key)}}{
212 | Returns \code{TRUE} if the cache contains the key, otherwise
213 | \code{FALSE}.
214 | }
215 | \item{\code{remove(key)}}{
216 | Removes \code{key} from the cache, if it exists in the cache. If the key is
217 | not in the cache, this does nothing.
218 | }
219 | \item{\code{size()}}{
220 | Returns the number of items currently in the cache.
221 | }
222 | \item{\code{keys()}}{
223 | Returns a character vector of all keys currently in the cache.
224 | }
225 | \item{\code{reset()}}{
226 | Clears all objects from the cache.
227 | }
228 | \item{\code{destroy()}}{
229 | Clears all objects in the cache, and removes the cache directory from
230 | disk.
231 | }
232 | \item{\code{prune()}}{
233 | Prunes the cache, using the parameters specified by \code{max_size},
234 | \code{max_age}, \code{max_n}, and \code{evict}.
235 | }
236 | }
237 | }
238 | 
239 | 


--------------------------------------------------------------------------------
/man/cache_layered.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cache-layered.R
 3 | \name{cache_layered}
 4 | \alias{cache_layered}
 5 | \title{Compose any number of cache objects into a new, layered cache object}
 6 | \usage{
 7 | cache_layered(..., logfile = NULL)
 8 | }
 9 | \arguments{
10 | \item{...}{Cache objects to compose into a new, layered cache object.}
11 | 
12 | \item{logfile}{An optional filename or connection object to where logging
13 | information will be written. To log to the console, use \code{stderr()} or
14 | \code{stdout()}.}
15 | }
16 | \value{
17 | A layered caching object, with class \code{cache_layered}.
18 | }
19 | \description{
20 | Note that \code{cache_layered} is currently experimental.
21 | }
22 | \examples{
23 | 
24 | # Make a layered cache from a small memory cache and large disk cache
25 | m <- cache_mem(max_size = 100 * 1024^2)
26 | d <- cache_disk(max_size = 2 * 1024^3)
27 | cl <- cache_layered(m, d)
28 | }
29 | 


--------------------------------------------------------------------------------
/man/cache_mem.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cache-mem.R
  3 | \name{cache_mem}
  4 | \alias{cache_mem}
  5 | \title{Create a memory cache object}
  6 | \usage{
  7 | cache_mem(
  8 |   max_size = 512 * 1024^2,
  9 |   max_age = Inf,
 10 |   max_n = Inf,
 11 |   evict = c("lru", "fifo"),
 12 |   missing = key_missing(),
 13 |   logfile = NULL
 14 | )
 15 | }
 16 | \arguments{
 17 | \item{max_size}{Maximum size of the cache, in bytes. If the cache exceeds
 18 | this size, cached objects will be removed according to the value of the
 19 | \code{evict}. Use \code{Inf} for no size limit. The default is 512 megabytes.}
 20 | 
 21 | \item{max_age}{Maximum age of files in cache before they are evicted, in
 22 | seconds. Use \code{Inf} for no age limit.}
 23 | 
 24 | \item{max_n}{Maximum number of objects in the cache. If the number of objects
 25 | exceeds this value, then cached objects will be removed according to the
 26 | value of \code{evict}. Use \code{Inf} for no limit of number of items.}
 27 | 
 28 | \item{evict}{The eviction policy to use to decide which objects are removed
 29 | when a cache pruning occurs. Currently, \code{"lru"} and \code{"fifo"} are supported.}
 30 | 
 31 | \item{missing}{A value to return when \code{get(key)} is called but the key is not
 32 | present in the cache. The default is a \code{\link[=key_missing]{key_missing()}} object. It is
 33 | actually an expression that is evaluated each time there is a cache miss.
 34 | See section Missing keys for more information.}
 35 | 
 36 | \item{logfile}{An optional filename or connection object to where logging
 37 | information will be written. To log to the console, use \code{stderr()} or
 38 | \code{stdout()}.}
 39 | }
 40 | \value{
 41 | A memory caching object, with class \code{cache_mem}.
 42 | }
 43 | \description{
 44 | A memory cache object is a key-value store that saves the values in an
 45 | environment. Objects can be stored and retrieved using the \code{get()} and
 46 | \code{set()} methods. Objects are automatically pruned from the cache according to
 47 | the parameters \code{max_size}, \code{max_age}, \code{max_n}, and \code{evict}.
 48 | }
 49 | \details{
 50 | In a \code{cache_mem}, R objects are stored directly in the cache; they are not
 51 | \emph{not} serialized before being stored in the cache. This contrasts with other
 52 | cache types, like \code{\link[=cache_disk]{cache_disk()}}, where objects are serialized, and the
 53 | serialized object is cached. This can result in some differences of behavior.
 54 | For example, as long as an object is stored in a cache_mem, it will not be
 55 | garbage collected.
 56 | }
 57 | \section{Missing keys}{
 58 | 
 59 | 
 60 | The \code{missing} parameter controls what happens when \code{get()} is called with a
 61 | key that is not in the cache (a cache miss). The default behavior is to
 62 | return a \code{\link[=key_missing]{key_missing()}} object. This is a \emph{sentinel value} that indicates
 63 | that the key was not present in the cache. You can test if the returned
 64 | value represents a missing key by using the \code{\link[=is.key_missing]{is.key_missing()}} function.
 65 | You can also have \code{get()} return a different sentinel value, like \code{NULL}.
 66 | If you want to throw an error on a cache miss, you can do so by providing
 67 | an expression for \code{missing}, as in \code{missing = stop("Missing key")}.
 68 | 
 69 | When the cache is created, you can supply a value for \code{missing}, which sets
 70 | the default value to be returned for missing values. It can also be
 71 | overridden when \code{get()} is called, by supplying a \code{missing} argument. For
 72 | example, if you use \code{cache$get("mykey", missing = NULL)}, it will return
 73 | \code{NULL} if the key is not in the cache.
 74 | 
 75 | The \code{missing} parameter is actually an expression which is evaluated each
 76 | time there is a cache miss. A quosure (from the rlang package) can be used.
 77 | 
 78 | If you use this, the code that calls \code{get()} should be wrapped with
 79 | \code{\link[=tryCatch]{tryCatch()}} to gracefully handle missing keys.
 80 | 
 81 | @section Cache pruning:
 82 | 
 83 | Cache pruning occurs when \code{set()} is called, or it can be invoked manually
 84 | by calling \code{prune()}.
 85 | 
 86 | When a pruning occurs, if there are any objects that are older than
 87 | \code{max_age}, they will be removed.
 88 | 
 89 | The \code{max_size} and \code{max_n} parameters are applied to the cache as a whole,
 90 | in contrast to \code{max_age}, which is applied to each object individually.
 91 | 
 92 | If the number of objects in the cache exceeds \code{max_n}, then objects will be
 93 | removed from the cache according to the eviction policy, which is set with
 94 | the \code{evict} parameter. Objects will be removed so that the number of items
 95 | is \code{max_n}.
 96 | 
 97 | If the size of the objects in the cache exceeds \code{max_size}, then objects
 98 | will be removed from the cache. Objects will be removed from the cache so
 99 | that the total size remains under \code{max_size}.
100 | 
101 | Another time that objects can be removed from the cache is when \code{get()} is
102 | called. If the target object is older than \code{max_age}, it will be removed
103 | and the cache will report it as a missing value.
104 | }
105 | 
106 | \section{Eviction policies}{
107 | 
108 | 
109 | If \code{max_n} or \code{max_size} are used, then objects will be removed
110 | from the cache according to an eviction policy. The available eviction
111 | policies are:
112 | 
113 | \describe{
114 | \item{\code{"lru"}}{
115 | Least Recently Used. The least recently used objects will be removed.
116 | }
117 | \item{\code{"fifo"}}{
118 | First-in-first-out. The oldest objects will be removed.
119 | }
120 | }
121 | }
122 | 
123 | \section{Methods}{
124 | 
125 | 
126 | A disk cache object has the following methods:
127 | 
128 | \describe{
129 | \item{\code{get(key, missing)}}{
130 | Returns the value associated with \code{key}. If the key is not in the
131 | cache, then it evaluates the expression specified by \code{missing} and
132 | returns the value. If \code{missing} is specified here, then it will
133 | override the default that was set when the \code{cache_mem} object was
134 | created. See section Missing Keys for more information.
135 | }
136 | \item{\code{set(key, value)}}{
137 | Stores the \code{key}-\code{value} pair in the cache.
138 | }
139 | \item{\code{exists(key)}}{
140 | Returns \code{TRUE} if the cache contains the key, otherwise
141 | \code{FALSE}.
142 | }
143 | \item{\code{remove(key)}}{
144 | Removes \code{key} from the cache, if it exists in the cache. If the key is
145 | not in the cache, this does nothing.
146 | }
147 | \item{\code{size()}}{
148 | Returns the number of items currently in the cache.
149 | }
150 | \item{\code{keys()}}{
151 | Returns a character vector of all keys currently in the cache.
152 | }
153 | \item{\code{reset()}}{
154 | Clears all objects from the cache.
155 | }
156 | \item{\code{destroy()}}{
157 | Clears all objects in the cache, and removes the cache directory from
158 | disk.
159 | }
160 | \item{\code{prune()}}{
161 | Prunes the cache, using the parameters specified by \code{max_size},
162 | \code{max_age}, \code{max_n}, and \code{evict}.
163 | }
164 | }
165 | }
166 | 
167 | 


--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reexports.R
 3 | \docType{import}
 4 | \name{reexports}
 5 | \alias{reexports}
 6 | \alias{key_missing}
 7 | \alias{is.key_missing}
 8 | \title{Objects exported from other packages}
 9 | \keyword{internal}
10 | \description{
11 | These objects are imported from other packages. Follow the links
12 | below to see their documentation.
13 | 
14 | \describe{
15 |   \item{fastmap}{\code{\link[fastmap:key_missing]{is.key_missing}}, \code{\link[fastmap]{key_missing}}}
16 | }}
17 | 
18 | 


--------------------------------------------------------------------------------
/revdep/.gitignore:
--------------------------------------------------------------------------------
1 | checks
2 | library
3 | checks.noindex
4 | library.noindex
5 | cloud.noindex
6 | data.sqlite
7 | *.html
8 | 


--------------------------------------------------------------------------------
/revdep/README.md:
--------------------------------------------------------------------------------
 1 | # Platform
 2 | 
 3 | |field    |value                        |
 4 | |:--------|:----------------------------|
 5 | |version  |R version 4.0.2 (2020-06-22) |
 6 | |os       |macOS  10.16                 |
 7 | |system   |x86_64, darwin17.0           |
 8 | |ui       |X11                          |
 9 | |language |(EN)                         |
10 | |collate  |en_US.UTF-8                  |
11 | |ctype    |en_US.UTF-8                  |
12 | |tz       |America/New_York             |
13 | |date     |2021-08-17                   |
14 | 
15 | # Dependencies
16 | 
17 | |package |old   |new   |Δ  |
18 | |:-------|:-----|:-----|:--|
19 | |cachem  |1.0.5 |1.0.6 |*  |
20 | 
21 | # Revdeps
22 | 
23 | ## All (6)
24 | 
25 | |package                          |version |error |warning |note |
26 | |:--------------------------------|:-------|:-----|:-------|:----|
27 | |aquodom                          |0.1.0   |      |        |     |
28 | |ffscrapr                         |1.4.5   |      |        |     |
29 | |memoise                          |2.0.0   |      |        |     |
30 | |[nflreadr](problems.md#nflreadr) |1.0.0   |      |        |1    |
31 | |[shiny](problems.md#shiny)       |1.6.0   |      |        |1    |
32 | |[slackr](problems.md#slackr)     |3.0.0   |      |        |1    |
33 | 
34 | 


--------------------------------------------------------------------------------
/revdep/cran.md:
--------------------------------------------------------------------------------
1 | ## revdepcheck results
2 | 
3 | We checked 6 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
4 | 
5 |  * We saw 0 new problems
6 |  * We failed to check 0 packages
7 | 
8 | 


--------------------------------------------------------------------------------
/revdep/failures.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*


--------------------------------------------------------------------------------
/revdep/problems.md:
--------------------------------------------------------------------------------
 1 | # nflreadr
 2 | 
 3 | <details>
 4 | 
 5 | * Version: 1.0.0
 6 | * GitHub: https://github.com/nflverse/nflreadr
 7 | * Source code: https://github.com/cran/nflreadr
 8 | * Date/Publication: 2021-08-09 14:40:02 UTC
 9 | * Number of recursive dependencies: 70
10 | 
11 | Run `revdep_details(, "nflreadr")` for more info
12 | 
13 | </details>
14 | 
15 | ## In both
16 | 
17 | *   checking dependencies in R code ... NOTE
18 |     ```
19 |     Namespace in Imports field not imported from: ‘dplyr’
20 |       All declared Imports should be used.
21 |     ```
22 | 
23 | # shiny
24 | 
25 | <details>
26 | 
27 | * Version: 1.6.0
28 | * GitHub: https://github.com/rstudio/shiny
29 | * Source code: https://github.com/cran/shiny
30 | * Date/Publication: 2021-01-25 21:50:02 UTC
31 | * Number of recursive dependencies: 104
32 | 
33 | Run `revdep_details(, "shiny")` for more info
34 | 
35 | </details>
36 | 
37 | ## In both
38 | 
39 | *   checking installed package size ... NOTE
40 |     ```
41 |       installed size is 12.1Mb
42 |       sub-directories of 1Mb or more:
43 |         R     2.0Mb
44 |         www   8.8Mb
45 |     ```
46 | 
47 | # slackr
48 | 
49 | <details>
50 | 
51 | * Version: 3.0.0
52 | * GitHub: https://github.com/mrkaye97/slackr
53 | * Source code: https://github.com/cran/slackr
54 | * Date/Publication: 2021-08-07 19:30:02 UTC
55 | * Number of recursive dependencies: 85
56 | 
57 | Run `revdep_details(, "slackr")` for more info
58 | 
59 | </details>
60 | 
61 | ## In both
62 | 
63 | *   checking dependencies in R code ... NOTE
64 |     ```
65 |     Namespaces in Imports field not imported from:
66 |       ‘methods’ ‘reprex’
67 |       All declared Imports should be used.
68 |     ```
69 | 
70 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 | 


--------------------------------------------------------------------------------
/src/cache.c:
--------------------------------------------------------------------------------
 1 | #include <R.h>
 2 | #include <Rdefines.h>
 3 | #include <string.h>
 4 | 
 5 | SEXP C_validate_key(SEXP key_r) {
 6 |   if (TYPEOF(key_r) != STRSXP || Rf_length(key_r) != 1) {
 7 |     Rf_error("key must be a one-element character vector");
 8 |   }
 9 |   SEXP key_c = STRING_ELT(key_r, 0);
10 |   if (key_c == NA_STRING || Rf_StringBlank(key_c)) {
11 |     Rf_error("key must be not be \"\" or NA");
12 |   }
13 | 
14 |   const char* s = R_CHAR(key_c);
15 |   char cset[] = "1234567890abcdefghijklmnopqrstuvwxyz_-";
16 |   int i = strspn(s, cset);
17 |   if (i != strlen(s)) {
18 |     Rf_error("Invalid key: %s. Only lowercase letters and numbers are allowed.", s);
19 |   }
20 | 
21 |   return Rf_ScalarLogical(TRUE);
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/src/init.c:
--------------------------------------------------------------------------------
 1 | #include <R.h>
 2 | #include <Rinternals.h>
 3 | #include <stdlib.h> // for NULL
 4 | #include <R_ext/Rdynload.h>
 5 | #include <R_ext/Visibility.h>
 6 | 
 7 | /* .Call calls */
 8 | extern SEXP C_validate_key(SEXP);
 9 | 
10 | static const R_CallMethodDef CallEntries[] = {
11 |   {"C_validate_key",                (DL_FUNC) &C_validate_key,         1},
12 |   {NULL, NULL, 0}
13 | };
14 | 
15 | attribute_visible void R_init_cachem(DllInfo *dll)
16 | {
17 |   R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
18 |   R_useDynamicSymbols(dll, FALSE);
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(cachem)
3 | 
4 | test_check("cachem")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-utils.R:
--------------------------------------------------------------------------------
1 | is_on_github_actions <- function() {
2 |   nzchar(Sys.getenv("GITHUB_ACTIONS"))
3 | }
4 | 


--------------------------------------------------------------------------------
/tests/testthat/test-cache-disk.R:
--------------------------------------------------------------------------------
  1 | 
  2 | cache_disk_deterministic <- function(...) {
  3 |   d <- cache_disk(...)
  4 | 
  5 |   # Normally the throttle counter starts with a random value, but for these
  6 |   # tests we need to make it deterministic.
  7 |   environment(d$set)$prune_throttle_counter_ <- 0
  8 | 
  9 |   d
 10 | }
 11 | 
 12 | 
 13 | test_that("cache_disk: handling missing values", {
 14 |   d <- cache_disk()
 15 |   expect_true(is.key_missing(d$get("abcd")))
 16 |   d$set("a", 100)
 17 |   expect_identical(d$get("a"), 100)
 18 |   expect_identical(d$get("y", missing = NULL), NULL)
 19 |   expect_error(
 20 |     d$get("y", missing = stop("Missing key")),
 21 |     "^Missing key$",
 22 |   )
 23 | 
 24 |   d <- cache_disk(missing = NULL)
 25 |   expect_true(is.null(d$get("abcd")))
 26 |   d$set("a", 100)
 27 |   expect_identical(d$get("a"), 100)
 28 |   expect_identical(d$get("y", missing = -1), -1)
 29 |   expect_error(
 30 |     d$get("y", missing = stop("Missing key")),
 31 |     "^Missing key$",
 32 |   )
 33 | 
 34 |   d <- cache_disk(missing = stop("Missing key"))
 35 |   expect_error(d$get("abcd"), "^Missing key$")
 36 |   d$set("x", NULL)
 37 |   d$set("a", 100)
 38 |   expect_identical(d$get("a"), 100)
 39 |   expect_error(d$get("y"), "^Missing key$")
 40 |   expect_identical(d$get("y", missing = NULL), NULL)
 41 |   expect_true(is.key_missing(d$get("y", missing = key_missing())))
 42 |   expect_error(
 43 |     d$get("y", missing = stop("Missing key 2")),
 44 |     "^Missing key 2$",
 45 |   )
 46 | 
 47 |   # Pass in a quosure
 48 |   expr <- rlang::quo(stop("Missing key"))
 49 |   d <- cache_disk(missing = !!expr)
 50 |   expect_error(d$get("y"), "^Missing key$")
 51 |   expect_error(d$get("y"), "^Missing key$") # Make sure a second time also throws
 52 | })
 53 | 
 54 | 
 55 | test_that("cache_disk: pruning respects max_n", {
 56 |   # Timing is apparently unreliable on CRAN, so skip tests there. It's possible
 57 |   # that a heavily loaded system will have issues with these tests because of
 58 |   # the time resolution.
 59 |   skip_on_cran()
 60 |   delay <- 0.01
 61 | 
 62 |   d <- cache_disk_deterministic(max_n = 3)
 63 |   # NOTE: The short delays after each item are meant to tests more reliable on
 64 |   # CI systems.
 65 |   d$set("a", rnorm(100)); Sys.sleep(delay)
 66 |   d$set("b", rnorm(100)); Sys.sleep(delay)
 67 |   d$set("c", rnorm(100)); Sys.sleep(delay)
 68 |   d$set("d", rnorm(100)); Sys.sleep(delay)
 69 |   d$set("e", rnorm(100)); Sys.sleep(delay)
 70 |   d$prune()
 71 |   expect_identical(sort(d$keys()), c("c", "d", "e"))
 72 | })
 73 | 
 74 | test_that("cache_disk: pruning respects max_size", {
 75 |   skip_on_cran()
 76 |   delay <- 0.01
 77 | 
 78 |   d <- cache_disk_deterministic(max_size = 200)
 79 |   d$set("a", rnorm(100)); Sys.sleep(delay)
 80 |   d$set("b", rnorm(100)); Sys.sleep(delay)
 81 |   d$set("c", 1);          Sys.sleep(delay)
 82 |   d$prune()
 83 |   expect_identical(sort(d$keys()), c("c"))
 84 |   d$set("d", rnorm(100)); Sys.sleep(delay)
 85 |   # Objects are pruned with oldest first, so even though "c" would fit in the
 86 |   # cache, it is removed after adding "d" (and "d" is removed as well because it
 87 |   # doesn't fit).
 88 |   d$prune()
 89 |   expect_length(d$keys(), 0)
 90 |   d$set("e", 2);          Sys.sleep(delay)
 91 |   d$set("f", 3);          Sys.sleep(delay)
 92 |   d$prune()
 93 |   expect_identical(sort(d$keys()), c("e", "f"))
 94 | })
 95 | 
 96 | # Issue shiny#3033
 97 | test_that("cache_disk: pruning respects both max_n and max_size", {
 98 |   skip_on_cran()
 99 |   d <- cache_disk_deterministic(max_n = 3, max_size = 200)
100 | 
101 |   # Set some values. Use rnorm so that object size is large; a simple vector
102 |   # like 1:100 will be stored very efficiently by R's ALTREP, and won't exceed
103 |   # the max_size. We want each of these objects to exceed max_size so that
104 |   # they'll be pruned.
105 |   d$set("a", rnorm(100))
106 |   d$set("b", rnorm(100))
107 |   d$set("c", rnorm(100))
108 |   d$set("d", rnorm(100))
109 |   d$set("e", rnorm(100))
110 |   Sys.sleep(0.1)  # For systems that have low mtime resolution.
111 |   d$set("f", 1)   # This object is small and shouldn't be pruned.
112 |   d$prune()
113 |   expect_identical(d$keys(), "f")
114 | })
115 | 
116 | # Return TRUE if the Sys.setFileTime() has subsecond resolution, FALSE
117 | # otherwise.
118 | setfiletime_has_subsecond_resolution <- function() {
119 |   tmp <- tempfile()
120 |   file.create(tmp)
121 |   Sys.setFileTime(tmp, Sys.time())
122 |   time <- as.numeric(file.info(tmp)[['mtime']])
123 |   if (time == floor(time)) {
124 |     return(FALSE)
125 |   } else {
126 |     return(TRUE)
127 |   }
128 | }
129 | 
130 | test_that('cache_disk: pruning with evict="lru"', {
131 |   skip_on_cran()
132 |   delay <- 0.01
133 |   # For lru tests, make sure there's sub-second resolution for
134 |   # Sys.setFileTime(), because that's what the lru code uses to update times.
135 |   skip_if_not(
136 |     setfiletime_has_subsecond_resolution(),
137 |     "Sys.setFileTime() does not have subsecond resolution on this platform."
138 |   )
139 | 
140 |   d <- cache_disk_deterministic(max_n = 2)
141 |   d$set("a", 1); Sys.sleep(delay)
142 |   d$set("b", 1); Sys.sleep(delay)
143 |   d$set("c", 1); Sys.sleep(delay)
144 |   d$prune()
145 |   expect_identical(sort(d$keys()), c("b", "c"))
146 |   d$get("b");    Sys.sleep(delay)
147 |   d$set("d", 1); Sys.sleep(delay)
148 |   d$prune()
149 |   expect_identical(sort(d$keys()), c("b", "d"))
150 |   d$get("b");    Sys.sleep(delay)
151 |   d$set("e", 2); Sys.sleep(delay)
152 |   d$get("b");    Sys.sleep(delay)
153 |   d$set("f", 3); Sys.sleep(delay)
154 |   d$prune()
155 |   expect_identical(sort(d$keys()), c("b", "f"))
156 | })
157 | 
158 | test_that('cache_disk: pruning with evict="fifo"', {
159 |   skip_on_cran()
160 |   delay <- 0.01
161 | 
162 |   d <- cache_disk_deterministic(max_n = 2, evict = "fifo")
163 |   d$set("a", 1); Sys.sleep(delay)
164 |   d$set("b", 1); Sys.sleep(delay)
165 |   d$set("c", 1); Sys.sleep(delay)
166 |   d$prune()
167 |   expect_identical(sort(d$keys()), c("b", "c"))
168 |   d$get("b")
169 |   d$set("d", 1); Sys.sleep(delay)
170 |   d$prune()
171 |   expect_identical(sort(d$keys()), c("c", "d"))
172 |   d$get("b")
173 |   d$set("e", 2); Sys.sleep(delay)
174 |   d$get("b")
175 |   d$set("f", 3); Sys.sleep(delay)
176 |   d$prune()
177 |   expect_identical(sort(d$keys()), c("e", "f"))
178 | })
179 | 
180 | 
181 | test_that("cache_disk: pruning throttling", {
182 |   skip_on_cran()
183 |   delay <- 0.01
184 | 
185 |   # Pruning won't happen when the number of items is less than prune_rate AND
186 |   # the set() calls happen within 5 seconds.
187 |   d <- cache_disk_deterministic(max_n = 2, prune_rate = 20)
188 |   d$set("a", 1); Sys.sleep(delay)
189 |   d$set("b", 1); Sys.sleep(delay)
190 |   d$set("c", 1); Sys.sleep(delay)
191 |   d$set("d", 1); Sys.sleep(delay)
192 |   expect_identical(sort(d$keys()), c("a", "b", "c", "d"))
193 | 
194 |   # Pruning will happen with a lower prune_rate value.
195 |   d <- cache_disk_deterministic(max_n = 2, prune_rate = 3)
196 |   d$set("a", 1); Sys.sleep(delay)
197 |   d$set("b", 1); Sys.sleep(delay)
198 |   d$set("c", 1); Sys.sleep(delay)
199 |   expect_identical(sort(d$keys()), c("b", "c"))
200 |   d$set("d", 1); Sys.sleep(delay)
201 |   expect_identical(sort(d$keys()), c("b", "c", "d"))
202 |   d$set("e", 1); Sys.sleep(delay)
203 |   expect_identical(sort(d$keys()), c("b", "c", "d", "e"))
204 |   d$set("f", 1); Sys.sleep(delay)
205 |   expect_identical(sort(d$keys()), c("e", "f"))
206 | 
207 |   Sys.sleep(5)
208 |   d$set("f", 1); Sys.sleep(delay)
209 |   expect_identical(sort(d$keys()), c("e", "f"))
210 | })
211 | 
212 | test_that("destroy_on_finalize works", {
213 |   d <- cache_disk(destroy_on_finalize = TRUE)
214 |   cache_dir <- d$info()$dir
215 | 
216 |   expect_true(dir.exists(cache_dir))
217 |   rm(d)
218 |   gc()
219 |   expect_false(dir.exists(cache_dir))
220 | })
221 | 
222 | 
223 | test_that("Warnings for caching reference objects", {
224 |   d <- cache_disk(warn_ref_objects = TRUE)
225 |   expect_warning(d$set("a", new.env()))
226 |   expect_warning(d$set("a", function() NULL))
227 |   expect_warning(d$set("a", fastmap()))  # fastmap objects contain an external pointer
228 | 
229 |   # Default is to not warn on ref objects
230 |   d <- cache_disk()
231 |   expect_silent(d$set("a", new.env()))
232 |   expect_silent(d$set("a", function() NULL))
233 |   expect_silent(d$set("a", fastmap()))
234 | })
235 | 
236 | test_that("Cache disk can use different formts", {
237 |   my_write <- function(...) write.csv(..., row.names = FALSE)
238 | 
239 |   d <- cache_disk(read_fn = read.csv, write_fn = my_write, extension = ".csv")
240 | 
241 |   mt <- mtcars
242 |   rownames(mt) <- NULL
243 |   d$set("mt", mt)
244 |   expect_equal(d$get("mt"), mt)
245 | })
246 | 


--------------------------------------------------------------------------------
/tests/testthat/test-cache-mem.R:
--------------------------------------------------------------------------------
  1 | time_factor <- 1
  2 | # Do things slower on GHA because of slow machines
  3 | if (is_on_github_actions()) time_factor <- 4
  4 | 
  5 | 
  6 | test_that("cache_mem: handling missing values", {
  7 |   d <- cache_mem()
  8 |   expect_true(is.key_missing(d$get("abcd")))
  9 |   d$set("a", 100)
 10 |   expect_identical(d$get("a"), 100)
 11 |   expect_identical(d$get("y", missing = NULL), NULL)
 12 |   expect_error(
 13 |     d$get("y", missing = stop("Missing key")),
 14 |     "^Missing key$",
 15 |   )
 16 | 
 17 |   d <- cache_mem(missing = NULL)
 18 |   expect_true(is.null(d$get("abcd")))
 19 |   d$set("a", 100)
 20 |   expect_identical(d$get("a"), 100)
 21 |   expect_identical(d$get("y", missing = -1), -1)
 22 |   expect_error(
 23 |     d$get("y", missing = stop("Missing key")),
 24 |     "^Missing key$",
 25 |   )
 26 | 
 27 |   d <- cache_mem(missing = stop("Missing key"))
 28 |   expect_error(d$get("abcd"), "^Missing key$")
 29 |   d$set("x", NULL)
 30 |   d$set("a", 100)
 31 |   expect_identical(d$get("a"), 100)
 32 |   expect_error(d$get("y"), "^Missing key$")
 33 |   expect_identical(d$get("y", missing = NULL), NULL)
 34 |   expect_true(is.key_missing(d$get("y", missing = key_missing())))
 35 |   expect_error(
 36 |     d$get("y", missing = stop("Missing key 2")),
 37 |     "^Missing key 2$",
 38 |   )
 39 | 
 40 |   # Pass in a quosure
 41 |   expr <- rlang::quo(stop("Missing key"))
 42 |   d <- cache_mem(missing = !!expr)
 43 |   expect_error(d$get("y"), "^Missing key$")
 44 |   expect_error(d$get("y"), "^Missing key$") # Make sure a second time also throws
 45 | })
 46 | 
 47 | test_that("cache_mem: reset", {
 48 |   mc <- cache_mem()
 49 |   mc$set("a", "A")
 50 |   mc$set("b", "B")
 51 |   mc$reset()
 52 |   expect_identical(mc$keys(), character())
 53 |   expect_identical(mc$size(), 0L)
 54 |   mc$set("c", "C")
 55 |   expect_identical(mc$keys(), "c")
 56 |   expect_identical(mc$size(), 1L)
 57 |   expect_false(mc$exists("a"))
 58 |   expect_true(mc$exists("c"))
 59 | })
 60 | 
 61 | test_that("cache_mem: pruning respects max_n", {
 62 |   delay <- 0.001 * time_factor
 63 |   d <- cache_mem(max_n = 3)
 64 |   # NOTE: The short delays after each item are meant to tests more reliable on
 65 |   # CI systems.
 66 |   d$set("a", rnorm(100)); Sys.sleep(delay)
 67 |   d$set("b", rnorm(100)); Sys.sleep(delay)
 68 |   d$set("c", rnorm(100)); Sys.sleep(delay)
 69 |   d$set("d", rnorm(100)); Sys.sleep(delay)
 70 |   d$set("e", rnorm(100)); Sys.sleep(delay)
 71 |   expect_identical(sort(d$keys()), c("c", "d", "e"))
 72 | })
 73 | 
 74 | test_that("cache_mem: pruning respects max_size", {
 75 |   delay <- 0.001 * time_factor
 76 |   d <- cache_mem(max_size = object.size(123) * 3)
 77 |   d$set("a", rnorm(100)); Sys.sleep(delay)
 78 |   d$set("b", rnorm(100)); Sys.sleep(delay)
 79 |   d$set("c", 1);          Sys.sleep(delay)
 80 |   expect_identical(sort(d$keys()), c("c"))
 81 |   d$set("d", rnorm(100)); Sys.sleep(delay)
 82 |   # Objects are pruned with oldest first, so even though "c" would fit in the
 83 |   # cache, it is removed after adding "d" (and "d" is removed as well because it
 84 |   # doesn't fit).
 85 |   expect_length(d$keys(), 0)
 86 |   d$set("e", 2);          Sys.sleep(delay)
 87 |   d$set("f", 3);          Sys.sleep(delay)
 88 |   expect_identical(sort(d$keys()), c("e", "f"))
 89 | })
 90 | 
 91 | test_that("cache_mem: max_size=Inf", {
 92 |   mc <- cachem::cache_mem(max_size = Inf)
 93 |   mc$set("a", 123)
 94 |   expect_identical(mc$get("a"), 123)
 95 |   mc$prune()
 96 |   expect_identical(mc$get("a"), 123)
 97 | })
 98 | 
 99 | test_that("cache_mem: pruning respects both max_n and max_size", {
100 |   delay <- 0.001 * time_factor
101 |   d <- cache_mem(max_n = 3, max_size = object.size(123) * 3)
102 |   # Set some values. Use rnorm so that object size is large; a simple vector
103 |   # like 1:100 will be stored very efficiently by R's ALTREP, and won't exceed
104 |   # the max_size. We want each of these objects to exceed max_size so that
105 |   # they'll be pruned.
106 |   d$set("a", rnorm(100)); Sys.sleep(delay)
107 |   d$set("b", rnorm(100)); Sys.sleep(delay)
108 |   d$set("c", rnorm(100)); Sys.sleep(delay)
109 |   d$set("d", rnorm(100)); Sys.sleep(delay)
110 |   d$set("e", rnorm(100)); Sys.sleep(delay)
111 |   d$set("f", 1);          Sys.sleep(delay)
112 |   d$set("g", 1);          Sys.sleep(delay)
113 |   d$set("h", 1);          Sys.sleep(delay)
114 |   expect_identical(sort(d$keys()), c("f", "g", "h"))
115 | 
116 |   # This will cause f to be pruned (due to max_n) and g to be pruned (due to
117 |   # max_size).
118 |   d$set("i", c(2, 3));    Sys.sleep(0.001)
119 |   expect_identical(sort(d$keys()), c("h", "i"))
120 | })
121 | 
122 | test_that('cache_mem: pruning with evict="lru"', {
123 |   delay <- 0.001 * time_factor
124 |   d <- cache_mem(max_n = 2)
125 |   d$set("a", 1); Sys.sleep(delay)
126 |   d$set("b", 1); Sys.sleep(delay)
127 |   d$set("c", 1); Sys.sleep(delay)
128 |   expect_identical(sort(d$keys()), c("b", "c"))
129 |   d$get("b")
130 |   d$set("d", 1); Sys.sleep(delay)
131 |   expect_identical(sort(d$keys()), c("b", "d"))
132 |   d$get("b")
133 |   d$set("e", 2); Sys.sleep(delay)
134 |   d$get("b")
135 |   d$set("f", 3); Sys.sleep(delay)
136 |   expect_identical(sort(d$keys()), c("b", "f"))
137 | 
138 |   d <- cache_mem(max_n = 2, evict = "lru")
139 |   d$set("a", 1); Sys.sleep(delay)
140 |   d$set("b", 1); Sys.sleep(delay)
141 |   d$set("c", 1); Sys.sleep(delay)
142 |   d$set("b", 2); Sys.sleep(delay)
143 |   d$set("d", 2); Sys.sleep(delay)
144 |   expect_identical(sort(d$keys()), c("b", "d"))
145 | })
146 | 
147 | test_that('cache_mem: pruning with evict="fifo"', {
148 |   delay <- 0.001 * time_factor
149 |   d <- cache_mem(max_n = 2, evict = "fifo")
150 |   d$set("a", 1); Sys.sleep(delay)
151 |   d$set("b", 1); Sys.sleep(delay)
152 |   d$set("c", 1); Sys.sleep(delay)
153 |   expect_identical(sort(d$keys()), c("b", "c"))
154 |   d$get("b")
155 |   d$set("d", 1); Sys.sleep(delay)
156 |   expect_identical(sort(d$keys()), c("c", "d"))
157 |   d$get("b")
158 |   d$set("e", 2); Sys.sleep(delay)
159 |   d$get("b")
160 |   d$set("f", 3); Sys.sleep(delay)
161 |   expect_identical(sort(d$keys()), c("e", "f"))
162 | 
163 |   d <- cache_mem(max_n = 2, evict = "fifo")
164 |   d$set("a", 1); Sys.sleep(delay)
165 |   d$set("b", 1); Sys.sleep(delay)
166 |   d$set("c", 1); Sys.sleep(delay)
167 |   d$set("b", 2); Sys.sleep(delay)
168 |   d$set("d", 2); Sys.sleep(delay)
169 |   expect_identical(sort(d$keys()), c("b", "d"))
170 | })
171 | 
172 | test_that("Pruning by max_age", {
173 |   skip_on_cran()
174 | 
175 |   # Should prune target item on get()
176 |   d <- cache_mem(max_age = 0.25*time_factor)
177 |   d$set("a", 1)
178 |   expect_identical(d$get("a"), 1)
179 |   Sys.sleep(0.3*time_factor)
180 |   expect_identical(d$get("a"), key_missing())
181 |   expect_identical(d$get("x"), key_missing())
182 | 
183 |   # Should prune all items on set()
184 |   d <- cache_mem(max_age = 0.25*time_factor)
185 |   d$set("a", 1)
186 |   expect_identical(d$get("a"), 1)
187 |   Sys.sleep(0.3*time_factor)
188 |   d$set("b", 1)
189 |   expect_identical(d$keys(), "b")
190 | 
191 |   # Should prune target item on exists()
192 |   d <- cache_mem(max_age = 0.25*time_factor)
193 |   d$set("a", 1)
194 |   expect_identical(d$get("a"), 1)
195 |   expect_true(d$exists("a"))
196 |   expect_false(d$exists("b"))
197 |   Sys.sleep(0.15*time_factor)
198 |   d$set("b", 1)
199 |   expect_true(d$exists("a"))
200 |   expect_true(d$exists("b"))
201 |   Sys.sleep(0.15*time_factor)
202 |   expect_false(d$exists("a"))
203 |   expect_true(d$exists("b"))
204 | 
205 |   # Should prune all items on keys()
206 |   d <- cache_mem(max_age = 0.25*time_factor)
207 |   d$set("a", 1)
208 |   expect_identical(d$keys(), "a")
209 |   Sys.sleep(0.15*time_factor)
210 |   d$set("b", 1)
211 |   Sys.sleep(0.15*time_factor)
212 |   expect_identical(d$keys(), "b")
213 | 
214 |   # Should prune all items on size()
215 |   d <- cache_mem(max_age = 0.25*time_factor)
216 |   d$set("a", 1)
217 |   expect_identical(d$size(), 1L)
218 |   Sys.sleep(0.15*time_factor)
219 |   d$set("b", 1)
220 |   expect_identical(d$size(), 2L)
221 |   Sys.sleep(0.15*time_factor)
222 |   expect_identical(d$size(), 1L)
223 | })
224 | 
225 | test_that("Removed objects can be GC'd", {
226 |   mc <- cache_mem()
227 |   e <- new.env()
228 |   finalized <- FALSE
229 |   reg.finalizer(e, function(x) finalized <<- TRUE)
230 |   mc$set("e", e)
231 |   rm(e)
232 |   mc$set("x", 1)
233 |   gc()
234 |   expect_false(finalized)
235 |   expect_true(is.environment(mc$get("e")))
236 | })
237 | 
238 | test_that("Pruned objects can be GC'd", {
239 |   delay <- 0.001 * time_factor
240 |   # Cache is large enough to hold one environment and one number
241 |   mc <- cache_mem(max_size = object.size(new.env()) + object.size(1234))
242 |   e <- new.env()
243 |   finalized <- FALSE
244 |   reg.finalizer(e, function(x) finalized <<- TRUE)
245 |   mc$set("e", e)
246 |   rm(e)
247 |   mc$set("x", 1)
248 |   gc()
249 |   expect_false(finalized)
250 |   expect_true(is.environment(mc$get("e")))
251 | 
252 |   # Get x so that the atime is updated
253 |   Sys.sleep(delay)
254 |   mc$get("x")
255 |   Sys.sleep(delay)
256 | 
257 |   # e should be pruned when we add another item
258 |   mc$set("y", 2)
259 |   gc()
260 |   expect_true(finalized)
261 |   expect_true(is.key_missing(mc$get("e")))
262 | })
263 | 
264 | 
265 | # For https://github.com/r-lib/cachem/issues/47, https://github.com/r-lib/cachem/pull/48/
266 | test_that("Cache doesn't shrink smaller than INITIAL_SIZE", {
267 |   # This test also makes sure that the cache doesn't keep adding elements to the
268 |   # storage vectors when there are zero items, then an item is added and
269 |   # removed, repeatedly.
270 |   m <- cache_mem()
271 |   e <- environment(m$get)
272 |   for (i in seq_len(e$INITIAL_SIZE)) {
273 |     m$set(as.character(i), i)
274 |     m$remove(as.character(i))
275 |   }
276 |   expect_equal(e$total_n_, 0)
277 |   expect_equal(e$last_idx_, e$INITIAL_SIZE)
278 |   expect_length(e$key_, e$INITIAL_SIZE)
279 |   expect_length(e$value_, e$INITIAL_SIZE)
280 | 
281 |   # Adding one more item should trigger a compact_()
282 |   m$set("a", 1)
283 |   m$remove("a")
284 | 
285 |   expect_equal(e$total_n_, 0)
286 |   # last_idx_ should be reset after we pass the INITIAL_SIZE, even if there are
287 |   # no items in the cache. Prior to the fix in #48, it could keep growing.
288 |   expect_equal(e$last_idx_, 0)
289 |   expect_length(e$key_, e$INITIAL_SIZE)
290 |   expect_length(e$value_, e$INITIAL_SIZE)
291 | })
292 | 


--------------------------------------------------------------------------------
/tests/testthat/test-utils.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_that("validate_key", {
 3 |   expect_true(validate_key("e"))
 4 |   expect_true(validate_key("abc"))
 5 |   expect_true(validate_key("abcd123-_"))
 6 |   expect_true(validate_key("-"))
 7 |   expect_true(validate_key("_"))
 8 | 
 9 |   expect_error(validate_key("a.b"))
10 |   expect_error(validate_key("a,b"))
11 |   expect_error(validate_key("é"))
12 |   expect_error(validate_key("ABC"))
13 |   expect_error(validate_key("_A"))
14 |   expect_error(validate_key("!"))
15 |   expect_error(validate_key("a b"))
16 |   expect_error(validate_key("ab\n"))
17 | })
18 | 


--------------------------------------------------------------------------------