├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── check-full.yaml │ ├── lint.yaml │ ├── pkgdown.yaml │ └── test-coverage.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS ├── R ├── cache.R ├── getPointer.R ├── lru.R ├── map.R └── memo-description.r ├── README.md ├── man ├── cache_stats.Rd ├── hashmap.Rd ├── lru_cache.Rd ├── memo-package.Rd ├── memo.Rd └── strategies.Rd ├── src ├── getPointer.c ├── init.c ├── vadr.c └── vadr.h ├── tests ├── testthat.R └── testthat │ ├── test-cache.R │ ├── test-hashmap.R │ └── test-lru.R └── vignettes └── README.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.travis\.yml 4 | ^\.github$ 5 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/check-full.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | # 4 | # NOTE: This workflow is overkill for most R packages and 5 | # check-standard.yaml is likely a better choice. 6 | # usethis::use_github_action("check-standard") will install it. 7 | on: 8 | push: 9 | branches: [main, master] 10 | pull_request: 11 | branches: [main, master] 12 | 13 | name: R-CMD-check 14 | 15 | jobs: 16 | R-CMD-check: 17 | runs-on: ${{ matrix.config.os }} 18 | 19 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | config: 25 | - {os: macos-latest, r: 'release'} 26 | 27 | - {os: windows-latest, r: 'release'} 28 | # Use 3.6 to trigger usage of RTools35 29 | - {os: windows-latest, r: '3.6'} 30 | # use 4.1 to check with rtools40's older compiler 31 | - {os: windows-latest, r: '4.1'} 32 | 33 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 34 | - {os: ubuntu-latest, r: 'release'} 35 | - {os: ubuntu-latest, r: 'oldrel-1'} 36 | - {os: ubuntu-latest, r: 'oldrel-2'} 37 | - {os: ubuntu-latest, r: 'oldrel-3'} 38 | - {os: ubuntu-latest, r: 'oldrel-4'} 39 | 40 | env: 41 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 42 | R_KEEP_PKG_SOURCE: yes 43 | 44 | steps: 45 | - uses: actions/checkout@v3 46 | 47 | - uses: r-lib/actions/setup-pandoc@v2 48 | 49 | - uses: r-lib/actions/setup-r@v2 50 | with: 51 | r-version: ${{ matrix.config.r }} 52 | http-user-agent: ${{ matrix.config.http-user-agent }} 53 | use-public-rspm: true 54 | 55 | - uses: r-lib/actions/setup-r-dependencies@v2 56 | with: 57 | extra-packages: any::rcmdcheck 58 | needs: check 59 | 60 | - uses: r-lib/actions/check-r-package@v2 61 | with: 62 | upload-snapshots: true 63 | -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: lint 10 | 11 | jobs: 12 | lint: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | steps: 17 | - uses: actions/checkout@v3 18 | 19 | - uses: r-lib/actions/setup-r@v2 20 | with: 21 | use-public-rspm: true 22 | 23 | - uses: r-lib/actions/setup-r-dependencies@v2 24 | with: 25 | extra-packages: any::lintr, local::. 26 | needs: lint 27 | 28 | - name: Lint 29 | run: lintr::lint_package() 30 | shell: Rscript {0} 31 | env: 32 | LINTR_ERROR_ON_LINT: true 33 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | jobs: 15 | pkgdown: 16 | runs-on: ubuntu-latest 17 | # Only restrict concurrency for non-PR jobs 18 | concurrency: 19 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | steps: 23 | - uses: actions/checkout@v3 24 | 25 | - uses: r-lib/actions/setup-pandoc@v2 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | with: 29 | use-public-rspm: true 30 | 31 | - uses: r-lib/actions/setup-r-dependencies@v2 32 | with: 33 | extra-packages: any::pkgdown, local::. 34 | needs: website 35 | 36 | - name: Build site 37 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 38 | shell: Rscript {0} 39 | 40 | - name: Deploy to GitHub pages 🚀 41 | if: github.event_name != 'pull_request' 42 | uses: JamesIves/github-pages-deploy-action@v4.4.1 43 | with: 44 | clean: false 45 | branch: gh-pages 46 | folder: docs 47 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage 10 | 11 | jobs: 12 | test-coverage: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | 20 | - uses: r-lib/actions/setup-r@v2 21 | with: 22 | use-public-rspm: true 23 | 24 | - uses: r-lib/actions/setup-r-dependencies@v2 25 | with: 26 | extra-packages: any::covr 27 | needs: coverage 28 | 29 | - name: Test coverage 30 | run: | 31 | covr::codecov( 32 | quiet = FALSE, 33 | clean = FALSE, 34 | install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") 35 | ) 36 | shell: Rscript {0} 37 | 38 | - name: Show testthat output 39 | if: always() 40 | run: | 41 | ## -------------------------------------------------------------------- 42 | find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true 43 | shell: bash 44 | 45 | - name: Upload test results 46 | if: failure() 47 | uses: actions/upload-artifact@v3 48 | with: 49 | name: coverage-test-failures 50 | path: ${{ runner.temp }}/package 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.fig 2 | *.rds 3 | *.asc 4 | *.mat 5 | *.dep 6 | *.so 7 | *.pdf 8 | *.eps 9 | *.dvi 10 | *.Rdata 11 | *.rdata 12 | *.RData 13 | *.DONE 14 | *.INIT 15 | *.files 16 | *.aux 17 | *.bbl 18 | *.log 19 | *.map 20 | *.pygen 21 | \#* 22 | .\#* 23 | monk.makefile 24 | *~ 25 | *.pyc 26 | *.o 27 | *.sqlite-journal 28 | last.dump.rda 29 | .Rhistory 30 | scripts.txt 31 | tikzdict* 32 | dependencies 33 | .Rdata 34 | tickets 35 | Rlibs 36 | cache 37 | figure 38 | *.html 39 | *.md 40 | eachplots 41 | descriptions 42 | collections 43 | dbtickets 44 | auto 45 | *.tar.gz 46 | *.products 47 | *.out 48 | *.rel 49 | *.sqlite 50 | *.sources 51 | *.Rcheck 52 | *.gen 53 | unexcluded.txt 54 | filelist.txt 55 | poolplots 56 | Rdata 57 | unpacked 58 | common 59 | common_adj 60 | stripped 61 | datafiles 62 | eyemovements 63 | session_figures 64 | pools 65 | *.prv 66 | *.blg 67 | DONE 68 | .Rproj.user 69 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: memo 2 | Type: Package 3 | Title: In-Memory Caching of Repeated Computations (Memoization) 4 | Version: 1.1.1 5 | Date: 2023-12-11 6 | Author: Peter Meilstrup 7 | Maintainer: Peter Meilstrup 8 | Description: A simple in-memory, LRU cache that can be wrapped 9 | around any function to memoize it. The cache is keyed on a hash of the input data (using 'digest') or on pointer equivalence. 10 | License: MIT + file LICENSE 11 | Encoding: UTF-8 12 | Imports: 13 | digest 14 | Suggests: 15 | testthat (>= 0.2), 16 | knitr, 17 | rmarkdown 18 | Collate: 19 | 'lru.R' 20 | 'cache.R' 21 | 'getPointer.R' 22 | 'map.R' 23 | 'memo-description.r' 24 | VignetteBuilder: knitr 25 | RoxygenNote: 7.2.3 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2016 2 | COPYRIGHT HOLDER: Peter Meilstrup -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method("[",hashmap) 4 | S3method("[<-",hashmap) 5 | S3method("[[",hashmap) 6 | S3method("[[<-",hashmap) 7 | S3method(dropKey,hashmap) 8 | S3method(hasKey,hashmap) 9 | S3method(keys,hashmap) 10 | S3method(to_pairs,hashmap) 11 | S3method(values,hashmap) 12 | export(cache_stats) 13 | export(digest_key) 14 | export(dropKey) 15 | export(from_pairs) 16 | export(hasKey) 17 | export(hashmap) 18 | export(keys) 19 | export(lru_cache) 20 | export(memo) 21 | export(permanent_cache) 22 | export(pointer_key) 23 | export(to_pairs) 24 | export(values) 25 | import(digest) 26 | useDynLib(memo,"_string_reps") 27 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | memo v1.1.1 (Release date: 2023-12-11) 2 | ================ 3 | 4 | * Correct issues raised by CRAN check: 5 | * Always cast argument of 'printf("%p", )' to '(void *)' 6 | 7 | memo v1.1 (Release date: 2023-02-10) 8 | ================ 9 | 10 | Changes: 11 | 12 | * Introduced "hashmap" class 13 | * Correct issues raised by CRAN check: 14 | * Changed uses of 'sprintf' to 'snprintf' 15 | 16 | memo v1.0.2 (Release date: 2022-04-05) 17 | ================ 18 | 19 | Changes: 20 | * Updates to documentation 21 | 22 | memo v1.0.1 (Release date: 2018-01-01) 23 | ================ 24 | 25 | Changes: 26 | 27 | * Addressed potential protection issues raised by rchk 28 | * Update to use MARK_NOT_MUTABLE instead of NAMED. 29 | 30 | memo v1.0.0 (Release date: 2016-08-22) 31 | ================ 32 | 33 | Initial release. 34 | -------------------------------------------------------------------------------- /R/cache.R: -------------------------------------------------------------------------------- 1 | #' Memoize a function. 2 | #' @param fn A function to wrap. It should be a pure function (i.e. it should 3 | #' not cause side effects, and should not depend on any variables that may 4 | #' change.) It should not be a nonstandard-evaluating function. All arguments 5 | #' will be forced by the wrapper. 6 | #' @param cache A cache to use. Defaults to a new instance of 7 | #' \code{\link{lru_cache}}. Caches may be shared between memoized functions. 8 | #' @param key A hashing strategy. The default "\code{\link{hybrid_key}}" 9 | #' first checks for pointer equivalence and then falls back to using a 10 | #' hash of the arguments. `pointer_key` uses just pointer equivalence, 11 | #' and `digest_key` always performs a hash. 12 | #' @param ... Further arguments passed on to key. 13 | #' @export 14 | memo <- function(fn, cache=lru_cache(5000), key=hybrid_key, ...) { 15 | force(fn) 16 | key <- match.fun(key) 17 | key(fn, cache, ...) 18 | } 19 | 20 | #' Strategies for caching items. 21 | #' 22 | #' The function \code{\link{memo}} accepts an argument `key` which 23 | #' specifies the keying strategy. 24 | #' 25 | #' @param fn A function whose results should be cached. 26 | #' @param cache A cache object. 27 | #' @return A memoized function. 28 | #' @name strategies 29 | NULL 30 | 31 | #' \code{digest_key} computes a key by hashing the contents of the object using 32 | #' the digest package. No attempt is made to avoid MD5 hash collisions. 33 | #' @rdname strategies 34 | #' @import digest 35 | #' @export 36 | digest_key <- function(fn, cache, digest=digest::digest) { 37 | delayedAssign("fn_digest", digest(fn)) 38 | function(...) { 39 | key <- paste0(c(fn_digest, digest(list(...))), collapse=";") 40 | cache(key, fn(...)) 41 | } 42 | } 43 | 44 | #' The \code{pointer_key} strategy uses object identity, 45 | #' that is, pointer equivalence. This can be faster because hte 46 | #' entire object need not be hashed. However, this strategy is only 47 | #' useful when the function is called repeatedly on the same 48 | #' object rather than merely identical objects. Also be aware that 49 | #' the cache will hold on to the values of the arguments, to prevent 50 | #' them being garbage collected. 51 | #' @rdname strategies 52 | #' @export 53 | pointer_key <- function(fn, cache) { 54 | delayedAssign("fn_digest", string_reps(list(fn))) 55 | function(...) { 56 | l <- list(...) 57 | key <- paste0(c(fn_digest, string_reps(list(...))), collapse=";") 58 | # hold onto the argument list while the cache remembers the pointer value 59 | cache(key, list(fn(...), l))[[1]] 60 | } 61 | } 62 | 63 | #' The \code{hybrid_key} strategy first tries to key on object 64 | #' identity and then falls back on computing the md5 digest. 65 | #' This may use two cache slots per result. 66 | #' @param digest A digest function to use. 67 | #' @rdname strategies 68 | hybrid_key <- function(fn, cache, digest=function(x) digest::digest(x, "md5")) { 69 | delayedAssign("fn_digest", digest(fn)) 70 | function(...) { 71 | l = list(...) 72 | predigest <- paste0(c(fn_digest, string_reps(l)), collapse=";") 73 | # also hold on to the argument list 74 | digest <- cache(predigest, 75 | list(paste0(fn_digest, digest(l), collapse=";"), 76 | l) 77 | )[[1]] 78 | cache(digest, fn(...)) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /R/getPointer.R: -------------------------------------------------------------------------------- 1 | # Return some canonical identifying strings for each of a list of objects. 2 | # For scalars, the values are directly represented. 3 | # For scalar strings, we use the pointer to the interned CHARSXP. 4 | # For other objects, we use the pointer. 5 | # If the list has names, we represent store STRSXP pointers to the names 6 | # Because it is possible for objects-pointed-to to be GCed and replaced with 7 | # different objects, the calling code is responsible for holding on to 8 | # references to the objects (see test-cache.R). 9 | #' @useDynLib memo _string_reps 10 | string_reps <- function(list) { 11 | .Call(`_string_reps`, list) 12 | } 13 | -------------------------------------------------------------------------------- /R/lru.R: -------------------------------------------------------------------------------- 1 | #' `basic_cache` makes a cache that does not expire old entries. 2 | #' It should be used in situations where you know the number of 3 | #' things to remember is bounded. 4 | #' @rdname lru_cache 5 | #' @export 6 | permanent_cache <- function() { 7 | cache <- new.env(parent=emptyenv()) 8 | 9 | hits <- 0L 10 | misses <- 0L 11 | expired <- 0L 12 | used <- 0L 13 | size <- Inf 14 | 15 | function(key, value, action="cache", ifnotfound=NULL) { 16 | switch(action, 17 | exists=exists(key, cache), 18 | cache=if(exists(key, cache)) { 19 | hits <<- hits+1L 20 | cache[[key]] 21 | } else { 22 | misses <<- misses+1 23 | used <<- used+1L 24 | cache[[key]] <<- value 25 | }, 26 | get=if(exists(key, cache)) { 27 | hits <<- hits+1L 28 | } else { 29 | misses <- misses+1L 30 | ifnotfound 31 | }, 32 | set={ 33 | if(!exists(key, cache)) {used <- used+1L} 34 | cache[[key]] <<- value 35 | }, 36 | rm=if(exists(key, cache)) { 37 | used <<- used-1L 38 | expired <<- expired+1L 39 | rm(list=key, envir=cache) 40 | }) 41 | } 42 | } 43 | 44 | #' Construct a cache with least-recently-used policy. 45 | #' @param size The maximum number of results to keep. 46 | #' @return A function f(key, value) which takes a string in the first 47 | #' parameter and a lazily evaluated value in the second. `f` 48 | #' will use the string key to retrieve a value from the cache, or 49 | #' return the matching item from the cache, or force the second 50 | #' argument and return that, remembering the result on future calls. 51 | #' 52 | #' When the number of entries in the cache exceeds \code{size}, the least 53 | #' recently accessed entries are removed. 54 | #' @export 55 | lru_cache <- function(size = 10000) { 56 | lru <- new.env(hash=TRUE, parent=emptyenv(), size=size) 57 | pred <- new.env(hash=TRUE, parent=emptyenv(), size=size) 58 | succ <- new.env(hash=TRUE, parent=emptyenv(), size=size) 59 | 60 | hits <- 0 61 | misses <- 0 62 | expired <- 0 63 | used <- 0 64 | 65 | pred$TAIL <- "HEAD" 66 | succ$HEAD <- "TAIL" 67 | 68 | function(key, value) { 69 | #value lazily forced if not found 70 | if (exists(key, lru)) { 71 | hits <<- hits+1 72 | #move accessed value to front 73 | new_succ <- succ[[key]] 74 | new_pred <- pred[[key]] 75 | succ[[new_pred]] <<- new_succ 76 | pred[[new_succ]] <<- new_pred 77 | pred[[succ$HEAD]] <<- key 78 | pred[[key]] <<- "HEAD" 79 | succ[[key]] <<- succ$HEAD 80 | succ$HEAD <<- key 81 | lru[[key]] 82 | } else { 83 | misses <<- misses+1 84 | lru[[key]] <<- value 85 | #drop if entries exceeded 86 | while (used >= size) { 87 | last <- pred$TAIL 88 | succ[[pred[[last]]]] <<- "TAIL" 89 | pred$TAIL <<- pred[[last]] 90 | rm(list=last, envir=lru) 91 | rm(list=last, envir=pred) 92 | rm(list=last, envir=succ) 93 | expired <<- expired + 1 94 | used <<- used - 1 95 | } 96 | succ[[key]] <<- succ$HEAD 97 | pred[[succ$HEAD]] <<- key 98 | succ$HEAD <<- key 99 | pred[[key]] <<- "HEAD" 100 | used <<- used + 1 101 | value 102 | } 103 | } 104 | } 105 | 106 | #' Report cache statistics. 107 | #' 108 | #' @param fn A memoized function that was created by \code{\link{memo}}. 109 | #' @return A list with labels "size", "used", "hits", "misses", "expired" 110 | #' counting the number of slots in the cache, the number of slots currently 111 | #' used, the number of times a previous result was recalled, a new result was 112 | #' recorded, and a result was dropped. 113 | #' @export 114 | cache_stats <- function(fn) { 115 | hitdata <- mget(c("size", "used", "hits", "misses", "expired"), 116 | environment(environment(fn)$cache)) 117 | as.list(hitdata) 118 | } 119 | -------------------------------------------------------------------------------- /R/map.R: -------------------------------------------------------------------------------- 1 | cached_digest <- function(...) digest(list(...), algo="md5") 2 | 3 | .onLoad <- function(libname, pkgname) { 4 | cached_digest <<- memo(cached_digest, key=pointer_key) 5 | } 6 | 7 | #' A reference-valued, key-value store. 8 | #' 9 | #' [hashmap()] constructs a hashmap, which is an object that behaves 10 | #' like an [environment] but can key on arbitrary objects rather than 11 | #' just characters. 12 | #' 13 | #' You can use multiple indices in a hashmap; the effect is similar to 14 | #' indexing on a list containing all keys. 15 | #' 16 | #' Type is significant; for instance, float `1` and integer `1L` are 17 | #' considered distinct indices. It is also permitted to index on NULL, 18 | #' NA, or the empty string. 19 | #' 20 | #' The `memo` package hashmap has a performance optimization over 21 | #' other implementations of this concept, in that the md5 digest is 22 | #' memoized on scalar and pointer values. That means that if you 23 | #' lookup using keys that are pointer-identical to previously seen 24 | #' keys, it will skip computing the digest a second time. Indexing 25 | #' using scalar values will also bypass the md5 hash. 26 | #' 27 | #' @return `hashmap()` returns a newly constructed hashmap. 28 | #' @author Peter Meilstrup 29 | #' @export 30 | hashmap <- function() { 31 | structure( 32 | list( 33 | keys=new.env(parent=emptyenv()), 34 | vals=new.env(parent=emptyenv()), 35 | digest=cached_digest), 36 | class="hashmap") 37 | } 38 | 39 | #' @exportS3Method "[" hashmap 40 | #' @rdname hashmap 41 | #' @param x a hashmap object. 42 | `[.hashmap` <- function(x, ...) { 43 | mapply(`[[.hashmap`, ..., MoreArgs=list(x=x), SIMPLIFY=FALSE) 44 | } 45 | 46 | #' The `[` and `[<-` methods work in terms of a list formed by 47 | #' iterating over the given indices in parallel; for instance 48 | #' `x[c(2, 8), c(3, 9)]` will be equivalent to `list(x[[2, 3]], 49 | #' x[[3, 9]])`. 50 | #' @param value A replacement value for `[[`; for '[', a 51 | #' sequence of replacement values. 52 | #' @rdname hashmap 53 | #' @exportS3Method "[<-" hashmap 54 | `[<-.hashmap` <- function(x, ..., value) { 55 | mapply(`[[<-.hashmap`, ..., value=value, MoreArgs=list(x=x), SIMPLIFY=FALSE) 56 | x 57 | } 58 | 59 | #' @exportS3Method "[[" hashmap 60 | #' @rdname hashmap 61 | `[[.hashmap` <- function(x, ...) { 62 | digestfn <- x$digest 63 | dig <- digestfn(...) # just writing x$digest(...) makes CRAN check complain??? 64 | if (exists(dig, envir=x$keys)) { 65 | stopifnot(identical(x$keys[[dig]], list(...))) 66 | x$vals[[dig]] 67 | } else NULL 68 | } 69 | 70 | #' @exportS3Method "[[<-" hashmap 71 | #' @rdname hashmap 72 | `[[<-.hashmap` <- function(x, ..., value) { 73 | digestfn <- x$digest 74 | dig <- digestfn(...) # just writing x$digest(...) makes CRAN check complain??? 75 | x$keys[[dig]] <- list(...) 76 | x$vals[[dig]] <- value 77 | x 78 | } 79 | 80 | #' @export 81 | #' @rdname hashmap 82 | keys <- function(x, ...) UseMethod("keys") 83 | 84 | #' @exportS3Method 85 | keys.hashmap <- function(x, ...) { 86 | lapply(sort(names(x$keys)), function(k) x$keys[[k]]) 87 | } 88 | 89 | #' @export 90 | #' @rdname hashmap 91 | values <- function(x, ...) UseMethod("values") 92 | 93 | #' @exportS3Method 94 | values.hashmap <- function(x, ...) { 95 | lapply(sort(names(x$keys)), function(k) x$vals[[k]]) 96 | } 97 | 98 | #' @export 99 | #' @return `pairs(x)` extracts from a hashmap a list of pairs, each 100 | #' pair being of the form `list(key=, val=)`. 101 | #' @rdname hashmap 102 | to_pairs <- function(x, ...) UseMethod("to_pairs") 103 | 104 | #' @exportS3Method 105 | to_pairs.hashmap <- function(x, ...) { 106 | lapply(sort(names(x$keys)), function(k) list(key=x$keys[[k]], value=x$vals[[k]])) 107 | } 108 | 109 | #' @export 110 | #' @param pairs A list of pairs, the first element is treated as key 111 | #' and the second as value. 112 | #' @rdname hashmap 113 | from_pairs <- function(pairs) { 114 | hm <- hashmap() 115 | lapply(pairs, function(x) { 116 | dig <- hm$digest(x[[1]]) 117 | hm$keys[[dig]] <- x[[1]] 118 | hm$vals[[dig]] <- x[[2]] 119 | }) 120 | hm 121 | } 122 | 123 | #' @export 124 | #' @rdname hashmap 125 | #' @param ... Any number of indices. 126 | #' @return `hasKey(x)` returns TRUE if there is a key with the same 127 | #' digest as `...` that compares [identical()] 128 | hasKey <- function(x, ...) UseMethod("hasKey") 129 | 130 | #' @exportS3Method 131 | hasKey.hashmap <- function(x, ...) { 132 | digest <- x$digest # CRAN complains about x$digest(...) 133 | exists(digest(...), envir=x$keys) 134 | } 135 | 136 | #' The base R behavior of deleting keys using `x[[key]] <- NULL` is 137 | #' explicitly _not_ supported. Instead, use `dropKey(x, ...)`. 138 | #' @rdname hashmap 139 | #' @export 140 | dropKey <- function(x, ...) UseMethod("dropKey") 141 | 142 | #' @exportS3Method 143 | dropKey.hashmap <- function(x, ...) { 144 | digest <- x$digest # why does CRAN complain about x$digest(...) 145 | dig <- digest(...) 146 | if (exists(dig, envir=x$keys)) { 147 | rm(list=dig, envir=x$keys) 148 | rm(list=dig, envir=x$vals) 149 | } 150 | invisible(hashmap) 151 | } 152 | -------------------------------------------------------------------------------- /R/memo-description.r: -------------------------------------------------------------------------------- 1 | #' In-memory caching of repeated computations, by pointer equivalence. 2 | #' 3 | #' The `memo` package implements a cache that can be used to avoid repeated 4 | #' computations of functions. The cache lookup is based on object 5 | #' identity (i.e. pointer equivalence) which is suited for functions 6 | #' like accessors or other functions that are called repeatedly on the 7 | #' same object. 8 | #' 9 | #' @aliases memo-package 10 | #' @author Peter Meilstrup 11 | "_PACKAGE" 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #memo 2 | ====== 3 | 4 | [![](https://www.r-pkg.org/badges/version/async?color=purple)](https://cran.r-project.org/package=memo) 5 | [![pkgdown](https://github.com/crowding/memo/workflows/pkgdown/badge.svg)](https://github.com/crowding/memo/actions) 6 | [![R-CMD-check](https://github.com/crowding/memo/workflows/R-CMD-check/badge.svg)](https://github.com/crowding/memo/actions) 7 | [![test-coverage](https://github.com/crowding/memo/workflows/test-coverage/badge.svg)](https://github.com/crowding/memo/actions) 8 | 9 | The `memo` package implements a simple in-memory cache for the results of a function. If you have an expensive function that is being called repeatedly with the same inputs, `memo` can help. 10 | 11 | ## Fibonnacci example 12 | 13 | 14 | ```r 15 | fib <- function(n) if (n <= 1) 1 else fib(n-1) + fib(n-2) 16 | sapply(0:9, fib) 17 | ``` 18 | 19 | ``` 20 | ## [1] 1 1 2 3 5 8 13 21 34 55 21 | ``` 22 | 23 | This recursive implementation corresponds closely to the way the sequence is defined in math texts, but it has a performance problem. The problem is that as you ask for values further down the sequence, the computation becomes inordinately slow due to recursion. To demonstrate the issue, we can try counting every time `fib` is 24 | called: 25 | 26 | 27 | ```r 28 | count <- 0 29 | fib <- function(n) { 30 | count <<- count+1 31 | if (n <= 1) 1 else fib(n-1) + fib(n-2) 32 | } 33 | 34 | counted_fib <- function(n) { 35 | count <<- 0 36 | c(n=n, result=fib(n), calls=count) 37 | } 38 | 39 | t(sapply(0:16, counted_fib)) 40 | ``` 41 | 42 | ``` 43 | ## n result calls 44 | ## [1,] 0 1 1 45 | ## [2,] 1 1 1 46 | ## [3,] 2 2 3 47 | ## [4,] 3 3 5 48 | ## [5,] 4 5 9 49 | ## [6,] 5 8 15 50 | ## [7,] 6 13 25 51 | ## [8,] 7 21 41 52 | ## [9,] 8 34 67 53 | ## [10,] 9 55 109 54 | ## [11,] 10 89 177 55 | ## [12,] 11 144 287 56 | ## [13,] 12 233 465 57 | ## [14,] 13 377 753 58 | ## [15,] 14 610 1219 59 | ## [16,] 15 987 1973 60 | ## [17,] 16 1597 3193 61 | ``` 62 | 63 | The number of calls increases unreasonably. This is because, for instance, `fib(6)` calls both `fib(5)` and `fib(4)`, but `fib(5)` also calls `fib(4)`. The second call to `fib(4)` is wasted work. And this pattern goes on -- the two calls to `fib(4)` lead to _four_ calls to `fib(2)`. Every time you increment `n` by one, the number of calls roughly doubles. (Clearly, there are more efficient algorithms for computing the Fibbonacci sequence, but this is a toy example, where `fib` stands in for some expensive function that is being called repeatedly.) 64 | 65 | One way to cut down on wasted effort would be to check whether `fib(n)` has already been computed for a given `n`. If it has, `fib` can just return that value instead of starting over. This is called "memoizing." The `memo` package can [automatically][] create a memoized version of a given function, just by wrapping the function definition in `memo()`: 66 | 67 | [automatically]: https://en.wikipedia.org/wiki/Memoization#Automatic_memoization 68 | 69 | 70 | ```r 71 | library(memo) 72 | 73 | count <- 0 74 | fib <- memo(function(n) { 75 | count <<- count+1 76 | if (n <= 1) 1 else fib(n-1) + fib(n-2) 77 | }) 78 | 79 | counted_fib(16) 80 | ``` 81 | 82 | ``` 83 | ## n result calls 84 | ## 16 1597 17 85 | ``` 86 | Now, computing `fib(16)` only takes 17 calls. And if we call again, it remembers the previous answer and doesn't make any new calls: 87 | 88 | ```r 89 | counted_fib(16) 90 | ``` 91 | 92 | ``` 93 | ## n result calls 94 | ## 16 1597 0 95 | ``` 96 | Each successive value then only takes two calls: 97 | 98 | ```r 99 | t(sapply(17:30, counted_fib)) 100 | ``` 101 | 102 | ``` 103 | ## n result calls 104 | ## [1,] 17 2584 1 105 | ## [2,] 18 4181 2 106 | ## [3,] 19 6765 2 107 | ## [4,] 20 10946 2 108 | ## [5,] 21 17711 2 109 | ## [6,] 22 28657 2 110 | ## [7,] 23 46368 2 111 | ## [8,] 24 75025 2 112 | ## [9,] 25 121393 2 113 | ## [10,] 26 196418 2 114 | ## [11,] 27 317811 2 115 | ## [12,] 28 514229 2 116 | ## [13,] 29 832040 2 117 | ## [14,] 30 1346269 2 118 | ``` 119 | 120 | The tradeoff for this speedup is the memory used to store previous results. By default `memo` will remember the 5000 most recently used results; to adjust that limit you can change the `cache` option: 121 | 122 | 123 | ```r 124 | fib <- memo(cache=lru_cache(5000), function () {...}) 125 | ``` 126 | 127 | The Fibonacci sequence being kind of a toy example, memoization has a variety of uses, such as: 128 | 129 | * Caching the results of expensive database queries, for instance in Shiny apps where many users may make identical queries. 130 | * Algorithms for path finding (dynamic programming) and parsing. 131 | * Simulations such as [Cellular automata](https://en.wikipedia.org/wiki/Hashlife). 132 | -------------------------------------------------------------------------------- /man/cache_stats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lru.R 3 | \name{cache_stats} 4 | \alias{cache_stats} 5 | \title{Report cache statistics.} 6 | \usage{ 7 | cache_stats(fn) 8 | } 9 | \arguments{ 10 | \item{fn}{A memoized function that was created by \code{\link{memo}}.} 11 | } 12 | \value{ 13 | A list with labels "size", "used", "hits", "misses", "expired" 14 | counting the number of slots in the cache, the number of slots currently 15 | used, the number of times a previous result was recalled, a new result was 16 | recorded, and a result was dropped. 17 | } 18 | \description{ 19 | Report cache statistics. 20 | } 21 | -------------------------------------------------------------------------------- /man/hashmap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/map.R 3 | \name{hashmap} 4 | \alias{hashmap} 5 | \alias{[.hashmap} 6 | \alias{[<-.hashmap} 7 | \alias{[[.hashmap} 8 | \alias{[[<-.hashmap} 9 | \alias{keys} 10 | \alias{values} 11 | \alias{to_pairs} 12 | \alias{from_pairs} 13 | \alias{hasKey} 14 | \alias{dropKey} 15 | \title{A reference-valued, key-value store.} 16 | \usage{ 17 | hashmap() 18 | 19 | \method{[}{hashmap}(x, ...) 20 | 21 | \method{[}{hashmap}(x, ...) <- value 22 | 23 | \method{[[}{hashmap}(x, ...) 24 | 25 | \method{[[}{hashmap}(x, ...) <- value 26 | 27 | keys(x, ...) 28 | 29 | values(x, ...) 30 | 31 | to_pairs(x, ...) 32 | 33 | from_pairs(pairs) 34 | 35 | hasKey(x, ...) 36 | 37 | dropKey(x, ...) 38 | } 39 | \arguments{ 40 | \item{x}{a hashmap object.} 41 | 42 | \item{...}{Any number of indices.} 43 | 44 | \item{value}{A replacement value for `[[`; for '[', a 45 | sequence of replacement values.} 46 | 47 | \item{pairs}{A list of pairs, the first element is treated as key 48 | and the second as value.} 49 | } 50 | \value{ 51 | `hashmap()` returns a newly constructed hashmap. 52 | 53 | `pairs(x)` extracts from a hashmap a list of pairs, each 54 | pair being of the form `list(key=, val=)`. 55 | 56 | `hasKey(x)` returns TRUE if there is a key with the same 57 | digest as `...` that compares [identical()] 58 | } 59 | \description{ 60 | [hashmap()] constructs a hashmap, which is an object that behaves 61 | like an [environment] but can key on arbitrary objects rather than 62 | just characters. 63 | } 64 | \details{ 65 | You can use multiple indices in a hashmap; the effect is similar to 66 | indexing on a list containing all keys. 67 | 68 | Type is significant; for instance, float `1` and integer `1L` are 69 | considered distinct indices. It is also permitted to index on NULL, 70 | NA, or the empty string. 71 | 72 | The `memo` package hashmap has a performance optimization over 73 | other implementations of this concept, in that the md5 digest is 74 | memoized on scalar and pointer values. That means that if you 75 | lookup using keys that are pointer-identical to previously seen 76 | keys, it will skip computing the digest a second time. Indexing 77 | using scalar values will also bypass the md5 hash. 78 | } 79 | \author{ 80 | Peter Meilstrup 81 | } 82 | -------------------------------------------------------------------------------- /man/lru_cache.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lru.R 3 | \name{permanent_cache} 4 | \alias{permanent_cache} 5 | \alias{lru_cache} 6 | \title{`basic_cache` makes a cache that does not expire old entries. 7 | It should be used in situations where you know the number of 8 | things to remember is bounded.} 9 | \usage{ 10 | permanent_cache() 11 | 12 | lru_cache(size = 10000) 13 | } 14 | \arguments{ 15 | \item{size}{The maximum number of results to keep.} 16 | } 17 | \value{ 18 | A function f(key, value) which takes a string in the first 19 | parameter and a lazily evaluated value in the second. `f` 20 | will use the string key to retrieve a value from the cache, or 21 | return the matching item from the cache, or force the second 22 | argument and return that, remembering the result on future calls. 23 | 24 | When the number of entries in the cache exceeds \code{size}, the least 25 | recently accessed entries are removed. 26 | } 27 | \description{ 28 | `basic_cache` makes a cache that does not expire old entries. 29 | It should be used in situations where you know the number of 30 | things to remember is bounded. 31 | 32 | Construct a cache with least-recently-used policy. 33 | } 34 | -------------------------------------------------------------------------------- /man/memo-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/memo-description.r 3 | \docType{package} 4 | \name{memo-package} 5 | \alias{memo-package} 6 | \alias{_PACKAGE} 7 | \title{In-memory caching of repeated computations, by pointer equivalence.} 8 | \description{ 9 | The `memo` package implements a cache that can be used to avoid repeated 10 | computations of functions. The cache lookup is based on object 11 | identity (i.e. pointer equivalence) which is suited for functions 12 | like accessors or other functions that are called repeatedly on the 13 | same object. 14 | } 15 | \author{ 16 | Peter Meilstrup 17 | } 18 | -------------------------------------------------------------------------------- /man/memo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cache.R 3 | \name{memo} 4 | \alias{memo} 5 | \title{Memoize a function.} 6 | \usage{ 7 | memo(fn, cache = lru_cache(5000), key = hybrid_key, ...) 8 | } 9 | \arguments{ 10 | \item{fn}{A function to wrap. It should be a pure function (i.e. it should 11 | not cause side effects, and should not depend on any variables that may 12 | change.) It should not be a nonstandard-evaluating function. All arguments 13 | will be forced by the wrapper.} 14 | 15 | \item{cache}{A cache to use. Defaults to a new instance of 16 | \code{\link{lru_cache}}. Caches may be shared between memoized functions.} 17 | 18 | \item{key}{A hashing strategy. The default "\code{\link{hybrid_key}}" 19 | first checks for pointer equivalence and then falls back to using a 20 | hash of the arguments. `pointer_key` uses just pointer equivalence, 21 | and `digest_key` always performs a hash.} 22 | 23 | \item{...}{Further arguments passed on to key.} 24 | } 25 | \description{ 26 | Memoize a function. 27 | } 28 | -------------------------------------------------------------------------------- /man/strategies.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cache.R 3 | \name{strategies} 4 | \alias{strategies} 5 | \alias{digest_key} 6 | \alias{pointer_key} 7 | \alias{hybrid_key} 8 | \title{Strategies for caching items.} 9 | \usage{ 10 | digest_key(fn, cache, digest = digest::digest) 11 | 12 | pointer_key(fn, cache) 13 | 14 | hybrid_key(fn, cache, digest = function(x) digest::digest(x, "md5")) 15 | } 16 | \arguments{ 17 | \item{fn}{A function whose results should be cached.} 18 | 19 | \item{cache}{A cache object.} 20 | 21 | \item{digest}{A digest function to use.} 22 | } 23 | \value{ 24 | A memoized function. 25 | } 26 | \description{ 27 | The function \code{\link{memo}} accepts an argument `key` which 28 | specifies the keying strategy. 29 | } 30 | -------------------------------------------------------------------------------- /src/getPointer.c: -------------------------------------------------------------------------------- 1 | #include "vadr.h" 2 | 3 | SEXP stringify_item(SEXP, char *, char *); 4 | int snprintdouble(char *, size_t, double); 5 | SEXP weakref_(SEXP, SEXP); 6 | 7 | #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L && !defined(__MINGW32__) 8 | 9 | int snprintdouble(char *buf, size_t n, double arg) { 10 | return snprintf(buf, n, "%la", arg); 11 | } 12 | 13 | #else 14 | 15 | /* hack for stdlibs that don't support "%a" printf conversion */ 16 | int snprintdouble(char *buf, size_t n, double arg) { 17 | int written = 0; 18 | union { 19 | double f; 20 | unsigned char ch[sizeof(double) / sizeof(char)]; 21 | } data; 22 | data.f = arg; 23 | 24 | for (int i = 0; i < sizeof(double) / sizeof(char); i++) { 25 | int chars = snprintf(buf, n, "%02x", data.ch[i]); 26 | written += chars; 27 | buf += chars; 28 | n -= chars; 29 | } 30 | return written; 31 | } 32 | 33 | #endif 34 | 35 | 36 | 37 | /* Return some canonical identifying strings for each of a list of objects. 38 | * For numeric scalars, the values are directly represented. 39 | * For scalar strings, we use the pointer to the interned CHARSXP. 40 | * For other objects, we use the pointer, after setting NAMED=2. 41 | * If the list has names, we represent the pointer values on the names. 42 | * Because it is possible for objects-pointed-to to be GCed and replaced with 43 | * different objects, the calling code is responsible for holding on to 44 | * references to the objects (see test-cache.R). 45 | */ 46 | SEXP _string_reps(SEXP list) { 47 | assert_type(list, VECSXP); 48 | 49 | int length = LENGTH(list); 50 | SEXP in_names = PROTECT(getAttrib(list, R_NamesSymbol)); 51 | SEXP out_reps = PROTECT(allocVector(STRSXP, length)); 52 | SEXP out_names; 53 | 54 | if (in_names != R_NilValue) { 55 | assert_type3(in_names, STRSXP, "names attribute should be a character vector"); 56 | if (LENGTH(in_names) < length) { 57 | in_names = R_NilValue; 58 | out_names = R_NilValue; 59 | } else { 60 | PROTECT(out_names = allocVector(STRSXP, length)); 61 | } 62 | } else { 63 | out_names = R_NilValue; 64 | } 65 | 66 | for (int i = 0; i < length; i++) { 67 | SEXP item = VECTOR_ELT(list, i); 68 | /* worst case, element has a tag and is a CLOSXP, so 4 pointers and change: 69 | "c0x0123456789abcdef=c_0x0123456789abcdef/0x0123456789abcdef/0x01234567abcdef/0x01234567abcdef" = 97 chars*/ 70 | char buf[128]; 71 | char *bufptr = buf; 72 | char *end = bufptr + sizeof(buf) - 1; 73 | 74 | if (in_names != R_NilValue) { 75 | SEXP name = STRING_ELT(in_names, i); 76 | if (name != R_BlankString) { 77 | bufptr += snprintf(bufptr, end-bufptr, "c%p=", R_CHAR(name)); 78 | } 79 | } 80 | 81 | stringify_item(item, bufptr, end); 82 | SET_STRING_ELT(out_reps, i, mkChar(buf)); 83 | } 84 | 85 | if (in_names != R_NilValue) { 86 | setAttrib(out_names, R_NamesSymbol, in_names); 87 | UNPROTECT(1); 88 | } 89 | 90 | UNPROTECT(2); 91 | return(out_reps); 92 | } 93 | 94 | /* Construct a string identifying some SEXP, either as a scalar value or as a pointer. 95 | If we use its pointer, mark the item immutable. 96 | Return that pointer, or R_NilValue. */ 97 | SEXP stringify_item(SEXP item, char *bufptr, char* end) { 98 | int done = 0; 99 | PROTECT_INDEX ix; 100 | SEXP item_ptr; 101 | PROTECT_WITH_INDEX(item_ptr = R_NilValue, &ix); 102 | while(!done) { 103 | switch (TYPEOF(item)) { 104 | case PROMSXP: 105 | /* if we have a promise, drill down. */ 106 | item = PRCODE(item); 107 | break; 108 | case CHARSXP: 109 | /* interned string, represent its pointer */ 110 | REPROTECT(item_ptr = item, ix); 111 | bufptr += snprintf(bufptr, end-bufptr, "c%p", CHAR(item_ptr)); 112 | done = 1; 113 | break; 114 | case REALSXP: 115 | case INTSXP: 116 | case STRSXP: 117 | case LGLSXP: 118 | /* we have a code literal. represent it canonically, 119 | and don't hold a ref to a scalar. */ 120 | if (LENGTH(item) == 0) { 121 | switch(TYPEOF(item)) { 122 | case REALSXP: bufptr += snprintf(bufptr, end-bufptr, "r0"); break; 123 | case INTSXP: bufptr += snprintf(bufptr, end-bufptr, "i0"); break; 124 | case LGLSXP: bufptr += snprintf(bufptr, end-bufptr, "l0"); break; 125 | case STRSXP: bufptr += snprintf(bufptr, end-bufptr, "s0"); break; 126 | default: error("Unexpected type %s (this shouldn't happen)", type2char(TYPEOF(item))); 127 | } 128 | } else if (LENGTH(item) == 1) { 129 | switch(TYPEOF(item)) { 130 | case REALSXP: 131 | bufptr += snprintf(bufptr, end-bufptr, "r"); 132 | bufptr += snprintdouble(bufptr, end-bufptr, REAL(item)[0]); 133 | break; 134 | case INTSXP: bufptr += snprintf(bufptr, end-bufptr, "i%x", INTEGER(item)[0]); break; 135 | case LGLSXP: bufptr += snprintf(bufptr, end-bufptr, "l%x", LOGICAL(item)[0]); break; 136 | case STRSXP: 137 | REPROTECT(item_ptr = STRING_ELT(item, 0), ix); 138 | bufptr += snprintf(bufptr, end-bufptr, "s%p", CHAR(item_ptr)); break; 139 | default: error("Unexpected type %s (this shouldn't happen)", type2char(TYPEOF(item))); 140 | } 141 | } else { 142 | /* for non-scalar vectors, represent the pointer */ 143 | REPROTECT(item_ptr = item, ix); 144 | bufptr += snprintf(bufptr, end-bufptr, "v%p", (void *)item_ptr); 145 | } 146 | done = 1; 147 | break; 148 | case VECSXP: 149 | REPROTECT(item_ptr = item, ix); 150 | bufptr += snprintf(bufptr, end-bufptr, "l%p", (void *)item_ptr); 151 | done = 1; 152 | break; 153 | case CLOSXP: 154 | REPROTECT(item_ptr = item, ix); 155 | bufptr += snprintf(bufptr, end-bufptr, "c_%p/%p/%p", 156 | (void *) FORMALS(item), 157 | (void *) BODY(item), 158 | (void *) CLOENV(item)); 159 | done = 1; 160 | break; 161 | case SYMSXP: 162 | case LANGSXP: 163 | case EXPRSXP: 164 | case BCODESXP: 165 | case BUILTINSXP: 166 | case SPECIALSXP: 167 | case NILSXP: 168 | /* We have an expression-ish, represent its pointer. */ 169 | REPROTECT(item_ptr = item, ix); 170 | bufptr += snprintf(bufptr, end-bufptr, "e%p", (void *)item_ptr); 171 | done = 1; 172 | break; 173 | default: 174 | error("Unexpected type %s", type2char(TYPEOF(item))); 175 | } 176 | } 177 | if (item_ptr != R_NilValue) { 178 | MARK_NOT_MUTABLE(item_ptr); 179 | } 180 | UNPROTECT(1); 181 | return item_ptr; 182 | } 183 | 184 | /* 185 | -*- previewing-build-command: '(previewing-run-R-unit-tests) 186 | */ 187 | -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include // for NULL 4 | #include 5 | #include "vadr.h" 6 | 7 | /* FIXME: 8 | Check these declarations against the C/Fortran source code. 9 | */ 10 | 11 | static const R_CallMethodDef CallEntries[] = { 12 | {"_string_reps", (DL_FUNC) &_string_reps, 2}, 13 | {NULL, NULL, 0} 14 | }; 15 | 16 | void R_init_msgpack(DllInfo *dll) 17 | { 18 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 19 | R_useDynamicSymbols(dll, FALSE); 20 | } 21 | -------------------------------------------------------------------------------- /src/vadr.c: -------------------------------------------------------------------------------- 1 | #include "vadr.h" 2 | 3 | /* Assert that some object is a type. */ 4 | void assert_type(SEXP x, SEXPTYPE type) { 5 | if (TYPEOF(x) != type) { 6 | error("Expected %s, got %s", type2char(type), type2char(TYPEOF(x))); 7 | } 8 | } 9 | 10 | /* As above, with extra words about what thing is to be an expected type */ 11 | void assert_type3(SEXP x, SEXPTYPE type, const char *what) { 12 | if (TYPEOF(x) != type) { 13 | error("Expected %s in %s, got %s", 14 | type2char(type), what, type2char(TYPEOF(x))); 15 | } 16 | } 17 | 18 | int recycle_length(int i, int j) { 19 | if (MIN(i,j) == 0) return 0; 20 | int n = MAX(i,j); 21 | if ((n%i != 0) || (n%j != 0)) { 22 | warning("Longer vector length is not a multiple of shorter vector length"); 23 | } 24 | return n; 25 | } 26 | -------------------------------------------------------------------------------- /src/vadr.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define MIN(x,y) ((x) < (y) ? (x) : (y)) 5 | #define MAX(x,y) ((x) > (y) ? (x) : (y)) 6 | #define MIN3(x, y, z) (MIN(x,(MIN(y,z)))) 7 | #define MAX3(x, y, z) (MAX(x,(MAX(y,z)))) 8 | 9 | void assert_type(SEXP, SEXPTYPE); 10 | void assert_type3(SEXP, SEXPTYPE, const char *); 11 | int recycle_length(int i, int j); 12 | SEXP allocate_dots(int length); 13 | 14 | SEXP _string_reps(SEXP); 15 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(memo) 3 | 4 | test_check("memo") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-cache.R: -------------------------------------------------------------------------------- 1 | context("cache") 2 | library("digest") 3 | 4 | `%is%` <- expect_equal 5 | 6 | # For these tests we record "signals" when arguments are forced or an error occurs. 7 | signals <- c() 8 | signal <- function(x=".") { 9 | signals <<- c(signals, x) 10 | x 11 | } 12 | 13 | try_finally <- function(setup = NULL, code = NULL, teardown = NULL) { 14 | force(setup) 15 | on.exit(teardown) 16 | result <- code 17 | } 18 | 19 | with_clean_signals <- function(code = NULL) { 20 | try_finally( 21 | setup = { 22 | old_signals <- signals 23 | signals <<- c()}, 24 | teardown = { 25 | signals <<- old_signals 26 | }, 27 | code = code) 28 | } 29 | 30 | expect_signal <- function (code, pattern=".+") { 31 | with_clean_signals({ 32 | force(code) 33 | expect_match(paste0(signals, collapse=""), pattern) 34 | }) 35 | } 36 | 37 | expect_no_signal <- function(code, pattern="^$") { 38 | with_clean_signals({ 39 | force(code) 40 | expect_match(paste0(signals, collapse=""), pattern) 41 | }) 42 | } 43 | 44 | test_that("memo() memoizes a function.", { 45 | f <- memo(function(x) {signal(); x*2}) 46 | a <- 1:5 47 | b <- a 48 | expect_signal(f(a) %is% seq(2, 10, 2)) 49 | expect_no_signal(f(b) %is% seq(2, 10, 2)) 50 | }) 51 | 52 | test_that("Memoization is based on memory address, not value", { 53 | f <- memo(function(x) {signal(); x*2}, key="pointer_key") 54 | a <- 1:5 55 | f(a) 56 | #A sufficiently clever implementation of R may make the following fail. 57 | c <- a + 1 - 1 # i.e. identical object but a new copy. 58 | expect_signal(f(c) %is% seq(2, 10, 2)) 59 | }) 60 | 61 | test_that("Pointer Memoization does compare scalars by value.", { 62 | a <- 123481233783240 63 | b <- 123481233700000 + 83240 64 | f <- memo(function(x) {signal(); x*2}, key="pointer_key") 65 | expect_signal(f(a) %is% 246962467566480) 66 | expect_no_signal(f(b) %is% 246962467566480) 67 | }) 68 | 69 | test_that("Digest-based memoisation memoises on content", { 70 | f <- memo(function(x) {signal(); x*2}, key="digest_key") 71 | a <- 1:5 + 0 #R now has range objects???? 72 | expect_signal(f(a)) 73 | c <- a + 1 - 1 # i.e. identical object but a new copy. 74 | expect_no_signal(f(c) %is% seq(2, 10, 2)) 75 | }) 76 | 77 | with_trace <- function(what, tracer, where=topenv(parent.frame())) { 78 | force(where) 79 | function(arg) { 80 | suppressMessages(trace((what), (tracer), (where), print=FALSE)) 81 | tryCatch(arg, finally=suppressMessages(untrace((what), (where)))) 82 | } 83 | } 84 | 85 | test_that("Hybrid falls back on content but limits calls to digest()", local({ 86 | signalDigest <- function(x) {signal("D"); digest(x)} 87 | f <- memo( 88 | function(x) { 89 | signal("E"); 90 | x*2 91 | }, 92 | key="hybrid_key", 93 | digest=signalDigest) #"E" for evaluate 94 | a <- 1:5 + 0 #R now has range objects 95 | expect_signal(f(a), "DDE") 96 | expect_no_signal(f(a)) #digest not computed 97 | c <- a + 1 - 1 # i.e. identical object but a new copy. 98 | expect_signal(f(c) %is% seq(2, 10, 2), "D") #digest computes, not eval 99 | })) 100 | 101 | test_that("pointer and hybrid caches hold on to their arguments", local({ 102 | # test: a large argument can be used in digest_cache and then forgotten. 103 | # Same is not true of pointer_key or hybrid_key. 104 | memused <- function() sum(gc()[,2]) 105 | 106 | observe_keysize <- function(key) { 107 | f <- memo(sum, key=key) 108 | x1 = memused() 109 | arg <- runif(100000) 110 | s <- sum(arg) 111 | f(arg) %is% s 112 | rm(arg) 113 | x2 <- memused() 114 | x2 - x1 115 | } 116 | expect_true(observe_keysize("digest_key") < 0.5) 117 | expect_true(observe_keysize("pointer_key") > 0.5) 118 | expect_true(observe_keysize("hybrid_key") > 0.5) 119 | })) 120 | 121 | test_that("permanent cache", { 122 | 123 | ca <- permanent_cache() 124 | ac <- memo(as.character, cache=ca, key="digest_key") 125 | ac(1) %is% "1" 126 | ac(2) %is% "2" 127 | ac(2) %is% "2" 128 | 129 | cache_stats(ac) %is% list(size=Inf, used=2, hits=1, misses=2, expired=0) 130 | }) 131 | 132 | test_that("permanent cache get/set", { 133 | 134 | ca <- permanent_cache() 135 | ca("one", 1) 136 | ca("two", 2) 137 | ca("three", action="get", ifnotfound=NULL) %is% NULL 138 | ca("two", 3) %is% 2 139 | ca("two", 3, action="set") 140 | ca("two", 4) %is% 3 141 | ca("two", action="rm") #expire 142 | expect_false(ca("two", action="exists")) 143 | ca("two", 4) %is% 4 144 | expect_true(ca("two", action="exists")) 145 | 146 | ac <- memo(as.character, cache=ca, key="digest_key") 147 | cache_stats(ac) %is% list(size=Inf, used=2, hits=2, misses=3, expired=1) 148 | }) 149 | -------------------------------------------------------------------------------- /tests/testthat/test-hashmap.R: -------------------------------------------------------------------------------- 1 | context("hashmap") 2 | 3 | `%is%` <- expect_equal 4 | 5 | test_that("hash uses arbitrary keys", { 6 | 7 | x <- hashmap() 8 | x[[ "1" ]] <- "foo" 9 | x[[ quote(`1`) ]] <- "bar" 10 | x[[ 1 ]] <- "baz" 11 | x[[ 1L ]] <- "qux" 12 | x[[ NULL ]] <- "qux" 13 | x[[ NA ]] <- "quux" 14 | x[[ NA_character_ ]] <- "quuux" 15 | 16 | x[[ "1" ]] %is% "foo" 17 | x[[ quote(`1`) ]] %is% "bar" 18 | x[[ 1 ]] %is% "baz" 19 | x[[ 1L ]] %is% "qux" 20 | x[[ NULL ]] %is% "qux" 21 | x[[ NA ]] %is% "quux" 22 | x[[ NA_character_ ]] %is% "quuux" 23 | 24 | x[list("1", NULL)] %is% list("foo", "qux") 25 | 26 | length(keys(x)) %is% 7 27 | expect_setequal(keys(x), 28 | list(list("1"), list(quote(`1`)), list(1), 29 | list(1L), list(NULL), list(NA), list(NA_character_))) 30 | expect_setequal(values(x), 31 | list("foo", "bar", "baz", "qux", "qux", "quux", "quuux")) 32 | 33 | }) 34 | 35 | test_that("hash [] and []<- and pairs()", { 36 | x <- hashmap() 37 | 38 | x[[ 1, "three" ]] <- "first" 39 | x[[ 4, "seven" ]] <- "second" 40 | x[c(1, 4), list("three", "seven")] %is% list("first", "second") 41 | 42 | x[list(1, 5), c("three", "seven")] <- c("refirst", "third") 43 | x[c(1, 4), list("three", "seven")] %is% list("refirst", "second") 44 | 45 | expect_true(hasKey(x, 1, "three")) 46 | expect_false(hasKey(x, 4, "three")) 47 | 48 | x[list(1, 5), c("three", "seven")] <- c("refirst", "third") 49 | 50 | expect_setequal(to_pairs(x), 51 | list(list(key = list(4, "seven"), value = "second"), 52 | list(key = list(1, "three"), value = "refirst"), 53 | list(key = list(5, "seven"), value = "third"))) 54 | 55 | y <- from_pairs(to_pairs(x)) 56 | expect_setequal(to_pairs(x), to_pairs(y)) 57 | 58 | dropKey(x, 1, "three") 59 | expect_equal(length(keys(x)), 2) 60 | 61 | }) 62 | -------------------------------------------------------------------------------- /tests/testthat/test-lru.R: -------------------------------------------------------------------------------- 1 | context("LRU cache") 2 | 3 | `%is%` <- expect_equal 4 | 5 | test_that("cache stores values", { 6 | store <- lru_cache() 7 | store("foo", 1) 8 | store("bar", 2) 9 | store("baz", 3) 10 | expect_equal(1, store("foo", stop("should not be evaluated"))) 11 | expect_equal(2, store("bar", stop("should not be evaluated"))) 12 | expect_equal(3, store("baz", stop("should not be evaluated"))) 13 | expect_equal(2, store("bar", 4)) 14 | }) 15 | 16 | test_that("cache incrementally expires old values", { 17 | store <- lru_cache(3) 18 | store("foo", 1) 19 | store("bar", 2) 20 | store("baz", 3) 21 | store("qux", 4) 22 | expect_equal(4, store("qux", stop("should not be evaluated"))) 23 | expect_equal(3, store("baz", stop("should not be evaluated"))) 24 | expect_equal(2, store("bar", stop("should not be evaluated"))) 25 | expect_equal(100, store("foo", 100)) 26 | }) 27 | 28 | test_that("lru_cache expires least recently accessed values", { 29 | store <- lru_cache(3) 30 | store("foo", 1) 31 | store("bar", 2) 32 | store("baz", 3) 33 | 34 | #from end of list 35 | expect_equal(1, store("foo", stop("should not be evaluated"))) 36 | expect_equal(4, store("qux", 4)) 37 | expect_equal(100, store("bar", 100)) 38 | 39 | #from middle of list 40 | expect_equal(4, store("qux", stop("should not be evaluated"))) 41 | expect_equal(200, store("baz", 200)) 42 | }) 43 | 44 | test_that("permanent_cache does not expire values", { 45 | store <- permanent_cache() 46 | for (i in 1:10001) { 47 | store(as.character(i), i) 48 | } 49 | fn <- memo(identity, cache=store) 50 | cache_stats(fn)$used %is% 10001 51 | }) 52 | 53 | test_that("cache_stats extracts stats", { 54 | fib <- function(x) if (x <= 1) 1 else fib(x-1) + fib(x-2) 55 | fib <- memo(fib, key=pointer_key) 56 | fib(30) 57 | cache_stats(fib) %is% 58 | list(size = 5000, used = 31, hits = 28, misses = 31, expired = 0) 59 | }) 60 | -------------------------------------------------------------------------------- /vignettes/README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "The 'memo' package" 3 | author: "Peter Meilstrup" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{The 'memo' package} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | \usepackage[utf8]{inputenc} 10 | --- 11 | #memo 12 | ====== 13 | ```{r include=FALSE} 14 | knitr::opts_chunk$set(cache=FALSE) 15 | ``` 16 | The `memo` package implements a simple in-memory cache for the results of a function. If you have an expensive function that is being called repeatedly with the same inputs, `memo` can help. 17 | 18 | ## Fibonnacci example 19 | 20 | ```{r} 21 | fib <- function(n) if (n <= 1) 1 else fib(n-1) + fib(n-2) 22 | sapply(0:9, fib) 23 | ``` 24 | 25 | This recursive implementation corresponds closely to the way the sequence is defined in math texts, but it has a performance problem. The problem is that as you ask for values further down the sequence, the computation becomes inordinately slow due to recursion. To demonstrate the issue, we can try counting every time `fib` is 26 | called: 27 | 28 | ```{r} 29 | count <- 0 30 | fib <- function(n) { 31 | count <<- count+1 32 | if (n <= 1) 1 else fib(n-1) + fib(n-2) 33 | } 34 | 35 | counted_fib <- function(n) { 36 | count <<- 0 37 | c(n=n, result=fib(n), calls=count) 38 | } 39 | 40 | t(sapply(0:16, counted_fib)) 41 | ``` 42 | 43 | The number of calls increases unreasonably. This is because, for instance, `fib(6)` calls both `fib(5)` and `fib(4)`, but `fib(5)` also calls `fib(4)`. The second call to `fib(4)` is wasted work. And this pattern goes on -- the two calls to `fib(4)` lead to _four_ calls to `fib(2)`. Every time you increment `n` by one, the number of calls roughly doubles. (Clearly, there are more efficient algorithms for computing the Fibbonacci sequence, but this is a toy example, where `fib` stands in for some expensive function that is being called repeatedly.) 44 | 45 | One way to cut down on wasted effort would be to check whether `fib(n)` has already been computed for a given `n`. If it has, `fib` can just return that value instead of starting over. This is called "memoizing." The `memo` package can [automatically][] create a memoized version of a given function, just by wrapping the function definition in `memo()`: 46 | 47 | [automatically]: https://en.wikipedia.org/wiki/Memoization#Automatic_memoization 48 | 49 | ```{r} 50 | library(memo) 51 | 52 | count <- 0 53 | fib <- memo(function(n) { 54 | count <<- count+1 55 | if (n <= 1) 1 else fib(n-1) + fib(n-2) 56 | }) 57 | 58 | counted_fib(16) 59 | ``` 60 | Now, computing `fib(16)` only takes 17 calls. And if we call again, it remembers the previous answer and doesn't make any new calls: 61 | ```{r} 62 | counted_fib(16) 63 | ``` 64 | Each successive value then only takes two calls: 65 | ```{r} 66 | t(sapply(17:30, counted_fib)) 67 | ``` 68 | 69 | The tradeoff for this speedup is the memory used to store previous results. By default `memo` will remember the 5000 most recently used results; to adjust that limit you can change the `cache` option: 70 | 71 | ```{r eval=FALSE} 72 | fib <- memo(cache=lru_cache(5000), function () {...}) 73 | ``` 74 | 75 | The Fibonacci sequence being kind of a toy example, memoization has a variety of uses, such as: 76 | 77 | * Caching the results of expensive database queries, for instance in Shiny apps where many users may make identical queries. 78 | * Algorithms for path finding (dynamic programming) and parsing. 79 | * Simulations such as [Cellular automata](https://en.wikipedia.org/wiki/Hashlife). 80 | --------------------------------------------------------------------------------