├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── LICENSE
├── Makefile
├── NAMESPACE
├── R
    ├── bulk.R
    ├── common.R
    ├── config.R
    ├── env.R
    ├── environment.R
    ├── exceptions.R
    ├── files.R
    ├── heartbeat.R
    ├── match_fun.R
    ├── monitor.R
    ├── object_cache.R
    ├── observer.R
    ├── queue.R
    ├── redis.R
    ├── rrqlapply.R
    ├── status.R
    ├── task.R
    ├── task_bundle.R
    ├── utils.R
    ├── utils_assert.R
    ├── worker.R
    └── worker_spawn.R
├── README.md
├── autodoc.R
├── doc
    └── how.md
├── docker
    └── Dockerfile
├── inst
    └── scripts
    │   ├── rrqueue_worker
    │   └── rrqueue_worker_tee
├── man-roxygen
    ├── observer.yml
    ├── observer_methods.R
    ├── queue.yml
    ├── queue_methods.R
    ├── task.yml
    ├── task_bundle.yml
    ├── task_bundle_methods.R
    └── task_methods.R
├── man
    ├── enqueue_bulk.Rd
    ├── install_scripts.Rd
    ├── observer.Rd
    ├── queue.Rd
    ├── rrqlapply.Rd
    ├── task.Rd
    ├── task_bundle.Rd
    ├── worker.Rd
    ├── worker_spawn.Rd
    ├── worker_stop.Rd
    └── yaml_env.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── config.yml
    │   ├── config2.yml
    │   ├── config3.yml
    │   ├── helper-rrqueue.R
    │   ├── myfuns.R
    │   ├── test-files.R
    │   ├── test-heartbeat.R
    │   ├── test-match-fun.R
    │   ├── test-queue.R
    │   ├── test-rrqlapply.R
    │   ├── test-task-bundle.R
    │   └── test-worker.R
├── update_web.sh
└── vignettes
    ├── introduction.Rmd
    ├── messages.Rmd
    ├── myfuns.R
    └── src
        ├── introduction.R
        ├── messages.R
        └── myfuns.R


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^notes\.md$
 2 | ^tmp\.R$
 3 | ^Makefile$
 4 | ^\.travis\.yml$
 5 | ^doc$
 6 | ^ignore$
 7 | ^docker$
 8 | ^man-roxygen$
 9 | ^autodoc.R$
10 | ^vignettes/src$
11 | ^inst/staticdocs$
12 | ^inst/web$
13 | ^update_web\.sh$
14 | ^rrqueue_.+\.tar\.gz$
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | notes.md
 5 | tmp.R
 6 | *.log
 7 | ignore
 8 | inst/doc
 9 | inst/staticdocs
10 | inst/web
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Sample .travis.yml for R projects from https://github.com/craigcitro/r-travis
 2 | 
 3 | language: c
 4 | 
 5 | sudo: required
 6 | 
 7 | services:
 8 |   - redis-server
 9 | 
10 | before_install:
11 |   - curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh
12 |   - chmod 755 ./travis-tool.sh
13 |   - ./travis-tool.sh bootstrap
14 | 
15 | # NOTE: libhiredis-dev is for RedisHeartbeat until I get linking working
16 | install:
17 |   - ./travis-tool.sh install_deps
18 |   - ./travis-tool.sh install_aptget libhiredis-dev
19 |   - ./travis-tool.sh install_aptget libcurl4-openssl-dev
20 |   - ./travis-tool.sh github_package gaborcsardi/progress
21 |   - ./travis-tool.sh github_package ropensci/RedisAPI
22 |   - ./travis-tool.sh github_package richfitz/redux
23 |   - ./travis-tool.sh github_package richfitz/RedisHeartbeat
24 |   - ./travis-tool.sh github_package richfitz/storr
25 |   - ./travis-tool.sh github_package richfitz/ids
26 | 
27 | script: ./travis-tool.sh run_tests
28 | 
29 | after_failure:
30 |   - ./travis-tool.sh dump_logs
31 | 
32 | notifications:
33 |   email:
34 |     on_success: change
35 |     on_failure: change
36 |   slack: bdkd:hY2eBCaH3bbsvNwlWwOfFdfr
37 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: rrqueue
 2 | Title: Scalable Job Queues for R with 'Redis'
 3 | Version: 0.2.1
 4 | Author: "Rich FitzJohn <rich.fitzjohn@gmail.com> [aut, cre]"
 5 | Maintainer: Richard G. FitzJohn <rich.fitzjohn@gmail.com>
 6 | Description: Scalable queuing system for R, using 'Redis'.
 7 | Depends:
 8 |     R (>= 3.1.0)
 9 | License: BSD_2_clause + file LICENSE
10 | LazyData: true
11 | SystemRequirements: Redis
12 | Imports:
13 |     R6,
14 |     RedisAPI (>= 0.3.0),
15 |     RedisHeartbeat (>= 0.3.0),
16 |     crayon,
17 |     digest,
18 |     docopt,
19 |     ids (>= 0.0.2),
20 |     progress,
21 |     redux (>= 0.3.0),
22 |     storr (>= 1.0.0)
23 | Suggests:
24 |     testthat,
25 |     tools,
26 |     knitr,
27 |     rmarkdown,
28 |     yaml
29 | VignetteBuilder: knitr
30 | RoxygenNote: 5.0.1
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2015
2 | COPYRIGHT HOLDER: Richard G. FitzJohn
3 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PACKAGE := $(shell grep '^Package:' DESCRIPTION | sed -E 's/^Package:[[:space:]]+//')
 2 | RSCRIPT = Rscript --no-init-file
 3 | 
 4 | all: install
 5 | 
 6 | test:
 7 | 	${RSCRIPT} -e 'library(methods); devtools::test()'
 8 | 
 9 | test_all:
10 | 	REMAKE_TEST_INSTALL_PACKAGES=true make test
11 | 
12 | autodoc:
13 | 	${RSCRIPT} autodoc.R process
14 | 
15 | roxygen:
16 | 	@mkdir -p man
17 | 	${RSCRIPT} -e "library(methods); devtools::document()"
18 | 
19 | staticdocs:
20 | 	@mkdir -p inst/staticdocs
21 | 	Rscript -e "library(methods); staticdocs::build_site()"
22 | 	rm -f vignettes/*.html
23 | 
24 | website: staticdocs
25 | 	./update_web.sh
26 | 
27 | install:
28 | 	R CMD INSTALL .
29 | 
30 | build:
31 | 	R CMD build .
32 | 
33 | check: build
34 | 	_R_CHECK_CRAN_INCOMING_=FALSE R CMD check --as-cran --no-manual `ls -1tr ${PACKAGE}*gz | tail -n1`
35 | 	@rm -f `ls -1tr ${PACKAGE}*gz | tail -n1`
36 | 	@rm -rf ${PACKAGE}.Rcheck
37 | 
38 | check_all:
39 | 	REMAKE_TEST_INSTALL_PACKAGES=true make check
40 | 
41 | README.md: README.Rmd
42 | 	Rscript -e 'library(methods); devtools::load_all(); knitr::knit("README.Rmd")'
43 | 	sed -i.bak 's/[[:space:]]*$$//' README.md
44 | 	rm -f $@.bak
45 | 
46 | vignettes/introduction.Rmd: vignettes/src/introduction.R
47 | 	${RSCRIPT} -e 'library(sowsear); sowsear("$<", output="$@")'
48 | vignettes/messages.Rmd: vignettes/src/messages.R
49 | 	${RSCRIPT} -e 'library(sowsear); sowsear("$<", output="$@")'
50 | vignettes: vignettes/introduction.Rmd vignettes/messages.Rmd
51 | 	${RSCRIPT} -e 'library(methods); devtools::build_vignettes()'
52 | 
53 | .PHONY: all test document install vignettes
54 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(print,worker_info)
 4 | export(enqueue_bulk)
 5 | export(enqueue_bulk_submit)
 6 | export(install_scripts)
 7 | export(observer)
 8 | export(queue)
 9 | export(rrqlapply)
10 | export(rrqlapply_submit)
11 | export(task)
12 | export(task_bundle)
13 | export(worker)
14 | export(worker_spawn)
15 | export(worker_stop)
16 | export(yaml_env)
17 | import(RedisAPI)
18 | importFrom(R6,R6Class)
19 | importFrom(crayon,make_style)
20 | importFrom(digest,digest)
21 | importFrom(progress,progress_bar)
22 | importFrom(storr,storr)
23 | 


--------------------------------------------------------------------------------
/R/bulk.R:
--------------------------------------------------------------------------------
  1 | ##' Bulk queuing.  Similar in some respects to things like
  2 | ##' \code{\link{apply}}.  This is an experiment to deal with the
  3 | ##' pattern where you have a big pile of parameters in a data.frame to
  4 | ##' loop over, by applying a function to each row.
  5 | ##'
  6 | ##' There are two modes here; selected with \code{do.call}.  With
  7 | ##' \code{do.call=FALSE}, the default, the function behaves similarly
  8 | ##' to \code{apply(X, FUN, 1)}; that is the function is applied to
  9 | ##' each row of the data.frame (as a list):
 10 | ##' \code{FUN(as.list(X[1,]))}, \code{FUN(as.list(X[2,]))}, and so on.
 11 | ##' The alternative mode (\code{do.call=TRUE}) is where the
 12 | ##' \code{data.frame} contains \emph{parameters} to the function
 13 | ##' \code{FUN} so equivalent to \code{FUN(X[1,1], X[1,2], ...}.  This
 14 | ##' is similar (but not implemented as) running: \code{do.call("FUN",
 15 | ##' as.list(X[1,]))}.
 16 | ##'
 17 | ##' Be careful, this one is going to change, including the name
 18 | ##' probably.  You have been warned.
 19 | ##'
 20 | ##' @title Bulk queuing
 21 | ##' @param X An object to loop over.  If a list, we'll loop over the
 22 | ##'   elements of the list, duplicating the behaviour of
 23 | ##'   \code{\link{rrqlapply}} except for not handling dots.  If a
 24 | ##'   \code{data.frame} we'll loop over the \emph{rows}.  Matrices are
 25 | ##'   not supported.
 26 | ##'
 27 | ##' @param FUN A function.  Will be found in the same way as
 28 | ##'   \code{FUN} within \code{\link{rrqlapply}}.
 29 | ##'
 30 | ##' @param rrq An rrq object
 31 | ##'
 32 | ##' @param do.call Behave like (but not via) \code{\link{do.call}};
 33 | ##'   given an element \code{el}, rather than run \code{FUN(el)} run
 34 | ##'   \code{FUN(el[[1]], el[[2]], ...)}.
 35 | ##'
 36 | ##' @param group Name of a group for generated task ids.  If not
 37 | ##'   included, an ID will be generated.
 38 | ##'
 39 | ##' @param timeout Total length of time to wait for tasks to be
 40 | ##'   completed.  The default is to wait forever (like \code{lapply}).
 41 | ##'
 42 | ##' @param time_poll Time to poll for tasks.  Must be an integer.
 43 | ##'   Because of how the function is implemented, R will be
 44 | ##'   unresponsive for this long each iteration (unless results are
 45 | ##'   returned), so the default of 1s should be reasonable.
 46 | ##'
 47 | ##' @param delete_tasks Delete tasks on successful finish?
 48 | ##'
 49 | ##' @param progress_bar Display a progress bar?
 50 | ##'
 51 | ##' @param env Environment to look in
 52 | ##'
 53 | ##' @export
 54 | enqueue_bulk <- function(X, FUN, rrq,
 55 |                          do.call=FALSE, group=NULL,
 56 |                          timeout=Inf, time_poll=1, delete_tasks=FALSE,
 57 |                          progress_bar=TRUE, env=parent.frame()) {
 58 |   obj <- enqueue_bulk_submit(X, FUN, rrq, do.call, group, progress_bar, env)
 59 |   tryCatch(obj$wait(timeout, time_poll, progress_bar),
 60 |            interrupt=function(e) obj)
 61 | }
 62 | 
 63 | ## There's going to be a lot of overlap here with rrqlapply but that's
 64 | ## OK for now; we'll work through and remove it shortly.  The biggest
 65 | ## issue is how to deal with dots.  In general I'd rather not have
 66 | ## that bit of complexity here.  I guess with dots we'd have:
 67 | ##
 68 | ##   f(el, ...)
 69 | ##   f(el[[1]], el[[2]], ...)
 70 | 
 71 | ##' @export
 72 | ##' @rdname enqueue_bulk
 73 | enqueue_bulk_submit <- function(X, FUN, rrq,
 74 |                                 do.call=FALSE, group=NULL,
 75 |                                 progress_bar=TRUE, env=parent.frame()) {
 76 |   if (is.data.frame(X)) {
 77 |     X <- df_to_list(X)
 78 |   } else if (!is.list(X)) {
 79 |     stop("X must be a data.frame or list")
 80 |   }
 81 | 
 82 |   fun <- find_fun(FUN, env, rrq)
 83 |   n <- length(X)
 84 | 
 85 |   ## See rrqlapply_submit for treatment of key_complete.  The rest of
 86 |   ## this is a bit more complicated than rrqlapply because we allow
 87 |   ## switching between f(x) and f(**x).
 88 |   tasks <- vector("list", length(X))
 89 |   e <- environment()
 90 |   key_complete <- NULL
 91 |   group <- create_group(group, progress_bar)
 92 |   p <- progress(total=n, show=progress_bar, prefix="submitting: ")
 93 |   for (i in seq_len(n)) {
 94 |     if (do.call) {
 95 |       expr <- as.call(c(list(fun), X[[i]]))
 96 |     } else {
 97 |       expr <- as.call(list(fun, X[[i]]))
 98 |     }
 99 |     tasks[[i]] <- rrq$enqueue_(expr, e, key_complete=key_complete)
100 |     if (is.null(key_complete)) {
101 |       key_complete <- tasks[[i]]$key_complete
102 |     }
103 |     p()
104 |   }
105 | 
106 |   task_bundle(rrq, tasks, group, names(X))
107 | }
108 | 


--------------------------------------------------------------------------------
/R/common.R:
--------------------------------------------------------------------------------
  1 | rrqueue_keys <- function(queue_name=NULL, worker_name=NULL) {
  2 |   if (is.null(queue_name)) {
  3 |     rrqueue_keys_global()
  4 |   } else if (is.null(worker_name)) {
  5 |     c(rrqueue_keys_global(),
  6 |       rrqueue_keys_queue(queue_name))
  7 |   } else {
  8 |     c(rrqueue_keys_global(),
  9 |       rrqueue_keys_queue(queue_name),
 10 |       rrqueue_keys_worker(queue_name, worker_name))
 11 |   }
 12 | }
 13 | 
 14 | rrqueue_protocol <- function() {
 15 |   packageVersion("rrqueue")[1, c(1:2)]
 16 | }
 17 | 
 18 | rrqueue_keys_global <- function() {
 19 |   list(rrqueue_queues  = "rrqueue:queues")
 20 | }
 21 | 
 22 | rrqueue_keys_queue <- function(queue) {
 23 |   list(queue_name      = queue,
 24 | 
 25 |        workers_name    = sprintf("%s:workers:name",    queue),
 26 |        workers_status  = sprintf("%s:workers:status",  queue),
 27 |        workers_task    = sprintf("%s:workers:task",    queue),
 28 |        workers_info    = sprintf("%s:workers:info",    queue),
 29 | 
 30 |        tasks_counter   = sprintf("%s:tasks:counter",   queue),
 31 |        tasks_expr      = sprintf("%s:tasks:expr",      queue),
 32 |        tasks_status    = sprintf("%s:tasks:status",    queue),
 33 |        tasks_time_sub  = sprintf("%s:tasks:time:sub",  queue),
 34 |        tasks_time_beg  = sprintf("%s:tasks:time:beg",  queue),
 35 |        tasks_time_end  = sprintf("%s:tasks:time:end",  queue),
 36 |        tasks_worker    = sprintf("%s:tasks:worker",    queue),
 37 |        tasks_result    = sprintf("%s:tasks:result",    queue),
 38 |        tasks_envir     = sprintf("%s:tasks:envir",     queue),
 39 |        tasks_complete  = sprintf("%s:tasks:complete",  queue),
 40 |        tasks_redirect  = sprintf("%s:tasks:redirect",  queue),
 41 |        tasks_group     = sprintf("%s:tasks:group",     queue),
 42 | 
 43 |        envirs_contents = sprintf("%s:envirs:contents", queue),
 44 |        envirs_files    = sprintf("%s:envirs:files",    queue),
 45 | 
 46 |        files           = sprintf("%s:files",           queue),
 47 |        objects         = sprintf("%s:objects",         queue))
 48 | }
 49 | 
 50 | ## NOTE: Or alternatively, key_tasks?
 51 | rrqueue_key_queue <- function(queue, envir) {
 52 |   sprintf("%s:tasks:%s:id", queue, envir)
 53 | }
 54 | 
 55 | rrqueue_keys_worker <- function(queue, worker) {
 56 |   list(message   = rrqueue_key_worker_message(queue, worker),
 57 |        response  = rrqueue_key_worker_response(queue, worker),
 58 |        log       = rrqueue_key_worker_log(queue, worker),
 59 |        heartbeat = rrqueue_key_worker_heartbeat(queue, worker),
 60 |        envir     = rrqueue_key_worker_envir(queue, worker))
 61 | }
 62 | 
 63 | ## Special key for worker-specific commands to be published to.
 64 | rrqueue_key_worker_message <- function(queue, worker) {
 65 |   sprintf("%s:workers:%s:message", queue, worker)
 66 | }
 67 | rrqueue_key_worker_response <- function(queue, worker) {
 68 |   sprintf("%s:workers:%s:response", queue, worker)
 69 | }
 70 | rrqueue_key_worker_log <- function(queue, worker) {
 71 |   sprintf("%s:workers:%s:log", queue, worker)
 72 | }
 73 | rrqueue_key_worker_heartbeat <- function(queue, worker) {
 74 |   sprintf("%s:workers:%s:heartbeat", queue, worker)
 75 | }
 76 | rrqueue_key_worker_envir <- function(queue, worker) {
 77 |   sprintf("%s:workers:%s:envir", queue, worker)
 78 | }
 79 | rrqueue_key_task_complete <- function(queue, task_id) {
 80 |   sprintf("%s:tasks:%s:complete", queue, task_id)
 81 | }
 82 | rrqueue_key_worker_alive <- function(queue) {
 83 |   sprintf("%s:workers:alive:%s", queue, ids::adjective_animal())
 84 | }
 85 | 
 86 | ## TODO: come up with a way of scheduling object deletion.  Things
 87 | ## that are created here should be deleted immediately after the
 88 | ## function ends (perhaps on exit).  *Objects* should only be deleted
 89 | ## if they have no more dangling pointers.
 90 | ##
 91 | ## So we'll register "groups" and schedule prefix deletion once the
 92 | ## group is done.  But for now, don't do any of that.
 93 | prepare_expression <- function(expr) {
 94 |   fun <- expr[[1]]
 95 |   args <- expr[-1]
 96 | 
 97 |   is_call <- vlapply(args, is.call)
 98 |   if (any(is_call)) {
 99 |     stop("complex expressions not yet supported")
100 |   }
101 |   is_symbol <- vlapply(args, is.symbol)
102 |   if (any(is_symbol)) {
103 |     object_names <- vcapply(args[is_symbol], as.character)
104 |   } else {
105 |     object_names <- NULL
106 |   }
107 | 
108 |   list(expr=expr, object_names=object_names)
109 | }
110 | 
111 | save_expression <- function(dat, task_id, envir, object_cache) {
112 |   object_names <- dat$object_names
113 |   if (!is.null(object_names)) {
114 |     if (!all(ok <- exists(object_names, envir, inherits=FALSE))) {
115 |       stop("not all objects found: ",
116 |            paste(object_names[!ok], collapse=", "))
117 |     }
118 |     names(object_names) <- paste0(task_object_prefix(task_id), object_names)
119 |     dat$object_names <- object_names
120 | 
121 |     object_cache$import(envir, object_names)
122 |   }
123 | 
124 |   object_to_string(dat)
125 | }
126 | 
127 | restore_expression <- function(dat, envir, object_cache) {
128 |   dat <- string_to_object(dat)
129 |   if (!is.null(object_cache) && length(dat$object_names) > 0L) {
130 |     object_cache$export(envir, invert_names(dat$object_names))
131 |   }
132 |   dat$expr
133 | }
134 | 
135 | parse_worker_name <- function(str) {
136 |   res <- strsplit(str, "::", fixed=TRUE)
137 |   if (any(viapply(res, length) != 2)) {
138 |     stop("parse error")
139 |   }
140 |   list(host=vcapply(res, "[[", 1),
141 |        pid=vcapply(res, "[[", 2))
142 | }
143 | 
144 | parse_worker_log <- function(log) {
145 |   re <- "^([0-9]+) ([^ ]+) ?(.*)$"
146 |   ok <- grepl(re, log)
147 |   if (!all(ok)) {
148 |     stop("Corrupt log")
149 |   }
150 |   time <- as.integer(sub(re, "\\1", log))
151 |   command <- sub(re, "\\2", log)
152 |   message <- lstrip(sub(re, "\\3", log))
153 |   data.frame(time, command, message, stringsAsFactors=FALSE)
154 | }
155 | 
156 | task_object_prefix <- function(task_id) {
157 |   sprintf(".%s:", task_id)
158 | }
159 | 
160 | version_info <- function(package=.packageName) {
161 |   descr <- packageDescription(package)
162 |   version <- package_version(descr$Version)
163 |   repository <- descr$Repository
164 |   sha <- descr$RemoteSha
165 |   list(package=package,
166 |        version=version,
167 |        repository=repository,
168 |        sha=sha)
169 | }
170 | 
171 | version_string <- function() {
172 |   data <- version_info()
173 |   if (!is.null(data$repository)) {
174 |     qual <- data$repository
175 |   } else if (!is.null(data$sha)) {
176 |     qual <- data$sha
177 |   } else {
178 |     qual <- "LOCAL"
179 |   }
180 |   sprintf("%s [%s]", data$version, qual)
181 | }
182 | 
183 | rrqueue_scripts <- function(con) {
184 |   set_hashes <- 'local id = ARGV[table.getn(ARGV)]
185 | for i, k in ipairs(KEYS) do
186 | redis.call("HSET", k, id, ARGV[i])
187 | end'
188 | 
189 |   ## Assume that ARGV[1] is the task id and KEYS[1] is the queue.  Then
190 |   ## ARGV[2..] and KEYS[2..] are the key / value pairs
191 |   job_submit <- '
192 | local task_id = ARGV[1]
193 | for i, k in ipairs(KEYS) do
194 |   if i > 1 then
195 |     redis.call("HSET", k, task_id, ARGV[i])
196 |   end
197 | end
198 | redis.call("RPUSH", KEYS[1], task_id)'
199 | 
200 |   job_incr <- 'return {redis.call("INCR", KEYS[1]), redis.call("TIME")}'
201 |   RedisAPI::redis_scripts(con,
202 |                           scripts=list(set_hashes=set_hashes,
203 |                                        job_incr=job_incr,
204 |                                        job_submit=job_submit))
205 | }
206 | 
207 | message_prepare <- function(id, command, args) {
208 |   object_to_string(list(id=id, command=command, args=args))
209 | }
210 | response_prepare <- function(id, command, result) {
211 |   object_to_string(list(id=id, command=command, result=result))
212 | }
213 | 


--------------------------------------------------------------------------------
/R/config.R:
--------------------------------------------------------------------------------
 1 | load_config <- function(filename) {
 2 |   keys_common <- c("queue_name")
 3 |   keys_worker <- c("heartbeat_period", "heartbeat_expire", "key_worker_alive")
 4 |   keys_queue  <- c("packages", "sources")
 5 | 
 6 |   ## Note that this is *not* the defaults to the underlying worker
 7 |   ## functions; I'll rework the docopt to read those soon.
 8 |   defaults_worker <- as.list(formals(worker))[c(keys_common, keys_worker)]
 9 |   defaults_queue  <- as.list(formals(queue))[c(keys_common, keys_queue)]
10 |   if (!isTRUE(all.equal(defaults_worker[keys_common],
11 |                         defaults_queue[keys_common]))) {
12 |     stop("This is a bug.")
13 |   }
14 |   defaults <- c(defaults_worker, defaults_queue[keys_queue])
15 |   defaults$queue_name <- NULL
16 |   defaults$redis <- list()
17 | 
18 |   config <- yaml_read(filename)
19 | 
20 |   extra <- setdiff(names(config),
21 |                    c(keys_common, keys_worker, keys_queue, "redis"))
22 |   if (length(extra) > 0L) {
23 |     warning(sprintf("Unknown keys in %s: %s",
24 |                     filename, paste(extra, collapse=", ")))
25 |   }
26 | 
27 |   ## Some validation:
28 |   assert_character_or_null(config$packages)
29 |   assert_character_or_null(config$sources)
30 | 
31 |   ret <- modifyList(defaults, config, keep.null=TRUE)
32 | }
33 | 
34 | ## This needs fixing in a few places though I don't think all are
35 | ## tested.  Eventually we move away from host/port pairs and go with
36 | ## redis_config.  Once that happens the interface here is heaps easier
37 | ## and basically can go away.
38 | ##
39 | ## Testing like in test-worker will be needed for observer/queue which
40 | ## also use this.
41 | tmp_fix_redis_config <- function(cfg) {
42 |   if (!is.null(cfg$redis)) {
43 |     cfg[paste0("redis_", names(cfg$redis))] <- cfg$redis
44 |     cfg$redis <- NULL
45 |   }
46 |   cfg
47 | }
48 | 


--------------------------------------------------------------------------------
/R/env.R:
--------------------------------------------------------------------------------
 1 | ##' Load environment variables from a yaml file.  This is a hack for a
 2 | ##' project.  It may change and may move package.  \code{callr} would
 3 | ##' be a better fit probably, but \code{callr} doesn't pull in
 4 | ##' \code{yaml} yet so I don't know that it's a good fit.
 5 | ##'
 6 | ##' The yaml file must be sets of key/value pairs of simple data
 7 | ##' types.  Something like:
 8 | ##'
 9 | ##' \preformatted{
10 | ##' REDIS_HOST: localhost
11 | ##' }
12 | ##'
13 | ##' Alternatively, for use with section, add an extra layer of nesting:
14 | ##'
15 | ##' \preformatted{
16 | ##' local:
17 | ##'   REDIS_HOST: localhost
18 | ##' remote:
19 | ##'   REDIS_HOST: redis.marathon.mesos
20 | ##' }
21 | ##'
22 | ##' @title Load environment variables from a yaml file
23 | ##' @param filename Name of the file
24 | ##' @param section An optional section of the file to load
25 | ##' @export
26 | ##'
27 | yaml_env <- function(filename, section=NULL) {
28 |   data <- yaml_read(filename)
29 |   if (!is.null(section)) {
30 |     if (section %in% names(data)) {
31 |       data <- data[[section]]
32 |     } else {
33 |       stop(sprintf("section %d not found", section))
34 |     }
35 |   }
36 |   check <- function(x) {
37 |     is.atomic(x) && length(x) == 1L
38 |   }
39 |   ok <- vlapply(data, check)
40 |   if (!all(ok)) {
41 |     stop("Unexpected type for ", paste(names(data)[!ok], collapse=", "))
42 |   }
43 |   if (length(data) > 0L) {
44 |     do.call("Sys.setenv", data)
45 |   }
46 |   invisible(names(data))
47 | }
48 | 


--------------------------------------------------------------------------------
/R/environment.R:
--------------------------------------------------------------------------------
 1 | create_environment <- function(packages, sources,
 2 |                                env=new.env(parent=.GlobalEnv)) {
 3 |   load_packages(packages)
 4 |   for (file in sources) {
 5 |     do_source(file, env, chdir=TRUE, keep.source=FALSE)
 6 |   }
 7 |   env
 8 | }
 9 | 
10 | ## An alternative here is to trace some of the connection code, but
11 | ## that might be harder.  The issue here is that we're going to be
12 | ## missing things like csv files, configuration files, etc, that might
13 | ## determine the state of the system.  I don't think that there's much
14 | ## we can do about that though.  For cases where we're running locally
15 | ## it'll be fine.
16 | create_environment2 <- function(packages, sources, env, global) {
17 |   load_packages(packages)
18 |   source_files <- character(0)
19 |   for (file in sources) {
20 |     source_files <- c(
21 |       source_files,
22 |       do_source(file, env, chdir=TRUE, keep.source=FALSE,
23 |                 source_fun=sys_source))
24 |     if (global) {
25 |       do_source(file, .GlobalEnv, chdir=TRUE, keep.source=FALSE,
26 |                 source_fun=sys_source)
27 |     }
28 |   }
29 |   source_files
30 | }
31 | 
32 | load_packages <- function(packages) {
33 |   for (p in packages) {
34 |     library(p, character.only=TRUE, quietly=TRUE)
35 |   }
36 | }
37 | 
38 | do_source <- function(file, ..., source_fun=sys.source) {
39 |   catch_source <- function(e) {
40 |     stop(sprintf("while sourcing %s:\n%s", file, e$message),
41 |          call.=FALSE)
42 |   }
43 |   tryCatch(source_fun(file, ...), error=catch_source)
44 | }
45 | 
46 | envirs_list <- function(con, keys) {
47 |   as.character(con$HKEYS(keys$envirs_contents))
48 | }
49 | 
50 | envirs_contents <- function(con, keys, envir_ids=NULL) {
51 |   dat <- from_redis_hash(con, keys$envirs_contents, envir_ids)
52 |   nok <- vlapply(dat, is.na)
53 |   if (any(nok)) {
54 |     stop(sprintf("Environment %s not found",
55 |                  paste(sprintf("'%s'", envir_ids[nok]), collapse=", ")))
56 |   }
57 |   lapply(dat, string_to_object)
58 | }
59 | 


--------------------------------------------------------------------------------
/R/exceptions.R:
--------------------------------------------------------------------------------
 1 | WorkerError <- function(worker, message, ...,
 2 |                         task_id=NULL,
 3 |                         task_status=NULL,
 4 |                         class=character(0),
 5 |                         call=NULL) {
 6 |   structure(list(worker=worker,
 7 |                  task_id=task_id,
 8 |                  task_status=task_status, ...,
 9 |                  message=message, call=call),
10 |             class=c(class, "WorkerError", "error", "condition"))
11 | }
12 | 
13 | ## This happens with no task
14 | WorkerStop <- function(worker, message) {
15 |   WorkerError(worker, message, class="WorkerStop")
16 | }
17 | 
18 | ## TODO: these might log more than once?
19 | WorkerTaskMissing <- function(worker, task_id) {
20 |   msg <- sprintf("Task %s/%s not found", worker$name, task_id)
21 |   worker$log("TASK_MISSING", task_id)
22 |   WorkerError(worker, msg,
23 |               task_id=task_id, task_status=TASK_MISSING,
24 |               class="WorkerTaskMissing")
25 | }
26 | 
27 | WorkerTaskError <- function(e) {
28 |   class(e) <- c("WorkerTaskError", "try-error", class(e))
29 |   e
30 | }
31 | 
32 | UnfetchableTask <- function(task_id, task_status) {
33 |   structure(list(task_id=task_id,
34 |                  task_status=task_status),
35 |             class=c("UnfetchableTask", "error", "condition"))
36 | }
37 | 


--------------------------------------------------------------------------------
/R/files.R:
--------------------------------------------------------------------------------
 1 | files_pack <- function(cache, ..., files=c(...)) {
 2 |   ## For now, assume text files only.
 3 |   pack1 <- function(filename) {
 4 |     ## NOTE: this duplicates the content addressable storage in storr,
 5 |     ## I think.  But the label is not going to change and we want to
 6 |     ## manage the mapping in rrqueue.
 7 |     contents <- read_file_to_string(filename)
 8 |     hash <- hash_string(contents)
 9 |     cache$set(hash, contents)
10 |     setNames(hash, filename)
11 |   }
12 |   ret <- vcapply(files, pack1)
13 |   class(ret) <- "files_pack"
14 |   ret
15 | }
16 | 
17 | files_unpack <- function(cache, pack, path=tempfile()) {
18 |   unpack1 <- function(x) {
19 |     filename <- file.path(path, x)
20 |     dir.create(dirname(filename), FALSE, TRUE)
21 |     contents <- cache$get(pack[[x]])
22 |     write_string_to_file(contents, filename)
23 |   }
24 |   lapply(names(pack), unpack1)
25 |   invisible(path)
26 | }
27 | 


--------------------------------------------------------------------------------
/R/heartbeat.R:
--------------------------------------------------------------------------------
 1 | ## Heartbeat support, but with a slightly different interface to
 2 | ## RedisHeartbeat and falling back on something informative if we have
 3 | ## no support.
 4 | heartbeat <- function(con, key, period, expire) {
 5 |   RedisHeartbeat::heartbeat(key, period,
 6 |                             expire=expire, value=expire,
 7 |                             con$config())
 8 | }
 9 | 
10 | heartbeat_time <- function(obj) {
11 |   status <- obj$tasks_status()
12 |   task_ids <- names(status[status == TASK_RUNNING])
13 |   if (length(task_ids) > 0L) {
14 |     w_running <- as.character(obj$con$HMGET(obj$keys$tasks_worker, task_ids))
15 |     key <- rrqueue_key_worker_heartbeat(obj$queue_name, w_running)
16 |     d <- data.frame(worker_id=w_running,
17 |                     task_id=task_ids,
18 |                     time=vnapply(key, obj$con$PTTL),
19 |                     stringsAsFactors=FALSE)
20 |     rownames(d) <- NULL
21 |   } else {
22 |     d <- data.frame(worker_id=character(0),
23 |                     task_id=character(0),
24 |                     time=numeric(0),
25 |                     stringsAsFactors=FALSE)
26 |   }
27 |   d
28 | }
29 | 
30 | identify_orphan_tasks <- function(obj) {
31 |   d <- heartbeat_time(obj)
32 |   i <- d$time == -2
33 |   task_id   <- d$task_id[i]
34 |   worker_id <- d$worker_id[i]
35 | 
36 |   con <- obj$con
37 |   keys <- obj$keys
38 |   time <- redis_time(obj$con)
39 |   for (i in seq_along(task_id)) {
40 |     con$HSET(keys$tasks_time_end, task_id[[i]],   time)
41 |     con$HSET(keys$tasks_status,   task_id[[i]],   TASK_ORPHAN)
42 |     con$HSET(keys$workers_status, worker_id[[i]], WORKER_LOST)
43 |   }
44 | 
45 |   setNames(task_id, worker_id)
46 | }
47 | 


--------------------------------------------------------------------------------
/R/match_fun.R:
--------------------------------------------------------------------------------
  1 | ## Will be prone to false positives but worth a shot
  2 | has_namespace <- function(str) {
  3 |   grepl("::", str, fixed=TRUE)
  4 | }
  5 | 
  6 | split_namespace <- function(str) {
  7 |   res <- strsplit(str, "::", fixed=TRUE)[[1]]
  8 |   if (length(res) != 2L) {
  9 |     stop("Not a namespace-qualified variable")
 10 |   }
 11 |   res
 12 | }
 13 | 
 14 | exists_function_here <- function(name, envir) {
 15 |   exists(name, envir, mode="function", inherits=FALSE)
 16 | }
 17 | exists_function_ns <- function(name, ns) {
 18 |   if (ns %in% .packages()) {
 19 |     exists_function_here(name, getNamespace(ns))
 20 |   } else {
 21 |     FALSE
 22 |   }
 23 | }
 24 | 
 25 | ## This is going to search back and find the location of a function by
 26 | ## descending through environments recursively.
 27 | find_function_name <- function(name, envir) {
 28 |   if (identical(envir, emptyenv())) {
 29 |     stop("Did not find function")
 30 |   }
 31 |   if (exists_function_here(name, envir)) {
 32 |     envir
 33 |   } else {
 34 |     find_function_name(name, parent.env(envir))
 35 |   }
 36 | }
 37 | 
 38 | find_function_value <- function(fun, envir) {
 39 |   if (identical(envir, emptyenv())) {
 40 |     stop("Did not find function")
 41 |   }
 42 |   name <- find_function_in_envir(fun, envir)
 43 |   if (!is.null(name)) {
 44 |     list(name=name, envir=envir)
 45 |   } else {
 46 |     find_function_value(fun, parent.env(envir))
 47 |   }
 48 | }
 49 | 
 50 | ## Determine the name of a function, given it's value and an
 51 | ## environment to find it in.
 52 | find_function_in_envir <- function(fun, envir) {
 53 |   pos <- ls(envir)
 54 |   i <- scapply(pos, function(x) identical(fun, envir[[x]]), NULL)
 55 |   if (is.null(i)) i else pos[[i]]
 56 | }
 57 | 
 58 | ## TODO: consider `<global>::` and `<local>::` as special names?
 59 | ## NOTE: This differs from match_fun_symbol because it allows skipping
 60 | ## up the search path to identify functions in specific parts of the
 61 | ## search path.  If a namespace-qualified value is given, we can
 62 | ## ignore envir entirely.
 63 | match_fun_name <- function(str, envir) {
 64 |   if (has_namespace(str)) {
 65 |     ret  <- split_namespace(str)
 66 |     if (!exists_function_ns(ret[[2]], ret[[1]])) {
 67 |       stop("Did not find function in loaded namespace")
 68 |     }
 69 |     ret
 70 |   } else {
 71 |     name <- str
 72 |     fun_envir <- find_function_name(name, envir)
 73 |     match_fun_sanitise(name, fun_envir)
 74 |   }
 75 | }
 76 | 
 77 | match_fun_symbol <- function(sym, envir) {
 78 |   name <- as.character(sym)
 79 |   match_fun_name(name, envir)
 80 | }
 81 | 
 82 | ## This one is much harder and might take a while.
 83 | ##
 84 | ## TODO: Don't deal here with the case that the function is in
 85 | ## anything other than the environment that it's enclosure points at;
 86 | ## that's going to skip memoized functions, etc.  It also is going to
 87 | ## miss anonymous functions for now.  But start with this bit I think.
 88 | ##
 89 | ## TODO: This is going to miss things like extra attributes added to a
 90 | ## function, but that's going in the category of "users making things
 91 | ## difficult".
 92 | match_fun_value <- function(fun, envir) {
 93 |   res <- find_function_value(fun, envir)
 94 |   match_fun_sanitise(res$name, res$envir)
 95 | }
 96 | 
 97 | ## TODO: might be worth also passing in 'envir' as the starting
 98 | ## environment; then we can determine if we're looking at:
 99 | ##   namespace
100 | ##   global env
101 | ##   given env
102 | ##   other env
103 | ## TODO: Might also return the environment here as a named list so
104 | ## that we can do some further faffing?
105 | match_fun_sanitise <- function(name, fun_envir) {
106 |   ns <- environmentName(fun_envir)
107 |   ## Don't treat the global environment specially here:
108 |   if (identical(ns, "R_GlobalEnv")) {
109 |     ns <- ""
110 |   } else {
111 |     ## Might be best here to treat all environments as non-namespace
112 |     ## unless we get a 'package:' name?
113 |     ns <- sub("^package:", "", ns)
114 |   }
115 |   ret <- c(ns, name)
116 |   if (ns == "") {
117 |     attr(ret, "envir") <- fun_envir
118 |   }
119 |   ret
120 | }
121 | 
122 | ## TODO: throughout here we'll need to have the functions loaded,
123 | ## which is not ideal.
124 | ##
125 | ## TODO: separate out the NSE from here, like this:
126 | ## match_fun <- function(fun, envir) {
127 | ##   fun_sub <- substitute(fun)
128 | ##   if (is.name(fun_sub)) {
129 | ##     match_fun_symbol(fun_sub, envir)
130 | ##   } else {
131 | ##     match_fun_(fun, envir)
132 | ##   }
133 | ## }
134 | match_fun <- function(fun, envir) {
135 |   if (is.character(fun)) {
136 |     match_fun_name(fun, envir)
137 |   } else if (is.function(fun)) {
138 |     match_fun_value(fun, envir)
139 |   } else {
140 |     stop("Invalid input")
141 |   }
142 | }
143 | 
144 | match_fun_rrqueue <- function(fun, envir, envir_rrqueue) {
145 |   dat <- match_fun(fun, envir)
146 |   if (dat[[1]] == "") {
147 |     ## Now, try to find the function in rrqueue's environment:
148 |     ## TODO: This might not really work; we want to look in the right
149 |     ## environment here...
150 |     if (exists_function_here(dat[[2]], envir_rrqueue)) {
151 |       name <- dat[[2]]
152 |       ok <- identical(deparse(attr(dat, "envir")[[name]]),
153 |                       deparse(envir_rrqueue[[name]]))
154 |       if (!ok) {
155 |         stop("Function found in given and rrqueue environment do not match")
156 |       }
157 |     } else {
158 |       stop("Function not found in rrqueue environment")
159 |     }
160 |   }
161 |   dat
162 | }
163 | 
164 | ## TODO: For functions that are not found, we can try and serialise
165 | ## them I think.  That's going to work best for things like
166 | ## `function(x) bar(x, a, b)` but it might be hard to pick up all the
167 | ## locals without doing some serious messing around.
168 | 


--------------------------------------------------------------------------------
/R/monitor.R:
--------------------------------------------------------------------------------
 1 | ## TODO: things to monitor specific completeness queues.
 2 | monitor_status <- function(obs) {
 3 |   message(monitor_status_workers(obs))
 4 |   message(monitor_status_tasks(obs))
 5 | }
 6 | 
 7 | monitor_status_workers <- function(obs) {
 8 |   cols <- cbind(c(WORKER_IDLE, "yellow"),
 9 |                 c(WORKER_BUSY, "green"),
10 |                 c(WORKER_LOST, "red"))
11 |   cols <- setNames(cols[2,], cols[1,])
12 |   status <- obs$workers_status()
13 |   monitor_status_string("workers", status, cols)
14 | }
15 | 
16 | monitor_status_tasks <- function(obs) {
17 |   cols <- cbind(c(TASK_PENDING, "grey"),
18 |                 c(TASK_RUNNING, "green"),
19 |                 c(TASK_COMPLETE, "blue"),
20 |                 c(TASK_ERROR, "red"),
21 |                 c(TASK_ORPHAN, "hotpink"),
22 |                 c(TASK_REDIRECT, "orange"))
23 |   cols <- setNames(cols[2,], cols[1,])
24 |   status <- obs$tasks_status()
25 |   monitor_status_string("tasks  ", status, cols, 1:4)
26 | }
27 | 
28 | ## TODO: Some of the nice colouring code from the worker would spice
29 | ## this up nicely.
30 | monitor_status_string <- function(name, status, cols, i=NULL) {
31 |   n <- table(factor(status, levels=names(cols)))
32 |   if (!is.null(i)) {
33 |     n <- n[i]
34 |   }
35 |   sprintf("%s [%s] %s",
36 |           name,
37 |           paste(n, collapse=" | "),
38 |           pretty_blocks(status, cols))
39 | }
40 | 
41 | ## No need for a class - just run this function until it gets
42 | ## escaped.
43 | monitor <- function(..., period=10) {
44 |   obs <- observer(...)
45 |   repeat {
46 |     message(sprintf("[ %s ]", as.character(Sys.time())))
47 |     monitor_status(obs)
48 |     Sys.sleep(period)
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/R/object_cache.R:
--------------------------------------------------------------------------------
 1 | ##' @importFrom storr storr
 2 | object_cache <- function(prefix, con) {
 3 |   dr <- storr::driver_redis_api(prefix, con)
 4 |   storr::storr(dr)
 5 | }
 6 | 
 7 | ## TODO: weirdly this is the same thing as object_cache, but a
 8 | ## different prefix.
 9 | file_cache <- function(prefix, con) {
10 |   dr <- storr::driver_redis_api(prefix, con)
11 |   storr::storr(dr)
12 | }
13 | 


--------------------------------------------------------------------------------
/R/observer.R:
--------------------------------------------------------------------------------
  1 | ##' Creates an observer for an rrqueue.  This is the "base class" for
  2 | ##' a couple of different objects in rrqueue; notably the
  3 | ##' \code{\link{queue}} object.  So any method listed here also works
  4 | ##' within \code{queue} objects.
  5 | ##'
  6 | ##' Most of the methods of the \code{observer} object are extremely
  7 | ##' simple and involve fetching information from the database about
  8 | ##' the state of tasks, environments and workers.
  9 | ##'
 10 | ##' The method and argument names try to give hints about the sort of
 11 | ##' things they expect; a method asking for \code{task_id} expects a
 12 | ##' single task identifier, while those asking for \code{task_ids}
 13 | ##' expect a vector of task identifiers (and if they have a default
 14 | ##' \code{NULL} then will default to returning information for
 15 | ##' \emph{all} task identifiers).  Similarly, a method starting
 16 | ##' \code{task_} applies to one task while a method starting
 17 | ##' \code{tasks_} applies to multiple.
 18 | ##'
 19 | ##' @template observer_methods
 20 | ##' @title Creates an observer for an rrqueue
 21 | ##' @param queue_name Name of the queue, if not given then it will
 22 | ##'   check with the given Redis server to see if there is just a
 23 | ##'   single queue known.  In that case we connect to that queue.
 24 | ##'   Otherwise we error and list possible queues.
 25 | ##' @param redis_host Redis hostname
 26 | ##' @param redis_port Redis port number
 27 | ##' @param config Configuration file of key/value pairs in yaml
 28 | ##'   format.  See the package README for an example.  If given,
 29 | ##'   additional arguments to this function override values in the
 30 | ##'   file which in turn override defaults of this function.
 31 | ##' @export
 32 | observer <- function(queue_name=NULL,
 33 |                      redis_host="127.0.0.1", redis_port=6379,
 34 |                      config=NULL) {
 35 |   if (!is.null(config)) {
 36 |     given <- as.list(sys.call())[-1] # -1 is the function name
 37 |     cfg <- tmp_fix_redis_config(load_config(config))
 38 |     dat <- modifyList(cfg, given)
 39 |     if (is.null(dat$queue_name)) {
 40 |       stop("queue_name must be given or specified in config")
 41 |     }
 42 |     observer(dat$queue_name, dat$redis_host, dat$redis_port, NULL)
 43 |   } else {
 44 |     if (is.null(queue_name)) {
 45 |       queue_names <- queues(redis_host, redis_port)
 46 |       if (length(queue_names) == 1L) {
 47 |         queue_name <- queue_names
 48 |         message("Connecting to queue: ", queue_name)
 49 |       } else if (length(queue_names) == 0L) {
 50 |         stop("No queues found")
 51 |       } else {
 52 |         stop("More than 1 queue found: specify one of ",
 53 |              paste(queue_names, collapse=", "))
 54 |       }
 55 |     }
 56 |     .R6_observer$new(queue_name, redis_host, redis_port)
 57 |   }
 58 | }
 59 | 
 60 | ## I think the correct design pattern is one that is totally dense
 61 | ## that takes queue_name, redis_host, redis_port and which sets up the
 62 | ## connection and keys.
 63 | ##
 64 | ## Other things can either inherit from this or compose with it.
 65 | ##
 66 | ## NOTE: There are no methods here that modify the queue.
 67 | .R6_observer <- R6::R6Class(
 68 |   "observer",
 69 |   public=list(
 70 |     queue_name=NULL,
 71 |     con=NULL,
 72 |     keys=NULL,
 73 |     files=NULL,
 74 |     objects=NULL,
 75 | 
 76 |     initialize=function(queue_name, redis_host, redis_port) {
 77 |       self$queue_name <- queue_name
 78 |       self$con <- redis_connection(redis_host, redis_port)
 79 |       self$keys <- rrqueue_keys(self$queue_name)
 80 |       self$files <- file_cache(self$keys$files, self$con)
 81 |       self$objects <- object_cache(self$keys$objects, self$con)
 82 |     },
 83 | 
 84 |     ## 1. Tasks:
 85 |     tasks_list=function() {
 86 |       tasks_list(self$con, self$keys)
 87 |     },
 88 |     tasks_status=function(task_ids=NULL, follow_redirect=FALSE) {
 89 |       tasks_status(self$con, self$keys, task_ids, follow_redirect)
 90 |     },
 91 |     tasks_overview=function() {
 92 |       tasks_overview(self$con, self$keys)
 93 |     },
 94 |     tasks_times=function(task_ids=NULL, unit_elapsed="secs") {
 95 |       tasks_times(self$con, self$keys, task_ids, unit_elapsed)
 96 |     },
 97 |     tasks_envir=function(task_ids=NULL) {
 98 |       tasks_envir(self$con, self$keys, task_ids)
 99 |     },
100 |     task_get=function(task_id) {
101 |       task(self, task_id)
102 |     },
103 |     task_result=function(task_id, follow_redirect=FALSE) {
104 |       task_result(self$con, self$keys, task_id, follow_redirect)
105 |     },
106 | 
107 |     ## (task groups)
108 |     tasks_groups_list=function() {
109 |       tasks_groups_list(self$con, self$keys)
110 |     },
111 |     tasks_in_groups=function(groups) {
112 |       tasks_in_groups(self$con, self$keys, groups)
113 |     },
114 |     tasks_lookup_group=function(task_ids=NULL) {
115 |       tasks_lookup_group(self$con, self$keys, task_ids)
116 |     },
117 |     task_bundle_get=function(groups=NULL, task_ids=NULL) {
118 |       task_bundle_get(self, groups, task_ids)
119 |     },
120 | 
121 |     ## 2: environments
122 |     envirs_list=function() {
123 |       envirs_list(self$con, self$keys)
124 |     },
125 |     envirs_contents=function(envir_ids=NULL) {
126 |       envirs_contents(self$con, self$keys, envir_ids)
127 |     },
128 |     envir_workers=function(envir_id, worker_ids=NULL) {
129 |       envir_workers(self$con, self$keys, envir_id, worker_ids)
130 |     },
131 | 
132 |     ## 3: workers
133 |     workers_len=function() {
134 |       workers_len(self$con, self$keys)
135 |     },
136 |     workers_list=function() {
137 |       workers_list(self$con, self$keys)
138 |     },
139 |     workers_list_exited=function() {
140 |       workers_list_exited(self$con, self$keys)
141 |     },
142 |     workers_status=function(worker_ids=NULL) {
143 |       workers_status(self$con, self$keys, worker_ids)
144 |     },
145 |     workers_times=function(worker_ids=NULL, unit_elapsed="secs") {
146 |       workers_times(self$con, self$keys, worker_ids, unit_elapsed)
147 |     },
148 |     workers_log_tail=function(worker_ids=NULL, n=1) {
149 |       workers_log_tail(self$con, self$keys, worker_ids, n)
150 |     },
151 |     workers_task_id=function(worker_ids=NULL) {
152 |       workers_task_id(self$con, self$keys, worker_ids)
153 |     },
154 |     ## NOTE: this returns data that is not necessarily fresh:
155 |     workers_info=function(worker_ids=NULL) {
156 |       workers_info(self$con, self$keys, worker_ids)
157 |     },
158 |     worker_envir=function(worker_id) {
159 |       worker_envir(self$con, self$keys, worker_id)
160 |     },
161 |     workers_running=function(worker_ids=NULL) {
162 |       workers_running(self$con, self$keys, worker_ids)
163 |     }
164 |     ))
165 | 


--------------------------------------------------------------------------------
/R/redis.R:
--------------------------------------------------------------------------------
 1 | ##' @import RedisAPI
 2 | redis_connection <- function(con, port=6379) {
 3 |   if (inherits(con, "redis_api")) {
 4 |     con
 5 |   } else if (is.null(con)) {
 6 |     redux::hiredis(host="127.0.0.1", port=port)
 7 |   } else if (is.character(con)) {
 8 |     redux::hiredis(host=con, port=port)
 9 |   } else {
10 |     stop("Cannot create a Redis connection from object")
11 |   }
12 | }
13 | 
14 | wait_until_hash_field_exists <- function(con, key, field, every=.05,
15 |                                          timeout=as.difftime(5, units="secs")) {
16 |   t0 <- Sys.time()
17 |   while (Sys.time() - t0 < timeout) {
18 |     if (con$HEXISTS(key, field)) {
19 |       return()
20 |     }
21 |     Sys.sleep(every)
22 |   }
23 |   stop(sprintf("field '%s' did not appear in time", field))
24 | }
25 | 
26 | ## Similar to the above, listen on a bunch of hash fields for
27 | ## something to exist.
28 | poll_hash_keys <- function(con, keys, field, wait, every=0.05) {
29 |   if (wait <= 0) {
30 |     res <- lapply(keys, con$HGET, field)
31 |   } else {
32 |     timeout <- as.difftime(wait, units="secs")
33 |     t0 <- Sys.time()
34 |     ok <- logical(length(keys))
35 |     res <- vector("list", length(keys))
36 |     while (Sys.time() - t0 < timeout) {
37 |       exists <- as.logical(vnapply(keys[!ok], con$HEXISTS, field))
38 |       if (any(exists)) {
39 |         i <- which(!ok)[exists]
40 |         res[i] <- lapply(keys[i], con$HGET, field)
41 |         ok[i] <- TRUE
42 |         if (all(ok)) {
43 |           break
44 |         }
45 |       }
46 |       ## This should not be called on the last way through...
47 |       Sys.sleep(every)
48 |     }
49 |   }
50 |   names(res) <- keys
51 |   res
52 | }
53 | 
54 | clean_pttl <- function(x) {
55 |   i <- x > 0
56 |   x[i] <- x[i] / 1000
57 |   x
58 | }
59 | 
60 | ## Way more complicated, simulation of BLPOP with no timeout on
61 | ## multiple lists.  Not anything safe.
62 | lpop_mult <- function(con, keys) {
63 |   for (k in keys) {
64 |     res <- con$LPOP(k)
65 |     if (!is.null(res)) {
66 |       return(list(k, res))
67 |     }
68 |   }
69 |   NULL
70 | }
71 | 


--------------------------------------------------------------------------------
/R/rrqlapply.R:
--------------------------------------------------------------------------------
 1 | ##' Parallel version of lapply using Redis queuing
 2 | ##' @title Parallel version of lapply using Redis queuing
 3 | ##' @param X A vector
 4 | ##' @param FUN The name of a function to apply to each element of the
 5 | ##' list.  \emph{this will change!}.
 6 | ##' @param rrq An rrq object
 7 | ##' @param ... Additional arguments passed to \code{FUN}
 8 | ##' @param group Name of a group for generated task ids.  If not
 9 | ##' included, an ID will be generated.
10 | ##' @param timeout Total length of time to wait for tasks to be
11 | ##'   completed.  The default is to wait forever (like \code{lapply}).
12 | ##'
13 | ##' @param time_poll Time to poll for tasks.  Must be an integer.
14 | ##'   Because of how the function is implemented, R will be
15 | ##'   unresponsive for this long each iteration (unless results are
16 | ##'   returned), so the default of 1s should be reasonable.
17 | ##' @param delete_tasks Delete tasks on successful finish?
18 | ##' @param progress_bar Display a progress bar?
19 | ##' @param env Environment to look in.
20 | ##' @export
21 | rrqlapply <- function(X, FUN, rrq, ..., group=NULL,
22 |                       timeout=Inf, time_poll=1, delete_tasks=FALSE,
23 |                       progress_bar=TRUE, env=parent.frame()) {
24 |   ## TODO: I've set progress_bar to be true on both submitting and
25 |   ## retrieving, but the submit phase *should* be fast enough that
26 |   ## it's not necessary.  That's not true if we're running Redis over
27 |   ## a slow connection though (which we do with the clusterous
28 |   ## approach).  This adds some overhead but I think it'll do for now.
29 |   obj <- rrqlapply_submit(X, FUN, rrq, ..., group=group,
30 |                           progress_bar=progress_bar, env=env)
31 |   tryCatch(obj$wait(timeout, time_poll, progress_bar),
32 |            interrupt=function(e) obj)
33 | }
34 | 
35 | ##' @export
36 | ##' @rdname rrqlapply
37 | rrqlapply_submit <- function(X, FUN, rrq, ..., group=NULL,
38 |                              progress_bar=TRUE, env=parent.frame()) {
39 |   fun <- find_fun(FUN, env, rrq)
40 |   DOTS <- list(...)
41 | 
42 |   n <- length(X)
43 | 
44 |   ## NOTE: the key_complete treatment here avoids possible race
45 |   ## condition/implementation depenence by giving all tasks the same
46 |   ## key_complete and making that shared with whatever the first gets
47 |   ## (which is done via INCR).
48 |   tasks <- vector("list", n)
49 |   e <- environment()
50 |   key_complete <- NULL
51 |   group <- create_group(group, progress_bar)
52 |   p <- progress(total=n, show=progress_bar, prefix="submitting: ")
53 |   for (i in seq_len(n)) {
54 |     expr <- as.call(c(list(fun, X[[i]]), DOTS))
55 |     tasks[[i]] <- rrq$enqueue_(expr, e, key_complete=key_complete, group=group)
56 |     if (is.null(key_complete)) {
57 |       key_complete <- tasks[[i]]$key_complete
58 |     }
59 |     p()
60 |   }
61 |   task_bundle(rrq, tasks, group, names(X))
62 | }
63 | 
64 | ## This is hopefully going to be enough:
65 | find_fun <- function(FUN, env, rrq) {
66 |   dat <- match_fun_rrqueue(FUN, env, rrq$envir)
67 |   if (dat[[1]] == "") {
68 |     as.name(dat[[2]])
69 |   } else {
70 |     call("::", as.name(dat[[1]]), as.name(dat[[2]]))
71 |   }
72 | }
73 | 


--------------------------------------------------------------------------------
/R/status.R:
--------------------------------------------------------------------------------
1 | ## Start of bits for querying the state of the system
2 | ##
3 | ## TODO: This becomes list_queues(), for consistency with the rest of
4 | ## the system.
5 | queues <- function(redis_host=NULL, redis_port=6379) {
6 |   con <- redis_connection(redis_host, redis_port)
7 |   as.character(con$SMEMBERS("rrqueue:queues"))
8 | }
9 | 


--------------------------------------------------------------------------------
/R/task.R:
--------------------------------------------------------------------------------
  1 | ##' Create a task handle object.  This is a "pointer" to a task and
  2 | ##' can be used to retrieve information about status, running times,
  3 | ##' expression and the result of the task once complete.  Generally
  4 | ##' you do not need to make a task object as they will be created for
  5 | ##' you by things like the \code{task_get} method of the
  6 | ##' \code{\link{observer}} and \code{\link{queue}} objects.
  7 | ##'
  8 | ##' Tasks have a \emph{unique identifier}; these are unique within a
  9 | ##' queue and are implemented as an incrementing integer.  However,
 10 | ##' this is an implementation detail and should not be relied on.  The
 11 | ##' identifier is represented as a \emph{character string} rather than
 12 | ##' an integer in most places.
 13 | ##'
 14 | ##' Tasks exist in one of a number of \emph{statuses}.  See the
 15 | ##' \code{status} method below for a list of possible statuses and
 16 | ##' their interpretation.
 17 | ##'
 18 | ##' @template task_methods
 19 | ##' @title Create a task handle
 20 | ##' @param obj A \code{queue} or \code{observer} object.
 21 | ##' @param task_id Task identifier
 22 | ##' @param key_complete If known, specify the \code{key_complete},
 23 | ##' otherwise we look it up on creation.
 24 | ##' @export
 25 | task <- function(obj, task_id, key_complete=NULL) {
 26 |   .R6_task$new(obj, task_id, key_complete)
 27 | }
 28 | 
 29 | ## First, the ideal lifecycle:
 30 | ## * after submissing a job is pending (time_sub)
 31 | TASK_PENDING  <- "PENDING"
 32 | ## * after it is picked up by a worker it is running (time_beg)
 33 | TASK_RUNNING  <- "RUNNING"
 34 | ## * after it is finished by a worker it is complete or error (time_end)
 35 | TASK_COMPLETE <- "COMPLETE"
 36 | TASK_ERROR    <- "ERROR"
 37 | 
 38 | ## Alternatively:
 39 | ## worker node died
 40 | TASK_ORPHAN   <- "ORPHAN"
 41 | ## orphaned task was requeued
 42 | TASK_REDIRECT <- "REDIRECT"
 43 | ## An unknown task
 44 | TASK_MISSING  <- "MISSING"
 45 | 
 46 | .R6_task <- R6::R6Class(
 47 |   "task",
 48 | 
 49 |   public=list(
 50 |     con=NULL,
 51 |     id=NULL,
 52 |     keys=NULL,
 53 |     key_complete=NULL,
 54 | 
 55 |     initialize=function(obj, id, key_complete=NULL) {
 56 |       self$con <- obj$con
 57 |       self$keys <- obj$keys
 58 | 
 59 |       self$id  <- as.character(id)
 60 |       if (is.null(key_complete)) {
 61 |         key_complete <- self$con$HGET(self$keys$tasks_complete, id)
 62 |       }
 63 |       self$key_complete <- key_complete
 64 |     },
 65 | 
 66 |     ## TODO: new methods:
 67 |     ##   - drop / cancel / kill [hmm]
 68 | 
 69 |     ## TODO: These could be active bindings, but that might require a
 70 |     ## new R6 to CRAN to work nicely.
 71 |     status=function(follow_redirect=FALSE) {
 72 |       unname(tasks_status(self$con, self$keys, self$id, follow_redirect))
 73 |     },
 74 | 
 75 |     result=function(follow_redirect=FALSE) {
 76 |       task_result(self$con, self$keys, self$id, follow_redirect)
 77 |     },
 78 | 
 79 |     expr=function(locals=FALSE) {
 80 |       task_expr(self$con, self$keys, self$id,
 81 |                 if (locals) object_cache(self$keys$objects, self$con))
 82 |     },
 83 | 
 84 |     envir=function() {
 85 |       unname(tasks_envir(self$con, self$keys, self$id))
 86 |     },
 87 | 
 88 |     times=function(unit_elapsed="secs") {
 89 |       tasks_times(self$con, self$keys, self$id, unit_elapsed)
 90 |     },
 91 | 
 92 |     wait=function(timeout, every=0.05) {
 93 |       task_wait(self$con, self$keys, self$id, timeout, every)
 94 |     }
 95 |   ))
 96 | 
 97 | ## TODO: This is going to hit status too many times.  Don't worry
 98 | ## about this for now, but if speed becomes important this is a
 99 | ## reasonable place to look.
100 | ##
101 | ## TODO: Scripting this is a little tricky because of the redirect
102 | ## loop; do do that in Lua requires implementing most of the work in
103 | ## tasks_status in Lua; that's going to be more work than is worth it
104 | ## right now, IMO.
105 | task_result <- function(con, keys, task_id,
106 |                         follow_redirect=FALSE, sanitise=FALSE) {
107 |   status <- tasks_status(con, keys, task_id, follow_redirect=FALSE)
108 | 
109 |   if (follow_redirect && status == TASK_REDIRECT) {
110 |     task_id <- task_redirect_target(con, keys, task_id)
111 |     status <- tasks_status(con, keys, task_id, follow_redirect=FALSE)
112 |   }
113 | 
114 |   if (status == TASK_COMPLETE || status == TASK_ERROR) {
115 |     string_to_object(con$HGET(keys$tasks_result, task_id))
116 |   } else if (sanitise) {
117 |     UnfetchableTask(task_id, status)
118 |   } else {
119 |     stop(sprintf("task %s is unfetchable: %s", task_id, status))
120 |   }
121 | }
122 | 
123 | task_redirect_target <- function(con, keys, task_id) {
124 |   to <- con$HGET(keys$tasks_redirect, task_id)
125 |   if (is.null(to)) {
126 |     task_id
127 |   } else {
128 |     task_redirect_target(con, keys, to)
129 |   }
130 | }
131 | 
132 | task_expr <- function(con, keys, task_id, object_cache=NULL) {
133 |   task_expr <- con$HGET(keys$tasks_expr, task_id)
134 |   if (!is.null(object_cache)) {
135 |     e <- new.env(parent=baseenv())
136 |     expr <- restore_expression(task_expr, e, object_cache)
137 |     attr(expr, "envir") <- e
138 |     expr
139 |   } else {
140 |     restore_expression(task_expr, NULL, NULL)
141 |   }
142 | }
143 | 
144 | tasks_list <- function(con, keys) {
145 |   as.character(con$HKEYS(keys$tasks_status))
146 | }
147 | 
148 | tasks_status <- function(con, keys, task_ids=NULL, follow_redirect=FALSE) {
149 |   ret <- from_redis_hash(con, keys$tasks_status, task_ids,
150 |                          as.character, TASK_MISSING)
151 |   if (follow_redirect) {
152 |     task_ids <- names(ret)
153 |     i <- ret == TASK_REDIRECT
154 |     if (any(i)) {
155 |       task2_id <- vcapply(task_ids[i],
156 |                           function(t) task_redirect_target(con, keys, t))
157 |       ret[i] <- unname(tasks_status(con, keys, task2_id, FALSE))
158 |     }
159 |   }
160 |   ret
161 | }
162 | 
163 | tasks_overview <- function(con, keys, task_ids=NULL) {
164 |   lvls <- c(TASK_PENDING, TASK_RUNNING, TASK_COMPLETE, TASK_ERROR)
165 |   status <- tasks_status(con, keys, task_ids)
166 |   lvls <- c(lvls, setdiff(unique(status), lvls))
167 |   table(factor(status, lvls))
168 | }
169 | 
170 | tasks_envir <- function(con, keys, task_ids=NULL) {
171 |   from_redis_hash(con, keys$tasks_envir, task_ids)
172 | }
173 | 
174 | tasks_times <- function(con, keys, task_ids=NULL, unit_elapsed="secs") {
175 |   ## TODO: This could make a lot of requests; might be worth
176 |   ## thinking about a little...
177 |   f <- function(key) {
178 |     from_redis_hash(con, key, task_ids, redis_time_to_r)
179 |   }
180 |   if (is.null(task_ids)) {
181 |     task_ids <- tasks_list(con, keys)
182 |   }
183 |   ret <- data.frame(task_id   = task_ids,
184 |                     submitted = f(keys$tasks_time_sub),
185 |                     started   = f(keys$tasks_time_beg),
186 |                     finished  = f(keys$tasks_time_end),
187 |                     stringsAsFactors=FALSE)
188 |   now <- redis_time_to_r(redis_time(con))
189 |   started2  <- ret$started
190 |   finished2 <- ret$finished
191 |   finished2[is.na(finished2)] <- started2[is.na(started2)] <- now
192 |   ret$waiting <- as.numeric(started2  - ret$submitted, unit_elapsed)
193 |   ret$running <- as.numeric(finished2 - ret$started,   unit_elapsed)
194 |   ret$idle    <- as.numeric(now       - ret$finished,  unit_elapsed)
195 |   ret
196 | }
197 | 
198 | ## Lookup functions.
199 | ## First, find the names of extant groups:
200 | tasks_groups_list <- function(con, keys) {
201 |   unique(as.character(con$HVALS(keys$tasks_group)))
202 | }
203 | 
204 | ## Then, the tasks associated with a given group:
205 | tasks_in_groups <- function(con, keys, groups) {
206 |   ## TODO: This should be done with HSCAN do do it "properly", but
207 |   ## that should probably move into the RedisAPI before I try.
208 |   groups_hash <- from_redis_hash(con, keys$tasks_group)
209 |   names(groups_hash)[groups_hash %in% groups]
210 | }
211 | 
212 | ## TODO: not tested anywhere yet.
213 | tasks_lookup_group <- function(con, keys, task_ids=NULL) {
214 |   if (is.null(task_ids)) {
215 |     task_ids <- tasks_list(con, keys)
216 |   }
217 |   ## TODO: this is not how we usually do this...
218 |   ## TODO: there are alot of possible edge cases here the should be
219 |   ## tested, especially missing values
220 |   groups <- from_redis_hash(con, keys$tasks_group)
221 |   setNames(groups[task_ids], task_ids)
222 | }
223 | 
224 | tasks_set_group <- function(con, keys, task_ids, group,
225 |                             exists_action="stop") {
226 |   ## Alternatively we could recycle?
227 |   if (!is.null(group)) {
228 |     assert_scalar_character(group)
229 |   }
230 |   exists_action <- match_value(exists_action,
231 |                                c("stop", "warn", "pass", "overwrite"))
232 |   ## should check that the ids are valid I think.
233 |   ## This is pretty nasty:
234 |   if (!is.null(group) && exists_action != "overwrite") {
235 |     cur <- from_redis_hash(con, keys$tasks_group, task_ids)
236 |     ok <- is.na(cur) | cur == group
237 |     if (!all(ok)) {
238 |       if (exists_action != "pass") {
239 |         msg <- paste0("Groups already exist for tasks: ",
240 |                       paste(task_ids[!ok], collapse=", "))
241 |         if (exists_action == "stop") {
242 |           stop(msg)
243 |         } else {
244 |           warning(msg, immediate.=TRUE)
245 |         }
246 |       }
247 |       task_ids <- task_ids[ok]
248 |     }
249 |   }
250 |   if (length(task_ids) > 0L) {
251 |     if (is.null(group)) {
252 |       con$HDEL(keys$tasks_group, task_ids)
253 |     } else {
254 |       con$HMSET(keys$tasks_group, task_ids, group)
255 |     }
256 |   }
257 |   invisible(NULL)
258 | }
259 | 
260 | task_wait <- function(con, keys, task_id, timeout, every=0.05) {
261 |   t0 <- Sys.time()
262 |   timeout <- as.difftime(timeout, units="secs")
263 |   repeat {
264 |     res <- task_result(con, keys, task_id, sanitise=TRUE)
265 |     if (!inherits(res, "UnfetchableTask")) {
266 |       return(res)
267 |     } else if (Sys.time() - t0 < timeout) {
268 |       Sys.sleep(every)
269 |     } else {
270 |       stop("task not returned in time")
271 |     }
272 |   }
273 | }
274 | 


--------------------------------------------------------------------------------
/R/task_bundle.R:
--------------------------------------------------------------------------------
  1 | ##' Create a task bundle.  Generally these are not created manually,
  2 | ##' but this page serves to document what task bundles are and the
  3 | ##' methods that they have.
  4 | ##'
  5 | ##' A task bundle exists to group together tasks that are related.  It
  6 | ##' is possible for a task to belong to multiple bundles.
  7 | ##'
  8 | ##' @template task_bundle_methods
  9 | ##'
 10 | ##' @title Create a task bundle
 11 | ##' @param obj An observer or queue object
 12 | ##' @param tasks A list of tasks
 13 | ##' @param groups Optional vector of groups.  If given, then additional
 14 | ##'    tasks can be added to the bundle if they share the same group names.
 15 | ##' @param names Optional vector of names to label output with.
 16 | ##' @export
 17 | task_bundle <- function(obj, tasks, groups=NULL, names=NULL) {
 18 |   ## TODO: What is groups used for here?  Seems no longer needed?
 19 |   .R6_task_bundle$new(obj, tasks, groups, names)
 20 | }
 21 | 
 22 | ## TODO: Next, make an automatically updating version.
 23 | .R6_task_bundle <- R6::R6Class(
 24 |   "task_bundle",
 25 | 
 26 |   public=
 27 |     list(
 28 |       obj=NULL,
 29 |       tasks=NULL,
 30 |       key_complete=NULL,
 31 |       groups=NULL,
 32 |       names=NULL,
 33 |       con=NULL,
 34 |       keys=NULL,
 35 | 
 36 |       initialize=function(obj, tasks, groups, names) {
 37 |         self$con <- obj$con
 38 |         self$keys <- obj$keys
 39 |         self$obj <- obj
 40 |         self$tasks <- setNames(tasks, vcapply(tasks, "[[", "id"))
 41 | 
 42 |         self$key_complete <- unique(vcapply(tasks, "[[", "key_complete"))
 43 |         self$groups <- groups
 44 | 
 45 |         if (!is.null(names) && length(names) != length(tasks)) {
 46 |           stop("Incorrect length names")
 47 |         }
 48 |         self$names <- names
 49 |       },
 50 | 
 51 |       ids=function() {
 52 |         names(self$tasks)
 53 |       },
 54 | 
 55 |       update_groups=function() {
 56 |         task_ids <- setdiff(tasks_in_groups(self$con, self$keys, self$groups),
 57 |                             self$ids())
 58 |         if (length(task_ids)) {
 59 |           tasks <- setNames(lapply(task_ids, self$obj$task_get), task_ids)
 60 |           self$tasks <- c(self$tasks, tasks)
 61 |           self$key_complete <- union(self$key_complete,
 62 |                                      unique(vcapply(tasks, "[[", "key_complete")))
 63 |           ## Can't deal with this for now :(
 64 |           self$names <- NULL
 65 |         }
 66 |         invisible(task_ids)
 67 |       },
 68 | 
 69 |       overview=function() {
 70 |         tasks_overview(self$con, self$keys, self$ids())
 71 |       },
 72 |       status=function(follow_redirect=FALSE) {
 73 |         self$obj$tasks_status(self$ids(), follow_redirect=follow_redirect)
 74 |       },
 75 |       results=function(follow_redirect=FALSE) {
 76 |         self$wait(0, 0, FALSE, follow_redirect)
 77 |       },
 78 |       wait=function(timeout=60, time_poll=1, progress_bar=TRUE, follow_redirect=FALSE) {
 79 |         task_bundle_wait(self, timeout, time_poll, progress_bar, follow_redirect)
 80 |       },
 81 |       wait1=function(timeout=60, time_poll=1, follow_redirect=FALSE) {
 82 |         task_bundle_wait1(self, timeout, time_poll, follow_redirect)
 83 |       },
 84 |       times=function(unit_elapsed="secs") {
 85 |         tasks_times(self$con, self$keys, self$ids(), unit_elapsed)
 86 |       },
 87 | 
 88 | 
 89 |       delete_tasks=function() {
 90 |         invisible(self$obj$tasks_drop(self$ids()))
 91 |       }))
 92 | 
 93 | 
 94 | ## There are a bunch of ways of getting appropriate things here:
 95 | task_bundle_get <- function(obj, groups=NULL, task_ids=NULL) {
 96 |   if (!xor(is.null(task_ids), is.null(groups))) {
 97 |     stop("Exactly one of task_ids or groups must be given")
 98 |   }
 99 |   if (is.null(groups)) {
100 |     groups <- obj$tasks_lookup_group(task_ids)
101 |   } else {
102 |     task_ids <- obj$tasks_in_groups(groups)
103 |   }
104 | 
105 |   tasks <- lapply(task_ids, obj$task_get)
106 |   names(tasks) <- task_ids
107 |   task_bundle(obj, tasks, groups)
108 | }
109 | 
110 | 
111 | task_bundle_wait <- function(obj, timeout, time_poll, progress_bar, follow_redirect) {
112 |   assert_integer_like(time_poll)
113 |   task_ids <- obj$ids()
114 |   status <- obj$status()
115 |   done <- !(status == TASK_PENDING | status == TASK_RUNNING |
116 |               status == TASK_ORPHAN)
117 | 
118 |   ## Immediately collect all results:
119 |   results <- named_list(task_ids)
120 |   if (any(done)) {
121 |     results[done] <- lapply(obj$tasks[done],
122 |                             function(t) t$result(follow_redirect))
123 |   }
124 | 
125 |   cleanup <- function(results, names) {
126 |     if (!is.null(names)) {
127 |       names(results) <- names
128 |     }
129 |     results
130 |   }
131 |   if (all(done)) {
132 |     return(cleanup(results, obj$names))
133 |   } else if (timeout == 0) {
134 |     stop("Tasks not yet completed; can't be immediately returned")
135 |   }
136 | 
137 |   p <- progress(total=length(obj$tasks), show=progress_bar)
138 |   t0 <- Sys.time()
139 |   timeout <- as.difftime(timeout, units="secs")
140 | 
141 |   p(sum(done))
142 |   i <- 1L
143 |   while (!all(done)) {
144 |     if (Sys.time() - t0 > timeout) {
145 |       stop(sprintf("Exceeded maximum time (%d / %d tasks pending)",
146 |                    sum(!done), length(done)))
147 |     }
148 |     res <- task_bundle_fetch1(obj, time_poll, follow_redirect)
149 |     if (is.null(res)) {
150 |       p(0)
151 |     } else {
152 |       p(1)
153 |       task_id <- res[[1]]
154 |       result <- res[[2]]
155 |       done[[task_id]] <- TRUE
156 |       ## NOTE: This conditional is needed to avoid deleting the
157 |       ## element in results if we get a NULL result.
158 |       if (!is.null(result)) {
159 |         results[[task_id]] <- result
160 |       }
161 |     }
162 | 
163 |   }
164 |   cleanup(results, obj$names)
165 | }
166 | 
167 | task_bundle_wait1 <- function(obj, timeout, time_poll, follow_redirect) {
168 |   status <- obj$status(follow_redirect)
169 |   done <- !(status == TASK_PENDING | status == TASK_RUNNING |
170 |               status == TASK_ORPHAN)
171 |   if (all(done)) {
172 |     done <- vnapply(obj$key_complete, obj$obj$con$LLEN) == 0
173 |   }
174 |   if (all(done)) {
175 |     return(NULL)
176 |   }
177 |   times_up <- time_checker(timeout)
178 |   repeat {
179 |     if (times_up()) {
180 |       return(NULL)
181 |     }
182 |     res <- task_bundle_fetch1(obj, time_poll, follow_redirect)
183 |     if (!is.null(res)) {
184 |       return(res)
185 |     }
186 |   }
187 | }
188 | 
189 | task_bundle_fetch1 <- function(bundle, timeout, follow_redirect) {
190 |   if (as.integer(timeout) > 0) {
191 |     res <- bundle$con$BLPOP(bundle$key_complete, timeout)
192 |   } else {
193 |     res <- lpop_mult(bundle$con, bundle$key_complete)
194 |   }
195 |   if (!is.null(res)) {
196 |     id <- res[[2]]
197 |     list(id=id,
198 |          result=bundle$obj$task_result(id, follow_redirect=follow_redirect))
199 | 
200 |   } else {
201 |     NULL
202 |   }
203 | }
204 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | ##' @importFrom digest digest
  2 | hash_string <- function(x) {
  3 |   digest::digest(x, serialize=FALSE)
  4 | }
  5 | 
  6 | hash_file <- function(x) {
  7 |   digest::digest(file=x)
  8 | }
  9 | 
 10 | hash_files <- function(x) {
 11 |   setNames(vcapply(x, hash_file), x)
 12 | }
 13 | 
 14 | compare_hash <- function(x) {
 15 |   if (length(x) == 0L) {
 16 |     TRUE
 17 |   } else {
 18 |     files <- names(x)
 19 |     all(file.exists(files)) && identical(hash_files(files), x)
 20 |   }
 21 | }
 22 | 
 23 | is_error <- function(x) {
 24 |   inherits(x, "try-error")
 25 | }
 26 | 
 27 | is_directory <- function(path) {
 28 |   file.info(path)$isdir
 29 | }
 30 | 
 31 | is_terminal <- function() {
 32 |   if (!isatty(stdout())) {
 33 |     return(FALSE)
 34 |   }
 35 |   if (.Platform$OS.type == "windows") {
 36 |     return(FALSE)
 37 |   }
 38 |   if (Sys.getenv("TERM") == "dumb") {
 39 |     return(FALSE)
 40 |   }
 41 |   !is_error(try(system("tput colors", intern=TRUE), silent=TRUE))
 42 | }
 43 | 
 44 | vcapply <- function(X, FUN, ...) {
 45 |   vapply(X, FUN, character(1), ...)
 46 | }
 47 | vnapply <- function(X, FUN, ...) {
 48 |   vapply(X, FUN, numeric(1), ...)
 49 | }
 50 | viapply <- function(X, FUN, ...) {
 51 |   vapply(X, FUN, integer(1), ...)
 52 | }
 53 | vlapply <- function(X, FUN, ...) {
 54 |   vapply(X, FUN, logical(1), ...)
 55 | }
 56 | 
 57 | docopt_parse <- function(...) {
 58 |   oo <- options(warnPartialMatchArgs=FALSE)
 59 |   if (isTRUE(oo$warnPartialMatchArgs)) {
 60 |     on.exit(options(oo))
 61 |   }
 62 |   docopt::docopt(...)
 63 | }
 64 | 
 65 | lstrip <- function(x) {
 66 |   sub("^\\s+", "", x, perl=TRUE)
 67 | }
 68 | rstrip <- function(x) {
 69 |   sub("\\s+$", "", x, perl=TRUE)
 70 | }
 71 | 
 72 | ## Source a file (using sys.source) and record all files that file
 73 | ## sources (only via source and sys.source, ignoring file connections,
 74 | ## assuming files don't change, etc, etc).
 75 | sys_source <- function(...) {
 76 |   collector <- function(...) {
 77 |     e <- parent.frame(2)
 78 |     if (exists("file", e, inherits=FALSE)) {
 79 |       file <- get("file", e, inherits=FALSE)
 80 |       if (is.character(file)) {
 81 |         ## TODO: need to deal with the case where source(...,
 82 |         ## chdir=TRUE) was used and the path has changed; in that case
 83 |         ## we're going to need to work out where the file is relative
 84 |         ## to the current directory, which requires pathr to work.
 85 |         ##
 86 |         ## If we *do* do this, then the create_environment function
 87 |         ## needs to take care of that bookkeeping.
 88 |         ##
 89 |         ## NOTE: using hash_files(), not hash_file(), as the latter
 90 |         ## adds names.
 91 |         dat <<- c(dat, hash_files(file))
 92 |       } else {
 93 |         warning("non-file source detected")
 94 |       }
 95 |     } else {
 96 |       warning("source detection failed")
 97 |     }
 98 |     dat
 99 |   }
100 |   dat <- character(0)
101 | 
102 |   suppressMessages({
103 |     trace(base::source,     function(...) collector(), print=FALSE)
104 |     trace(base::sys.source, function(...) collector(), print=FALSE)
105 |   })
106 |   on.exit({
107 |     suppressMessages({
108 |       untrace(base::source)
109 |       untrace(base::sys.source)
110 |     })
111 |   })
112 |   sys.source(...)
113 |   dat
114 | }
115 | 
116 | random_colour <- function(n=1) {
117 |   rgb(runif(n), runif(n), runif(n))
118 | }
119 | 
120 | strrep <- function (str, n) {
121 |   paste(rep_len(str, n), collapse = "")
122 | }
123 | 
124 | Sys_kill <- function(pid, signal=NULL) {
125 |   system2("kill", c(pid, signal))
126 | }
127 | 
128 | find_script <- function(name) {
129 |   cmd <- Sys.which(name)
130 |   if (cmd == "") {
131 |     tmp <- tempfile()
132 |     install_scripts(tmp)
133 |     cmd <- file.path(tmp, name)
134 |   }
135 |   cmd
136 | }
137 | 
138 | hostname <- function() {
139 |   Sys.info()[["nodename"]]
140 | }
141 | process_id <- function() {
142 |   Sys.getpid()
143 | }
144 | 
145 | ## Potentially useful for a monitor thing:
146 | ## x <- sample(letters[1:4], 40, replace=TRUE)
147 | ## cols <- c(a="red", b="blue", c="green", d="purple")
148 | ## pretty_blocks(x, cols)
149 | pretty_blocks <- function(x, cols) {
150 |   sq <- vcapply(cols, function(x) crayon::make_style(x)("\u2588"))
151 |   paste(sq[x], collapse="")
152 | }
153 | 
154 | ## Alternatives:
155 | ## http://stackoverflow.com/a/2685827
156 | spin_symbols <- function() {
157 |   sym <- c("-", "\\", "|", "/")
158 |   i <- 0L
159 |   n <- length(sym)
160 |   function() {
161 |     sym[[i <<- if (i >= n) 1L else i + 1L]]
162 |   }
163 | }
164 | 
165 | ##' @importFrom progress progress_bar
166 | progress <- function(total, ..., show=TRUE, prefix="") {
167 |   if (show) {
168 |     fmt <- paste0(prefix, "[:bar] :percent :spin")
169 |     pb <- progress::progress_bar$new(fmt, total=total)
170 |     ws <- spin_symbols()
171 |     function(len=1) {
172 |       invisible(pb$tick(len, list(spin=ws())))
173 |     }
174 |   } else {
175 |     function(...) {}
176 |   }
177 | }
178 | 
179 | ## Short-circuit apply; returns the index of the first element of x
180 | ## for which cond(x[[i]]) holds true.
181 | scapply <- function(x, cond, no_match=NA_integer_) {
182 |   for (i in seq_along(x)) {
183 |     if (isTRUE(cond(x[[i]]))) {
184 |       return(i)
185 |     }
186 |   }
187 |   no_match
188 | }
189 | 
190 | invert_names <- function(x) {
191 |   setNames(names(x), x)
192 | }
193 | 
194 | blank <- function(n) {
195 |   paste(rep_len(" ", n), collapse="")
196 | }
197 | 
198 | ## Possibly could be done faster.
199 | df_to_list <- function(x) {
200 |   keep <- c("names", "class", "row.names")
201 |   at <- attributes(x)
202 |   attributes(x) <- at[intersect(names(at), keep)]
203 |   unname(lapply(split(x, seq_len(nrow(x))), as.list))
204 | }
205 | 
206 | match_value <- function(arg, choices, name=deparse(substitute(arg))) {
207 |   assert_scalar_character(arg)
208 |   if (!(arg %in% choices)) {
209 |     stop(sprintf("%s must be one of %s",
210 |                  name, paste(dQuote(choices), collapse=", ")))
211 |   }
212 |   arg
213 | }
214 | 
215 | create_group <- function(group, verbose) {
216 |   if (is.null(group)) {
217 |     group <- ids::adjective_animal()
218 |     if (verbose) {
219 |       message(sprintf("Creating group: '%s'", group))
220 |     }
221 |   }
222 |   group
223 | }
224 | 
225 | read_file_to_string <- function(filename) {
226 |   readChar(filename, file.info(filename)$size)
227 | }
228 | write_string_to_file <- function(str, filename) {
229 |   writeChar(str, filename, eos=NULL)
230 | }
231 | 
232 | ##' Install rrqueue scripts.  This currently includes a script for
233 | ##' starting a worker process.  The script is extremely simple and
234 | ##' does not need upgrading when rrqueue is upgraded.
235 | ##' @title Install rrqueue scripts
236 | ##' @param dest Directory to install scripts to, preferably in the \code{$PATH}
237 | ##' @param overwrite Overwrite any existing file?
238 | ##' @export
239 | install_scripts <- function(dest, overwrite=TRUE) {
240 |   src <- system.file("scripts", package=.packageName)
241 |   scripts <- dir(src)
242 |   dir.create(dest, FALSE, TRUE)
243 |   ok <- file.copy(file.path(src, scripts),
244 |                   file.path(dest, scripts), overwrite=overwrite)
245 |   invisible(ok)
246 | }
247 | 
248 | yaml_load <- function(string) {
249 |   handlers <- list(`bool#yes` = function(x) {
250 |     if (identical(toupper(x), "TRUE")) TRUE else x
251 |   }, `bool#no` = function(x) {
252 |     if (identical(toupper(x), "FALSE")) FALSE else x
253 |   })
254 |   yaml::yaml.load(string, handlers = handlers)
255 | }
256 | yaml_read <- function(filename) {
257 |   yaml_load(paste(readLines(filename), collapse="\n"))
258 | }
259 | 
260 | docopt_parse <- function(doc, args, clean=TRUE) {
261 |   oo <- options(warnPartialMatchArgs=FALSE)
262 |   if (isTRUE(oo$warnPartialMatchArgs)) {
263 |     on.exit(options(oo))
264 |   }
265 |   opts <- docopt::docopt(doc, args)
266 |   if (clean) {
267 |     opts <- docopt_clean(opts)
268 |   }
269 |   opts
270 | }
271 | 
272 | docopt_clean <- function(opts) {
273 |   opts <- opts[!(grepl("^-", names(opts)) | grepl("^<.*>$", names(opts)))]
274 |   names(opts) <- gsub("-", "_", names(opts))
275 |   opts
276 | }
277 | 
278 | with_wd <- function(path, expr) {
279 |   if (path != ".") {
280 |     if (!file.exists(path)) {
281 |       stop(sprintf("Path '%s' does not exist", path))
282 |     }
283 |     if (!is_directory(path)) {
284 |       stop(sprintf("Path '%s' exists, but is not a directory", path))
285 |     }
286 |     owd <- setwd(path)
287 |     on.exit(setwd(owd))
288 |   }
289 |   force(expr)
290 | }
291 | 
292 | dir_create <- function(paths) {
293 |   invisible(vlapply(unique(paths), dir.create, FALSE, TRUE))
294 | }
295 | 
296 | named_list <- function(names) {
297 |   setNames(vector("list", length(names)), names)
298 | }
299 | named_logical <- function(names) {
300 |   setNames(logical(length(names)), names)
301 | }
302 | 
303 | time_checker <- function(timeout) {
304 |   t0 <- Sys.time()
305 |   timeout <- as.difftime(timeout, units="secs")
306 |   function() {
307 |     Sys.time() - t0 > timeout
308 |   }
309 | }
310 | 


--------------------------------------------------------------------------------
/R/utils_assert.R:
--------------------------------------------------------------------------------
 1 | ## Type
 2 | assert_inherits <- function(x, what, name=deparse(substitute(x))) {
 3 |   if (!inherits(x, what)) {
 4 |     stop(sprintf("%s must be a %s", name,
 5 |                  paste(what, collapse=" / ")), call.=FALSE)
 6 |   }
 7 | }
 8 | assert_character <- function(x, name=deparse(substitute(x))) {
 9 |   if (!is.character(x)) {
10 |     stop(sprintf("%s must be character", name), call.=FALSE)
11 |   }
12 | }
13 | 
14 | assert_character_or_null <- function(x, name=deparse(substitute(x))) {
15 |   if (!is.null(x)) {
16 |     assert_character(x, name)
17 |   }
18 | }
19 | 
20 | ## Length
21 | assert_scalar <- function(x, name=deparse(substitute(x))) {
22 |   if (length(x) != 1) {
23 |     stop(sprintf("%s must be a scalar", name), call.=FALSE)
24 |   }
25 | }
26 | 
27 | ## Compound:
28 | assert_scalar_character <- function(x, name=deparse(substitute(x))) {
29 |   assert_scalar(x, name)
30 |   assert_character(x, name)
31 | }
32 | 
33 | assert_integer_like <- function(x, name=deparse(substitute(x))) {
34 |   if (!isTRUE(all.equal(as.integer(x), x))) {
35 |     stop(sprintf("%s is not integer like", name))
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/R/worker_spawn.R:
--------------------------------------------------------------------------------
  1 | ## There are *four* ways of launching workers; that's why this is so
  2 | ## complicated.
  3 | ##
  4 | ## 1. manually using worker(...)
  5 | ## 2. from R using worker_spawn(...)
  6 | ## 3. from the command line using rrqueue_worker
  7 | ## 4. from the command line using rrqueue_worker_tee
  8 | 
  9 | ## TODO: autogenerate this in the same way that we do for the args
 10 | rrqueue_worker_main <- function(args=commandArgs(TRUE)) {
 11 |   opts <- rrqueue_worker_args(args)
 12 |   con <- redux::hiredis(host=opts$redis_host,
 13 |                         port=as.integer(opts$redis_port))
 14 |   worker(opts$queue_name, con,
 15 |          heartbeat_period=opts$heartbeat_period,
 16 |          heartbeat_expire=opts$heartbeat_expire,
 17 |          key_worker_alive=opts$key_worker_alive)
 18 | }
 19 | 
 20 | rrqueue_worker_args <- function(args=commandArgs(TRUE)) {
 21 |   'Usage:
 22 |   rrqueue_worker [options] <queue_name>
 23 |   rrqueue_worker --config=FILENAME [options] [<queue_name>]
 24 |   rrqueue_worker -h | --help
 25 | 
 26 | Options:
 27 |   --redis-host HOSTNAME   Hostname for Redis
 28 |   --redis-port PORT       Port for Redis
 29 |   --heartbeat-period T    Heartbeat period
 30 |   --heartbeat-expire T    Heartbeat expiry time
 31 |   --key-worker-alive KEY  Key to write to when the worker becomes alive
 32 |   --config FILENAME       Optional YAML configuration filename
 33 | 
 34 |   Arguments:
 35 |   <queue_name>   Name of queue
 36 |   ' -> doc
 37 | 
 38 |   given <- docopt_parse(doc, args)
 39 |   given <- given[!vlapply(given, is.null)]
 40 | 
 41 |   defaults <- as.list(formals(worker))
 42 |   nms <- names(defaults)
 43 |   defaults$queue_name <- NULL
 44 | 
 45 |   ## This will be an issue for docopt interface to the main queue too.
 46 |   if (is.null(given$config)) {
 47 |     ret <- modifyList(defaults, given)[nms]
 48 |   } else {
 49 |     cfg <- tmp_fix_redis_config(load_config(given$config))
 50 |     ret <- modifyList(modifyList(defaults, cfg), given)[nms]
 51 |     ## Check that we did get a queue_name
 52 |     if (is.null(ret$queue_name)) {
 53 |       stop("queue name must be given")
 54 |     }
 55 |   }
 56 |   ret
 57 | }
 58 | 
 59 | ##' Spawn a worker in the background
 60 | ##'
 61 | ##' Spawning multiple workers.  If \code{n} is greater than one,
 62 | ##' multiple workers will be spawned.  This happens in parallel so it
 63 | ##' does not take n times longer than spawing a single worker.
 64 | ##'
 65 | ##' Beware that signals like Ctrl-C passed to \emph{this} R instance
 66 | ##' can still propagate to the child processes and can result in them
 67 | ##' dying unexpectedly.  It is probably safer to start processes in a
 68 | ##' standalone session.
 69 | ##'
 70 | ##' @title Spawn a worker
 71 | ##' @param queue_name Name of the queue to connect to
 72 | ##' @param logfile Name of a log file to write to (consider
 73 | ##'   \code{tempfile()}).  If \code{n} > 1, then \code{n} log files
 74 | ##'   must be provided.
 75 | ##' @param redis_host Host name/IP for the Redis server
 76 | ##' @param redis_port Port for the Redis server
 77 | ##' @param n Number of workers to spawn
 78 | ##' @param timeout Time to wait for the worker to appear
 79 | ##' @param time_poll Period to poll for the worker (must be in
 80 | ##'   seconds)
 81 | ##' @param heartbeat_period Period between heartbeat pulses
 82 | ##' @param heartbeat_expire Time that heartbeat pulses will persist
 83 | ##' @param path Path to start the worker in.  By default workers will
 84 | ##'   start in the current working directory, but you can start them
 85 | ##'   elsewhere by providing a path here.  If the path does not exist,
 86 | ##'   an error will be thrown.  If \code{n} is greater than 1, all
 87 | ##'   workers will start in the same working directory.  The
 88 | ##'   \code{logfile} argument will be interpreted relative to current
 89 | ##'   working directory (not the worker working directory); use
 90 | ##'   \code{\link{normalizePath}} to convert into an absolute path
 91 | ##'   name to prevent this.
 92 | ##' @export
 93 | worker_spawn <- function(queue_name, logfile,
 94 |                          redis_host="127.0.0.1",
 95 |                          redis_port=6379,
 96 |                          n=1,
 97 |                          timeout=20, time_poll=1,
 98 |                          heartbeat_period=NULL,
 99 |                          heartbeat_expire=NULL,
100 |                          path=".") {
101 |   rrqueue_worker <- find_script("rrqueue_worker")
102 |   env <- paste0("RLIBS=", paste(.libPaths(), collapse=":"),
103 |                 'R_TESTS=""')
104 | 
105 |   if (length(logfile) != n) {
106 |     stop("logfile must be a vector of length n")
107 |   }
108 |   assert_integer_like(time_poll)
109 | 
110 |   con <- redux::hiredis(host=redis_host, port=redis_port)
111 |   key_worker_alive <- rrqueue_key_worker_alive(queue_name)
112 | 
113 |   opts <- character(0)
114 |   if (!is.null(heartbeat_expire)) {
115 |     opts <- c(opts, "--heartbeat-expire", heartbeat_expire)
116 |   }
117 |   if (!is.null(heartbeat_period)) {
118 |     opts <- c(opts, "--heartbeat-period", heartbeat_period)
119 |   }
120 |   if (!is.null(redis_host)) {
121 |     opts <- c(opts, "--redis-host", redis_host)
122 |   }
123 |   if (!is.null(redis_port)) {
124 |     opts <- c(opts, "--redis-port", redis_port)
125 |   }
126 |   opts <- c(opts, "--key-worker-alive", key_worker_alive, queue_name)
127 | 
128 |   dir_create(dirname(logfile))
129 |   logfile <- file.path(normalizePath(dirname(logfile)), basename(logfile))
130 | 
131 |   code <- integer(n)
132 |   with_wd(path, {
133 |     for (i in seq_len(n)) {
134 |       code[[i]] <- system2(rrqueue_worker, opts,
135 |                            env=env, wait=FALSE,
136 |                            stdout=logfile[[i]], stderr=logfile[[i]])
137 |     }
138 |   })
139 |   if (any(code != 0L)) {
140 |     warning("Error launching script: worker *probably* does not exist")
141 |   }
142 | 
143 |   ret <- rep.int(NA_character_, n)
144 |   t0 <- Sys.time()
145 |   timeout <- as.difftime(timeout, units="secs")
146 | 
147 |   i <- 1L
148 |   repeat {
149 |     x <- con$BLPOP(key_worker_alive, time_poll)
150 |     if (is.null(x)) {
151 |       message(".", appendLF=FALSE)
152 |       flush.console()
153 |     } else {
154 |       ret[[i]] <- x[[2]]
155 |       if (n > 1L) {
156 |         message(sprintf("new worker: %s (%d / %d)", x[[2]], i, n))
157 |       } else {
158 |         message(sprintf("new worker: %s", x[[2]]))
159 |       }
160 |       i <- i + 1
161 |     }
162 |     if (!any(is.na(ret))) {
163 |       break
164 |     }
165 |     if (Sys.time() - t0 > timeout) {
166 |       ## TODO: Better recover here.  Ideally we'd stop any workers
167 |       ## that *are* running, and provide data from the log files.
168 |       stop(sprintf("%d / %d workers not identified in time",
169 |                    sum(is.na(ret)), n))
170 |     }
171 |   }
172 | 
173 |   ret
174 | }
175 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # rrqueue
  2 | 
  3 | `rrqueue` is a *distributed task queue* for R, implemented on top of  [Redis](http://redis.io).  At the cost of a little more work it allows for more flexible parallelisation than afforded by `mclapply`.  The main goal is to support non-map style operations: submit some tasks, collect the completed results, queue more even while some tasks are still running.
  4 | 
  5 | Other features include:
  6 | 
  7 | * Low-level task submission / retrieval has a simple API so that asynchronous task queues can be created.
  8 | * Objects representing tasks, workers, queues, etc can be queried.
  9 | * While blocking `mclapply`-like functions are available, the package is designed to be non-blocking so that intermediate results can be used.
 10 | * Automatic fingerprinting of environments so that code run on a remote machine will correspond to the code found locally.
 11 | * Works well connecting to a Redis database running on the cloud (e.g., on an AWS machine over an ssh tunnel).
 12 | * Local workers can be added to a remote pool, so long as everything can talk to the same Redis server.
 13 | ?
 14 | * The worker pool can be scaled at any time (up or down).
 15 | * Basic fault tolerance, supporting requeuing tasks lost on crashed workers.
 16 | 
 17 | # Simple usage
 18 | 
 19 | The basic workflow is:
 20 | 
 21 | 1. Create a queue
 22 | 2. Submit tasks to the queue
 23 | 3. Start workers
 24 | 4. Collect results
 25 | 
 26 | The workers can be started at any time between 1-3, though they do need to be started before results can be collected.
 27 | 
 28 | ## Create a queue
 29 | 
 30 | Start a queue that we will submit tasks to
 31 | ```
 32 | con <- rrqueue::queue("jobs")
 33 | ```
 34 | 
 35 | Expressions can be queued using the `enqueue` method:
 36 | 
 37 | ```
 38 | task <- con$enqueue(sin(1))
 39 | ```
 40 | 
 41 | Task objects can be inspected to find out (for example) how long they have been waiting for:
 42 | 
 43 | ```
 44 | task$times()
 45 | ```
 46 | 
 47 | or what their status is:
 48 | 
 49 | ```
 50 | task$status()
 51 | ```
 52 | 
 53 | To get workers to process jobs from this queue, interactively run (in a separate R instance)
 54 | 
 55 | ```
 56 | w <- rrqueue::worker("jobs")
 57 | ```
 58 | 
 59 | or spawn a worker in the background with
 60 | 
 61 | ```
 62 | logfile <- tempfile()
 63 | rrqueue::worker_spawn("jobs", logfile)
 64 | ```
 65 | 
 66 | The task will complete:
 67 | 
 68 | ```
 69 | task$status()
 70 | ```
 71 | 
 72 | and the value can be retrieved:
 73 | 
 74 | ```
 75 | task$result()
 76 | ```
 77 | 
 78 | ```
 79 | con$send_message("STOP")
 80 | ```
 81 | 
 82 | In contrast with many parallel approaches in R, workers can be added at at any time and will automatically start working on any remaining jobs.
 83 | 
 84 | There's lots more in various stages of completion, including `mclapply`-like functions (`rrqlapply`), and lots of information gathering.
 85 | 
 86 | # Installation
 87 | 
 88 | Redis must be installed, `redis-server` must be running.  If you are familiar with docker, the [redis](https://registry.hub.docker.com/_/redis/) docker image might be a good idea here. Alternatively, [download redis](http://redis.io/download), unpack and then install by running `make install` in a terminal window within the downloaded folder.
 89 | 
 90 | Once installed start `redis-server` by typing in a terminal window
 91 | 
 92 | ```
 93 | redis-server
 94 | ```
 95 | (On Linux the server will probably be running for you if you. On Mac OSX, you might like to set it up to run as a daemon -- i.e. [background process](https://en.wikipedia.org/wiki/Daemon_(computing)) -- i f you end up using redis at lot, following [these instructions](http://naleid.com/blog/2011/03/05/running-redis-as-a-user-daemon-on-osx-with-launchd))
 96 | 
 97 | Try `redis-cli PING` to see if it is running; it should return `PONG` and not give an error.
 98 | 
 99 | We'll also need to install some R packages (before), which will require installing the [hiredis](https://github.com/redis/hiredis) library.  See the details on the [https://github.com/richfitz/RedisHeartbeat](RedisHeartbeat) page, but briefly:
100 | 
101 | * Download the archive from the [https://github.com/redis/hiredis/releases/latest](hiredis release  page)
102 | * install with `tar -zxvf hiredis-0.XX.Y.tar.gz && cd hiredis-0.XX.Y && make && sudo make install`
103 | * On OS/X, add to your `~/.profile`
104 | 
105 | ```
106 | export DYLD_LIBRARY_PATH=/usr/local/lib
107 | ```
108 | 
109 | On debian/ubuntu, install the `libhiredis-dev` package in `apt-get`.
110 | 
111 | You can then install the required R packages:
112 | 
113 | ```
114 | install.packages(c("RcppRedis", "R6", "digest", "docopt"))
115 | devtools::install_github(c("ropensci/RedisAPI", "richfitz/redux", "richfitz/RedisHeartbeat", "richfitz/ids"))
116 | devtools::install_github("traitecoevo/rrqueue")
117 | ```
118 | 
119 | (*optional*) to see what is going on, in a terminal, run `redis-cli monitor` which will print all the Redis chatter, though it will impact on redis performance.
120 | 
121 | # Starting workers
122 | 
123 | Workers can be started from within an R process using `rrqueue::worker_spawn` function.  This takes an optional argument `n` to start more than one worker at a time, and will block until all workers have appeared.
124 | 
125 | From the command line, workers can be started using the `rrqueue_worker` script.  The script can be installed by running (from R)
126 | 
127 | ```
128 | rrqueue::install_scripts("~/bin")
129 | ```
130 | 
131 | replacing `"~/bin"` with a path that is in your executable search path and which is writeable.
132 | 
133 | ```
134 | $ rrqueue_worker --help
135 | Usage:
136 |   rrqueue_worker [options] <queue_name>
137 |   rrqueue_worker --config=FILENAME [options] [<queue_name>]
138 |   rrqueue_worker -h | --help
139 | 
140 | Options:
141 |   --redis-host HOSTNAME   Hostname for Redis
142 |   --redis-port PORT       Port for Redis
143 |   --heartbeat-period T    Heartbeat period
144 |   --heartbeat-expire T    Heartbeat expiry time
145 |   --key-worker-alive KEY  Key to write to when the worker becomes alive
146 |   --config FILENAME       Optional YAML configuration filename
147 | 
148 |   Arguments:
149 |   <queue_name>   Name of queue
150 | ```
151 | 
152 | the arguments correspond to the arguments documented in `?worker_spawn`.  The queue name is determined by position.
153 | 
154 | The `config` argument is an optional path to a yml configuration file.  That configuration file contains values for any of the arguments to `worker_spawn`, for example:
155 | 
156 | ```yaml
157 | queue_name: tmpjobs
158 | redis_host: 127.0.0.1
159 | redis_port: 6379
160 | heartbeat_period: 30
161 | heartbeat_expire: 90
162 | ```
163 | 
164 | Arguments passed to `rrqueue_worker` in addition to the configuration will override values in the yaml.
165 | 
166 | This file can also be passed to `queue` and `observer` as the `config` argument (e.g., `queue(config="config.yml")` rather than having to pass in lots of parameters.
167 | 
168 | # Documentation
169 | 
170 | Reference documentation and vignettes are available on [this website](http://traitecoevo.github.io/rrqueue/).  If the vignettes are built (`make vignettes`), they will be avilable in the package, and this will be commited to github once things settle down.
171 | 
172 | # Performance
173 | 
174 | So far, I've done relatively little performance tuning.  In particular, the *workers* make no effort to minimise the number of calls to Redis and assumes that this is fast connection.  On the other hand, we use `rrqueue` where the controller many hops across the internet (controlling a queue on AWS).  To reduce the time involved, `rrqueue` uses [lua scripting](https://en.wikipedia.org/wiki/Lua_(programming_language)) to reduce the number of instruction round trips.
175 | 
176 | # False warning errors
177 | 
178 | You may see a variant on errors like
179 | 
180 | ```
181 | Calls: <Anonymous> -> .handleSimpleError -> h -> signalCondition
182 | Error in signalCondition(e) :
183 |   no function to return from, jumping to top level
184 | ```
185 | 
186 | This is an issue somewhere within Rcpp modules (which RcppRedis uses) and seems harmless.
187 | 


--------------------------------------------------------------------------------
/autodoc.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | 
  3 | ## Dirty hack to compile docs in the absence of proper Roxygen R6 support.
  4 | devtools::load_all(".")
  5 | 
  6 | add_usage <- function(dat, generator) {
  7 |   capture_usage <- function(name) {
  8 |     tmp <- capture.output(args(generator$public_methods[[name]]))
  9 |     tmp <- strip_trailing_whitespace(paste(tmp[-length(tmp)], collapse="\n"))
 10 |     sub("^function\\s*", name, tmp)
 11 |   }
 12 | 
 13 |   valid <- names(generator$public_methods)
 14 |   extra <- setdiff(names(dat), valid)
 15 |   if (length(extra) > 0L) {
 16 |     warning(sprintf("In '%s', extra methods: %s",
 17 |                     generator$classname,
 18 |                     paste(extra, collapse=", ")),
 19 |             immediate.=TRUE, call.=FALSE)
 20 |   }
 21 | 
 22 |   for (name in names(dat)) {
 23 |     dat[[name]]$method_name <- name
 24 |     dat[[name]]$usage <- capture_usage(name)
 25 |     dat[[name]]$order <- names(formals(generator$public_methods[[name]]))
 26 |   }
 27 |   dat
 28 | }
 29 | 
 30 | indent <- function(str, n, pad=NULL) {
 31 |   if (is.null(pad)) {
 32 |     pad <- paste(rep(" ", n), collapse="")
 33 |   }
 34 |   p <- function(s) {
 35 |     paste(paste0(pad, s), collapse="\n")
 36 |   }
 37 |   vapply(strsplit(str, "\n"), p, character(1))
 38 | }
 39 | 
 40 | format_params <- function(xp) {
 41 |   fmt1 <- "\\describe{\n%s\n}"
 42 |   fmt2 <- "\\item{\\code{%s}}{\n%s\n}\n"
 43 |   pars <- sprintf(fmt2, names(xp), indent(unlist(xp), 2))
 44 |   sprintf(fmt1, indent(paste(pars, collapse="\n"), 2))
 45 | }
 46 | 
 47 | format_method <- function(x) {
 48 |   title <- sprintf("\\item{\\code{%s}}{", x$method_name)
 49 |   end <- "}"
 50 | 
 51 |   p_msg   <- setdiff(x$order, names(x$params))
 52 |   p_extra <- setdiff(names(x$params), x$order)
 53 |   if (length(p_msg) > 0) {
 54 |     warning(sprintf("In '%s', missing parameters: %s",
 55 |                     x$method_name, paste(p_msg, collapse=", ")),
 56 |             immediate.=TRUE, call.=FALSE)
 57 |   }
 58 |   if (length(p_extra) > 0) {
 59 |     warning(sprintf("In '%s', extra parameters: %s",
 60 |                     x$method_name, paste(p_extra, collapse=", ")),
 61 |             immediate.=TRUE, call.=FALSE)
 62 |   }
 63 |   ## preseve order, though I'm pretty sure that the yaml package is
 64 |   ## actually preserving it.
 65 |   if (length(p_msg) == 0 && length(p_extra) == 0) {
 66 |     x$params <- x$params[x$order]
 67 |   }
 68 | 
 69 |   body <- sprintf("%s\n\n\\emph{Usage:}\n\\code{%s}",
 70 |                   x$short, x$usage)
 71 |   if (!is.null(x$params)) {
 72 |     body <- paste0(body, "\n\n\\emph{Arguments:}\n", format_params(x$params))
 73 |   }
 74 |   if (!is.null(x$details)) {
 75 |     body <- paste0(body, "\n\n\\emph{Details:}\n", x$details)
 76 |   }
 77 |   if (!is.null(x$value)) {
 78 |     body <- paste0(body, "\n\n\\emph{Value}:\n", x$value)
 79 |   }
 80 |   paste(title, indent(body, 2), end, sep="\n")
 81 | }
 82 | 
 83 | strip_trailing_whitespace <- function(x) {
 84 |   gsub("[ t]+(\n|$)", "\\1", x)
 85 | }
 86 | 
 87 | format_class <- function(x) {
 88 |   ret <- vapply(x, format_method, character(1))
 89 |   ret <- sprintf("@section Methods:\n\n\\describe{\n%s\n}",
 90 |                  paste(ret, collapse="\n"))
 91 |   ret <- indent(ret, pad="##' ")
 92 |   strip_trailing_whitespace(ret)
 93 | }
 94 | 
 95 | process <- function(type) {
 96 |   e <- environment(rrqueue::queue)
 97 |   generator <- get(sprintf(".R6_%s", type), e)
 98 |   dat <- rrqueue:::yaml_read(sprintf("man-roxygen/%s.yml", type))
 99 |   ret <- format_class(add_usage(dat, generator))
100 |   writeLines(ret, sprintf("man-roxygen/%s_methods.R", type))
101 | }
102 | 
103 | types <- function() {
104 |   re <- "\\.yml$"
105 |   sub(re, "", dir("man-roxygen", pattern=re))
106 | }
107 | 
108 | process_all <- function() {
109 |   for (t in types()) {
110 |     message(paste("Generating:", t))
111 |     process(t)
112 |   }
113 | }
114 | 
115 | if (!interactive() && identical(commandArgs(TRUE), "process")) {
116 |   process_all()
117 | }
118 | 


--------------------------------------------------------------------------------
/doc/how.md:
--------------------------------------------------------------------------------
 1 | Through the lifecycle of a task:
 2 | 
 3 | Assuming a queue name `rrq`, so all keys begin with `rrq:`
 4 | 
 5 | ## Controller queues a task
 6 | 
 7 | 1. Given `expr` (some unevaluted R expression) and `envir` (an environment to locate local variables) we do some processing to create something that can be serialised by Redis (see [below](#preparing-the-expression))
 8 | 2. Increment the *task counter* (`rqr:tasks:counter`) to get the *task id*
 9 | 3. Store things in hashes with the key as the task id:
10 |    * serialised expression in `rrq::tasks:expr`
11 |    * environment id in `rrq:tasks:envir`
12 |    * task status (pending) in `rrq:tasks:status`
13 |    * time submitted in `rrq:tasks:time:sub`
14 |    * name of the key to push to when the job is complete to `rrq:tasks:complete`
15 | 4. Push the task onto the task queue: `rrq:tasks:id`
16 | 
17 | ## Worker accepts a task
18 | 
19 | 1. Pop a task off `rrq:tasks:id`
20 | 2. Retrieve the expression
21 | 3. Store things in hashes indicating that we're working on the task:
22 |    * Set the worker status as busy (`rrq:workers:status`)
23 |    * Store the task id in `rrq:workers:task`
24 |    * Store the worker id for this task in `rrq:tasks:worker`
25 |    * Store the time the job was begun in `rrq:tasks:times:beg`
26 |    * Set the task status as running in `rrq:tasks:status`
27 | 4. Get to work running the task
28 | 
29 | (note that the task can be lost between steps 1 and 3 though it's easy enough to work out that this is the case because the task is still known to the system but will not be associated with any worker).
30 | 
31 | ## Worker completes the task
32 | 
33 | 1. Store things in hashes:
34 |    * Write the serialized result to hash `rrq:tasks:result`
35 |    * Store task status (complete / error / etc) in `rrq:tasks:status`
36 |    * Store finished time in `rrq:tasks:times:end`
37 |    * Set worker status as idle in `rrq:workers:status`
38 | 2. Push the task onto the finished queue - which we look up from `rrq:tasks:complete`
39 | 3. Return to polling `rrq:tasks:id` for new jobs
40 | 
41 | # Preparing the expression
42 | 
43 | 1. Look at all arguments of the expression and work out which entries are symbols
44 | 2. For each symbol
45 |    * retrieve the value from the given environment
46 |    * serialise it to Redis using the "object cache" object, using a key which is `<task_id>:<object_name>`.
47 | 3. Serialise the expression, plus the list of object names and mapping to the mangled names.
48 | 
49 | The serialisation to Redis via the object cache uses a content-addressable system where objects are actually stored against keys that are the *hash* of the object contents, and then a pointer is stored from the mangled name to the object.  There's also a counter in there that indicates how many things point at a given piece of data - once that counter drops to zero the object is deleted.  This means that if there are regularly referenced large pieces of data, they are only stored occasionally.
50 | 
51 | On the recieving end, the workers only pull the objects from the database if they have not pulled an object with that hash before.  This avoids the bottleneck of pulling over the network and deserialising.
52 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM r-base
 2 | 
 3 | RUN apt-get update \
 4 |   && apt-get install -y --no-install-recommends  \
 5 |     ca-certificates \
 6 |     curl \
 7 |     git \
 8 |     libcurl4-openssl-dev \
 9 |     libhiredis-dev \
10 |     libssl-dev/unstable \
11 |     libxml2-dev \
12 |     pandoc \
13 |     ssh \
14 |   && apt-get clean
15 | 
16 | RUN rm /usr/local/bin/install2.r && \
17 |   wget --no-check-certificate \
18 |   https://raw.githubusercontent.com/cboettig/littler/master/examples/install2.r \
19 |   -O /usr/local/bin/install2.r \
20 |   && chmod +x /usr/local/bin/install2.r
21 | 
22 | RUN install2.r --error  \
23 |     crayon \
24 |     devtools \
25 |     digest \
26 |     docopt \
27 |     downloader \
28 |     dplyr \
29 |     inline \
30 |     R6 \
31 |     RCurl \
32 |     rmarkdown \
33 |     stringr
34 | 
35 | RUN installGithub.r  \
36 |     ropensci/RedisAPI \
37 |     richfitz/RedisHeartbeat \
38 |     richfitz/storr \
39 |     richfitz/remake \
40 |     gaborcsardi/progress \
41 |     traitecoevo/dockertest \
42 |     traitecoevo/rrqueue
43 | 
44 | RUN r -e 'remake:::install_remake("/usr/local/bin")'
45 | RUN r -e 'rrqueue:::install_scripts("/usr/local/bin")'
46 | 
47 | WORKDIR /root
48 | 
49 | CMD ["bash"]
50 | 


--------------------------------------------------------------------------------
/inst/scripts/rrqueue_worker:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env Rscript
2 | library(methods)
3 | w <- rrqueue:::rrqueue_worker_main()
4 | 


--------------------------------------------------------------------------------
/inst/scripts/rrqueue_worker_tee:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | # set -x
 4 | LOGDIR=$1
 5 | shift
 6 | mkdir -p ${LOGDIR}
 7 | 
 8 | # log files will have the form <directory/<hostname>_<id> -- mktemp
 9 | # and rely on timestamps to work out which is most recent; this is
10 | # safe for race conditions.
11 | LOGFILE=$(mktemp "${LOGDIR}/$(hostname)_XXXX")
12 | 
13 | echo "Logging to $LOGFILE"
14 | rrqueue_worker "$@" 2>&1 | tee -i "$LOGFILE"
15 | 


--------------------------------------------------------------------------------
/man-roxygen/observer.yml:
--------------------------------------------------------------------------------
  1 | tasks_list:
  2 |   short: Return a vector of known task ids.
  3 |   value: A character vector
  4 | 
  5 | tasks_status:
  6 |   short: >-
  7 |     Returns a named character vector indicating the task status.
  8 |   params:
  9 |     task_ids: &task_ids >-
 10 |       Optional vector of task identifiers.  If omitted all tasks known
 11 |       to rrqueue will be used.
 12 |     follow_redirect: >-
 13 |       should we follow redirects to get the status of any requeued tasks?
 14 |   value: |
 15 |     A named character vector; the names will be the task ids, and the
 16 |     values are the status of each task.  Possible status values are
 17 |     \describe{
 18 |     \item{\code{PENDING}}{queued, but not run by a worker}
 19 |     \item{\code{RUNNING}}{being run on a worker, but not complete}
 20 |     \item{\code{COMPLETE}}{task completed successfully}
 21 |     \item{\code{ERROR}}{task completed with an error}
 22 |     \item{\code{ORPHAN}}{task orphaned due to loss of worker}
 23 |     \item{\code{REDIRECT}}{orphaned task has been redirected}
 24 |     \item{\code{MISSING}}{task not known (deleted, or never existed)}
 25 |     }
 26 | 
 27 | tasks_overview:
 28 |   short: >-
 29 |     High-level overview of the tasks in the queue; the number of tasks
 30 |     in each status.
 31 | 
 32 | tasks_times:
 33 |   short: returns a summary of times for a set of tasks
 34 |   params:
 35 |     task_ids: *task_ids
 36 |     unit_elapsed: >-
 37 |       Unit to use in computing elapsed times.  The default is to use
 38 |       "secs".  This is passed through to \code{\link{difftime}} so the
 39 |       units there are available and are "auto", "secs", "mins",
 40 |       "hours", "days", "weeks".
 41 |   value: |
 42 |     A \code{data.frame}, one row per task, with columns
 43 |     \describe{
 44 |     \item{\code{task_id}}{The task id}
 45 |     \item{\code{submitted}}{Time the task was submitted}
 46 |     \item{\code{started}}{Time the task was started, or \code{NA} if waiting}
 47 |     \item{\code{finished}}{Time the task was completed, or \code{NA}
 48 |     if waiting or running}
 49 |     \item{\code{waiting}}{Elapsed time spent waiting}
 50 |     \item{\code{running}}{Elapsed time spent running, or \code{NA} if waiting}
 51 |     \item{\code{idle}}{Elapsed time since finished, or \code{NA}
 52 |     if waiting or running}
 53 |     }
 54 | 
 55 | tasks_envir:
 56 |   short: returns the mapping of tasks to environment
 57 |   params:
 58 |     task_ids: *task_ids
 59 |   value: >-
 60 |     A named character vector; names are the task ids and the value is
 61 |     the environment id associated with that task.
 62 | 
 63 | task_get:
 64 |   short: >-
 65 |     returns a \code{\link{task}} object associated with a given task
 66 |     identifier.  This can be used to interrogate an individual task.
 67 |     See the help for \code{\link{task}} objects for more about these objects.
 68 |   params:
 69 |     task_id: &task_id A single task identifier
 70 | 
 71 | task_result:
 72 |   short: >-
 73 |     Get the result for a single task
 74 |   params:
 75 |     task_id: *task_id
 76 |     follow_redirect: &follow_redirect >-
 77 |       should we follow redirects to get the status of any requeued task?
 78 | 
 79 | tasks_groups_list:
 80 |   short: >-
 81 |     Returns list of \emph{groups} known to rrqueue.  Groups are
 82 |     assigned during task creation, or through the
 83 |     \code{tasks_set_group} method of \code{link{queue}}.
 84 | 
 85 | tasks_in_groups:
 86 |   short: >-
 87 |     Returns a list of tasks belonging to any of the groups listed.
 88 |   params:
 89 |     groups: >-
 90 |       A character vector of one or more groups (use
 91 |       \code{tasks_groups_list} to get a list of valid groups).
 92 | 
 93 | tasks_lookup_group:
 94 |   short: >-
 95 |     Look up the group for a set of tasks
 96 |   params:
 97 |     task_ids: *task_ids
 98 |   value: >-
 99 |     A named character vector; names refer to task ids and the value is
100 |     the group (or \code{NA} if no group is set for that task id).
101 | 
102 | task_bundle_get:
103 |   short: >-
104 |     Return a "bundle" of tasks that can be operated on together; see
105 |     \code{\link{task_bundle}}
106 |   params:
107 |     groups: A vector of groups to include in the bundle
108 |     task_ids: >-
109 |       A vector of task ids in the bundle.  Unlike all other uses of
110 |       \code{task_ids} here, only one of \code{groups} or
111 |       \code{task_ids} can be provided, so if \code{task_ids=NULL} then
112 |       \code{task_ids} is ignored and \code{groups} is used.
113 | 
114 | envirs_list:
115 |   short: >-
116 |     Return a vector of all known environment ids in this queue.
117 | envirs_contents:
118 |   short: >-
119 |     Return a vector of the environment contents
120 |   params:
121 |     envir_ids: &envir_ids >-
122 |       Vector of environment ids.  If omitted then all environments in
123 |       this queue are used.
124 |   value: |
125 |     A list, each element of which is a list of elements
126 |     \describe{
127 |     \item{\code{packages}}{a vector of packages loaded}
128 |     \item{\code{sources}}{a vector of files explicitly sourced}
129 |     \item{\code{source_files}}{a vector of files sourced including
130 |     their hashes.  This includes and files detected to be sourced
131 |     by another file}
132 |     }
133 | envir_workers:
134 |   short: >-
135 |     Determine which workers are known to be able to process tasks in a
136 |     particular environment.
137 |   params:
138 |     envir_id: A single environment id
139 |     worker_ids: &worker_ids
140 |       Optional vector of worker identifiers.  If omitted all workers known
141 |       to rrqueue will be used (currently running workers only).
142 |   value: >-
143 |     A named logical vector; \code{TRUE} if a worker can use an
144 |     environment, named by the worker identifers.
145 | 
146 | workers_len:
147 |   short: >-
148 |     Number of workers that have made themselves known to rrqueue.
149 |     There are situations where this is an overestimate and that may
150 |     get fixed at some point.
151 | 
152 | workers_list:
153 |   short: >-
154 |     Returns a vector of all known worker identifiers (may include
155 |     workers that have crashed).
156 | 
157 | workers_list_exited:
158 |   short: >-
159 |     Returns a vector of workers that are known to have exited.
160 |     Workers leave behind most of the interesting bits of logs, times,
161 |     etc, so these identifiers are useful for asking what they worked
162 |     on.
163 | 
164 | workers_status:
165 |   short: >-
166 |     Returns a named character vector indicating the task status.
167 |   params:
168 |     worker_ids: *worker_ids
169 |   value: |
170 |     A named character vector; the names will be the task ids, and the
171 |     values are the status of each task.  Possible status values are
172 |     \describe{
173 |     \item{\code{IDLE}}{worker is idle}
174 |     \item{\code{BUSY}}{worker is running a task}
175 |     \item{\code{LOST}}{worker has been identified as lost by the
176 |     \code{workers_identify_lost} of \code{\link{queue}}.}
177 |     \item{\code{EXITED}}{worker has exited}
178 |     \item{\code{PAUSED}}{worker is paused}
179 |     }
180 | 
181 | workers_task_id:
182 |   short: >-
183 |     Returns the tasks that workers are currently processing (or
184 |     \code{NA} for workers that are not known to be working on a task)
185 | 
186 |   params:
187 |     worker_ids: *worker_ids
188 | 
189 |   value: >-
190 |     A named character vector.  Names are the worker ids and value is
191 |     the task id, or \code{NA} if no task is being worked on.
192 | 
193 | workers_times:
194 |   short: >-
195 |     returns a summary of times for a set of workers.  This only
196 |     returns useful information if the workers are running a heartbeat
197 |     process, which requires the \code{RedisHeartbeat} package.
198 |   params:
199 |     worker_ids: *worker_ids
200 |     unit_elapsed: >-
201 |       Unit to use in computing elapsed times.  The default is to use
202 |       "secs".  This is passed through to \code{\link{difftime}} so the
203 |       units there are available and are "auto", "secs", "mins",
204 |       "hours", "days", "weeks".
205 |   value: |
206 |     A \code{data.frame}, one row per worker, with columns
207 |     \describe{
208 |     \item{\code{worker_id}}{Worker identifier}
209 |     \item{\code{expire_max}}{Maximum length of time before worker can
210 |       be declared missing, in seconds}
211 |     \item{\code{expire}}{Time until the worker will expire, in seconds}
212 |     \item{\code{last_seen}}{Time since the worker was last seen}
213 |     \item{\code{last_action}}{Time since the last worker action}
214 |     }
215 | 
216 | workers_log_tail:
217 |   short: >-
218 |     Return the last few entries in the worker logs.
219 |   params:
220 |     worker_ids: *worker_ids
221 |     n: >-
222 |       Number of log entries to return.  Use \code{0} or \code{Inf} to
223 |       return all entries.
224 |   value: |
225 | 
226 |     A \code{data.frame} with columns
227 |     \describe{
228 |     \item{\code{worker_id}}{the worker identifier}
229 |     \item{\code{time}}{time of the event}
230 |     \item{\code{command}}{the command (e.g., MESSAGE, ALIVE)}
231 |     \item{\code{message}}{The message associated with the command}
232 |     }
233 | 
234 | workers_info:
235 |   short: >-
236 |     Returns a set of key/value information about workers.  Includes
237 |     things like hostnames, process ids, environments that can be run,
238 |     etc.  Note that this information is from the last time that the
239 |     worker process registered an \code{INFO} command.  This is
240 |     registered at startup and after recieving a \code{INFO} message
241 |     from a \code{\link{queue}} object.  So the information may be out
242 |     of date.
243 |   params:
244 |     worker_ids: *worker_ids
245 |   value: |
246 |     A list, each element of which is a \code{worker_info}
247 | 
248 | worker_envir:
249 |   short: >-
250 |     Returns an up-to-date list of environments a worker is capable of
251 |     using (in contrast to the entry in \code{workers_info} that might
252 |     be out of date.
253 |   params:
254 |     worker_id: Single worker identifier
255 | 


--------------------------------------------------------------------------------
/man-roxygen/task.yml:
--------------------------------------------------------------------------------
 1 | status:
 2 |   short: >-
 3 |     Returns a scalar character indicating the task status.
 4 | 
 5 |   params:
 6 |     follow_redirect: &follow_redirect >-
 7 |       should we follow redirects to get the status of any requeued task?
 8 | 
 9 |   value: |
10 |     Scalar character.  Possible values are
11 |     \describe{
12 |     \item{\code{PENDING}}{queued, but not run by a worker}
13 |     \item{\code{RUNNING}}{being run on a worker, but not complete}
14 |     \item{\code{COMPLETE}}{task completed successfully}
15 |     \item{\code{ERROR}}{task completed with an error}
16 |     \item{\code{ORPHAN}}{task orphaned due to loss of worker}
17 |     \item{\code{REDIRECT}}{orphaned task has been redirected}
18 |     \item{\code{MISSING}}{task not known (deleted, or never existed)}
19 |     }
20 | 
21 | result:
22 |   short: >-
23 |     Fetch the result of a task, so long as it has completed.
24 | 
25 |   params:
26 |     follow_redirect: *follow_redirect
27 | 
28 | wait:
29 |   short: >-
30 |     Like \code{result}, but will wait until the task is complete.  In
31 |     order to preserve the \code{key_complete} for anything that might
32 |     be listening for it (and to avoid collision with anything else
33 |     writing to that key), this function repeatedly polls the database.
34 |     Over a slow connection you may want to increase the \code{every}
35 |     parameter.
36 | 
37 |   params:
38 |     timeout: >-
39 |       Length of time, in seconds, to wait.  A value of zero will not
40 |       wait.  Infinite times are possible and can be escaped by
41 |       pressing Ctrl-C or Escape (depending on platform).
42 | 
43 |     every: How often, in seconds, to poll for results
44 | 
45 | expr:
46 |   short: returns the expression stored in the task
47 |   params:
48 |     locals: >-
49 |       Logical, indicating if the local variables associated with the
50 |       expression should also be retuned.  If \code{TRUE}, then local
51 |       variables used in the expression will be returned in a
52 |       \emph{attribute} of the expression \code{envir}.
53 |   value: >-
54 |     A quoted expression (a language object).  Turn this into a string
55 |     with deparse.  If \code{locals} was \code{TRUE} there will be an
56 |     environment attribute with local variables included.
57 | 
58 | envir:
59 |   short: returns the environment identifier for the task
60 | 
61 | times:
62 |   short: returns a summar of times associated with this task.
63 |   params:
64 |     unit_elapsed: >-
65 |       Unit to use in computing elapsed times.  The default is to use
66 |       "secs".  This is passed through to \code{\link{difftime}} so the
67 |       units there are available and are "auto", "secs", "mins",
68 |       "hours", "days", "weeks".
69 |   value: |
70 |     A one row \code{data.frame} with columns
71 |     \describe{
72 |     \item{\code{submitted}}{Time the task was submitted}
73 |     \item{\code{started}}{Time the task was started, or \code{NA} if waiting}
74 |     \item{\code{finished}}{Time the task was completed, or \code{NA}
75 |     if waiting or running}
76 |     \item{\code{waiting}}{Elapsed time spent waiting}
77 |     \item{\code{running}}{Elapsed time spent running, or \code{NA} if waiting}
78 |     \item{\code{idle}}{Elapsed time since finished, or \code{NA}
79 |     if waiting or running}
80 |     }
81 | 


--------------------------------------------------------------------------------
/man-roxygen/task_bundle.yml:
--------------------------------------------------------------------------------
 1 | ids:
 2 |   short: >-
 3 |     Return the vector of ids for the bundle (will return
 4 |     \code{character(0)} if the bundle is empty).
 5 | 
 6 | status:
 7 |   short: >-
 8 |     Fetch the current task status (see \code{\link{task}}).
 9 | 
10 |   params:
11 |     follow_redirect: &follow_redirect >-
12 |       should we follow redirects to get the status of any requeued task?
13 | 
14 | overview:
15 |   short: >-
16 |     High-level overview of the tasks in the bundle; the number of tasks
17 |     in each status.
18 | 
19 | results:
20 |   short: >-
21 |     Fetch results for all tasks, immediately (unlike \code{wait}).
22 |   params:
23 |     follow_redirect: *follow_redirect
24 | 
25 |   value: >-
26 | 
27 |     Always returns a list, regardless of how many tasks are in the
28 |     bundle \code{list()} for zero tasks, \code{list(x1)} for one task,
29 |     \code{list(x1, x2)} for two, etc).  This list will be named with
30 |     the ids (see \code{ids()}) unless the bundles has a \code{names}
31 |     attribute that has not been invalidated by using
32 |     \code{update_groups()} (this interface may change).
33 | 
34 | wait:
35 |   short: >-
36 |     Like \code{results}, but block until results are returned.
37 | 
38 |   params:
39 |     timeout: &timeout >-
40 | 
41 |       Total time to wait, in seconds.  If the tasks have not returned
42 |       by this point an error willl be thrown.
43 | 
44 |     time_poll: &time_poll >-
45 | 
46 |       Interval, in seconds, to poll for new results.  Increasing this
47 |       time will reduce the number of Redis roundtrips but decrease the
48 |       responsiveness to an interrupt.
49 | 
50 |     progress_bar: >-
51 | 
52 |       Display a progress bar while waiting?  Includes a spinner for
53 |       reassurance that the process has not died.
54 | 
55 |     follow_redirect: *follow_redirect
56 | 
57 | wait1:
58 |   short: >-
59 | 
60 |     Wait for a single task to complete,a nd return its result.
61 | 
62 |   params:
63 | 
64 |     timeout: *timeout
65 |     time_poll: *time_poll
66 |     follow_redirect: *follow_redirect
67 | 
68 |   value: >-
69 | 
70 |     If a task as available, either because all tasks are complete or
71 |     because the waiting time was too short, \code{NULL} is returned.
72 |     Otherwise a list of length 2 is returned, with elements \code{id}
73 |     (the id of the task that completed) and \code{value} the value of
74 |     that that task.  This interface is inspired by Redis' \code{BLPOP}
75 |     operation.
76 | 
77 | times:
78 |   short: >-
79 |     Like times in \code{\link{observer}}, but limited to the ids in the bundle.
80 | 
81 |   params:
82 |     unit_elapsed: >-
83 |       Unit to use in computing elapsed times.  The default is to use
84 |       "secs".  This is passed through to \code{\link{difftime}} so the
85 |       units there are available and are "auto", "secs", "mins",
86 |       "hours", "days", "weeks".
87 | 
88 | update_groups:
89 |   short: >-
90 | 
91 |     Check for new tasks that have been added to the groups included in
92 |     this bunddle and add these tasks to the bundle.
93 | 
94 |   value: >-
95 | 
96 |     Invisibly, the ids of tasks that have been added to the bundle.
97 | 


--------------------------------------------------------------------------------
/man-roxygen/task_bundle_methods.R:
--------------------------------------------------------------------------------
  1 | ##' @section Methods:
  2 | ##'
  3 | ##' \describe{
  4 | ##' \item{\code{ids}}{
  5 | ##'   Return the vector of ids for the bundle (will return \code{character(0)} if the bundle is empty).
  6 | ##'
  7 | ##'   \emph{Usage:}
  8 | ##'   \code{ids()}
  9 | ##' }
 10 | ##' \item{\code{status}}{
 11 | ##'   Fetch the current task status (see \code{\link{task}}).
 12 | ##'
 13 | ##'   \emph{Usage:}
 14 | ##'   \code{status(follow_redirect = FALSE)}
 15 | ##'
 16 | ##'   \emph{Arguments:}
 17 | ##'   \describe{
 18 | ##'     \item{\code{follow_redirect}}{
 19 | ##'       should we follow redirects to get the status of any requeued task?
 20 | ##'     }
 21 | ##'   }
 22 | ##' }
 23 | ##' \item{\code{overview}}{
 24 | ##'   High-level overview of the tasks in the bundle; the number of tasks in each status.
 25 | ##'
 26 | ##'   \emph{Usage:}
 27 | ##'   \code{overview()}
 28 | ##' }
 29 | ##' \item{\code{results}}{
 30 | ##'   Fetch results for all tasks, immediately (unlike \code{wait}).
 31 | ##'
 32 | ##'   \emph{Usage:}
 33 | ##'   \code{results(follow_redirect = FALSE)}
 34 | ##'
 35 | ##'   \emph{Arguments:}
 36 | ##'   \describe{
 37 | ##'     \item{\code{follow_redirect}}{
 38 | ##'       should we follow redirects to get the status of any requeued task?
 39 | ##'     }
 40 | ##'   }
 41 | ##'
 42 | ##'   \emph{Value}:
 43 | ##'
 44 | ##'   Always returns a list, regardless of how many tasks are in the bundle \code{list()} for zero tasks, \code{list(x1)} for one task, \code{list(x1, x2)} for two, etc).  This list will be named with the ids (see \code{ids()}) unless the bundles has a \code{names} attribute that has not been invalidated by using \code{update_groups()} (this interface may change).
 45 | ##' }
 46 | ##' \item{\code{wait}}{
 47 | ##'   Like \code{results}, but block until results are returned.
 48 | ##'
 49 | ##'   \emph{Usage:}
 50 | ##'   \code{wait(timeout = 60, time_poll = 1, progress_bar = TRUE, follow_redirect = FALSE)}
 51 | ##'
 52 | ##'   \emph{Arguments:}
 53 | ##'   \describe{
 54 | ##'     \item{\code{timeout}}{
 55 | ##'
 56 | ##'       Total time to wait, in seconds.  If the tasks have not returned by this point an error willl be thrown.
 57 | ##'     }
 58 | ##'
 59 | ##'     \item{\code{time_poll}}{
 60 | ##'
 61 | ##'       Interval, in seconds, to poll for new results.  Increasing this time will reduce the number of Redis roundtrips but decrease the responsiveness to an interrupt.
 62 | ##'     }
 63 | ##'
 64 | ##'     \item{\code{progress_bar}}{
 65 | ##'
 66 | ##'       Display a progress bar while waiting?  Includes a spinner for reassurance that the process has not died.
 67 | ##'     }
 68 | ##'
 69 | ##'     \item{\code{follow_redirect}}{
 70 | ##'       should we follow redirects to get the status of any requeued task?
 71 | ##'     }
 72 | ##'   }
 73 | ##' }
 74 | ##' \item{\code{wait1}}{
 75 | ##'
 76 | ##'   Wait for a single task to complete,a nd return its result.
 77 | ##'
 78 | ##'   \emph{Usage:}
 79 | ##'   \code{wait1(timeout = 60, time_poll = 1, follow_redirect = FALSE)}
 80 | ##'
 81 | ##'   \emph{Arguments:}
 82 | ##'   \describe{
 83 | ##'     \item{\code{timeout}}{
 84 | ##'
 85 | ##'       Total time to wait, in seconds.  If the tasks have not returned by this point an error willl be thrown.
 86 | ##'     }
 87 | ##'
 88 | ##'     \item{\code{time_poll}}{
 89 | ##'
 90 | ##'       Interval, in seconds, to poll for new results.  Increasing this time will reduce the number of Redis roundtrips but decrease the responsiveness to an interrupt.
 91 | ##'     }
 92 | ##'
 93 | ##'     \item{\code{follow_redirect}}{
 94 | ##'       should we follow redirects to get the status of any requeued task?
 95 | ##'     }
 96 | ##'   }
 97 | ##'
 98 | ##'   \emph{Value}:
 99 | ##'
100 | ##'   If a task as available, either because all tasks are complete or because the waiting time was too short, \code{NULL} is returned. Otherwise a list of length 2 is returned, with elements \code{id} (the id of the task that completed) and \code{value} the value of that that task.  This interface is inspired by Redis' \code{BLPOP} operation.
101 | ##' }
102 | ##' \item{\code{times}}{
103 | ##'   Like times in \code{\link{observer}}, but limited to the ids in the bundle.
104 | ##'
105 | ##'   \emph{Usage:}
106 | ##'   \code{times(unit_elapsed = "secs")}
107 | ##'
108 | ##'   \emph{Arguments:}
109 | ##'   \describe{
110 | ##'     \item{\code{unit_elapsed}}{
111 | ##'       Unit to use in computing elapsed times.  The default is to use "secs".  This is passed through to \code{\link{difftime}} so the units there are available and are "auto", "secs", "mins", "hours", "days", "weeks".
112 | ##'     }
113 | ##'   }
114 | ##' }
115 | ##' \item{\code{update_groups}}{
116 | ##'
117 | ##'   Check for new tasks that have been added to the groups included in this bunddle and add these tasks to the bundle.
118 | ##'
119 | ##'   \emph{Usage:}
120 | ##'   \code{update_groups()}
121 | ##'
122 | ##'   \emph{Value}:
123 | ##'
124 | ##'   Invisibly, the ids of tasks that have been added to the bundle.
125 | ##' }
126 | ##' }
127 | 


--------------------------------------------------------------------------------
/man-roxygen/task_methods.R:
--------------------------------------------------------------------------------
  1 | ##' @section Methods:
  2 | ##'
  3 | ##' \describe{
  4 | ##' \item{\code{status}}{
  5 | ##'   Returns a scalar character indicating the task status.
  6 | ##'
  7 | ##'   \emph{Usage:}
  8 | ##'   \code{status(follow_redirect = FALSE)}
  9 | ##'
 10 | ##'   \emph{Arguments:}
 11 | ##'   \describe{
 12 | ##'     \item{\code{follow_redirect}}{
 13 | ##'       should we follow redirects to get the status of any requeued task?
 14 | ##'     }
 15 | ##'   }
 16 | ##'
 17 | ##'   \emph{Value}:
 18 | ##'   Scalar character.  Possible values are
 19 | ##'   \describe{
 20 | ##'   \item{\code{PENDING}}{queued, but not run by a worker}
 21 | ##'   \item{\code{RUNNING}}{being run on a worker, but not complete}
 22 | ##'   \item{\code{COMPLETE}}{task completed successfully}
 23 | ##'   \item{\code{ERROR}}{task completed with an error}
 24 | ##'   \item{\code{ORPHAN}}{task orphaned due to loss of worker}
 25 | ##'   \item{\code{REDIRECT}}{orphaned task has been redirected}
 26 | ##'   \item{\code{MISSING}}{task not known (deleted, or never existed)}
 27 | ##'   }
 28 | ##' }
 29 | ##' \item{\code{result}}{
 30 | ##'   Fetch the result of a task, so long as it has completed.
 31 | ##'
 32 | ##'   \emph{Usage:}
 33 | ##'   \code{result(follow_redirect = FALSE)}
 34 | ##'
 35 | ##'   \emph{Arguments:}
 36 | ##'   \describe{
 37 | ##'     \item{\code{follow_redirect}}{
 38 | ##'       should we follow redirects to get the status of any requeued task?
 39 | ##'     }
 40 | ##'   }
 41 | ##' }
 42 | ##' \item{\code{wait}}{
 43 | ##'   Like \code{result}, but will wait until the task is complete.  In order to preserve the \code{key_complete} for anything that might be listening for it (and to avoid collision with anything else writing to that key), this function repeatedly polls the database. Over a slow connection you may want to increase the \code{every} parameter.
 44 | ##'
 45 | ##'   \emph{Usage:}
 46 | ##'   \code{wait(timeout, every = 0.05)}
 47 | ##'
 48 | ##'   \emph{Arguments:}
 49 | ##'   \describe{
 50 | ##'     \item{\code{timeout}}{
 51 | ##'       Length of time, in seconds, to wait.  A value of zero will not wait.  Infinite times are possible and can be escaped by pressing Ctrl-C or Escape (depending on platform).
 52 | ##'     }
 53 | ##'
 54 | ##'     \item{\code{every}}{
 55 | ##'       How often, in seconds, to poll for results
 56 | ##'     }
 57 | ##'   }
 58 | ##' }
 59 | ##' \item{\code{expr}}{
 60 | ##'   returns the expression stored in the task
 61 | ##'
 62 | ##'   \emph{Usage:}
 63 | ##'   \code{expr(locals = FALSE)}
 64 | ##'
 65 | ##'   \emph{Arguments:}
 66 | ##'   \describe{
 67 | ##'     \item{\code{locals}}{
 68 | ##'       Logical, indicating if the local variables associated with the expression should also be retuned.  If \code{TRUE}, then local variables used in the expression will be returned in a \emph{attribute} of the expression \code{envir}.
 69 | ##'     }
 70 | ##'   }
 71 | ##'
 72 | ##'   \emph{Value}:
 73 | ##'   A quoted expression (a language object).  Turn this into a string with deparse.  If \code{locals} was \code{TRUE} there will be an environment attribute with local variables included.
 74 | ##' }
 75 | ##' \item{\code{envir}}{
 76 | ##'   returns the environment identifier for the task
 77 | ##'
 78 | ##'   \emph{Usage:}
 79 | ##'   \code{envir()}
 80 | ##' }
 81 | ##' \item{\code{times}}{
 82 | ##'   returns a summar of times associated with this task.
 83 | ##'
 84 | ##'   \emph{Usage:}
 85 | ##'   \code{times(unit_elapsed = "secs")}
 86 | ##'
 87 | ##'   \emph{Arguments:}
 88 | ##'   \describe{
 89 | ##'     \item{\code{unit_elapsed}}{
 90 | ##'       Unit to use in computing elapsed times.  The default is to use "secs".  This is passed through to \code{\link{difftime}} so the units there are available and are "auto", "secs", "mins", "hours", "days", "weeks".
 91 | ##'     }
 92 | ##'   }
 93 | ##'
 94 | ##'   \emph{Value}:
 95 | ##'   A one row \code{data.frame} with columns
 96 | ##'   \describe{
 97 | ##'   \item{\code{submitted}}{Time the task was submitted}
 98 | ##'   \item{\code{started}}{Time the task was started, or \code{NA} if waiting}
 99 | ##'   \item{\code{finished}}{Time the task was completed, or \code{NA}
100 | ##'   if waiting or running}
101 | ##'   \item{\code{waiting}}{Elapsed time spent waiting}
102 | ##'   \item{\code{running}}{Elapsed time spent running, or \code{NA} if waiting}
103 | ##'   \item{\code{idle}}{Elapsed time since finished, or \code{NA}
104 | ##'   if waiting or running}
105 | ##'   }
106 | ##' }
107 | ##' }
108 | 


--------------------------------------------------------------------------------
/man/enqueue_bulk.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bulk.R
 3 | \name{enqueue_bulk}
 4 | \alias{enqueue_bulk}
 5 | \alias{enqueue_bulk_submit}
 6 | \title{Bulk queuing}
 7 | \usage{
 8 | enqueue_bulk(X, FUN, rrq, do.call = FALSE, group = NULL, timeout = Inf,
 9 |   time_poll = 1, delete_tasks = FALSE, progress_bar = TRUE,
10 |   env = parent.frame())
11 | 
12 | enqueue_bulk_submit(X, FUN, rrq, do.call = FALSE, group = NULL,
13 |   progress_bar = TRUE, env = parent.frame())
14 | }
15 | \arguments{
16 | \item{X}{An object to loop over.  If a list, we'll loop over the
17 | elements of the list, duplicating the behaviour of
18 | \code{\link{rrqlapply}} except for not handling dots.  If a
19 | \code{data.frame} we'll loop over the \emph{rows}.  Matrices are
20 | not supported.}
21 | 
22 | \item{FUN}{A function.  Will be found in the same way as
23 | \code{FUN} within \code{\link{rrqlapply}}.}
24 | 
25 | \item{rrq}{An rrq object}
26 | 
27 | \item{do.call}{Behave like (but not via) \code{\link{do.call}};
28 | given an element \code{el}, rather than run \code{FUN(el)} run
29 | \code{FUN(el[[1]], el[[2]], ...)}.}
30 | 
31 | \item{group}{Name of a group for generated task ids.  If not
32 | included, an ID will be generated.}
33 | 
34 | \item{timeout}{Total length of time to wait for tasks to be
35 | completed.  The default is to wait forever (like \code{lapply}).}
36 | 
37 | \item{time_poll}{Time to poll for tasks.  Must be an integer.
38 | Because of how the function is implemented, R will be
39 | unresponsive for this long each iteration (unless results are
40 | returned), so the default of 1s should be reasonable.}
41 | 
42 | \item{delete_tasks}{Delete tasks on successful finish?}
43 | 
44 | \item{progress_bar}{Display a progress bar?}
45 | 
46 | \item{env}{Environment to look in}
47 | }
48 | \description{
49 | Bulk queuing.  Similar in some respects to things like
50 | \code{\link{apply}}.  This is an experiment to deal with the
51 | pattern where you have a big pile of parameters in a data.frame to
52 | loop over, by applying a function to each row.
53 | }
54 | \details{
55 | There are two modes here; selected with \code{do.call}.  With
56 | \code{do.call=FALSE}, the default, the function behaves similarly
57 | to \code{apply(X, FUN, 1)}; that is the function is applied to
58 | each row of the data.frame (as a list):
59 | \code{FUN(as.list(X[1,]))}, \code{FUN(as.list(X[2,]))}, and so on.
60 | The alternative mode (\code{do.call=TRUE}) is where the
61 | \code{data.frame} contains \emph{parameters} to the function
62 | \code{FUN} so equivalent to \code{FUN(X[1,1], X[1,2], ...}.  This
63 | is similar (but not implemented as) running: \code{do.call("FUN",
64 | as.list(X[1,]))}.
65 | 
66 | Be careful, this one is going to change, including the name
67 | probably.  You have been warned.
68 | }
69 | 
70 | 


--------------------------------------------------------------------------------
/man/install_scripts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{install_scripts}
 4 | \alias{install_scripts}
 5 | \title{Install rrqueue scripts}
 6 | \usage{
 7 | install_scripts(dest, overwrite = TRUE)
 8 | }
 9 | \arguments{
10 | \item{dest}{Directory to install scripts to, preferably in the \code{$PATH}}
11 | 
12 | \item{overwrite}{Overwrite any existing file?}
13 | }
14 | \description{
15 | Install rrqueue scripts.  This currently includes a script for
16 | starting a worker process.  The script is extremely simple and
17 | does not need upgrading when rrqueue is upgraded.
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/man/rrqlapply.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rrqlapply.R
 3 | \name{rrqlapply}
 4 | \alias{rrqlapply}
 5 | \alias{rrqlapply_submit}
 6 | \title{Parallel version of lapply using Redis queuing}
 7 | \usage{
 8 | rrqlapply(X, FUN, rrq, ..., group = NULL, timeout = Inf, time_poll = 1,
 9 |   delete_tasks = FALSE, progress_bar = TRUE, env = parent.frame())
10 | 
11 | rrqlapply_submit(X, FUN, rrq, ..., group = NULL, progress_bar = TRUE,
12 |   env = parent.frame())
13 | }
14 | \arguments{
15 | \item{X}{A vector}
16 | 
17 | \item{FUN}{The name of a function to apply to each element of the
18 | list.  \emph{this will change!}.}
19 | 
20 | \item{rrq}{An rrq object}
21 | 
22 | \item{...}{Additional arguments passed to \code{FUN}}
23 | 
24 | \item{group}{Name of a group for generated task ids.  If not
25 | included, an ID will be generated.}
26 | 
27 | \item{timeout}{Total length of time to wait for tasks to be
28 | completed.  The default is to wait forever (like \code{lapply}).}
29 | 
30 | \item{time_poll}{Time to poll for tasks.  Must be an integer.
31 | Because of how the function is implemented, R will be
32 | unresponsive for this long each iteration (unless results are
33 | returned), so the default of 1s should be reasonable.}
34 | 
35 | \item{delete_tasks}{Delete tasks on successful finish?}
36 | 
37 | \item{progress_bar}{Display a progress bar?}
38 | 
39 | \item{env}{Environment to look in.}
40 | }
41 | \description{
42 | Parallel version of lapply using Redis queuing
43 | }
44 | 
45 | 


--------------------------------------------------------------------------------
/man/task.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/task.R
  3 | \name{task}
  4 | \alias{task}
  5 | \title{Create a task handle}
  6 | \usage{
  7 | task(obj, task_id, key_complete = NULL)
  8 | }
  9 | \arguments{
 10 | \item{obj}{A \code{queue} or \code{observer} object.}
 11 | 
 12 | \item{task_id}{Task identifier}
 13 | 
 14 | \item{key_complete}{If known, specify the \code{key_complete},
 15 | otherwise we look it up on creation.}
 16 | }
 17 | \description{
 18 | Create a task handle object.  This is a "pointer" to a task and
 19 | can be used to retrieve information about status, running times,
 20 | expression and the result of the task once complete.  Generally
 21 | you do not need to make a task object as they will be created for
 22 | you by things like the \code{task_get} method of the
 23 | \code{\link{observer}} and \code{\link{queue}} objects.
 24 | }
 25 | \details{
 26 | Tasks have a \emph{unique identifier}; these are unique within a
 27 | queue and are implemented as an incrementing integer.  However,
 28 | this is an implementation detail and should not be relied on.  The
 29 | identifier is represented as a \emph{character string} rather than
 30 | an integer in most places.
 31 | 
 32 | Tasks exist in one of a number of \emph{statuses}.  See the
 33 | \code{status} method below for a list of possible statuses and
 34 | their interpretation.
 35 | }
 36 | \section{Methods}{
 37 | 
 38 | 
 39 | \describe{
 40 | \item{\code{status}}{
 41 |   Returns a scalar character indicating the task status.
 42 | 
 43 |   \emph{Usage:}
 44 |   \code{status(follow_redirect = FALSE)}
 45 | 
 46 |   \emph{Arguments:}
 47 |   \describe{
 48 |     \item{\code{follow_redirect}}{
 49 |       should we follow redirects to get the status of any requeued task?
 50 |     }
 51 |   }
 52 | 
 53 |   \emph{Value}:
 54 |   Scalar character.  Possible values are
 55 |   \describe{
 56 |   \item{\code{PENDING}}{queued, but not run by a worker}
 57 |   \item{\code{RUNNING}}{being run on a worker, but not complete}
 58 |   \item{\code{COMPLETE}}{task completed successfully}
 59 |   \item{\code{ERROR}}{task completed with an error}
 60 |   \item{\code{ORPHAN}}{task orphaned due to loss of worker}
 61 |   \item{\code{REDIRECT}}{orphaned task has been redirected}
 62 |   \item{\code{MISSING}}{task not known (deleted, or never existed)}
 63 |   }
 64 | }
 65 | \item{\code{result}}{
 66 |   Fetch the result of a task, so long as it has completed.
 67 | 
 68 |   \emph{Usage:}
 69 |   \code{result(follow_redirect = FALSE)}
 70 | 
 71 |   \emph{Arguments:}
 72 |   \describe{
 73 |     \item{\code{follow_redirect}}{
 74 |       should we follow redirects to get the status of any requeued task?
 75 |     }
 76 |   }
 77 | }
 78 | \item{\code{wait}}{
 79 |   Like \code{result}, but will wait until the task is complete.  In order to preserve the \code{key_complete} for anything that might be listening for it (and to avoid collision with anything else writing to that key), this function repeatedly polls the database. Over a slow connection you may want to increase the \code{every} parameter.
 80 | 
 81 |   \emph{Usage:}
 82 |   \code{wait(timeout, every = 0.05)}
 83 | 
 84 |   \emph{Arguments:}
 85 |   \describe{
 86 |     \item{\code{timeout}}{
 87 |       Length of time, in seconds, to wait.  A value of zero will not wait.  Infinite times are possible and can be escaped by pressing Ctrl-C or Escape (depending on platform).
 88 |     }
 89 | 
 90 |     \item{\code{every}}{
 91 |       How often, in seconds, to poll for results
 92 |     }
 93 |   }
 94 | }
 95 | \item{\code{expr}}{
 96 |   returns the expression stored in the task
 97 | 
 98 |   \emph{Usage:}
 99 |   \code{expr(locals = FALSE)}
100 | 
101 |   \emph{Arguments:}
102 |   \describe{
103 |     \item{\code{locals}}{
104 |       Logical, indicating if the local variables associated with the expression should also be retuned.  If \code{TRUE}, then local variables used in the expression will be returned in a \emph{attribute} of the expression \code{envir}.
105 |     }
106 |   }
107 | 
108 |   \emph{Value}:
109 |   A quoted expression (a language object).  Turn this into a string with deparse.  If \code{locals} was \code{TRUE} there will be an environment attribute with local variables included.
110 | }
111 | \item{\code{envir}}{
112 |   returns the environment identifier for the task
113 | 
114 |   \emph{Usage:}
115 |   \code{envir()}
116 | }
117 | \item{\code{times}}{
118 |   returns a summar of times associated with this task.
119 | 
120 |   \emph{Usage:}
121 |   \code{times(unit_elapsed = "secs")}
122 | 
123 |   \emph{Arguments:}
124 |   \describe{
125 |     \item{\code{unit_elapsed}}{
126 |       Unit to use in computing elapsed times.  The default is to use "secs".  This is passed through to \code{\link{difftime}} so the units there are available and are "auto", "secs", "mins", "hours", "days", "weeks".
127 |     }
128 |   }
129 | 
130 |   \emph{Value}:
131 |   A one row \code{data.frame} with columns
132 |   \describe{
133 |   \item{\code{submitted}}{Time the task was submitted}
134 |   \item{\code{started}}{Time the task was started, or \code{NA} if waiting}
135 |   \item{\code{finished}}{Time the task was completed, or \code{NA}
136 |   if waiting or running}
137 |   \item{\code{waiting}}{Elapsed time spent waiting}
138 |   \item{\code{running}}{Elapsed time spent running, or \code{NA} if waiting}
139 |   \item{\code{idle}}{Elapsed time since finished, or \code{NA}
140 |   if waiting or running}
141 |   }
142 | }
143 | }
144 | }
145 | 
146 | 


--------------------------------------------------------------------------------
/man/task_bundle.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/task_bundle.R
  3 | \name{task_bundle}
  4 | \alias{task_bundle}
  5 | \title{Create a task bundle}
  6 | \usage{
  7 | task_bundle(obj, tasks, groups = NULL, names = NULL)
  8 | }
  9 | \arguments{
 10 | \item{obj}{An observer or queue object}
 11 | 
 12 | \item{tasks}{A list of tasks}
 13 | 
 14 | \item{groups}{Optional vector of groups.  If given, then additional
 15 | tasks can be added to the bundle if they share the same group names.}
 16 | 
 17 | \item{names}{Optional vector of names to label output with.}
 18 | }
 19 | \description{
 20 | Create a task bundle.  Generally these are not created manually,
 21 | but this page serves to document what task bundles are and the
 22 | methods that they have.
 23 | }
 24 | \details{
 25 | A task bundle exists to group together tasks that are related.  It
 26 | is possible for a task to belong to multiple bundles.
 27 | }
 28 | \section{Methods}{
 29 | 
 30 | 
 31 | \describe{
 32 | \item{\code{ids}}{
 33 |   Return the vector of ids for the bundle (will return \code{character(0)} if the bundle is empty).
 34 | 
 35 |   \emph{Usage:}
 36 |   \code{ids()}
 37 | }
 38 | \item{\code{status}}{
 39 |   Fetch the current task status (see \code{\link{task}}).
 40 | 
 41 |   \emph{Usage:}
 42 |   \code{status(follow_redirect = FALSE)}
 43 | 
 44 |   \emph{Arguments:}
 45 |   \describe{
 46 |     \item{\code{follow_redirect}}{
 47 |       should we follow redirects to get the status of any requeued task?
 48 |     }
 49 |   }
 50 | }
 51 | \item{\code{overview}}{
 52 |   High-level overview of the tasks in the bundle; the number of tasks in each status.
 53 | 
 54 |   \emph{Usage:}
 55 |   \code{overview()}
 56 | }
 57 | \item{\code{results}}{
 58 |   Fetch results for all tasks, immediately (unlike \code{wait}).
 59 | 
 60 |   \emph{Usage:}
 61 |   \code{results(follow_redirect = FALSE)}
 62 | 
 63 |   \emph{Arguments:}
 64 |   \describe{
 65 |     \item{\code{follow_redirect}}{
 66 |       should we follow redirects to get the status of any requeued task?
 67 |     }
 68 |   }
 69 | 
 70 |   \emph{Value}:
 71 | 
 72 |   Always returns a list, regardless of how many tasks are in the bundle \code{list()} for zero tasks, \code{list(x1)} for one task, \code{list(x1, x2)} for two, etc).  This list will be named with the ids (see \code{ids()}) unless the bundles has a \code{names} attribute that has not been invalidated by using \code{update_groups()} (this interface may change).
 73 | }
 74 | \item{\code{wait}}{
 75 |   Like \code{results}, but block until results are returned.
 76 | 
 77 |   \emph{Usage:}
 78 |   \code{wait(timeout = 60, time_poll = 1, progress_bar = TRUE, follow_redirect = FALSE)}
 79 | 
 80 |   \emph{Arguments:}
 81 |   \describe{
 82 |     \item{\code{timeout}}{
 83 | 
 84 |       Total time to wait, in seconds.  If the tasks have not returned by this point an error willl be thrown.
 85 |     }
 86 | 
 87 |     \item{\code{time_poll}}{
 88 | 
 89 |       Interval, in seconds, to poll for new results.  Increasing this time will reduce the number of Redis roundtrips but decrease the responsiveness to an interrupt.
 90 |     }
 91 | 
 92 |     \item{\code{progress_bar}}{
 93 | 
 94 |       Display a progress bar while waiting?  Includes a spinner for reassurance that the process has not died.
 95 |     }
 96 | 
 97 |     \item{\code{follow_redirect}}{
 98 |       should we follow redirects to get the status of any requeued task?
 99 |     }
100 |   }
101 | }
102 | \item{\code{wait1}}{
103 | 
104 |   Wait for a single task to complete,a nd return its result.
105 | 
106 |   \emph{Usage:}
107 |   \code{wait1(timeout = 60, time_poll = 1, follow_redirect = FALSE)}
108 | 
109 |   \emph{Arguments:}
110 |   \describe{
111 |     \item{\code{timeout}}{
112 | 
113 |       Total time to wait, in seconds.  If the tasks have not returned by this point an error willl be thrown.
114 |     }
115 | 
116 |     \item{\code{time_poll}}{
117 | 
118 |       Interval, in seconds, to poll for new results.  Increasing this time will reduce the number of Redis roundtrips but decrease the responsiveness to an interrupt.
119 |     }
120 | 
121 |     \item{\code{follow_redirect}}{
122 |       should we follow redirects to get the status of any requeued task?
123 |     }
124 |   }
125 | 
126 |   \emph{Value}:
127 | 
128 |   If a task as available, either because all tasks are complete or because the waiting time was too short, \code{NULL} is returned. Otherwise a list of length 2 is returned, with elements \code{id} (the id of the task that completed) and \code{value} the value of that that task.  This interface is inspired by Redis' \code{BLPOP} operation.
129 | }
130 | \item{\code{times}}{
131 |   Like times in \code{\link{observer}}, but limited to the ids in the bundle.
132 | 
133 |   \emph{Usage:}
134 |   \code{times(unit_elapsed = "secs")}
135 | 
136 |   \emph{Arguments:}
137 |   \describe{
138 |     \item{\code{unit_elapsed}}{
139 |       Unit to use in computing elapsed times.  The default is to use "secs".  This is passed through to \code{\link{difftime}} so the units there are available and are "auto", "secs", "mins", "hours", "days", "weeks".
140 |     }
141 |   }
142 | }
143 | \item{\code{update_groups}}{
144 | 
145 |   Check for new tasks that have been added to the groups included in this bunddle and add these tasks to the bundle.
146 | 
147 |   \emph{Usage:}
148 |   \code{update_groups()}
149 | 
150 |   \emph{Value}:
151 | 
152 |   Invisibly, the ids of tasks that have been added to the bundle.
153 | }
154 | }
155 | }
156 | 
157 | 


--------------------------------------------------------------------------------
/man/worker.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/worker.R
 3 | \name{worker}
 4 | \alias{worker}
 5 | \title{Create an rrqueue worker}
 6 | \usage{
 7 | worker(queue_name, redis_host = "127.0.0.1", redis_port = 6379,
 8 |   heartbeat_period = 30, heartbeat_expire = 90, key_worker_alive = NULL)
 9 | }
10 | \arguments{
11 | \item{queue_name}{Queue name}
12 | 
13 | \item{redis_host}{Host name/IP for the Redis server}
14 | 
15 | \item{redis_port}{Port for the Redis server}
16 | 
17 | \item{heartbeat_period}{Period between heartbeat pulses}
18 | 
19 | \item{heartbeat_expire}{Time that heartbeat pulses will persist
20 | for (must be greater than \code{heartbeat_period})}
21 | 
22 | \item{key_worker_alive}{Optional key to write to when the worker
23 | becomes alive.  The worker will push onto this key so that
24 | another process can monitor it and determine when a worker has
25 | come up.}
26 | }
27 | \description{
28 | Create an rrqueue worker.  This blocks the main loop.
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/man/worker_spawn.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/worker_spawn.R
 3 | \name{worker_spawn}
 4 | \alias{worker_spawn}
 5 | \title{Spawn a worker}
 6 | \usage{
 7 | worker_spawn(queue_name, logfile, redis_host = "127.0.0.1",
 8 |   redis_port = 6379, n = 1, timeout = 20, time_poll = 1,
 9 |   heartbeat_period = NULL, heartbeat_expire = NULL, path = ".")
10 | }
11 | \arguments{
12 | \item{queue_name}{Name of the queue to connect to}
13 | 
14 | \item{logfile}{Name of a log file to write to (consider
15 | \code{tempfile()}).  If \code{n} > 1, then \code{n} log files
16 | must be provided.}
17 | 
18 | \item{redis_host}{Host name/IP for the Redis server}
19 | 
20 | \item{redis_port}{Port for the Redis server}
21 | 
22 | \item{n}{Number of workers to spawn}
23 | 
24 | \item{timeout}{Time to wait for the worker to appear}
25 | 
26 | \item{time_poll}{Period to poll for the worker (must be in
27 | seconds)}
28 | 
29 | \item{heartbeat_period}{Period between heartbeat pulses}
30 | 
31 | \item{heartbeat_expire}{Time that heartbeat pulses will persist}
32 | 
33 | \item{path}{Path to start the worker in.  By default workers will
34 | start in the current working directory, but you can start them
35 | elsewhere by providing a path here.  If the path does not exist,
36 | an error will be thrown.  If \code{n} is greater than 1, all
37 | workers will start in the same working directory.  The
38 | \code{logfile} argument will be interpreted relative to current
39 | working directory (not the worker working directory); use
40 | \code{\link{normalizePath}} to convert into an absolute path
41 | name to prevent this.}
42 | }
43 | \description{
44 | Spawn a worker in the background
45 | }
46 | \details{
47 | Spawning multiple workers.  If \code{n} is greater than one,
48 | multiple workers will be spawned.  This happens in parallel so it
49 | does not take n times longer than spawing a single worker.
50 | 
51 | Beware that signals like Ctrl-C passed to \emph{this} R instance
52 | can still propagate to the child processes and can result in them
53 | dying unexpectedly.  It is probably safer to start processes in a
54 | standalone session.
55 | }
56 | 
57 | 


--------------------------------------------------------------------------------
/man/worker_stop.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/worker.R
 3 | \name{worker_stop}
 4 | \alias{worker_stop}
 5 | \title{Stop a worker}
 6 | \usage{
 7 | worker_stop(queue, worker_id, type = "message", host = "127.0.0.1",
 8 |   port = 6379)
 9 | }
10 | \arguments{
11 | \item{queue}{Name of the queue}
12 | 
13 | \item{worker_id}{Name of a single worker}
14 | 
15 | \item{type}{Either "message" to send a message or "kill" or kill the
16 | worker.}
17 | 
18 | \item{host}{Redis hostname}
19 | 
20 | \item{port}{Redis port}
21 | }
22 | \description{
23 | Try to stop a worker.  You won't need to call this very often, hopefully.
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/man/yaml_env.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/env.R
 3 | \name{yaml_env}
 4 | \alias{yaml_env}
 5 | \title{Load environment variables from a yaml file}
 6 | \usage{
 7 | yaml_env(filename, section = NULL)
 8 | }
 9 | \arguments{
10 | \item{filename}{Name of the file}
11 | 
12 | \item{section}{An optional section of the file to load}
13 | }
14 | \description{
15 | Load environment variables from a yaml file.  This is a hack for a
16 | project.  It may change and may move package.  \code{callr} would
17 | be a better fit probably, but \code{callr} doesn't pull in
18 | \code{yaml} yet so I don't know that it's a good fit.
19 | }
20 | \details{
21 | The yaml file must be sets of key/value pairs of simple data
22 | types.  Something like:
23 | 
24 | \preformatted{
25 | REDIS_HOST: localhost
26 | }
27 | 
28 | Alternatively, for use with section, add an extra layer of nesting:
29 | 
30 | \preformatted{
31 | local:
32 |   REDIS_HOST: localhost
33 | remote:
34 |   REDIS_HOST: redis.marathon.mesos
35 | }
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(rrqueue)
3 | 
4 | test_check("rrqueue")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/config.yml:
--------------------------------------------------------------------------------
 1 | queue_name: tmpjobs
 2 | ## Only affect workers:
 3 | heartbeat_period: 30
 4 | heartbeat_expire: 90
 5 | key_worker_alive: ~
 6 | ## Only affect queues:
 7 | packages: ~
 8 | sources:
 9 |   - "myfuns.R"
10 | redis:
11 |   host: 127.0.0.1
12 |   port: 6379
13 | 


--------------------------------------------------------------------------------
/tests/testthat/config2.yml:
--------------------------------------------------------------------------------
1 | queue_name: tmpjobs
2 | redis:
3 |   host: redis.mesos.marathon
4 | 


--------------------------------------------------------------------------------
/tests/testthat/config3.yml:
--------------------------------------------------------------------------------
1 | redis:
2 |   host: redis.mesos.marathon
3 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-rrqueue.R:
--------------------------------------------------------------------------------
 1 | object_to_string <- RedisAPI::object_to_string
 2 | string_to_object <- RedisAPI::string_to_object
 3 | 
 4 | ## TODO: run in db 15 or something?
 5 | empty_named_list <- function() {
 6 |   structure(list(), names = character(0))
 7 | }
 8 | empty_named_character <- function() {
 9 |   structure(character(0), names = character(0))
10 | }
11 | 
12 | test_cleanup <- function() {
13 |   test_queue_clean("tmpjobs")
14 |   test_queue_clean("myqueue") # vignette
15 |   test_queue_clean("testq:heartbeat")
16 | }
17 | 
18 | test_queue_clean <- function(name) {
19 |   queue_clean(redis_connection(NULL), name, purge=TRUE, stop_workers="kill")
20 | }
21 | 
22 | ## Looks like a bug to me, relative to the docs:
23 | PSKILL_SUCCESS <- tools::pskill(Sys.getpid(), 0)
24 | pid_exists <- function(pid) {
25 |   tools::pskill(pid, 0) == PSKILL_SUCCESS
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/testthat/myfuns.R:
--------------------------------------------------------------------------------
 1 | slowdouble <- function(x) {
 2 |   Sys.sleep(x)
 3 |   x * 2
 4 | }
 5 | 
 6 | suml <- function(x) {
 7 |   x[[1]] + x[[2]]
 8 | }
 9 | 
10 | prod2 <- function(a, b) {
11 |   a * b
12 | }
13 | 
14 | ## Simulate a function called for side effects, but without sideeffects...
15 | ret_null <- function(...) {
16 |   Sys.sleep(0.02)
17 |   NULL
18 | }
19 | 
20 | failure <- function(controlled) {
21 |   if (controlled) {
22 |     try(stop("an expected error"), silent=TRUE)
23 |   } else {
24 |     stop("an unexpected error")
25 |   }
26 | }
27 | 
28 | ## This is a blocking call that skips the R interrupt loop; we won't
29 | ## listen for SIGINT during this and the stop request will fail.
30 | block <- function(n) {
31 |   key <- ids::adjective_animal()
32 |   rrqueue:::redis_connection(NULL)$BLPOP(key, n)
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/testthat/test-files.R:
--------------------------------------------------------------------------------
 1 | context("files")
 2 | 
 3 | test_that("io", {
 4 |   filename <- "myfuns.R"
 5 |   str <- read_file_to_string(filename)
 6 |   expect_that(hash_string(str), equals(hash_file(filename)))
 7 | 
 8 |   tmp <- tempfile("rrqueue_")
 9 |   write_string_to_file(str, tmp)
10 |   expect_that(hash_file(tmp), equals(hash_file(filename)))
11 |   expect_that(readLines(tmp), not(gives_warning()))
12 | })
13 | 
14 | test_that("files", {
15 |   test_cleanup()
16 |   con <- redis_connection(NULL)
17 |   prefix <- "tmpjobs:files:"
18 |   cache <- file_cache(prefix, con)
19 | 
20 |   expect_that(cache$list(), equals(character(0)))
21 | 
22 |   obj <- files_pack(cache)
23 |   expect_that(obj, equals(structure(character(0), class="files_pack")))
24 | 
25 |   obj <- files_pack(cache, "myfuns.R")
26 |   expect_that(length(obj), equals(1))
27 |   expect_that(names(obj), equals("myfuns.R"))
28 |   expect_that(obj[["myfuns.R"]], equals(hash_file("myfuns.R")))
29 | 
30 |   tmp <- tempfile("rrqueue_")
31 |   files_unpack(cache, obj, tmp)
32 |   path <- file.path(tmp, "myfuns.R")
33 |   expect_that(file.exists(path), is_true())
34 |   expect_that(hash_file(path), equals(obj[["myfuns.R"]]))
35 | })
36 | 


--------------------------------------------------------------------------------
/tests/testthat/test-heartbeat.R:
--------------------------------------------------------------------------------
  1 | context("heartbeat")
  2 | 
  3 | ## This file collects all the really nasty stuff with starting and
  4 | ## destroying worker processes, and detecting that with the heartbeat
  5 | ## process.  There is a lot of mocking up and it's slow to develop and
  6 | ## test.  There is a real problem with cascading test failures
  7 | ## throughout, so many failures might just be caused by a single root
  8 | ## thing.
  9 | test_that("heartbeat with signals", {
 10 |   test_cleanup()
 11 | 
 12 |   ## Fairly fast worker timeout:
 13 |   expire <- 3
 14 |   obj <- queue("tmpjobs", sources="myfuns.R")
 15 |   expect_that(obj$workers_list_exited(), equals(character(0)))
 16 | 
 17 |   logfile <- "worker_heartbeat.log"
 18 |   Sys.setenv("R_TESTS" = "")
 19 |   wid <- worker_spawn(obj$queue_name, logfile,
 20 |                       heartbeat_period=1, heartbeat_expire=expire)
 21 |   ## worker("tmpjobs", heartbeat_expire=3, heartbeat_period=1)
 22 | 
 23 |   pid <- obj$workers_info()[[wid]]$pid
 24 |   expect_that(pid_exists(pid), is_true())
 25 | 
 26 |   expect_that(obj$workers_running(wid),
 27 |               equals(setNames(TRUE, wid)))
 28 |   expect_that(obj$workers_status(),
 29 |               equals(setNames(WORKER_IDLE, wid)))
 30 | 
 31 |   t <- obj$enqueue(slowdouble(100))
 32 |   Sys.sleep(.5)
 33 |   expect_that(obj$workers_status(wid), equals(setNames("BUSY", wid)))
 34 |   expect_that(is.na(t$times()[["finished"]]), is_true())
 35 | 
 36 |   obj$send_signal(tools::SIGINT, wid)
 37 |   Sys.sleep(.5)
 38 | 
 39 |   expect_that(t$status(), equals(TASK_ORPHAN))
 40 |   expect_that(is.na(t$times()[["finished"]]), is_false())
 41 | 
 42 |   ## Machine still alive:
 43 |   expect_that(pid_exists(pid), is_true())
 44 | 
 45 |   ## Realistically, this will trigger fairly quickly.
 46 |   for (i in seq_len(10)) {
 47 |     ## Another one at this point will do nothing:
 48 |     obj$send_signal(tools::SIGINT, wid)
 49 |     Sys.sleep(.5)
 50 |     expect_that(pid_exists(pid), is_true())
 51 | 
 52 |     ## Check that an additional job will work OK and not get caught by
 53 |     ## the *second* interrupt (this did happen to me before)
 54 |     tt <- obj$enqueue(sin(1))
 55 |     expect_that(res <- tt$wait(1), not(throws_error()))
 56 |     expect_that(res, equals(sin(1)))
 57 | 
 58 |     if ("REQUEUE" %in% obj$workers_log_tail(wid, 0)$command) {
 59 |       break
 60 |     }
 61 |   }
 62 |   expect_that("REQUEUE" %in% obj$workers_log_tail(wid, 0)$command,
 63 |               is_true())
 64 | 
 65 |   t <- obj$enqueue(slowdouble(100))
 66 |   Sys.sleep(.5)
 67 |   obj$send_signal(tools::SIGTERM, wid)
 68 |   Sys.sleep(.5)
 69 | 
 70 |   expect_that(pid_exists(pid), is_false())
 71 | 
 72 |   expect_that(obj$workers_running(wid),
 73 |               equals(setNames(TRUE, wid)))
 74 |   expect_that(obj$workers_list(), equals(wid))
 75 | 
 76 |   Sys.sleep(expire)
 77 | 
 78 |   expect_that(obj$workers_running(wid),
 79 |               equals(setNames(FALSE, wid)))
 80 |   ## Still in the workers list, though:
 81 |   expect_that(obj$workers_list(), equals(wid))
 82 | 
 83 |   ret <- obj$workers_identify_lost()
 84 | 
 85 |   expect_that(ret, equals(list(workers=wid, tasks=t$id)))
 86 |   expect_that(t$status(), equals(TASK_ORPHAN))
 87 |   expect_that(is.na(t$times()[["finished"]]), is_false())
 88 |   expect_that(obj$workers_status(wid), equals(setNames(WORKER_LOST, wid)))
 89 |   expect_that(obj$workers_list_exited(), equals(wid))
 90 | 
 91 |   ## We were killed in the act:
 92 |   log <- obj$workers_log_tail(wid, 1)
 93 |   expect_that(log$command, equals("TASK_START"))
 94 | 
 95 |   ## Then we can clean this mess up:
 96 |   res <- obj$workers_delete_exited()
 97 |   expect_that(res, equals(wid))
 98 |   expect_that(obj$workers_list_exited(), equals(character(0)))
 99 |   expect_that(obj$workers_delete_exited(), equals(character(0)))
100 | 
101 |   ## No worker keys, plenty of task keys:
102 |   expect_that(RedisAPI::scan_find(obj$con, "tmpjobs:tasks:*"),
103 |               not(equals(character(0))))
104 |   expect_that(RedisAPI::scan_find(obj$con, "tmpjobs:workers:*"),
105 |               equals(character(0)))
106 | })
107 | 
108 | test_that("heartbeat shutdown when running job", {
109 |   expire <- 3
110 | 
111 |   test_cleanup()
112 |   obj <- queue("tmpjobs", sources="myfuns.R")
113 |   expect_that(obj$workers_list_exited(), equals(character(0)))
114 | 
115 |   logfile <- "worker_heartbeat.log"
116 |   Sys.setenv("R_TESTS" = "")
117 |   wid <- worker_spawn(obj$queue_name, logfile,
118 |                       heartbeat_period=1, heartbeat_expire=expire)
119 |   ## worker("tmpjobs", heartbeat_expire=3, heartbeat_period=1)
120 |   ## wid <- obj$workers_list()
121 |   pid <- obj$workers_info()[[wid]]$pid
122 |   expect_that(pid_exists(pid), is_true())
123 | 
124 |   t <- obj$enqueue(slowdouble(100))
125 |   Sys.sleep(.5)
126 |   expect_that(obj$workers_status(wid), equals(setNames("BUSY", wid)))
127 | 
128 |   ## This message will be ignored:
129 |   obj$send_message("STOP")
130 |   Sys.sleep(.5)
131 |   expect_that(obj$workers_status(wid), equals(setNames("BUSY", wid)))
132 |   expect_that(pid_exists(pid), is_true())
133 |   expect_that(t$status(), equals("RUNNING"))
134 | 
135 |   ## But after a message will be honoured:
136 |   obj$send_signal(tools::SIGINT, wid)
137 |   Sys.sleep(.5)
138 | 
139 |   ## The shutdown *is* clean:
140 |   expect_that(obj$workers_status(wid),
141 |               equals(setNames(WORKER_EXITED, wid)))
142 |   log <- obj$workers_log_tail(wid, 1)
143 |   expect_that(log$command, equals("STOP"))
144 |   expect_that(log$message, equals("OK"))
145 |   expect_that(pid_exists(pid), is_false())
146 | 
147 |   ## The task was orphaned:
148 |   expect_that(t$status(), equals(TASK_ORPHAN))
149 | })
150 | 
151 | test_that("requeue orphaned jobs", {
152 |   expire <- 3
153 | 
154 |   test_cleanup()
155 |   obj <- queue("tmpjobs", sources="myfuns.R")
156 |   expect_that(obj$workers_list_exited(), equals(character(0)))
157 | 
158 |   logfile <- "worker_heartbeat.log"
159 |   Sys.setenv("R_TESTS" = "")
160 |   wid <- worker_spawn(obj$queue_name, logfile,
161 |                       heartbeat_period=1, heartbeat_expire=expire)
162 |   ## worker("tmpjobs", heartbeat_expire=3, heartbeat_period=1)
163 |   ## wid <- obj$workers_list()
164 |   pid <- obj$workers_info()[[wid]]$pid
165 |   expect_that(pid_exists(pid), is_true())
166 | 
167 |   t_double <- 5
168 |   t <- obj$enqueue(slowdouble(t_double))
169 |   Sys.sleep(.5)
170 |   expect_that(obj$workers_status(wid), equals(setNames("BUSY", wid)))
171 |   expect_that(is.na(t$times()[["finished"]]), is_true())
172 | 
173 |   obj$send_signal(tools::SIGINT, wid)
174 |   Sys.sleep(.5)
175 | 
176 |   expect_that(t$status(), equals(TASK_ORPHAN))
177 |   expect_that(is.na(t$times()[["finished"]]), is_false())
178 | 
179 |   t2 <- obj$requeue(t$id)
180 |   expect_that(t$status(), equals(TASK_REDIRECT))
181 |   Sys.sleep(0.5)
182 |   expect_that(t2$status(), equals(TASK_RUNNING))
183 |   Sys.sleep(t_double)
184 |   expect_that(t2$status(), equals(TASK_COMPLETE))
185 | 
186 |   expect_that(t$status(follow_redirect=TRUE), equals(TASK_COMPLETE))
187 |   expect_that(t$result(follow_redirect=TRUE), equals(t_double * 2))
188 |   expect_that(t$result(), throws_error("task [0-9]+ is unfetchable"))
189 | 
190 |   tt <- obj$tasks_times(t2$id)
191 |   expect_that(tt, is_a("data.frame"))
192 |   expect_that(tt$running, is_more_than(t_double - 1))
193 |   expect_that(tt$running, is_less_than(t_double + 1))
194 | 
195 |   obj$send_message("STOP")
196 | })
197 | 
198 | 
199 | test_that("workers stop", {
200 |   expire <- 3
201 | 
202 |   test_cleanup()
203 |   obj <- queue("tmpjobs", sources="myfuns.R")
204 |   expect_that(obj$workers_list_exited(), equals(character(0)))
205 | 
206 |   logfile <- "worker_heartbeat.log"
207 |   Sys.setenv("R_TESTS" = "")
208 |   wid <- worker_spawn(obj$queue_name, logfile,
209 |                       heartbeat_period=1, heartbeat_expire=expire)
210 |   ## worker("tmpjobs", heartbeat_expire=3, heartbeat_period=1)
211 |   ## wid <- obj$workers_list()
212 |   pid <- obj$workers_info()[[wid]]$pid
213 |   expect_that(pid_exists(pid), is_true())
214 | 
215 |   ## TODO: worker handles could help avoid this bit of mock up:
216 |   expect_that(msg <- worker_stop_message(
217 |                        list(con=obj$con,
218 |                             keys=list(queue_name=obj$queue_name),
219 |                             name=wid,
220 |                             styles=worker_styles())),
221 |               shows_message())
222 |   cmp <- bquote(rrqueue::worker_stop(.(obj$queue_name), .(wid)))
223 |   expect_that(parse(text=msg[[length(msg)]])[[1]],
224 |               equals(cmp))
225 |   eval(cmp, .GlobalEnv)
226 | 
227 |   Sys.sleep(.5)
228 |   expect_that(pid_exists(pid), is_false())
229 |   log <- obj$workers_log_tail(wid, 1)
230 |   expect_that(log$command, equals("STOP"))
231 |   expect_that(log$message, equals("OK"))
232 | })
233 | 
234 | test_that("stop_workers (simple case)", {
235 |   test_cleanup()
236 |   obj <- queue("tmpjobs", sources="myfuns.R")
237 |   expect_that(obj$workers_list_exited(), equals(character(0)))
238 | 
239 |   logfile <- "worker_heartbeat.log"
240 |   Sys.setenv("R_TESTS" = "")
241 |   wid <- worker_spawn(obj$queue_name, logfile)
242 | 
243 |   ok <- obj$stop_workers(wid, wait=10)
244 |   expect_that(ok, equals(wid))
245 | 
246 |   expect_that(obj$workers_list(), equals(character(0)))
247 |   expect_that(obj$workers_list_exited(), equals(wid))
248 |   log <- obj$workers_log_tail(wid, 3)
249 |   expect_that(log$command, equals(c("MESSAGE", "RESPONSE", "STOP")))
250 |   expect_that(log$message, equals(c("STOP", "STOP", "OK")))
251 | 
252 |   res <- obj$workers_identify_lost()
253 |   expect_that(res, equals(list(workers=character(), tasks=character())))
254 | 
255 |   expect_that(obj$workers_delete_exited(), equals(wid))
256 |   expect_that(obj$workers_list_exited(), equals(character(0)))
257 |   expect_that(nrow(obj$workers_log_tail(wid, 1)), equals(0))
258 | })
259 | 
260 | test_that("stop_workers (running, interrupt)", {
261 |   test_cleanup()
262 |   obj <- queue("tmpjobs", sources="myfuns.R")
263 |   expect_that(obj$workers_list_exited(), equals(character(0)))
264 | 
265 |   logfile <- "worker_heartbeat.log"
266 |   Sys.setenv("R_TESTS" = "")
267 |   wid <- worker_spawn(obj$queue_name, logfile)
268 | 
269 |   len <- 2
270 |   t <- obj$enqueue(slowdouble(len))
271 |   Sys.sleep(.25)
272 |   expect_that(t$status(), equals(TASK_RUNNING))
273 |   ok <- obj$stop_workers(wid, interrupt=TRUE)
274 |   expect_that(ok, equals(wid))
275 | 
276 |   Sys.sleep(.25)
277 | 
278 |   expect_that(t$status(), equals(TASK_ORPHAN))
279 |   expect_that(obj$workers_list(), equals(character(0)))
280 |   expect_that(obj$workers_list_exited(), equals(wid))
281 |   log <- obj$workers_log_tail(wid, 5)
282 |   expect_that(log$command, equals(c("INTERRUPT", "TASK_ORPHAN",
283 |                                     "MESSAGE", "RESPONSE", "STOP")))
284 |   expect_that(log$message, equals(c("", t$id, "STOP", "STOP", "OK")))
285 | 
286 |   res <- obj$workers_identify_lost()
287 |   expect_that(res, equals(list(workers=character(), tasks=character())))
288 | 
289 |   expect_that(obj$workers_delete_exited(), equals(wid))
290 |   expect_that(obj$workers_list_exited(), equals(character(0)))
291 |   expect_that(nrow(obj$workers_log_tail(wid, 1)), equals(0))
292 | })
293 | 
294 | test_that("stop_workers (running, wait)", {
295 |   test_cleanup()
296 |   obj <- queue("tmpjobs", sources="myfuns.R")
297 |   expect_that(obj$workers_list_exited(), equals(character(0)))
298 | 
299 |   logfile <- "worker_heartbeat.log"
300 |   Sys.setenv("R_TESTS" = "")
301 |   wid <- worker_spawn(obj$queue_name, logfile)
302 | 
303 |   len <- 2
304 |   t <- obj$enqueue(slowdouble(len))
305 |   Sys.sleep(.25)
306 |   expect_that(t$status(), equals(TASK_RUNNING))
307 |   ok <- obj$stop_workers(wid, interrupt=FALSE)
308 |   expect_that(ok, equals(wid))
309 | 
310 |   Sys.sleep(len + .25)
311 |   expect_that(t$status(), equals(TASK_COMPLETE))
312 |   expect_that(obj$workers_list(), equals(character(0)))
313 |   expect_that(obj$workers_list_exited(), equals(wid))
314 |   log <- obj$workers_log_tail(wid, 1)
315 |   expect_that(log$command, equals("STOP"))
316 |   expect_that(log$message, equals("OK"))
317 | 
318 |   expect_that(obj$workers_delete_exited(), equals(wid))
319 |   expect_that(obj$workers_list_exited(), equals(character(0)))
320 |   expect_that(nrow(obj$workers_log_tail(wid, 1)), equals(0))
321 | })
322 | 
323 | test_that("stop_workers (blocking)", {
324 |   test_cleanup()
325 |   obj <- queue("tmpjobs", sources="myfuns.R")
326 |   expect_that(obj$workers_list_exited(), equals(character(0)))
327 | 
328 |   expire <- 3
329 |   logfile <- "worker_heartbeat.log"
330 |   Sys.setenv("R_TESTS" = "")
331 |   wid <- worker_spawn(obj$queue_name, logfile,
332 |                       heartbeat_expire=expire, heartbeat_period=1)
333 | 
334 |   t <- obj$enqueue(block(10))
335 |   Sys.sleep(.5)
336 | 
337 |   ok <- obj$stop_workers(wid, wait=1)
338 |   expect_that(ok, equals(wid))
339 |   expect_that(t$status(), equals(TASK_RUNNING))
340 | 
341 |   Sys.sleep(expire)
342 |   res <- obj$workers_identify_lost()
343 |   expect_that(res, equals(list(workers=wid, tasks=t$id)))
344 |   expect_that(t$status(), equals(TASK_ORPHAN))
345 | })
346 | 
347 | test_that("clean up multiple workers", {
348 |   test_cleanup()
349 |   obj <- queue("tmpjobs", sources="myfuns.R")
350 |   expect_that(obj$workers_list_exited(), equals(character(0)))
351 | 
352 |   expire <- 3
353 |   n <- 4
354 |   logfile <- sprintf("worker_%d.log", seq_len(n))
355 |   wid <- worker_spawn(obj$queue_name, logfile, n=n,
356 |                       heartbeat_expire=expire, heartbeat_period=1)
357 | 
358 |   expect_that(sort(obj$workers_list()), equals(sort(wid)))
359 |   for (i in seq_len(n)) {
360 |     obj$enqueue(slowdouble(1000))
361 |   }
362 |   Sys.sleep(.5)
363 |   ids <- obj$workers_task_id()
364 | 
365 |   ## Kill two savagely:
366 |   wkill <- obj$workers_list()[2:3]
367 |   pid <- viapply(obj$workers_info()[wkill], "[[", "pid")
368 |   ok <- tools::pskill(pid, tools::SIGTERM) == PSKILL_SUCCESS
369 |   Sys.sleep(expire + 1)
370 | 
371 |   expect_that(sort(obj$workers_list()), equals(sort(wid)))
372 |   x <- obj$workers_identify_lost()
373 | 
374 |   expect_that(sort(x$workers), equals(sort(wkill)))
375 |   expect_that(sort(x$tasks), equals(sort(unname(ids[wkill]))))
376 | 
377 |   expect_that(obj$tasks_status(x$tasks),
378 |               equals(setNames(c(TASK_ORPHAN, TASK_ORPHAN),
379 |                               x$tasks)))
380 |   expect_that(obj$workers_status(x$workers),
381 |               equals(setNames(c(WORKER_LOST, WORKER_LOST), wkill)))
382 | 
383 |   obj$stop_workers()
384 | })
385 | 


--------------------------------------------------------------------------------
/tests/testthat/test-match-fun.R:
--------------------------------------------------------------------------------
  1 | context("function matching")
  2 | 
  3 | ## NOTE: this process has the potential to be quite slow; I don't
  4 | ## think we want to do it for everything over and over.  But let's
  5 | ## start here and work back to get
  6 | 
  7 | test_that("has_namespace", {
  8 |   expect_that(has_namespace("foo::bar"), is_true())
  9 |   expect_that(has_namespace("::bar"), is_true())
 10 |   expect_that(has_namespace("foo::"), is_true())
 11 |   ## false positive:
 12 |   expect_that(has_namespace("foo::bar::baz"), is_true())
 13 |   expect_that(has_namespace("foo"), is_false())
 14 |   expect_that(has_namespace(":foo:"), is_false())
 15 | })
 16 | 
 17 | test_that("split_namespace", {
 18 |   expect_that(split_namespace("foo::bar"), equals(c("foo", "bar")))
 19 |   expect_that(split_namespace("::bar"), equals(c("", "bar")))
 20 |   ## Odd behaviour of strsplit():
 21 | 
 22 |   throws <- throws_error("Not a namespace-qualified variable")
 23 |   expect_that(split_namespace("foo::"), throws)
 24 |   ## false positive: - should error here?
 25 |   expect_that(split_namespace("foo::bar::baz"), throws)
 26 |   expect_that(split_namespace("foo"), throws)
 27 |   expect_that(split_namespace(":foo:"), throws)
 28 | })
 29 | 
 30 | test_that("match_fun_name", {
 31 |   e <- new.env(parent=parent.env(.GlobalEnv))
 32 |   source("myfuns.R", e)
 33 | 
 34 |   expect_that(match_fun_name("rrqueue::worker", e),
 35 |               equals(c("rrqueue", "worker")))
 36 |   expect_that(match_fun_name("rrqueuex::worker", e),
 37 |                            throws_error("Did not find function"))
 38 |   expect_that(match_fun_name("rrqueue::workerx", e),
 39 |               throws_error("Did not find function"))
 40 | 
 41 |   cmp <- structure(c("", "slowdouble"), envir=e)
 42 |   expect_that(match_fun_name("slowdouble", e), equals(cmp))
 43 |   expect_that(match_fun_name("slowdouble_no_such", e),
 44 |               throws_error("Did not find function"))
 45 | })
 46 | 
 47 | test_that("match_fun_symbol", {
 48 |   e <- new.env(parent=parent.env(.GlobalEnv))
 49 |   source("myfuns.R", e)
 50 |   expect_that(match_fun_symbol(quote(worker), e),
 51 |               equals(c("rrqueue", "worker")))
 52 |   expect_that(match_fun_symbol(quote(workerx), e),
 53 |               throws_error("Did not find function"))
 54 | 
 55 |   cmp <- structure(c("", "slowdouble"), envir=e)
 56 |   expect_that(match_fun_symbol(quote(slowdouble), e),
 57 |               equals(cmp))
 58 |   expect_that(match_fun_symbol(quote(slowdouble_no_such), e),
 59 |               throws_error("Did not find function"))
 60 | })
 61 | 
 62 | test_that("match_fun_value", {
 63 |   e <- new.env(parent=parent.env(.GlobalEnv))
 64 |   source("myfuns.R", e)
 65 |   fun <- e$slowdouble
 66 | 
 67 |   expect_that(match_fun_value(worker, e),
 68 |               equals(c("rrqueue", "worker")))
 69 |   ww <- worker
 70 |   expect_that(match_fun_value(ww, e),
 71 |               equals(c("rrqueue", "worker")))
 72 |   expect_that(match_fun_value(get("worker"), e),
 73 |               equals(c("rrqueue", "worker")))
 74 | 
 75 |   cmp <- structure(c("", "slowdouble"), envir=e)
 76 |   expect_that(match_fun_value(fun, e),
 77 |               equals(cmp))
 78 | 
 79 |   e2 <- new.env(parent=parent.env(.GlobalEnv))
 80 |   source("myfuns.R", e2)
 81 |   expect_that(e2$slowdouble, not(is_identical_to(e$slowdouble)))
 82 | 
 83 |   expect_that(match_fun_value(worker, baseenv()),
 84 |               throws_error("Did not find function"))
 85 |   expect_that(match_fun_value(fun, e2),
 86 |               throws_error("Did not find function"))
 87 | })
 88 | 
 89 | test_that("match_fun", {
 90 |   env <- new.env(parent=parent.env(.GlobalEnv))
 91 |   source("myfuns.R", env)
 92 | 
 93 |   fun <- env$slowdouble
 94 |   cmp <- structure(c("", "slowdouble"), envir=env)
 95 | 
 96 |   expect_that(match_fun("slowdouble", env), equals(cmp))
 97 |   expect_that(match_fun(get("slowdouble", env), env), equals(cmp))
 98 |   expect_that(match_fun(fun, env), equals(cmp))
 99 | 
100 |   fun <- rrqueue::worker
101 |   cmp <- c("rrqueue", "worker")
102 | 
103 |   expect_that(match_fun("rrqueue::worker", env), equals(cmp))
104 |   expect_that(match_fun("worker", env), equals(cmp))
105 |   expect_that(match_fun(rrqueue::worker, env), equals(cmp))
106 |   expect_that(match_fun(get("worker", env), env), equals(cmp))
107 |   expect_that(match_fun(fun, env), equals(cmp))
108 | })
109 | 
110 | test_that("match_fun_rrqueue", {
111 |   env1 <- new.env(parent=parent.env(.GlobalEnv))
112 |   env2 <- new.env(parent=parent.env(.GlobalEnv))
113 |   env3 <- new.env(parent=parent.env(.GlobalEnv))
114 |   source("myfuns.R", env1)
115 |   env3$slowdouble <- function(x) x * 2
116 | 
117 |   fun <- env1$slowdouble
118 |   cmp <- structure(c("", "slowdouble"), envir=env1)
119 | 
120 |   expect_that(match_fun_rrqueue("slowdouble", env1, env1),
121 |               equals(cmp))
122 |   expect_that(match_fun_rrqueue("slowdouble", env1, env2),
123 |               throws_error("Function not found in rrqueue environment"))
124 |   expect_that(match_fun_rrqueue("slowdouble", env1, env3),
125 |               throws_error("Function found in given and rrqueue"))
126 | 
127 |   expect_that(match_fun_rrqueue(fun, env1, env1),
128 |               equals(cmp))
129 |   expect_that(match_fun_rrqueue(fun, env1, env2),
130 |               throws_error("Function not found in rrqueue environment"))
131 |   expect_that(match_fun_rrqueue(fun, env1, env3),
132 |               throws_error("Function found in given and rrqueue"))
133 | })
134 | 


--------------------------------------------------------------------------------
/tests/testthat/test-rrqlapply.R:
--------------------------------------------------------------------------------
  1 | context("rrqlapply")
  2 | 
  3 | ## The first "high" level thing; a really basic mclapply like clone.
  4 | ## TODO: test passing in an unknown function
  5 | test_that("Basic use", {
  6 |   test_cleanup()
  7 |   on.exit(test_cleanup())
  8 | 
  9 |   obj <- queue("tmpjobs", sources="myfuns.R")
 10 |   expect_that(obj$tasks_groups_list(), equals(character(0)))
 11 |   x <- sample(1:10, 20, replace=TRUE)
 12 |   rrql <- rrqlapply_submit(x, "sin", obj)
 13 |   expect_that(rrql, is_a("task_bundle"))
 14 | 
 15 |   grp <- obj$tasks_groups_list()
 16 |   expect_that(length(grp), equals(1L))
 17 | 
 18 |   expect_that(grp, equals(rrql$groups))
 19 |   expect_that(obj$tasks_in_groups(grp),
 20 |               equals(rrql$ids()))
 21 |   expect_that(rrql$names, equals(names(x)))
 22 | 
 23 |   tmp <- task_bundle_get(obj, groups=grp)
 24 |   expect_that(tmp, is_a("task_bundle"))
 25 |   expect_that(tmp$groups, equals(grp))
 26 |   expect_that(tmp$names, equals(NULL))
 27 |   expect_that(tmp$key_complete, equals(rrql$key_complete))
 28 | 
 29 |   ## TODO: This would be nice to do filtering by jobs in the bundle...
 30 |   monitor_status(obj)
 31 | 
 32 |   task_ids <- rrql$ids()
 33 |   expect_that(obj$tasks_status(task_ids),
 34 |               equals(setNames(rep(TASK_PENDING, length(x)), task_ids)))
 35 | 
 36 |   wid <- worker_spawn(obj$queue_name, "rrqlapply.log")
 37 |   ## w <- rrqueue::worker("tmpjobs", heartbeat_period=10)
 38 | 
 39 |   ## TODO:
 40 |   Sys.sleep(0.5)
 41 |   expect_that(obj$tasks_status(task_ids),
 42 |               equals(setNames(rep(TASK_COMPLETE, length(x)), task_ids)))
 43 | 
 44 |   res <- rrql$wait()
 45 |   cmp <- setNames(lapply(x, sin), rrql$ids())
 46 |   expect_that(res, equals(cmp, tolerance=1e-15))
 47 | 
 48 |   expect_that(obj$tasks_status(task_ids),
 49 |               equals(setNames(rep(TASK_COMPLETE, length(x)), task_ids)))
 50 | 
 51 |   rrql$delete_tasks()
 52 | 
 53 |   ## Cleanup has happened
 54 |   expect_that(obj$tasks_status(task_ids),
 55 |               equals(setNames(rep(TASK_MISSING, length(x)), task_ids)))
 56 | 
 57 |   res <- rrqlapply(x, "sin", obj, progress_bar=FALSE)
 58 |   ## NOTE: hardcoded name here:
 59 |   expect_that(res, equals(setNames(cmp, 21:40), tolerance=1e-15))
 60 | 
 61 |   obj$send_message("STOP")
 62 | })
 63 | 
 64 | test_that("null return", {
 65 |   test_cleanup()
 66 |   on.exit(test_cleanup())
 67 | 
 68 |   obj <- queue("tmpjobs", sources="myfuns.R")
 69 |   x <- sample(1:10, 20, replace=TRUE)
 70 |   rrql <- rrqlapply_submit(x, "ret_null", obj)
 71 |   monitor_status(obj)
 72 | 
 73 |   task_ids <- rrql$ids()
 74 |   expect_that(obj$tasks_status(task_ids),
 75 |               equals(setNames(rep(TASK_PENDING, length(x)), task_ids)))
 76 | 
 77 |   wid <- worker_spawn(obj$queue_name, "rrqlapply.log")
 78 |   ## w <- rrqueue::worker("tmpjobs", heartbeat_period=10)
 79 | 
 80 |   ## TODO:
 81 |   Sys.sleep(1.0)
 82 |   expect_that(obj$tasks_status(task_ids),
 83 |               equals(setNames(rep(TASK_COMPLETE, length(x)), task_ids)))
 84 | 
 85 |   res <- rrql$wait(progress_bar=FALSE)
 86 |   cmp <- named_list(rrql$ids())
 87 |   expect_that(res, equals(cmp))
 88 | 
 89 |   ## And again:
 90 |   expect_that(rrql$wait(progress_bar=FALSE), equals(cmp))
 91 |   rrql$delete_tasks()
 92 | 
 93 |   ## Cleanup has happened
 94 |   expect_that(obj$tasks_status(task_ids),
 95 |               equals(setNames(rep(TASK_MISSING, length(x)), task_ids)))
 96 | 
 97 |   res <- rrqlapply(x, "ret_null", obj, progress_bar=FALSE)
 98 |   expect_that(res, equals(setNames(cmp, 21:40)))
 99 | 
100 |   obj$send_message("STOP")
101 | })
102 | 
103 | test_that("bulk", {
104 |   test_cleanup()
105 |   x <- expand.grid(a=1:4, b=runif(3))
106 | 
107 |   obj <- queue("tmpjobs", sources="myfuns.R")
108 | 
109 |   ## Serial versions:
110 |   cmp_sum  <- lapply(df_to_list(x), suml)
111 |   cmp_prod <- lapply(df_to_list(x), function(el) prod2(el$a, el$b))
112 | 
113 |   wid <- worker_spawn(obj$queue_name, "rrqlapply.log")
114 | 
115 |   res <- enqueue_bulk_submit(x, suml, obj)
116 |   expect_that(res$groups, is_a("character"))
117 | 
118 |   ans <- res$wait()
119 |   expect_that(ans, equals(setNames(cmp_sum, res$ids())))
120 | 
121 |   ## All at once:
122 |   res <- enqueue_bulk(x, prod2, obj, do.call=TRUE)
123 |   expect_that(unname(res), equals(cmp_prod))
124 | 
125 |   obj$send_message("STOP")
126 |   test_cleanup()
127 | })
128 | 


--------------------------------------------------------------------------------
/tests/testthat/test-task-bundle.R:
--------------------------------------------------------------------------------
  1 | context("task_bundle")
  2 | 
  3 | test_that("simple", {
  4 |   test_cleanup()
  5 |   on.exit(test_cleanup())
  6 | 
  7 |   existing <- queues()
  8 |   ## expect_that(existing, equals(character(0)))
  9 | 
 10 |   obj <- queue("tmpjobs", sources="myfuns.R")
 11 | 
 12 |   group <- "mygroup"
 13 |   x <- obj$task_bundle_get(group)
 14 | 
 15 |   expect_that(x, is_a("task_bundle"))
 16 |   expect_that(x$ids(), equals(character(0)))
 17 |   expect_that(x$groups, equals(group))
 18 |   expect_that(x$update_groups(), equals(character(0)))
 19 |   expect_that(x$results(), equals(empty_named_list()))
 20 |   expect_that(x$wait(), equals(empty_named_list()))
 21 |   expect_that(as.list(x$overview()),
 22 |               equals(list(PENDING=0, RUNNING=0, COMPLETE=0, ERROR=0)))
 23 | 
 24 |   ## Queue up a job:
 25 |   t <- obj$enqueue(sin(1), group=group)
 26 | 
 27 |   expect_that(x$update_groups(), equals(t$id))
 28 |   expect_that(x$update_groups(), equals(character(0)))
 29 | 
 30 |   ids <- t$id
 31 |   for (i in 1:3) {
 32 |     t <- obj$enqueue(sin(1), group=group)
 33 |     ids <- c(ids, t$id)
 34 |   }
 35 | 
 36 |   expect_that(x$update_groups(), equals(ids[-1]))
 37 |   expect_that(x$update_groups(), equals(character(0)))
 38 | 
 39 |   expect_that(x$ids(), equals(ids))
 40 | 
 41 |   expect_that(x$status(),
 42 |               equals(setNames(rep(TASK_PENDING, length(ids)), ids)))
 43 | 
 44 |   expect_that(x$results(), throws_error("Tasks not yet completed"))
 45 |   expect_that(x$wait(0), throws_error("Tasks not yet completed"))
 46 |   expect_that(x$wait(1), throws_error("Exceeded maximum time"))
 47 | 
 48 |   expect_that(x$status(),
 49 |               equals(setNames(rep(TASK_PENDING, length(ids)), ids)))
 50 | 
 51 |   ## Start a worker:
 52 |   logfile <- "worker.log"
 53 |   Sys.setenv("R_TESTS" = "")
 54 |   wid <- worker_spawn(obj$queue_name, logfile)
 55 | 
 56 |   Sys.sleep(.5)
 57 | 
 58 |   expect_that(x$status(),
 59 |               equals(setNames(rep(TASK_COMPLETE, length(ids)), ids)))
 60 | 
 61 |   cmp <- setNames(rep(list(sin(1)), length(ids)), ids)
 62 |   expect_that(x$results(), equals(cmp))
 63 | 
 64 |   ## Set some names and watch them come out too:
 65 |   x$names <- letters[1:4]
 66 |   x$results()
 67 |   cmp <- setNames(rep(list(sin(1)), length(ids)), letters[1:4])
 68 |   expect_that(x$results(), equals(cmp))
 69 | 
 70 |   ## Add an new task:
 71 |   t <- obj$enqueue(sin(1), group=group)
 72 |   Sys.sleep(.5)
 73 | 
 74 |   ## Nothing has changed in the bundle:
 75 |   expect_that(x$results(), equals(cmp))
 76 | 
 77 |   id <- x$update_groups()
 78 |   expect_that(id, equals(t$id))
 79 |   ids <- c(ids, id)
 80 | 
 81 |   ## Names have been removed:
 82 |   expect_that(x$names, equals(NULL))
 83 | 
 84 |   cmp <- setNames(rep(list(sin(1)), length(ids)), ids)
 85 |   expect_that(x$results(), equals(cmp))
 86 | 
 87 |   t <- obj$enqueue(slowdouble(2), group=group)
 88 |   x$update_groups()
 89 |   st <- setNames(rep(c(TASK_COMPLETE, TASK_RUNNING), c(length(ids), 1)),
 90 |                  c(ids, t$id))
 91 |   Sys.sleep(.2)
 92 |   expect_that(x$status(), equals(st))
 93 |   r <- x$wait(3)
 94 | 
 95 |   cmp <- c(cmp, setNames(list(4), t$id))
 96 |   expect_that(r, equals(cmp))
 97 |   expect_that(x$status(),
 98 |               equals(setNames(rep(TASK_COMPLETE, length(r)), names(r))))
 99 | 
100 |   expect_that(x$wait1(1), equals(NULL))
101 |   expect_that(x$wait1(1), takes_less_than(1))
102 | 
103 |   t1 <- obj$enqueue(slowdouble(1), group=group)
104 |   t2 <- obj$enqueue(slowdouble(2), group=group)
105 |   x$update_groups()
106 |   res <- x$wait1(60)
107 |   expect_that(res[[1]], equals(t1$id))
108 |   expect_that(res[[2]], equals(2))
109 |   res <- x$wait1(60)
110 |   expect_that(res[[1]], equals(t2$id))
111 |   expect_that(res[[2]], equals(4))
112 |   expect_that(x$wait1(), is_null())
113 | 
114 |   t3 <- obj$enqueue(slowdouble(3), group=group)
115 |   x$update_groups()
116 |   res0 <- x$wait1(1)
117 |   expect_that(res0, is_null())
118 |   res <- x$wait1(10)
119 |   expect_that(res, equals(list(id=t3$id, result=t3$result())))
120 | 
121 |   expect_that(as.list(x$overview()),
122 |               equals(list(PENDING=0, RUNNING=0, COMPLETE=9, ERROR=0)))
123 | 
124 |   ## Yeah, this is not going to work.
125 |   xt <- x$times()
126 |   expect_that(xt, is_a("data.frame"))
127 |   cols <- c("task_id", "submitted", "started", "finished",
128 |             "waiting", "running", "idle")
129 |   expect_that(names(xt), equals(cols))
130 |   cols <- setdiff(cols, "idle")
131 |   expect_that(xt[cols], equals(obj$tasks_times(x$ids())[cols]))
132 | 
133 |   ## Get the bundle again:
134 |   y <- obj$task_bundle_get(group)
135 |   expect_that(y$ids(),     equals(x$ids()))
136 |   expect_that(y$status(),  equals(x$status()))
137 |   expect_that(y$results(), equals(x$results()))
138 | 
139 |   obj$stop_workers(wid)
140 | })
141 | 


--------------------------------------------------------------------------------
/tests/testthat/test-worker.R:
--------------------------------------------------------------------------------
 1 | context("worker")
 2 | 
 3 | test_that("config", {
 4 |   expect_that(rrqueue_worker_args(character(0)),
 5 |               throws_error("usage: rrqueue_worker"))
 6 | 
 7 |   queue_name <- "tmpjobs"
 8 | 
 9 |   opts <- rrqueue_worker_args(queue_name)
10 |   expect_that(opts$queue_name, equals(queue_name))
11 |   expect_that(opts$redis_host, equals("127.0.0.1"))
12 |   expect_that(opts$redis_port, equals(6379))
13 |   expect_that(opts$heartbeat_period, equals(30))
14 |   expect_that(opts$heartbeat_expire, equals(90))
15 |   expect_that(opts$key_worker_alive, is_null())
16 | 
17 |   opts <- rrqueue_worker_args(c("--config", "config.yml"))
18 |   dat <- yaml_read("config.yml")
19 |   expect_that(opts$queue_name, equals(dat$queue_name))
20 |   expect_that(opts$redis_host, equals(dat$redis$host))
21 |   expect_that(opts$redis_port, equals(dat$redis$port))
22 |   expect_that(opts$heartbeat_period, equals(dat$heartbeat_period))
23 |   expect_that(opts$heartbeat_expire, equals(dat$heartbeat_expire))
24 |   expect_that(opts$key_worker_alive, is_null())
25 | 
26 |   ## override some opts:
27 |   opts <- rrqueue_worker_args(c("--config", "config.yml",
28 |                                 "--key-worker-alive", "mykey"))
29 |   expect_that(opts$key_worker_alive, equals("mykey"))
30 | 
31 |   opts <- rrqueue_worker_args(c("--config", "config.yml",
32 |                                 "--redis-port", "9999"))
33 |   expect_that(opts$redis_port, equals("9999"))
34 | 
35 |   opts <- rrqueue_worker_args(c("--config", "config.yml", queue_name))
36 |   expect_that(opts$queue_name, equals(queue_name))
37 | 
38 |   ## And again with a configuration that loads very little:
39 |   opts <- rrqueue_worker_args(c("--config", "config2.yml",
40 |                                 "--key-worker-alive", "mykey"))
41 |   expect_that(opts$key_worker_alive, equals("mykey"))
42 |   expect_that(opts$redis_host, equals(yaml_read("config2.yml")$redis$host))
43 |   expect_that(opts$redis_port, equals(6379))
44 | 
45 |   expect_that(rrqueue_worker_args(c("--config", "config3.yml")),
46 |               throws_error("queue name must be given"))
47 |   opts <- rrqueue_worker_args(c("--config", "config3.yml", queue_name))
48 |   expect_that(opts$queue_name, equals(queue_name))
49 | })
50 | 
51 | test_that("workers_times - nonexistant worker", {
52 |   obs <- observer("tmpjobs")
53 |   name <- "no such worker"
54 |   t <- obs$workers_times(name)
55 |   expect_that(t, is_a("data.frame"))
56 |   expect_that(nrow(t), equals(1))
57 |   expect_that(t, equals(data.frame(worker_id=name,
58 |                                    expire_max=NA_real_,
59 |                                    expire=-2.0,
60 |                                    last_seen=NA_real_,
61 |                                    last_action=NA_real_,
62 |                                    stringsAsFactors=FALSE)))
63 | })
64 | 
65 | test_that("workers_times - no workers", {
66 |   obs <- observer("tmpjobs")
67 |   t <- obs$workers_times()
68 |   expect_that(t, is_a("data.frame"))
69 |   expect_that(nrow(t), equals(0))
70 |   expect_that(t, equals(data.frame(worker_id=character(0),
71 |                                    expire_max=numeric(0),
72 |                                    expire=numeric(0),
73 |                                    last_seen=numeric(0),
74 |                                    last_action=numeric(0),
75 |                                    stringsAsFactors=FALSE)))
76 | 
77 |   expect_that(obs$workers_list(), equals(character(0)))
78 |   expect_that(obs$workers_status(), equals(empty_named_character()))
79 | 
80 |   log <- obs$workers_log_tail()
81 |   expect_that(log, equals(data.frame(worker_id=character(0),
82 |                                      time=character(0),
83 |                                      command=character(0),
84 |                                      message=character(0),
85 |                                      stringsAsFactors=FALSE)))
86 |   test_cleanup()
87 | })
88 | 


--------------------------------------------------------------------------------
/update_web.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | 
 4 | DOCS_DIR=inst/web
 5 | VERSION=$(git rev-parse --short HEAD)
 6 | REMOTE_URL=$(git config --get remote.origin.url)
 7 | 
 8 | rm -rf ${DOCS_DIR}/.git
 9 | git init ${DOCS_DIR}
10 | git -C ${DOCS_DIR} checkout --orphan gh-pages
11 | git -C ${DOCS_DIR} add .
12 | git -C ${DOCS_DIR} commit --no-verify -m "Update docs for version ${VERSION}"
13 | git -C ${DOCS_DIR} remote add origin -m "gh-pages" ${REMOTE_URL}
14 | git -C ${DOCS_DIR} push --force -u origin gh-pages
15 | 


--------------------------------------------------------------------------------
/vignettes/introduction.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Introduction to rrqueue"
  3 | author: "Rich FitzJohn"
  4 | date: "`r Sys.Date()`"
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{Introduction to rrqueue}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding{UTF-8}
 10 | ---
 11 | 
 12 | # Overview
 13 | 
 14 | `rrqueue` is a *distributed task queue* for R, implemented on top of  [Redis](http://redis.io).
 15 | At the cost of a little more work it allows for more flexible parallelisation than afforded by `mclapply`.
 16 | The main goal is to support non-map style operations: submit some tasks, collect the completed results,
 17 | queue more even while some tasks are still running.
 18 | 
 19 | Other features include:
 20 | 
 21 | * Low-level task submission / retrieval has a simple API so that asynchronous task queues can be created.
 22 | * Objects representing tasks, workers, queues, etc can be queried.
 23 | * While blocking `mclapply`-like functions are available, the package is designed to be non-blocking so that intermediate results can be used.
 24 | * Automatic fingerprinting of environments so that code run on a remote machine will correspond to the code found locally.
 25 | * Works well connecting to a Redis database running on the cloud (e.g., on an AWS machine over an ssh tunnel).
 26 | * Local workers can be added to a remote pool, so long as everything can talk to the same Redis server.
 27 | * The worker pool can be scaled at any time (up or down).
 28 | * Basic fault tolerance, supporting re-queuing tasks lost on crashed workers.
 29 | 
 30 | The basic workflow is:
 31 | 
 32 | 1. Create a queue
 33 | 2. Submit tasks to the queue
 34 | 3. Start workers
 35 | 4. Collect results
 36 | 
 37 | The workers can be started at any time between 1-3, though they do need to be started before results can be collected.
 38 | 
 39 | Documenting things that work asynchronously is difficult.  This
 40 | document gives a tutorial-style overview of working with rrqueue.
 41 | 
 42 | # Getting started
 43 | 
 44 | The queue and workers can be started in any order, but it's easiest
 45 | to explain starting the queue first.
 46 | 
 47 | Suppose we have some simulation code; it needs to be in a file that
 48 | the queue can see.  For now, I'll use the file `myfuns.R` which is
 49 | the test code.  It has a function in it called `slowdouble` that
 50 | takes a number, sleeps for that many seconds, and then returns
 51 | twice the number.  It's useful for testing.
 52 | 
 53 | ``` {r echo=FALSE, results="hide"}
 54 | rrqueue:::queue_clean(redux::hiredis(), "myqueue",
 55 |                       purge=TRUE, stop_workers="kill")
 56 | lang_output <- function(x, lang) {
 57 |   cat(c(sprintf("```%s", lang), x, "```"), sep="\n")
 58 | }
 59 | cpp_output <- function(x) lang_output(x, "c++")
 60 | r_output <- function(x) lang_output(x, "r")
 61 | yaml_output <- function(x) lang_output(x, "yaml")
 62 | plain_output <- function(x) lang_output(x, "plain")
 63 | ```
 64 | 
 65 | You'll also need a running Redis server.  I have one operating with
 66 | the default parameters, so this works:
 67 | ``` {r }
 68 | redux::hiredis()$PING()
 69 | ```
 70 | 
 71 | Create queue called "myqueue", tell it to load the source file
 72 | "myfuns.R".  If it was to load packages, then passing
 73 | `packages=c("package1", "package2")` would indicate that workers
 74 | would need to load those packages, too.
 75 | ``` {r }
 76 | obj <- rrqueue::queue("myqueue", sources="myfuns.R")
 77 | ```
 78 | 
 79 | The message "creating new queue" here indicates that `rrqueue` did
 80 | not find any previous queues in place.  Queues are designed to be
 81 | re-attachable so we can immediately just do that:
 82 | ``` {r }
 83 | obj <- rrqueue::queue("myqueue", sources="myfuns.R")
 84 | ```
 85 | 
 86 | The message also notes that we have no workers available, so no
 87 | work is going to get done.  But we can still queue some tasks.
 88 | 
 89 | # Queuing tasks
 90 | 
 91 | The simplest sort of task queuing is to pass an expression into enqueue:
 92 | ``` {r }
 93 | t <- obj$enqueue(1 + 1)
 94 | ```
 95 | 
 96 | The expression is not evaluated but stored and will be evaluated on
 97 | the worker.  Saving the result of this gives a `task` object which
 98 | can be inspected.
 99 | ``` {r }
100 | t
101 | ```
102 | 
103 | The expression stored in the task:
104 | ``` {r }
105 | t$expr()
106 | ```
107 | 
108 | The status of the task:
109 | ``` {r }
110 | t$status()
111 | ```
112 | 
113 | The result of the task, which will throw an error if we try to call it:
114 | ``` {r error=TRUE}
115 | t$result()
116 | ```
117 | 
118 | And how long the task has been waiting:
119 | ``` {r }
120 | t$times()
121 | ```
122 | 
123 | Tasks can use local variables, too:
124 | ``` {r }
125 | x <- 10
126 | t2 <- obj$enqueue(x * 2)
127 | t2$expr()
128 | ```
129 | 
130 | And because using unevaluated expressions can be problematic,
131 | `rrqueue` has a standard-evaluation version (`enqueue_`) which takes
132 | either strings representing expressions or quoted expressions:
133 | ``` {r }
134 | obj$enqueue_(quote(x / 2))
135 | ```
136 | 
137 | Now we have three tasks:
138 | ``` {r }
139 | obj$tasks_list()
140 | ```
141 | 
142 | All the tasks are waiting to be run:
143 | ``` {r }
144 | obj$tasks_status()
145 | ```
146 | 
147 | We can get an overview of the tasks:
148 | ``` {r }
149 | obj$tasks_overview()
150 | ```
151 | 
152 | # Starting workers
153 | 
154 | `rrqueue` includes a script `rrqueue_worker` for starting workers
155 | from the command line (install with `rrqueue::install_scripts()`.
156 | Workers can also be started from within R using the `worker_spawn`
157 | function:
158 | ``` {r }
159 | logfile <- tempfile()
160 | wid <- rrqueue::worker_spawn("myqueue", logfile)
161 | ```
162 | ``` {r echo=FALSE}
163 | Sys.sleep(.5)
164 | ```
165 | 
166 | This function returns the *worker identifier*, which is also
167 | printed to the screen.
168 | 
169 | It's probably informative at this point to read the logfile of the
170 | worker to see what it did on startup:
171 | 
172 | ``` {r results="asis", echo=TRUE}
173 | plain_output(readLines(logfile))
174 | ```
175 | 
176 | The worker first prints a lot of diagnostic information to the
177 | screen (or log file) indicating the name of the worker, the version
178 | of rrqueue, machine information, and special keys in the database
179 | where important information is stored.
180 | 
181 | Then after broadcasting that it is awake (`ALIVE`) it detected that
182 | there was a controller on the queue and it attempts to construct
183 | the environment that the controller wants `r paste("ENVIR", obj$envir_id)`.
184 | 
185 | After that, there are a series of `TASK_START`, `EXPR`, and
186 | `TASK_COMPLETE` lines as each of the three tasks is processed.
187 | ``` {r }
188 | obj$tasks_status()
189 | ```
190 | 
191 | The times here give an indication of the rrqueue overhead; the
192 | running time of these simple expressions should be close to zero.
193 | ``` {r }
194 | obj$tasks_times()
195 | ```
196 | 
197 | The task handle created before can now give a result:
198 | ``` {r }
199 | t$result()
200 | ```
201 | 
202 | Similarly, results can be retrieved from the queue directly:
203 | ``` {r }
204 | obj$task_result(1)
205 | obj$task_result(2)
206 | obj$task_result(3)
207 | ```
208 | 
209 | The worker that we created can be seen here:
210 | ``` {r }
211 | obj$workers_list()
212 | ```
213 | 
214 | Queue a slower task; this time the `slowdouble` function.  This
215 | will take 1s:
216 | ``` {r echo=TRUE}
217 | t <- obj$enqueue(slowdouble(1))
218 | t$status()
219 | Sys.sleep(.3)
220 | t$status()
221 | Sys.sleep(1)
222 | t$status()
223 | t$result()
224 | ```
225 | 
226 | Again, times are available:
227 | ``` {r }
228 | t$times()
229 | ```
230 | 
231 | # Finishing up
232 | ``` {r }
233 | obj$stop_workers()
234 | ```
235 | 
236 | ``` {r echo=FALSE}
237 | Sys.sleep(.5)
238 | ```
239 | 
240 | ``` {r results="asis", echo=FALSE}
241 | plain_output(readLines(logfile))
242 | ```
243 | 
244 | worker is now in the exited list
245 | ``` {r }
246 | obj$workers_list_exited()
247 | ```
248 | 
249 | The full log from our worker (dropping the first column which is
250 | the worker id and takes up valuable space here):
251 | ``` {r }
252 | obj$workers_log_tail(wid, Inf)[-1]
253 | ```
254 | 
255 | ``` {r echo=FALSE, results="hide"}
256 | rrqueue:::queue_clean(obj$con, obj$queue_name, purge=TRUE)
257 | ```
258 | 


--------------------------------------------------------------------------------
/vignettes/messages.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "rrqueue messages"
  3 | author: "Rich FitzJohn"
  4 | date: "`r Sys.Date()`"
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{rrqueue messages}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding{UTF-8}
 10 | ---
 11 | 
 12 | ``` {r echo=FALSE, results="hide"}
 13 | rrqueue:::queue_clean(redux::hiredis(), "myqueue",
 14 |                       purge=TRUE, stop_workers="kill")
 15 | lang_output <- function(x, lang) {
 16 |   cat(c(sprintf("```%s", lang), x, "```"), sep="\n")
 17 | }
 18 | make_reader <- function(filename, lang="plain") {
 19 |   n <- 0
 20 |   force(filename)
 21 |   force(lang)
 22 |   function() {
 23 |     txt <- readLines(logfile)
 24 |     if (n > 0) {
 25 |       txt <- txt[-seq_len(n)]
 26 |     }
 27 |     n <<- n + length(txt)
 28 |     lang_output(txt, lang)
 29 |   }
 30 | }
 31 | ```
 32 | 
 33 | In addition to passing tasks (and results) between a controller and
 34 | workers, the controller can also send "messages" to workers.  This
 35 | vignette shows what the possible messages do.
 36 | 
 37 | In order to do this, we're going to need a queue and a worker:
 38 | ``` {r }
 39 | obj <- rrqueue::queue("myqueue", sources="myfuns.R")
 40 | logfile <- tempfile()
 41 | worker_id <- rrqueue::worker_spawn("myqueue", logfile)
 42 | ```
 43 | 
 44 | On startup the worker log contains:
 45 | ``` {r results="asis", echo=FALSE}
 46 | reader <- make_reader(logfile)
 47 | reader()
 48 | ```
 49 | 
 50 | Because one of the main effects of messages is to print to the
 51 | worker logfile, we'll print this fairly often.
 52 | 
 53 | ## Messages and responses
 54 | 
 55 | 1. The queue sends a message for one or more workers to process.
 56 |    The message has an *identifier* that is derived from the current
 57 |    time.  Messages are written to a first-in-first-out queue, *per
 58 |    worker*, and are processed independently by workers who do not
 59 |    look to see if other workers have messages or are processing
 60 |    them.
 61 | 
 62 | 2. As soon as a worker has finished processing any current job it
 63 |    will process the message (it must wait to finish a current job
 64 |    but will not start any further jobs).
 65 | 
 66 | 3. Once the message has been processed (see below) a response will
 67 |    be written to a response list with the same identifier as the
 68 |    message.
 69 | 
 70 | ## `PING`
 71 | 
 72 | The `PING` message simply asks the worker to return `PONG`.  It's
 73 | useful for diagnosing communication issues because it does so
 74 | little
 75 | ``` {r }
 76 | message_id <- obj$send_message("PING")
 77 | ```
 78 | 
 79 | The message id is going to be useful for getting responses:
 80 | ``` {r }
 81 | message_id
 82 | ```
 83 | 
 84 | (this is derived from the current time, according to Redis which is
 85 | the central reference point of time for the whole system).
 86 | 
 87 | wait a little while:
 88 | ``` {r }
 89 | Sys.sleep(.5)
 90 | ```
 91 | 
 92 | ``` {r results="asis", echo=TRUE}
 93 | reader()
 94 | ```
 95 | 
 96 | The logfile prints:
 97 | 
 98 | 1. the request for the `PING` (`MESSAGE PING`)
 99 | 2. the value `PONG` to the R message stream
100 | 3. logging a response (`RESPONSE PONG`), which means that something is written to the response stream.
101 | 
102 | We can access the same bits of information in the worker log:
103 | ``` {r }
104 | obj$workers_log_tail(n=Inf)[-1]
105 | ```
106 | 
107 | This includes the `ALIVE` and `ENVIR` bits as the worker comes up.
108 | 
109 | Inspecting the logs is fine for interactive use, but it's going to
110 | be more useful often to poll for a response.
111 | 
112 | We already know that our worker has a response, but we can ask anyway:
113 | ``` {r }
114 | obj$has_responses(message_id)
115 | ```
116 | 
117 | Or inversely we can as what messages a given worker has responses for:
118 | ``` {r }
119 | obj$response_ids(worker_id)
120 | ```
121 | 
122 | To fetch the responses from all workers it was sent to (always
123 | returning a named list):
124 | ``` {r }
125 | obj$get_responses(message_id)
126 | ```
127 | 
128 | or to fetch the response from a given worker:
129 | ``` {r }
130 | obj$get_response(message_id, worker_id)
131 | ```
132 | 
133 | The response can be deleted by passing `delete=TRUE` to this method:
134 | ``` {r }
135 | obj$get_response(message_id, worker_id, delete=TRUE)
136 | ```
137 | 
138 | after which recalling the message will throw an error:
139 | ``` {r error=TRUE}
140 | obj$get_response(message_id, worker_id, delete=TRUE)
141 | ```
142 | 
143 | There is also a `wait` argument that lets you wait until a response
144 | is ready.  The `slowdouble` command will take a few seconds, so to
145 | demonstrate:
146 | ``` {r }
147 | obj$enqueue(slowdouble(2))
148 | message_id <- obj$send_message("PING")
149 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
150 | ```
151 | 
152 | Looking at the log will show what went on here:
153 | ``` {r }
154 | obj$workers_log_tail(n=4)[-1]
155 | ```
156 | 
157 | 1. A task is recieved
158 | 2. 2s later the task is completed
159 | 3. Then the message is recieved
160 | 4. Then, basically instantaneously, the message is responded to
161 | 
162 | However, because the message is only processed after the task is
163 | completed, the response takes a while to come back.  Equivalently,
164 | from the worker log:
165 | 
166 | ``` {r results="asis", echo=FALSE}
167 | reader()
168 | ```
169 | 
170 | ## `ECHO`
171 | 
172 | This is basically like `PING` and not very interesting; it prints
173 | an arbitrary string to the log.  It always returns `"OK"` as a
174 | response.
175 | 
176 | ``` {r }
177 | message_id <- obj$send_message("ECHO", "hello world!")
178 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
179 | ```
180 | 
181 | ``` {r results="asis", echo=FALSE}
182 | reader()
183 | ```
184 | 
185 | ## `INFO`
186 | 
187 | The `INFO` command refreshes and returns the worker information.
188 | 
189 | We already have a copy of the worker info; it was created when the
190 | worker started up:
191 | ``` {r }
192 | obj$workers_info()[[worker_id]]
193 | ```
194 | 
195 | Note that the `envir` field is currently empty (`{}`) because when
196 | the worker started it did not know about any environments.
197 | 
198 | ``` {r }
199 | message_id <- obj$send_message("INFO")
200 | ```
201 | 
202 | Here's the new worker information, complete with an updated `envir`
203 | field:
204 | ``` {r }
205 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
206 | ```
207 | 
208 | This has been updated on the database copy too:
209 | ``` {r }
210 | obj$workers_info()[[worker_id]]$envir
211 | ```
212 | 
213 | and the same information is printed to the worker log:
214 | ``` {r results="asis", echo=FALSE}
215 | reader()
216 | ```
217 | 
218 | ## `DIR`
219 | 
220 | This is useful for listing directory contents, similar to the `dir`
221 | function in R.  However, because file *contents* are usually more
222 | interesting (e.g., working out why something is not running on the
223 | remote machine), this is basically the result of passing the
224 | results of `dir` to `tools::md5sum` in order to get the md5sum of
225 | the file.
226 | ``` {r }
227 | message_id <- obj$send_message("DIR")
228 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
229 | ```
230 | 
231 | Additional arguments to `dir` can be passed through:
232 | ``` {r }
233 | message_id <- obj$send_message("DIR", list(pattern="\\.R$"))
234 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
235 | ```
236 | 
237 | If you pass in invalid arguments to `dir`, then a reasonably
238 | helpful message should be generated:
239 | ``` {r }
240 | message_id <- obj$send_message("DIR", list(foo="bar"))
241 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
242 | ```
243 | 
244 | (note that this does not generate an error locally, but you can
245 | test to see if it did throw an error by checking the class of the
246 | returned value).
247 | 
248 | and the same information is printed to the worker log:
249 | ``` {r results="asis", echo=FALSE}
250 | reader()
251 | ```
252 | 
253 | ## `PUSH`
254 | 
255 | The commands `PUSH` and `PULL` move files from and to the worker.
256 | The command is interpreted as an instruction to the worker so
257 | `PUSH` pushes files from the worker into the database while `PULL`
258 | pulls files from the database into the worker.  There are (will be)
259 | prefereable higher-level ways of dealing with this.
260 | 
261 | Things to be aware of here: Redis is an in memory store and rrqueue
262 | is not at all agressive about deleting objects.  If you push a 1GB
263 | file into Redis things *will* go badly.  There are no checks for
264 | this at present!
265 | 
266 | `PUSH` takes a vector of filename as an argument.  The response is
267 | not the file itself (how could it do that?) but instead the *hash*
268 | of that file.  By the time the response is recieved the file
269 | contents are stored in the database and can be returned.
270 | ``` {r }
271 | message_id <- obj$send_message("PUSH", "myfuns.R")
272 | res <- obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
273 | res
274 | ```
275 | 
276 | We can save the file onto a temporary directory in the filesystem
277 | using the \code{files_unpack} method of \code{queue}:
278 | ``` {r }
279 | path <- obj$files_unpack(res)
280 | dir(path)
281 | ```
282 | 
283 | And the files have the expected hash:
284 | ``` {r }
285 | tools::md5sum(file.path(path, names(res)))
286 | ```
287 | 
288 | ``` {r results="asis", echo=FALSE}
289 | reader()
290 | ```
291 | 
292 | ## `PULL`
293 | 
294 | This is the inverse of `PUSH` and takes files from the machine the
295 | queue is running on and copies them into the worker (from the view
296 | of the worker, the files in question are already in the database
297 | and it will "pull" them down locally.
298 | 
299 | First, we need to save files into the database.  Let's rename the
300 | temporary file above and save that:
301 | ``` {r }
302 | file.rename(file.path(path, "myfuns.R"),
303 |             "brandnewcode.R")
304 | res <- obj$files_pack("brandnewcode.R")
305 | res
306 | ```
307 | 
308 | Note that the hash here is the same as above: `rrqueue` can tell
309 | this is the same file even though it has the same filename.  Note
310 | also that filenames will be interepted relative to the working
311 | directory, because the directory layout on the worker outside of
312 | this point could be arbitrarily different.
313 | 
314 | Now the the files have been packed, we can run the PULL command:
315 | 
316 | (note that the `PULL` command *always* unpacks files into the
317 | workers working directory).
318 | ``` {r }
319 | message_id <- obj$send_message("PULL")
320 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
321 | ```
322 | 
323 | And the new file will be present in the directory:
324 | ``` {r }
325 | message_id <- obj$send_message("DIR", list(pattern="\\.R$"))
326 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
327 | ```
328 | 
329 | ``` {r results="asis", echo=FALSE}
330 | reader()
331 | ```
332 | 
333 | ## `EVAL`
334 | 
335 | Evaluate an arbitrary R expression, passed as a string (*not* as
336 | any sort of unevaluated or quoted expression).  This expression is
337 | evaluated in the global environment, which is *not* the environment
338 | in which queued code is evaluated in.
339 | 
340 | ``` {r }
341 | message_id <- obj$send_message("EVAL", "1 + 1")
342 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
343 | ```
344 | 
345 | We can delete the file created above:
346 | ``` {r }
347 | message_id <- obj$send_message("EVAL", "file.remove('brandnewcode.R')")
348 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
349 | ```
350 | 
351 | ``` {r results="asis", echo=FALSE}
352 | reader()
353 | ```
354 | 
355 | This could be used to evaluate code that has side effects, such as
356 | installing packages.  However, due to limitations with how R loads
357 | packages the only way to update and reload a package is going to be
358 | to restart the worker.
359 | 
360 | ## `STOP`
361 | 
362 | Stop sends a shutdown message to the worker.  Generally you should
363 | prefer the `stop_workers` method, which uses `STOP` behind the
364 | scenes.
365 | 
366 | ``` {r }
367 | message_id <- obj$send_message("STOP")
368 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
369 | ```
370 | 
371 | ``` {r results="asis", echo=FALSE}
372 | reader()
373 | ```
374 | 


--------------------------------------------------------------------------------
/vignettes/myfuns.R:
--------------------------------------------------------------------------------
1 | slowdouble <- function(x) {
2 |   Sys.sleep(x)
3 |   x * 2
4 | }
5 | 


--------------------------------------------------------------------------------
/vignettes/src/introduction.R:
--------------------------------------------------------------------------------
  1 | ## ---
  2 | ## title: "Introduction to rrqueue"
  3 | ## author: "Rich FitzJohn"
  4 | ## date: "`r Sys.Date()`"
  5 | ## output: rmarkdown::html_vignette
  6 | ## vignette: >
  7 | ##   %\VignetteIndexEntry{Introduction to rrqueue}
  8 | ##   %\VignetteEngine{knitr::rmarkdown}
  9 | ##   %\VignetteEncoding{UTF-8}
 10 | ## ---
 11 | 
 12 | ## # Overview
 13 | 
 14 | ## `rrqueue` is a *distributed task queue* for R, implemented on top of  [Redis](http://redis.io).
 15 | ## At the cost of a little more work it allows for more flexible parallelisation than afforded by `mclapply`.
 16 | ## The main goal is to support non-map style operations: submit some tasks, collect the completed results,
 17 | ## queue more even while some tasks are still running.
 18 | 
 19 | ## Other features include:
 20 | 
 21 | ## * Low-level task submission / retrieval has a simple API so that asynchronous task queues can be created.
 22 | ## * Objects representing tasks, workers, queues, etc can be queried.
 23 | ## * While blocking `mclapply`-like functions are available, the package is designed to be non-blocking so that intermediate results can be used.
 24 | ## * Automatic fingerprinting of environments so that code run on a remote machine will correspond to the code found locally.
 25 | ## * Works well connecting to a Redis database running on the cloud (e.g., on an AWS machine over an ssh tunnel).
 26 | ## * Local workers can be added to a remote pool, so long as everything can talk to the same Redis server.
 27 | ## * The worker pool can be scaled at any time (up or down).
 28 | ## * Basic fault tolerance, supporting re-queuing tasks lost on crashed workers.
 29 | 
 30 | ## The basic workflow is:
 31 | 
 32 | ## 1. Create a queue
 33 | ## 2. Submit tasks to the queue
 34 | ## 3. Start workers
 35 | ## 4. Collect results
 36 | 
 37 | ## The workers can be started at any time between 1-3, though they do need to be started before results can be collected.
 38 | 
 39 | ## Documenting things that work asynchronously is difficult.  This
 40 | ## document gives a tutorial-style overview of working with rrqueue.
 41 | 
 42 | ## # Getting started
 43 | 
 44 | ## The queue and workers can be started in any order, but it's easiest
 45 | ## to explain starting the queue first.
 46 | 
 47 | ## Suppose we have some simulation code; it needs to be in a file that
 48 | ## the queue can see.  For now, I'll use the file `myfuns.R` which is
 49 | ## the test code.  It has a function in it called `slowdouble` that
 50 | ## takes a number, sleeps for that many seconds, and then returns
 51 | ## twice the number.  It's useful for testing.
 52 | 
 53 | ##+ echo=FALSE, results="hide"
 54 | rrqueue:::queue_clean(redux::hiredis(), "myqueue",
 55 |                       purge=TRUE, stop_workers="kill")
 56 | lang_output <- function(x, lang) {
 57 |   cat(c(sprintf("```%s", lang), x, "```"), sep="\n")
 58 | }
 59 | cpp_output <- function(x) lang_output(x, "c++")
 60 | r_output <- function(x) lang_output(x, "r")
 61 | yaml_output <- function(x) lang_output(x, "yaml")
 62 | plain_output <- function(x) lang_output(x, "plain")
 63 | 
 64 | ## You'll also need a running Redis server.  I have one operating with
 65 | ## the default parameters, so this works:
 66 | redux::hiredis()$PING()
 67 | 
 68 | ## Create queue called "myqueue", tell it to load the source file
 69 | ## "myfuns.R".  If it was to load packages, then passing
 70 | ## `packages=c("package1", "package2")` would indicate that workers
 71 | ## would need to load those packages, too.
 72 | obj <- rrqueue::queue("myqueue", sources="myfuns.R")
 73 | 
 74 | ## The message "creating new queue" here indicates that `rrqueue` did
 75 | ## not find any previous queues in place.  Queues are designed to be
 76 | ## re-attachable so we can immediately just do that:
 77 | obj <- rrqueue::queue("myqueue", sources="myfuns.R")
 78 | 
 79 | ## The message also notes that we have no workers available, so no
 80 | ## work is going to get done.  But we can still queue some tasks.
 81 | 
 82 | ## # Queuing tasks
 83 | 
 84 | ## The simplest sort of task queuing is to pass an expression into enqueue:
 85 | t <- obj$enqueue(1 + 1)
 86 | 
 87 | ## The expression is not evaluated but stored and will be evaluated on
 88 | ## the worker.  Saving the result of this gives a `task` object which
 89 | ## can be inspected.
 90 | t
 91 | 
 92 | ## The expression stored in the task:
 93 | t$expr()
 94 | 
 95 | ## The status of the task:
 96 | t$status()
 97 | 
 98 | ## The result of the task, which will throw an error if we try to call it:
 99 | ##+ error=TRUE
100 | t$result()
101 | 
102 | ## And how long the task has been waiting:
103 | t$times()
104 | 
105 | ## Tasks can use local variables, too:
106 | x <- 10
107 | t2 <- obj$enqueue(x * 2)
108 | t2$expr()
109 | 
110 | ## And because using unevaluated expressions can be problematic,
111 | ## `rrqueue` has a standard-evaluation version (`enqueue_`) which takes
112 | ## either strings representing expressions or quoted expressions:
113 | ### obj$enqueue_("x / 2")
114 | obj$enqueue_(quote(x / 2))
115 | 
116 | ## Now we have three tasks:
117 | obj$tasks_list()
118 | 
119 | ## All the tasks are waiting to be run:
120 | obj$tasks_status()
121 | 
122 | ## We can get an overview of the tasks:
123 | obj$tasks_overview()
124 | 
125 | ## # Starting workers
126 | 
127 | ## `rrqueue` includes a script `rrqueue_worker` for starting workers
128 | ## from the command line (install with `rrqueue::install_scripts()`.
129 | ## Workers can also be started from within R using the `worker_spawn`
130 | ## function:
131 | logfile <- tempfile()
132 | wid <- rrqueue::worker_spawn("myqueue", logfile)
133 | ##+ echo=FALSE
134 | Sys.sleep(.5)
135 | 
136 | ## This function returns the *worker identifier*, which is also
137 | ## printed to the screen.
138 | 
139 | ## It's probably informative at this point to read the logfile of the
140 | ## worker to see what it did on startup:
141 | 
142 | ### See the RcppR6 tutorial for how to do this nicely.
143 | ##+ results="asis", echo=TRUE
144 | plain_output(readLines(logfile))
145 | 
146 | ## The worker first prints a lot of diagnostic information to the
147 | ## screen (or log file) indicating the name of the worker, the version
148 | ## of rrqueue, machine information, and special keys in the database
149 | ## where important information is stored.
150 | 
151 | ## Then after broadcasting that it is awake (`ALIVE`) it detected that
152 | ## there was a controller on the queue and it attempts to construct
153 | ## the environment that the controller wants `r paste("ENVIR", obj$envir_id)`.
154 | 
155 | ## After that, there are a series of `TASK_START`, `EXPR`, and
156 | ## `TASK_COMPLETE` lines as each of the three tasks is processed.
157 | obj$tasks_status()
158 | 
159 | ## The times here give an indication of the rrqueue overhead; the
160 | ## running time of these simple expressions should be close to zero.
161 | obj$tasks_times()
162 | 
163 | ## The task handle created before can now give a result:
164 | t$result()
165 | 
166 | ## Similarly, results can be retrieved from the queue directly:
167 | obj$task_result(1)
168 | obj$task_result(2)
169 | obj$task_result(3)
170 | 
171 | ## The worker that we created can be seen here:
172 | obj$workers_list()
173 | 
174 | ## Queue a slower task; this time the `slowdouble` function.  This
175 | ## will take 1s:
176 | ##+ echo=TRUE
177 | t <- obj$enqueue(slowdouble(1))
178 | t$status()
179 | Sys.sleep(.3)
180 | t$status()
181 | Sys.sleep(1)
182 | t$status()
183 | t$result()
184 | 
185 | ## Again, times are available:
186 | t$times()
187 | 
188 | ## # Finishing up
189 | obj$stop_workers()
190 | 
191 | ##+ echo=FALSE
192 | Sys.sleep(.5)
193 | 
194 | ##+ results="asis", echo=FALSE
195 | plain_output(readLines(logfile))
196 | 
197 | ## worker is now in the exited list
198 | obj$workers_list_exited()
199 | 
200 | ## The full log from our worker (dropping the first column which is
201 | ## the worker id and takes up valuable space here):
202 | obj$workers_log_tail(wid, Inf)[-1]
203 | 
204 | ### This is unfortunate, but I really need things cleaned up nicely so
205 | ### that I can get tests to pass in R CMD check.  Will try to expose
206 | ### this function soon.
207 | ##+ echo=FALSE, results="hide"
208 | rrqueue:::queue_clean(obj$con, obj$queue_name, purge=TRUE)
209 | 


--------------------------------------------------------------------------------
/vignettes/src/messages.R:
--------------------------------------------------------------------------------
  1 | ## ---
  2 | ## title: "rrqueue messages"
  3 | ## author: "Rich FitzJohn"
  4 | ## date: "`r Sys.Date()`"
  5 | ## output: rmarkdown::html_vignette
  6 | ## vignette: >
  7 | ##   %\VignetteIndexEntry{rrqueue messages}
  8 | ##   %\VignetteEngine{knitr::rmarkdown}
  9 | ##   %\VignetteEncoding{UTF-8}
 10 | ## ---
 11 | 
 12 | ##+ echo=FALSE, results="hide"
 13 | rrqueue:::queue_clean(redux::hiredis(), "myqueue",
 14 |                       purge=TRUE, stop_workers="kill")
 15 | lang_output <- function(x, lang) {
 16 |   cat(c(sprintf("```%s", lang), x, "```"), sep="\n")
 17 | }
 18 | make_reader <- function(filename, lang="plain") {
 19 |   n <- 0
 20 |   force(filename)
 21 |   force(lang)
 22 |   function() {
 23 |     txt <- readLines(logfile)
 24 |     if (n > 0) {
 25 |       txt <- txt[-seq_len(n)]
 26 |     }
 27 |     n <<- n + length(txt)
 28 |     lang_output(txt, lang)
 29 |   }
 30 | }
 31 | 
 32 | ## In addition to passing tasks (and results) between a controller and
 33 | ## workers, the controller can also send "messages" to workers.  This
 34 | ## vignette shows what the possible messages do.
 35 | 
 36 | ## In order to do this, we're going to need a queue and a worker:
 37 | obj <- rrqueue::queue("myqueue", sources="myfuns.R")
 38 | logfile <- tempfile()
 39 | worker_id <- rrqueue::worker_spawn("myqueue", logfile)
 40 | 
 41 | ## On startup the worker log contains:
 42 | ##+ results="asis", echo=FALSE
 43 | reader <- make_reader(logfile)
 44 | reader()
 45 | 
 46 | ## Because one of the main effects of messages is to print to the
 47 | ## worker logfile, we'll print this fairly often.
 48 | 
 49 | ## ## Messages and responses
 50 | 
 51 | ## 1. The queue sends a message for one or more workers to process.
 52 | ##    The message has an *identifier* that is derived from the current
 53 | ##    time.  Messages are written to a first-in-first-out queue, *per
 54 | ##    worker*, and are processed independently by workers who do not
 55 | ##    look to see if other workers have messages or are processing
 56 | ##    them.
 57 | ##
 58 | ## 2. As soon as a worker has finished processing any current job it
 59 | ##    will process the message (it must wait to finish a current job
 60 | ##    but will not start any further jobs).
 61 | ##
 62 | ## 3. Once the message has been processed (see below) a response will
 63 | ##    be written to a response list with the same identifier as the
 64 | ##    message.
 65 | 
 66 | ## ## `PING`
 67 | 
 68 | ## The `PING` message simply asks the worker to return `PONG`.  It's
 69 | ## useful for diagnosing communication issues because it does so
 70 | ## little
 71 | message_id <- obj$send_message("PING")
 72 | 
 73 | ## The message id is going to be useful for getting responses:
 74 | message_id
 75 | 
 76 | ## (this is derived from the current time, according to Redis which is
 77 | ## the central reference point of time for the whole system).
 78 | 
 79 | ## wait a little while:
 80 | Sys.sleep(.5)
 81 | 
 82 | ##+ results="asis", echo=TRUE
 83 | reader()
 84 | 
 85 | ## The logfile prints:
 86 | 
 87 | ## 1. the request for the `PING` (`MESSAGE PING`)
 88 | ## 2. the value `PONG` to the R message stream
 89 | ## 3. logging a response (`RESPONSE PONG`), which means that something is written to the response stream.
 90 | 
 91 | ## We can access the same bits of information in the worker log:
 92 | obj$workers_log_tail(n=Inf)[-1]
 93 | 
 94 | ## This includes the `ALIVE` and `ENVIR` bits as the worker comes up.
 95 | 
 96 | ## Inspecting the logs is fine for interactive use, but it's going to
 97 | ## be more useful often to poll for a response.
 98 | 
 99 | ## We already know that our worker has a response, but we can ask anyway:
100 | obj$has_responses(message_id)
101 | 
102 | ## Or inversely we can as what messages a given worker has responses for:
103 | obj$response_ids(worker_id)
104 | 
105 | ## To fetch the responses from all workers it was sent to (always
106 | ## returning a named list):
107 | obj$get_responses(message_id)
108 | 
109 | ## or to fetch the response from a given worker:
110 | obj$get_response(message_id, worker_id)
111 | 
112 | ## The response can be deleted by passing `delete=TRUE` to this method:
113 | obj$get_response(message_id, worker_id, delete=TRUE)
114 | 
115 | ## after which recalling the message will throw an error:
116 | ##+ error=TRUE
117 | obj$get_response(message_id, worker_id, delete=TRUE)
118 | 
119 | ## There is also a `wait` argument that lets you wait until a response
120 | ## is ready.  The `slowdouble` command will take a few seconds, so to
121 | ## demonstrate:
122 | obj$enqueue(slowdouble(2))
123 | message_id <- obj$send_message("PING")
124 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
125 | 
126 | ## Looking at the log will show what went on here:
127 | obj$workers_log_tail(n=4)[-1]
128 | 
129 | ## 1. A task is recieved
130 | ## 2. 2s later the task is completed
131 | ## 3. Then the message is recieved
132 | ## 4. Then, basically instantaneously, the message is responded to
133 | 
134 | ## However, because the message is only processed after the task is
135 | ## completed, the response takes a while to come back.  Equivalently,
136 | ## from the worker log:
137 | 
138 | ##+ results="asis", echo=FALSE
139 | reader()
140 | 
141 | ## ## `ECHO`
142 | 
143 | ## This is basically like `PING` and not very interesting; it prints
144 | ## an arbitrary string to the log.  It always returns `"OK"` as a
145 | ## response.
146 | 
147 | message_id <- obj$send_message("ECHO", "hello world!")
148 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
149 | 
150 | ##+ results="asis", echo=FALSE
151 | reader()
152 | 
153 | ## ## `INFO`
154 | 
155 | ## The `INFO` command refreshes and returns the worker information.
156 | 
157 | ## We already have a copy of the worker info; it was created when the
158 | ## worker started up:
159 | obj$workers_info()[[worker_id]]
160 | 
161 | ## Note that the `envir` field is currently empty (`{}`) because when
162 | ## the worker started it did not know about any environments.
163 | 
164 | message_id <- obj$send_message("INFO")
165 | 
166 | ## Here's the new worker information, complete with an updated `envir`
167 | ## field:
168 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
169 | 
170 | ## This has been updated on the database copy too:
171 | obj$workers_info()[[worker_id]]$envir
172 | 
173 | ## and the same information is printed to the worker log:
174 | ##+ results="asis", echo=FALSE
175 | reader()
176 | 
177 | ## ## `DIR`
178 | 
179 | ## This is useful for listing directory contents, similar to the `dir`
180 | ## function in R.  However, because file *contents* are usually more
181 | ## interesting (e.g., working out why something is not running on the
182 | ## remote machine), this is basically the result of passing the
183 | ## results of `dir` to `tools::md5sum` in order to get the md5sum of
184 | ## the file.
185 | message_id <- obj$send_message("DIR")
186 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
187 | 
188 | ## Additional arguments to `dir` can be passed through:
189 | message_id <- obj$send_message("DIR", list(pattern="\\.R$"))
190 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
191 | 
192 | ## If you pass in invalid arguments to `dir`, then a reasonably
193 | ## helpful message should be generated:
194 | message_id <- obj$send_message("DIR", list(foo="bar"))
195 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
196 | 
197 | ## (note that this does not generate an error locally, but you can
198 | ## test to see if it did throw an error by checking the class of the
199 | ## returned value).
200 | 
201 | ## and the same information is printed to the worker log:
202 | ##+ results="asis", echo=FALSE
203 | reader()
204 | 
205 | ## ## `PUSH`
206 | 
207 | ## The commands `PUSH` and `PULL` move files from and to the worker.
208 | ## The command is interpreted as an instruction to the worker so
209 | ## `PUSH` pushes files from the worker into the database while `PULL`
210 | ## pulls files from the database into the worker.  There are (will be)
211 | ## prefereable higher-level ways of dealing with this.
212 | 
213 | ## Things to be aware of here: Redis is an in memory store and rrqueue
214 | ## is not at all agressive about deleting objects.  If you push a 1GB
215 | ## file into Redis things *will* go badly.  There are no checks for
216 | ## this at present!
217 | 
218 | ## `PUSH` takes a vector of filename as an argument.  The response is
219 | ## not the file itself (how could it do that?) but instead the *hash*
220 | ## of that file.  By the time the response is recieved the file
221 | ## contents are stored in the database and can be returned.
222 | message_id <- obj$send_message("PUSH", "myfuns.R")
223 | res <- obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
224 | res
225 | 
226 | ## We can save the file onto a temporary directory in the filesystem
227 | ## using the \code{files_unpack} method of \code{queue}:
228 | path <- obj$files_unpack(res)
229 | dir(path)
230 | 
231 | ## And the files have the expected hash:
232 | tools::md5sum(file.path(path, names(res)))
233 | 
234 | ##+ results="asis", echo=FALSE
235 | reader()
236 | 
237 | ## ## `PULL`
238 | 
239 | ## This is the inverse of `PUSH` and takes files from the machine the
240 | ## queue is running on and copies them into the worker (from the view
241 | ## of the worker, the files in question are already in the database
242 | ## and it will "pull" them down locally.
243 | 
244 | ## First, we need to save files into the database.  Let's rename the
245 | ## temporary file above and save that:
246 | file.rename(file.path(path, "myfuns.R"),
247 |             "brandnewcode.R")
248 | res <- obj$files_pack("brandnewcode.R")
249 | res
250 | 
251 | ## Note that the hash here is the same as above: `rrqueue` can tell
252 | ## this is the same file even though it has the same filename.  Note
253 | ## also that filenames will be interepted relative to the working
254 | ## directory, because the directory layout on the worker outside of
255 | ## this point could be arbitrarily different.
256 | 
257 | ## Now the the files have been packed, we can run the PULL command:
258 | 
259 | ## (note that the `PULL` command *always* unpacks files into the
260 | ## workers working directory).
261 | message_id <- obj$send_message("PULL")
262 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
263 | 
264 | ## And the new file will be present in the directory:
265 | message_id <- obj$send_message("DIR", list(pattern="\\.R$"))
266 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
267 | 
268 | ##+ results="asis", echo=FALSE
269 | reader()
270 | 
271 | ## ## `EVAL`
272 | 
273 | ## Evaluate an arbitrary R expression, passed as a string (*not* as
274 | ## any sort of unevaluated or quoted expression).  This expression is
275 | ## evaluated in the global environment, which is *not* the environment
276 | ## in which queued code is evaluated in.
277 | 
278 | message_id <- obj$send_message("EVAL", "1 + 1")
279 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
280 | 
281 | ## We can delete the file created above:
282 | message_id <- obj$send_message("EVAL", "file.remove('brandnewcode.R')")
283 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
284 | 
285 | ##+ results="asis", echo=FALSE
286 | reader()
287 | 
288 | ## This could be used to evaluate code that has side effects, such as
289 | ## installing packages.  However, due to limitations with how R loads
290 | ## packages the only way to update and reload a package is going to be
291 | ## to restart the worker.
292 | 
293 | ## ## `STOP`
294 | 
295 | ## Stop sends a shutdown message to the worker.  Generally you should
296 | ## prefer the `stop_workers` method, which uses `STOP` behind the
297 | ## scenes.
298 | 
299 | message_id <- obj$send_message("STOP")
300 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10)
301 | 
302 | ##+ results="asis", echo=FALSE
303 | reader()
304 | 


--------------------------------------------------------------------------------
/vignettes/src/myfuns.R:
--------------------------------------------------------------------------------
1 | ../myfuns.R


--------------------------------------------------------------------------------