├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── LICENSE ├── Makefile ├── NAMESPACE ├── R ├── bulk.R ├── common.R ├── config.R ├── env.R ├── environment.R ├── exceptions.R ├── files.R ├── heartbeat.R ├── match_fun.R ├── monitor.R ├── object_cache.R ├── observer.R ├── queue.R ├── redis.R ├── rrqlapply.R ├── status.R ├── task.R ├── task_bundle.R ├── utils.R ├── utils_assert.R ├── worker.R └── worker_spawn.R ├── README.md ├── autodoc.R ├── doc └── how.md ├── docker └── Dockerfile ├── inst └── scripts │ ├── rrqueue_worker │ └── rrqueue_worker_tee ├── man-roxygen ├── observer.yml ├── observer_methods.R ├── queue.yml ├── queue_methods.R ├── task.yml ├── task_bundle.yml ├── task_bundle_methods.R └── task_methods.R ├── man ├── enqueue_bulk.Rd ├── install_scripts.Rd ├── observer.Rd ├── queue.Rd ├── rrqlapply.Rd ├── task.Rd ├── task_bundle.Rd ├── worker.Rd ├── worker_spawn.Rd ├── worker_stop.Rd └── yaml_env.Rd ├── tests ├── testthat.R └── testthat │ ├── config.yml │ ├── config2.yml │ ├── config3.yml │ ├── helper-rrqueue.R │ ├── myfuns.R │ ├── test-files.R │ ├── test-heartbeat.R │ ├── test-match-fun.R │ ├── test-queue.R │ ├── test-rrqlapply.R │ ├── test-task-bundle.R │ └── test-worker.R ├── update_web.sh └── vignettes ├── introduction.Rmd ├── messages.Rmd ├── myfuns.R └── src ├── introduction.R ├── messages.R └── myfuns.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^notes\.md$ 2 | ^tmp\.R$ 3 | ^Makefile$ 4 | ^\.travis\.yml$ 5 | ^doc$ 6 | ^ignore$ 7 | ^docker$ 8 | ^man-roxygen$ 9 | ^autodoc.R$ 10 | ^vignettes/src$ 11 | ^inst/staticdocs$ 12 | ^inst/web$ 13 | ^update_web\.sh$ 14 | ^rrqueue_.+\.tar\.gz$ 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | notes.md 5 | tmp.R 6 | *.log 7 | ignore 8 | inst/doc 9 | inst/staticdocs 10 | inst/web 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Sample .travis.yml for R projects from https://github.com/craigcitro/r-travis 2 | 3 | language: c 4 | 5 | sudo: required 6 | 7 | services: 8 | - redis-server 9 | 10 | before_install: 11 | - curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh 12 | - chmod 755 ./travis-tool.sh 13 | - ./travis-tool.sh bootstrap 14 | 15 | # NOTE: libhiredis-dev is for RedisHeartbeat until I get linking working 16 | install: 17 | - ./travis-tool.sh install_deps 18 | - ./travis-tool.sh install_aptget libhiredis-dev 19 | - ./travis-tool.sh install_aptget libcurl4-openssl-dev 20 | - ./travis-tool.sh github_package gaborcsardi/progress 21 | - ./travis-tool.sh github_package ropensci/RedisAPI 22 | - ./travis-tool.sh github_package richfitz/redux 23 | - ./travis-tool.sh github_package richfitz/RedisHeartbeat 24 | - ./travis-tool.sh github_package richfitz/storr 25 | - ./travis-tool.sh github_package richfitz/ids 26 | 27 | script: ./travis-tool.sh run_tests 28 | 29 | after_failure: 30 | - ./travis-tool.sh dump_logs 31 | 32 | notifications: 33 | email: 34 | on_success: change 35 | on_failure: change 36 | slack: bdkd:hY2eBCaH3bbsvNwlWwOfFdfr 37 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: rrqueue 2 | Title: Scalable Job Queues for R with 'Redis' 3 | Version: 0.2.1 4 | Author: "Rich FitzJohn [aut, cre]" 5 | Maintainer: Richard G. FitzJohn 6 | Description: Scalable queuing system for R, using 'Redis'. 7 | Depends: 8 | R (>= 3.1.0) 9 | License: BSD_2_clause + file LICENSE 10 | LazyData: true 11 | SystemRequirements: Redis 12 | Imports: 13 | R6, 14 | RedisAPI (>= 0.3.0), 15 | RedisHeartbeat (>= 0.3.0), 16 | crayon, 17 | digest, 18 | docopt, 19 | ids (>= 0.0.2), 20 | progress, 21 | redux (>= 0.3.0), 22 | storr (>= 1.0.0) 23 | Suggests: 24 | testthat, 25 | tools, 26 | knitr, 27 | rmarkdown, 28 | yaml 29 | VignetteBuilder: knitr 30 | RoxygenNote: 5.0.1 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2015 2 | COPYRIGHT HOLDER: Richard G. FitzJohn 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PACKAGE := $(shell grep '^Package:' DESCRIPTION | sed -E 's/^Package:[[:space:]]+//') 2 | RSCRIPT = Rscript --no-init-file 3 | 4 | all: install 5 | 6 | test: 7 | ${RSCRIPT} -e 'library(methods); devtools::test()' 8 | 9 | test_all: 10 | REMAKE_TEST_INSTALL_PACKAGES=true make test 11 | 12 | autodoc: 13 | ${RSCRIPT} autodoc.R process 14 | 15 | roxygen: 16 | @mkdir -p man 17 | ${RSCRIPT} -e "library(methods); devtools::document()" 18 | 19 | staticdocs: 20 | @mkdir -p inst/staticdocs 21 | Rscript -e "library(methods); staticdocs::build_site()" 22 | rm -f vignettes/*.html 23 | 24 | website: staticdocs 25 | ./update_web.sh 26 | 27 | install: 28 | R CMD INSTALL . 29 | 30 | build: 31 | R CMD build . 32 | 33 | check: build 34 | _R_CHECK_CRAN_INCOMING_=FALSE R CMD check --as-cran --no-manual `ls -1tr ${PACKAGE}*gz | tail -n1` 35 | @rm -f `ls -1tr ${PACKAGE}*gz | tail -n1` 36 | @rm -rf ${PACKAGE}.Rcheck 37 | 38 | check_all: 39 | REMAKE_TEST_INSTALL_PACKAGES=true make check 40 | 41 | README.md: README.Rmd 42 | Rscript -e 'library(methods); devtools::load_all(); knitr::knit("README.Rmd")' 43 | sed -i.bak 's/[[:space:]]*$$//' README.md 44 | rm -f $@.bak 45 | 46 | vignettes/introduction.Rmd: vignettes/src/introduction.R 47 | ${RSCRIPT} -e 'library(sowsear); sowsear("$<", output="$@")' 48 | vignettes/messages.Rmd: vignettes/src/messages.R 49 | ${RSCRIPT} -e 'library(sowsear); sowsear("$<", output="$@")' 50 | vignettes: vignettes/introduction.Rmd vignettes/messages.Rmd 51 | ${RSCRIPT} -e 'library(methods); devtools::build_vignettes()' 52 | 53 | .PHONY: all test document install vignettes 54 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,worker_info) 4 | export(enqueue_bulk) 5 | export(enqueue_bulk_submit) 6 | export(install_scripts) 7 | export(observer) 8 | export(queue) 9 | export(rrqlapply) 10 | export(rrqlapply_submit) 11 | export(task) 12 | export(task_bundle) 13 | export(worker) 14 | export(worker_spawn) 15 | export(worker_stop) 16 | export(yaml_env) 17 | import(RedisAPI) 18 | importFrom(R6,R6Class) 19 | importFrom(crayon,make_style) 20 | importFrom(digest,digest) 21 | importFrom(progress,progress_bar) 22 | importFrom(storr,storr) 23 | -------------------------------------------------------------------------------- /R/bulk.R: -------------------------------------------------------------------------------- 1 | ##' Bulk queuing. Similar in some respects to things like 2 | ##' \code{\link{apply}}. This is an experiment to deal with the 3 | ##' pattern where you have a big pile of parameters in a data.frame to 4 | ##' loop over, by applying a function to each row. 5 | ##' 6 | ##' There are two modes here; selected with \code{do.call}. With 7 | ##' \code{do.call=FALSE}, the default, the function behaves similarly 8 | ##' to \code{apply(X, FUN, 1)}; that is the function is applied to 9 | ##' each row of the data.frame (as a list): 10 | ##' \code{FUN(as.list(X[1,]))}, \code{FUN(as.list(X[2,]))}, and so on. 11 | ##' The alternative mode (\code{do.call=TRUE}) is where the 12 | ##' \code{data.frame} contains \emph{parameters} to the function 13 | ##' \code{FUN} so equivalent to \code{FUN(X[1,1], X[1,2], ...}. This 14 | ##' is similar (but not implemented as) running: \code{do.call("FUN", 15 | ##' as.list(X[1,]))}. 16 | ##' 17 | ##' Be careful, this one is going to change, including the name 18 | ##' probably. You have been warned. 19 | ##' 20 | ##' @title Bulk queuing 21 | ##' @param X An object to loop over. If a list, we'll loop over the 22 | ##' elements of the list, duplicating the behaviour of 23 | ##' \code{\link{rrqlapply}} except for not handling dots. If a 24 | ##' \code{data.frame} we'll loop over the \emph{rows}. Matrices are 25 | ##' not supported. 26 | ##' 27 | ##' @param FUN A function. Will be found in the same way as 28 | ##' \code{FUN} within \code{\link{rrqlapply}}. 29 | ##' 30 | ##' @param rrq An rrq object 31 | ##' 32 | ##' @param do.call Behave like (but not via) \code{\link{do.call}}; 33 | ##' given an element \code{el}, rather than run \code{FUN(el)} run 34 | ##' \code{FUN(el[[1]], el[[2]], ...)}. 35 | ##' 36 | ##' @param group Name of a group for generated task ids. If not 37 | ##' included, an ID will be generated. 38 | ##' 39 | ##' @param timeout Total length of time to wait for tasks to be 40 | ##' completed. The default is to wait forever (like \code{lapply}). 41 | ##' 42 | ##' @param time_poll Time to poll for tasks. Must be an integer. 43 | ##' Because of how the function is implemented, R will be 44 | ##' unresponsive for this long each iteration (unless results are 45 | ##' returned), so the default of 1s should be reasonable. 46 | ##' 47 | ##' @param delete_tasks Delete tasks on successful finish? 48 | ##' 49 | ##' @param progress_bar Display a progress bar? 50 | ##' 51 | ##' @param env Environment to look in 52 | ##' 53 | ##' @export 54 | enqueue_bulk <- function(X, FUN, rrq, 55 | do.call=FALSE, group=NULL, 56 | timeout=Inf, time_poll=1, delete_tasks=FALSE, 57 | progress_bar=TRUE, env=parent.frame()) { 58 | obj <- enqueue_bulk_submit(X, FUN, rrq, do.call, group, progress_bar, env) 59 | tryCatch(obj$wait(timeout, time_poll, progress_bar), 60 | interrupt=function(e) obj) 61 | } 62 | 63 | ## There's going to be a lot of overlap here with rrqlapply but that's 64 | ## OK for now; we'll work through and remove it shortly. The biggest 65 | ## issue is how to deal with dots. In general I'd rather not have 66 | ## that bit of complexity here. I guess with dots we'd have: 67 | ## 68 | ## f(el, ...) 69 | ## f(el[[1]], el[[2]], ...) 70 | 71 | ##' @export 72 | ##' @rdname enqueue_bulk 73 | enqueue_bulk_submit <- function(X, FUN, rrq, 74 | do.call=FALSE, group=NULL, 75 | progress_bar=TRUE, env=parent.frame()) { 76 | if (is.data.frame(X)) { 77 | X <- df_to_list(X) 78 | } else if (!is.list(X)) { 79 | stop("X must be a data.frame or list") 80 | } 81 | 82 | fun <- find_fun(FUN, env, rrq) 83 | n <- length(X) 84 | 85 | ## See rrqlapply_submit for treatment of key_complete. The rest of 86 | ## this is a bit more complicated than rrqlapply because we allow 87 | ## switching between f(x) and f(**x). 88 | tasks <- vector("list", length(X)) 89 | e <- environment() 90 | key_complete <- NULL 91 | group <- create_group(group, progress_bar) 92 | p <- progress(total=n, show=progress_bar, prefix="submitting: ") 93 | for (i in seq_len(n)) { 94 | if (do.call) { 95 | expr <- as.call(c(list(fun), X[[i]])) 96 | } else { 97 | expr <- as.call(list(fun, X[[i]])) 98 | } 99 | tasks[[i]] <- rrq$enqueue_(expr, e, key_complete=key_complete) 100 | if (is.null(key_complete)) { 101 | key_complete <- tasks[[i]]$key_complete 102 | } 103 | p() 104 | } 105 | 106 | task_bundle(rrq, tasks, group, names(X)) 107 | } 108 | -------------------------------------------------------------------------------- /R/common.R: -------------------------------------------------------------------------------- 1 | rrqueue_keys <- function(queue_name=NULL, worker_name=NULL) { 2 | if (is.null(queue_name)) { 3 | rrqueue_keys_global() 4 | } else if (is.null(worker_name)) { 5 | c(rrqueue_keys_global(), 6 | rrqueue_keys_queue(queue_name)) 7 | } else { 8 | c(rrqueue_keys_global(), 9 | rrqueue_keys_queue(queue_name), 10 | rrqueue_keys_worker(queue_name, worker_name)) 11 | } 12 | } 13 | 14 | rrqueue_protocol <- function() { 15 | packageVersion("rrqueue")[1, c(1:2)] 16 | } 17 | 18 | rrqueue_keys_global <- function() { 19 | list(rrqueue_queues = "rrqueue:queues") 20 | } 21 | 22 | rrqueue_keys_queue <- function(queue) { 23 | list(queue_name = queue, 24 | 25 | workers_name = sprintf("%s:workers:name", queue), 26 | workers_status = sprintf("%s:workers:status", queue), 27 | workers_task = sprintf("%s:workers:task", queue), 28 | workers_info = sprintf("%s:workers:info", queue), 29 | 30 | tasks_counter = sprintf("%s:tasks:counter", queue), 31 | tasks_expr = sprintf("%s:tasks:expr", queue), 32 | tasks_status = sprintf("%s:tasks:status", queue), 33 | tasks_time_sub = sprintf("%s:tasks:time:sub", queue), 34 | tasks_time_beg = sprintf("%s:tasks:time:beg", queue), 35 | tasks_time_end = sprintf("%s:tasks:time:end", queue), 36 | tasks_worker = sprintf("%s:tasks:worker", queue), 37 | tasks_result = sprintf("%s:tasks:result", queue), 38 | tasks_envir = sprintf("%s:tasks:envir", queue), 39 | tasks_complete = sprintf("%s:tasks:complete", queue), 40 | tasks_redirect = sprintf("%s:tasks:redirect", queue), 41 | tasks_group = sprintf("%s:tasks:group", queue), 42 | 43 | envirs_contents = sprintf("%s:envirs:contents", queue), 44 | envirs_files = sprintf("%s:envirs:files", queue), 45 | 46 | files = sprintf("%s:files", queue), 47 | objects = sprintf("%s:objects", queue)) 48 | } 49 | 50 | ## NOTE: Or alternatively, key_tasks? 51 | rrqueue_key_queue <- function(queue, envir) { 52 | sprintf("%s:tasks:%s:id", queue, envir) 53 | } 54 | 55 | rrqueue_keys_worker <- function(queue, worker) { 56 | list(message = rrqueue_key_worker_message(queue, worker), 57 | response = rrqueue_key_worker_response(queue, worker), 58 | log = rrqueue_key_worker_log(queue, worker), 59 | heartbeat = rrqueue_key_worker_heartbeat(queue, worker), 60 | envir = rrqueue_key_worker_envir(queue, worker)) 61 | } 62 | 63 | ## Special key for worker-specific commands to be published to. 64 | rrqueue_key_worker_message <- function(queue, worker) { 65 | sprintf("%s:workers:%s:message", queue, worker) 66 | } 67 | rrqueue_key_worker_response <- function(queue, worker) { 68 | sprintf("%s:workers:%s:response", queue, worker) 69 | } 70 | rrqueue_key_worker_log <- function(queue, worker) { 71 | sprintf("%s:workers:%s:log", queue, worker) 72 | } 73 | rrqueue_key_worker_heartbeat <- function(queue, worker) { 74 | sprintf("%s:workers:%s:heartbeat", queue, worker) 75 | } 76 | rrqueue_key_worker_envir <- function(queue, worker) { 77 | sprintf("%s:workers:%s:envir", queue, worker) 78 | } 79 | rrqueue_key_task_complete <- function(queue, task_id) { 80 | sprintf("%s:tasks:%s:complete", queue, task_id) 81 | } 82 | rrqueue_key_worker_alive <- function(queue) { 83 | sprintf("%s:workers:alive:%s", queue, ids::adjective_animal()) 84 | } 85 | 86 | ## TODO: come up with a way of scheduling object deletion. Things 87 | ## that are created here should be deleted immediately after the 88 | ## function ends (perhaps on exit). *Objects* should only be deleted 89 | ## if they have no more dangling pointers. 90 | ## 91 | ## So we'll register "groups" and schedule prefix deletion once the 92 | ## group is done. But for now, don't do any of that. 93 | prepare_expression <- function(expr) { 94 | fun <- expr[[1]] 95 | args <- expr[-1] 96 | 97 | is_call <- vlapply(args, is.call) 98 | if (any(is_call)) { 99 | stop("complex expressions not yet supported") 100 | } 101 | is_symbol <- vlapply(args, is.symbol) 102 | if (any(is_symbol)) { 103 | object_names <- vcapply(args[is_symbol], as.character) 104 | } else { 105 | object_names <- NULL 106 | } 107 | 108 | list(expr=expr, object_names=object_names) 109 | } 110 | 111 | save_expression <- function(dat, task_id, envir, object_cache) { 112 | object_names <- dat$object_names 113 | if (!is.null(object_names)) { 114 | if (!all(ok <- exists(object_names, envir, inherits=FALSE))) { 115 | stop("not all objects found: ", 116 | paste(object_names[!ok], collapse=", ")) 117 | } 118 | names(object_names) <- paste0(task_object_prefix(task_id), object_names) 119 | dat$object_names <- object_names 120 | 121 | object_cache$import(envir, object_names) 122 | } 123 | 124 | object_to_string(dat) 125 | } 126 | 127 | restore_expression <- function(dat, envir, object_cache) { 128 | dat <- string_to_object(dat) 129 | if (!is.null(object_cache) && length(dat$object_names) > 0L) { 130 | object_cache$export(envir, invert_names(dat$object_names)) 131 | } 132 | dat$expr 133 | } 134 | 135 | parse_worker_name <- function(str) { 136 | res <- strsplit(str, "::", fixed=TRUE) 137 | if (any(viapply(res, length) != 2)) { 138 | stop("parse error") 139 | } 140 | list(host=vcapply(res, "[[", 1), 141 | pid=vcapply(res, "[[", 2)) 142 | } 143 | 144 | parse_worker_log <- function(log) { 145 | re <- "^([0-9]+) ([^ ]+) ?(.*)$" 146 | ok <- grepl(re, log) 147 | if (!all(ok)) { 148 | stop("Corrupt log") 149 | } 150 | time <- as.integer(sub(re, "\\1", log)) 151 | command <- sub(re, "\\2", log) 152 | message <- lstrip(sub(re, "\\3", log)) 153 | data.frame(time, command, message, stringsAsFactors=FALSE) 154 | } 155 | 156 | task_object_prefix <- function(task_id) { 157 | sprintf(".%s:", task_id) 158 | } 159 | 160 | version_info <- function(package=.packageName) { 161 | descr <- packageDescription(package) 162 | version <- package_version(descr$Version) 163 | repository <- descr$Repository 164 | sha <- descr$RemoteSha 165 | list(package=package, 166 | version=version, 167 | repository=repository, 168 | sha=sha) 169 | } 170 | 171 | version_string <- function() { 172 | data <- version_info() 173 | if (!is.null(data$repository)) { 174 | qual <- data$repository 175 | } else if (!is.null(data$sha)) { 176 | qual <- data$sha 177 | } else { 178 | qual <- "LOCAL" 179 | } 180 | sprintf("%s [%s]", data$version, qual) 181 | } 182 | 183 | rrqueue_scripts <- function(con) { 184 | set_hashes <- 'local id = ARGV[table.getn(ARGV)] 185 | for i, k in ipairs(KEYS) do 186 | redis.call("HSET", k, id, ARGV[i]) 187 | end' 188 | 189 | ## Assume that ARGV[1] is the task id and KEYS[1] is the queue. Then 190 | ## ARGV[2..] and KEYS[2..] are the key / value pairs 191 | job_submit <- ' 192 | local task_id = ARGV[1] 193 | for i, k in ipairs(KEYS) do 194 | if i > 1 then 195 | redis.call("HSET", k, task_id, ARGV[i]) 196 | end 197 | end 198 | redis.call("RPUSH", KEYS[1], task_id)' 199 | 200 | job_incr <- 'return {redis.call("INCR", KEYS[1]), redis.call("TIME")}' 201 | RedisAPI::redis_scripts(con, 202 | scripts=list(set_hashes=set_hashes, 203 | job_incr=job_incr, 204 | job_submit=job_submit)) 205 | } 206 | 207 | message_prepare <- function(id, command, args) { 208 | object_to_string(list(id=id, command=command, args=args)) 209 | } 210 | response_prepare <- function(id, command, result) { 211 | object_to_string(list(id=id, command=command, result=result)) 212 | } 213 | -------------------------------------------------------------------------------- /R/config.R: -------------------------------------------------------------------------------- 1 | load_config <- function(filename) { 2 | keys_common <- c("queue_name") 3 | keys_worker <- c("heartbeat_period", "heartbeat_expire", "key_worker_alive") 4 | keys_queue <- c("packages", "sources") 5 | 6 | ## Note that this is *not* the defaults to the underlying worker 7 | ## functions; I'll rework the docopt to read those soon. 8 | defaults_worker <- as.list(formals(worker))[c(keys_common, keys_worker)] 9 | defaults_queue <- as.list(formals(queue))[c(keys_common, keys_queue)] 10 | if (!isTRUE(all.equal(defaults_worker[keys_common], 11 | defaults_queue[keys_common]))) { 12 | stop("This is a bug.") 13 | } 14 | defaults <- c(defaults_worker, defaults_queue[keys_queue]) 15 | defaults$queue_name <- NULL 16 | defaults$redis <- list() 17 | 18 | config <- yaml_read(filename) 19 | 20 | extra <- setdiff(names(config), 21 | c(keys_common, keys_worker, keys_queue, "redis")) 22 | if (length(extra) > 0L) { 23 | warning(sprintf("Unknown keys in %s: %s", 24 | filename, paste(extra, collapse=", "))) 25 | } 26 | 27 | ## Some validation: 28 | assert_character_or_null(config$packages) 29 | assert_character_or_null(config$sources) 30 | 31 | ret <- modifyList(defaults, config, keep.null=TRUE) 32 | } 33 | 34 | ## This needs fixing in a few places though I don't think all are 35 | ## tested. Eventually we move away from host/port pairs and go with 36 | ## redis_config. Once that happens the interface here is heaps easier 37 | ## and basically can go away. 38 | ## 39 | ## Testing like in test-worker will be needed for observer/queue which 40 | ## also use this. 41 | tmp_fix_redis_config <- function(cfg) { 42 | if (!is.null(cfg$redis)) { 43 | cfg[paste0("redis_", names(cfg$redis))] <- cfg$redis 44 | cfg$redis <- NULL 45 | } 46 | cfg 47 | } 48 | -------------------------------------------------------------------------------- /R/env.R: -------------------------------------------------------------------------------- 1 | ##' Load environment variables from a yaml file. This is a hack for a 2 | ##' project. It may change and may move package. \code{callr} would 3 | ##' be a better fit probably, but \code{callr} doesn't pull in 4 | ##' \code{yaml} yet so I don't know that it's a good fit. 5 | ##' 6 | ##' The yaml file must be sets of key/value pairs of simple data 7 | ##' types. Something like: 8 | ##' 9 | ##' \preformatted{ 10 | ##' REDIS_HOST: localhost 11 | ##' } 12 | ##' 13 | ##' Alternatively, for use with section, add an extra layer of nesting: 14 | ##' 15 | ##' \preformatted{ 16 | ##' local: 17 | ##' REDIS_HOST: localhost 18 | ##' remote: 19 | ##' REDIS_HOST: redis.marathon.mesos 20 | ##' } 21 | ##' 22 | ##' @title Load environment variables from a yaml file 23 | ##' @param filename Name of the file 24 | ##' @param section An optional section of the file to load 25 | ##' @export 26 | ##' 27 | yaml_env <- function(filename, section=NULL) { 28 | data <- yaml_read(filename) 29 | if (!is.null(section)) { 30 | if (section %in% names(data)) { 31 | data <- data[[section]] 32 | } else { 33 | stop(sprintf("section %d not found", section)) 34 | } 35 | } 36 | check <- function(x) { 37 | is.atomic(x) && length(x) == 1L 38 | } 39 | ok <- vlapply(data, check) 40 | if (!all(ok)) { 41 | stop("Unexpected type for ", paste(names(data)[!ok], collapse=", ")) 42 | } 43 | if (length(data) > 0L) { 44 | do.call("Sys.setenv", data) 45 | } 46 | invisible(names(data)) 47 | } 48 | -------------------------------------------------------------------------------- /R/environment.R: -------------------------------------------------------------------------------- 1 | create_environment <- function(packages, sources, 2 | env=new.env(parent=.GlobalEnv)) { 3 | load_packages(packages) 4 | for (file in sources) { 5 | do_source(file, env, chdir=TRUE, keep.source=FALSE) 6 | } 7 | env 8 | } 9 | 10 | ## An alternative here is to trace some of the connection code, but 11 | ## that might be harder. The issue here is that we're going to be 12 | ## missing things like csv files, configuration files, etc, that might 13 | ## determine the state of the system. I don't think that there's much 14 | ## we can do about that though. For cases where we're running locally 15 | ## it'll be fine. 16 | create_environment2 <- function(packages, sources, env, global) { 17 | load_packages(packages) 18 | source_files <- character(0) 19 | for (file in sources) { 20 | source_files <- c( 21 | source_files, 22 | do_source(file, env, chdir=TRUE, keep.source=FALSE, 23 | source_fun=sys_source)) 24 | if (global) { 25 | do_source(file, .GlobalEnv, chdir=TRUE, keep.source=FALSE, 26 | source_fun=sys_source) 27 | } 28 | } 29 | source_files 30 | } 31 | 32 | load_packages <- function(packages) { 33 | for (p in packages) { 34 | library(p, character.only=TRUE, quietly=TRUE) 35 | } 36 | } 37 | 38 | do_source <- function(file, ..., source_fun=sys.source) { 39 | catch_source <- function(e) { 40 | stop(sprintf("while sourcing %s:\n%s", file, e$message), 41 | call.=FALSE) 42 | } 43 | tryCatch(source_fun(file, ...), error=catch_source) 44 | } 45 | 46 | envirs_list <- function(con, keys) { 47 | as.character(con$HKEYS(keys$envirs_contents)) 48 | } 49 | 50 | envirs_contents <- function(con, keys, envir_ids=NULL) { 51 | dat <- from_redis_hash(con, keys$envirs_contents, envir_ids) 52 | nok <- vlapply(dat, is.na) 53 | if (any(nok)) { 54 | stop(sprintf("Environment %s not found", 55 | paste(sprintf("'%s'", envir_ids[nok]), collapse=", "))) 56 | } 57 | lapply(dat, string_to_object) 58 | } 59 | -------------------------------------------------------------------------------- /R/exceptions.R: -------------------------------------------------------------------------------- 1 | WorkerError <- function(worker, message, ..., 2 | task_id=NULL, 3 | task_status=NULL, 4 | class=character(0), 5 | call=NULL) { 6 | structure(list(worker=worker, 7 | task_id=task_id, 8 | task_status=task_status, ..., 9 | message=message, call=call), 10 | class=c(class, "WorkerError", "error", "condition")) 11 | } 12 | 13 | ## This happens with no task 14 | WorkerStop <- function(worker, message) { 15 | WorkerError(worker, message, class="WorkerStop") 16 | } 17 | 18 | ## TODO: these might log more than once? 19 | WorkerTaskMissing <- function(worker, task_id) { 20 | msg <- sprintf("Task %s/%s not found", worker$name, task_id) 21 | worker$log("TASK_MISSING", task_id) 22 | WorkerError(worker, msg, 23 | task_id=task_id, task_status=TASK_MISSING, 24 | class="WorkerTaskMissing") 25 | } 26 | 27 | WorkerTaskError <- function(e) { 28 | class(e) <- c("WorkerTaskError", "try-error", class(e)) 29 | e 30 | } 31 | 32 | UnfetchableTask <- function(task_id, task_status) { 33 | structure(list(task_id=task_id, 34 | task_status=task_status), 35 | class=c("UnfetchableTask", "error", "condition")) 36 | } 37 | -------------------------------------------------------------------------------- /R/files.R: -------------------------------------------------------------------------------- 1 | files_pack <- function(cache, ..., files=c(...)) { 2 | ## For now, assume text files only. 3 | pack1 <- function(filename) { 4 | ## NOTE: this duplicates the content addressable storage in storr, 5 | ## I think. But the label is not going to change and we want to 6 | ## manage the mapping in rrqueue. 7 | contents <- read_file_to_string(filename) 8 | hash <- hash_string(contents) 9 | cache$set(hash, contents) 10 | setNames(hash, filename) 11 | } 12 | ret <- vcapply(files, pack1) 13 | class(ret) <- "files_pack" 14 | ret 15 | } 16 | 17 | files_unpack <- function(cache, pack, path=tempfile()) { 18 | unpack1 <- function(x) { 19 | filename <- file.path(path, x) 20 | dir.create(dirname(filename), FALSE, TRUE) 21 | contents <- cache$get(pack[[x]]) 22 | write_string_to_file(contents, filename) 23 | } 24 | lapply(names(pack), unpack1) 25 | invisible(path) 26 | } 27 | -------------------------------------------------------------------------------- /R/heartbeat.R: -------------------------------------------------------------------------------- 1 | ## Heartbeat support, but with a slightly different interface to 2 | ## RedisHeartbeat and falling back on something informative if we have 3 | ## no support. 4 | heartbeat <- function(con, key, period, expire) { 5 | RedisHeartbeat::heartbeat(key, period, 6 | expire=expire, value=expire, 7 | con$config()) 8 | } 9 | 10 | heartbeat_time <- function(obj) { 11 | status <- obj$tasks_status() 12 | task_ids <- names(status[status == TASK_RUNNING]) 13 | if (length(task_ids) > 0L) { 14 | w_running <- as.character(obj$con$HMGET(obj$keys$tasks_worker, task_ids)) 15 | key <- rrqueue_key_worker_heartbeat(obj$queue_name, w_running) 16 | d <- data.frame(worker_id=w_running, 17 | task_id=task_ids, 18 | time=vnapply(key, obj$con$PTTL), 19 | stringsAsFactors=FALSE) 20 | rownames(d) <- NULL 21 | } else { 22 | d <- data.frame(worker_id=character(0), 23 | task_id=character(0), 24 | time=numeric(0), 25 | stringsAsFactors=FALSE) 26 | } 27 | d 28 | } 29 | 30 | identify_orphan_tasks <- function(obj) { 31 | d <- heartbeat_time(obj) 32 | i <- d$time == -2 33 | task_id <- d$task_id[i] 34 | worker_id <- d$worker_id[i] 35 | 36 | con <- obj$con 37 | keys <- obj$keys 38 | time <- redis_time(obj$con) 39 | for (i in seq_along(task_id)) { 40 | con$HSET(keys$tasks_time_end, task_id[[i]], time) 41 | con$HSET(keys$tasks_status, task_id[[i]], TASK_ORPHAN) 42 | con$HSET(keys$workers_status, worker_id[[i]], WORKER_LOST) 43 | } 44 | 45 | setNames(task_id, worker_id) 46 | } 47 | -------------------------------------------------------------------------------- /R/match_fun.R: -------------------------------------------------------------------------------- 1 | ## Will be prone to false positives but worth a shot 2 | has_namespace <- function(str) { 3 | grepl("::", str, fixed=TRUE) 4 | } 5 | 6 | split_namespace <- function(str) { 7 | res <- strsplit(str, "::", fixed=TRUE)[[1]] 8 | if (length(res) != 2L) { 9 | stop("Not a namespace-qualified variable") 10 | } 11 | res 12 | } 13 | 14 | exists_function_here <- function(name, envir) { 15 | exists(name, envir, mode="function", inherits=FALSE) 16 | } 17 | exists_function_ns <- function(name, ns) { 18 | if (ns %in% .packages()) { 19 | exists_function_here(name, getNamespace(ns)) 20 | } else { 21 | FALSE 22 | } 23 | } 24 | 25 | ## This is going to search back and find the location of a function by 26 | ## descending through environments recursively. 27 | find_function_name <- function(name, envir) { 28 | if (identical(envir, emptyenv())) { 29 | stop("Did not find function") 30 | } 31 | if (exists_function_here(name, envir)) { 32 | envir 33 | } else { 34 | find_function_name(name, parent.env(envir)) 35 | } 36 | } 37 | 38 | find_function_value <- function(fun, envir) { 39 | if (identical(envir, emptyenv())) { 40 | stop("Did not find function") 41 | } 42 | name <- find_function_in_envir(fun, envir) 43 | if (!is.null(name)) { 44 | list(name=name, envir=envir) 45 | } else { 46 | find_function_value(fun, parent.env(envir)) 47 | } 48 | } 49 | 50 | ## Determine the name of a function, given it's value and an 51 | ## environment to find it in. 52 | find_function_in_envir <- function(fun, envir) { 53 | pos <- ls(envir) 54 | i <- scapply(pos, function(x) identical(fun, envir[[x]]), NULL) 55 | if (is.null(i)) i else pos[[i]] 56 | } 57 | 58 | ## TODO: consider `::` and `::` as special names? 59 | ## NOTE: This differs from match_fun_symbol because it allows skipping 60 | ## up the search path to identify functions in specific parts of the 61 | ## search path. If a namespace-qualified value is given, we can 62 | ## ignore envir entirely. 63 | match_fun_name <- function(str, envir) { 64 | if (has_namespace(str)) { 65 | ret <- split_namespace(str) 66 | if (!exists_function_ns(ret[[2]], ret[[1]])) { 67 | stop("Did not find function in loaded namespace") 68 | } 69 | ret 70 | } else { 71 | name <- str 72 | fun_envir <- find_function_name(name, envir) 73 | match_fun_sanitise(name, fun_envir) 74 | } 75 | } 76 | 77 | match_fun_symbol <- function(sym, envir) { 78 | name <- as.character(sym) 79 | match_fun_name(name, envir) 80 | } 81 | 82 | ## This one is much harder and might take a while. 83 | ## 84 | ## TODO: Don't deal here with the case that the function is in 85 | ## anything other than the environment that it's enclosure points at; 86 | ## that's going to skip memoized functions, etc. It also is going to 87 | ## miss anonymous functions for now. But start with this bit I think. 88 | ## 89 | ## TODO: This is going to miss things like extra attributes added to a 90 | ## function, but that's going in the category of "users making things 91 | ## difficult". 92 | match_fun_value <- function(fun, envir) { 93 | res <- find_function_value(fun, envir) 94 | match_fun_sanitise(res$name, res$envir) 95 | } 96 | 97 | ## TODO: might be worth also passing in 'envir' as the starting 98 | ## environment; then we can determine if we're looking at: 99 | ## namespace 100 | ## global env 101 | ## given env 102 | ## other env 103 | ## TODO: Might also return the environment here as a named list so 104 | ## that we can do some further faffing? 105 | match_fun_sanitise <- function(name, fun_envir) { 106 | ns <- environmentName(fun_envir) 107 | ## Don't treat the global environment specially here: 108 | if (identical(ns, "R_GlobalEnv")) { 109 | ns <- "" 110 | } else { 111 | ## Might be best here to treat all environments as non-namespace 112 | ## unless we get a 'package:' name? 113 | ns <- sub("^package:", "", ns) 114 | } 115 | ret <- c(ns, name) 116 | if (ns == "") { 117 | attr(ret, "envir") <- fun_envir 118 | } 119 | ret 120 | } 121 | 122 | ## TODO: throughout here we'll need to have the functions loaded, 123 | ## which is not ideal. 124 | ## 125 | ## TODO: separate out the NSE from here, like this: 126 | ## match_fun <- function(fun, envir) { 127 | ## fun_sub <- substitute(fun) 128 | ## if (is.name(fun_sub)) { 129 | ## match_fun_symbol(fun_sub, envir) 130 | ## } else { 131 | ## match_fun_(fun, envir) 132 | ## } 133 | ## } 134 | match_fun <- function(fun, envir) { 135 | if (is.character(fun)) { 136 | match_fun_name(fun, envir) 137 | } else if (is.function(fun)) { 138 | match_fun_value(fun, envir) 139 | } else { 140 | stop("Invalid input") 141 | } 142 | } 143 | 144 | match_fun_rrqueue <- function(fun, envir, envir_rrqueue) { 145 | dat <- match_fun(fun, envir) 146 | if (dat[[1]] == "") { 147 | ## Now, try to find the function in rrqueue's environment: 148 | ## TODO: This might not really work; we want to look in the right 149 | ## environment here... 150 | if (exists_function_here(dat[[2]], envir_rrqueue)) { 151 | name <- dat[[2]] 152 | ok <- identical(deparse(attr(dat, "envir")[[name]]), 153 | deparse(envir_rrqueue[[name]])) 154 | if (!ok) { 155 | stop("Function found in given and rrqueue environment do not match") 156 | } 157 | } else { 158 | stop("Function not found in rrqueue environment") 159 | } 160 | } 161 | dat 162 | } 163 | 164 | ## TODO: For functions that are not found, we can try and serialise 165 | ## them I think. That's going to work best for things like 166 | ## `function(x) bar(x, a, b)` but it might be hard to pick up all the 167 | ## locals without doing some serious messing around. 168 | -------------------------------------------------------------------------------- /R/monitor.R: -------------------------------------------------------------------------------- 1 | ## TODO: things to monitor specific completeness queues. 2 | monitor_status <- function(obs) { 3 | message(monitor_status_workers(obs)) 4 | message(monitor_status_tasks(obs)) 5 | } 6 | 7 | monitor_status_workers <- function(obs) { 8 | cols <- cbind(c(WORKER_IDLE, "yellow"), 9 | c(WORKER_BUSY, "green"), 10 | c(WORKER_LOST, "red")) 11 | cols <- setNames(cols[2,], cols[1,]) 12 | status <- obs$workers_status() 13 | monitor_status_string("workers", status, cols) 14 | } 15 | 16 | monitor_status_tasks <- function(obs) { 17 | cols <- cbind(c(TASK_PENDING, "grey"), 18 | c(TASK_RUNNING, "green"), 19 | c(TASK_COMPLETE, "blue"), 20 | c(TASK_ERROR, "red"), 21 | c(TASK_ORPHAN, "hotpink"), 22 | c(TASK_REDIRECT, "orange")) 23 | cols <- setNames(cols[2,], cols[1,]) 24 | status <- obs$tasks_status() 25 | monitor_status_string("tasks ", status, cols, 1:4) 26 | } 27 | 28 | ## TODO: Some of the nice colouring code from the worker would spice 29 | ## this up nicely. 30 | monitor_status_string <- function(name, status, cols, i=NULL) { 31 | n <- table(factor(status, levels=names(cols))) 32 | if (!is.null(i)) { 33 | n <- n[i] 34 | } 35 | sprintf("%s [%s] %s", 36 | name, 37 | paste(n, collapse=" | "), 38 | pretty_blocks(status, cols)) 39 | } 40 | 41 | ## No need for a class - just run this function until it gets 42 | ## escaped. 43 | monitor <- function(..., period=10) { 44 | obs <- observer(...) 45 | repeat { 46 | message(sprintf("[ %s ]", as.character(Sys.time()))) 47 | monitor_status(obs) 48 | Sys.sleep(period) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /R/object_cache.R: -------------------------------------------------------------------------------- 1 | ##' @importFrom storr storr 2 | object_cache <- function(prefix, con) { 3 | dr <- storr::driver_redis_api(prefix, con) 4 | storr::storr(dr) 5 | } 6 | 7 | ## TODO: weirdly this is the same thing as object_cache, but a 8 | ## different prefix. 9 | file_cache <- function(prefix, con) { 10 | dr <- storr::driver_redis_api(prefix, con) 11 | storr::storr(dr) 12 | } 13 | -------------------------------------------------------------------------------- /R/observer.R: -------------------------------------------------------------------------------- 1 | ##' Creates an observer for an rrqueue. This is the "base class" for 2 | ##' a couple of different objects in rrqueue; notably the 3 | ##' \code{\link{queue}} object. So any method listed here also works 4 | ##' within \code{queue} objects. 5 | ##' 6 | ##' Most of the methods of the \code{observer} object are extremely 7 | ##' simple and involve fetching information from the database about 8 | ##' the state of tasks, environments and workers. 9 | ##' 10 | ##' The method and argument names try to give hints about the sort of 11 | ##' things they expect; a method asking for \code{task_id} expects a 12 | ##' single task identifier, while those asking for \code{task_ids} 13 | ##' expect a vector of task identifiers (and if they have a default 14 | ##' \code{NULL} then will default to returning information for 15 | ##' \emph{all} task identifiers). Similarly, a method starting 16 | ##' \code{task_} applies to one task while a method starting 17 | ##' \code{tasks_} applies to multiple. 18 | ##' 19 | ##' @template observer_methods 20 | ##' @title Creates an observer for an rrqueue 21 | ##' @param queue_name Name of the queue, if not given then it will 22 | ##' check with the given Redis server to see if there is just a 23 | ##' single queue known. In that case we connect to that queue. 24 | ##' Otherwise we error and list possible queues. 25 | ##' @param redis_host Redis hostname 26 | ##' @param redis_port Redis port number 27 | ##' @param config Configuration file of key/value pairs in yaml 28 | ##' format. See the package README for an example. If given, 29 | ##' additional arguments to this function override values in the 30 | ##' file which in turn override defaults of this function. 31 | ##' @export 32 | observer <- function(queue_name=NULL, 33 | redis_host="127.0.0.1", redis_port=6379, 34 | config=NULL) { 35 | if (!is.null(config)) { 36 | given <- as.list(sys.call())[-1] # -1 is the function name 37 | cfg <- tmp_fix_redis_config(load_config(config)) 38 | dat <- modifyList(cfg, given) 39 | if (is.null(dat$queue_name)) { 40 | stop("queue_name must be given or specified in config") 41 | } 42 | observer(dat$queue_name, dat$redis_host, dat$redis_port, NULL) 43 | } else { 44 | if (is.null(queue_name)) { 45 | queue_names <- queues(redis_host, redis_port) 46 | if (length(queue_names) == 1L) { 47 | queue_name <- queue_names 48 | message("Connecting to queue: ", queue_name) 49 | } else if (length(queue_names) == 0L) { 50 | stop("No queues found") 51 | } else { 52 | stop("More than 1 queue found: specify one of ", 53 | paste(queue_names, collapse=", ")) 54 | } 55 | } 56 | .R6_observer$new(queue_name, redis_host, redis_port) 57 | } 58 | } 59 | 60 | ## I think the correct design pattern is one that is totally dense 61 | ## that takes queue_name, redis_host, redis_port and which sets up the 62 | ## connection and keys. 63 | ## 64 | ## Other things can either inherit from this or compose with it. 65 | ## 66 | ## NOTE: There are no methods here that modify the queue. 67 | .R6_observer <- R6::R6Class( 68 | "observer", 69 | public=list( 70 | queue_name=NULL, 71 | con=NULL, 72 | keys=NULL, 73 | files=NULL, 74 | objects=NULL, 75 | 76 | initialize=function(queue_name, redis_host, redis_port) { 77 | self$queue_name <- queue_name 78 | self$con <- redis_connection(redis_host, redis_port) 79 | self$keys <- rrqueue_keys(self$queue_name) 80 | self$files <- file_cache(self$keys$files, self$con) 81 | self$objects <- object_cache(self$keys$objects, self$con) 82 | }, 83 | 84 | ## 1. Tasks: 85 | tasks_list=function() { 86 | tasks_list(self$con, self$keys) 87 | }, 88 | tasks_status=function(task_ids=NULL, follow_redirect=FALSE) { 89 | tasks_status(self$con, self$keys, task_ids, follow_redirect) 90 | }, 91 | tasks_overview=function() { 92 | tasks_overview(self$con, self$keys) 93 | }, 94 | tasks_times=function(task_ids=NULL, unit_elapsed="secs") { 95 | tasks_times(self$con, self$keys, task_ids, unit_elapsed) 96 | }, 97 | tasks_envir=function(task_ids=NULL) { 98 | tasks_envir(self$con, self$keys, task_ids) 99 | }, 100 | task_get=function(task_id) { 101 | task(self, task_id) 102 | }, 103 | task_result=function(task_id, follow_redirect=FALSE) { 104 | task_result(self$con, self$keys, task_id, follow_redirect) 105 | }, 106 | 107 | ## (task groups) 108 | tasks_groups_list=function() { 109 | tasks_groups_list(self$con, self$keys) 110 | }, 111 | tasks_in_groups=function(groups) { 112 | tasks_in_groups(self$con, self$keys, groups) 113 | }, 114 | tasks_lookup_group=function(task_ids=NULL) { 115 | tasks_lookup_group(self$con, self$keys, task_ids) 116 | }, 117 | task_bundle_get=function(groups=NULL, task_ids=NULL) { 118 | task_bundle_get(self, groups, task_ids) 119 | }, 120 | 121 | ## 2: environments 122 | envirs_list=function() { 123 | envirs_list(self$con, self$keys) 124 | }, 125 | envirs_contents=function(envir_ids=NULL) { 126 | envirs_contents(self$con, self$keys, envir_ids) 127 | }, 128 | envir_workers=function(envir_id, worker_ids=NULL) { 129 | envir_workers(self$con, self$keys, envir_id, worker_ids) 130 | }, 131 | 132 | ## 3: workers 133 | workers_len=function() { 134 | workers_len(self$con, self$keys) 135 | }, 136 | workers_list=function() { 137 | workers_list(self$con, self$keys) 138 | }, 139 | workers_list_exited=function() { 140 | workers_list_exited(self$con, self$keys) 141 | }, 142 | workers_status=function(worker_ids=NULL) { 143 | workers_status(self$con, self$keys, worker_ids) 144 | }, 145 | workers_times=function(worker_ids=NULL, unit_elapsed="secs") { 146 | workers_times(self$con, self$keys, worker_ids, unit_elapsed) 147 | }, 148 | workers_log_tail=function(worker_ids=NULL, n=1) { 149 | workers_log_tail(self$con, self$keys, worker_ids, n) 150 | }, 151 | workers_task_id=function(worker_ids=NULL) { 152 | workers_task_id(self$con, self$keys, worker_ids) 153 | }, 154 | ## NOTE: this returns data that is not necessarily fresh: 155 | workers_info=function(worker_ids=NULL) { 156 | workers_info(self$con, self$keys, worker_ids) 157 | }, 158 | worker_envir=function(worker_id) { 159 | worker_envir(self$con, self$keys, worker_id) 160 | }, 161 | workers_running=function(worker_ids=NULL) { 162 | workers_running(self$con, self$keys, worker_ids) 163 | } 164 | )) 165 | -------------------------------------------------------------------------------- /R/redis.R: -------------------------------------------------------------------------------- 1 | ##' @import RedisAPI 2 | redis_connection <- function(con, port=6379) { 3 | if (inherits(con, "redis_api")) { 4 | con 5 | } else if (is.null(con)) { 6 | redux::hiredis(host="127.0.0.1", port=port) 7 | } else if (is.character(con)) { 8 | redux::hiredis(host=con, port=port) 9 | } else { 10 | stop("Cannot create a Redis connection from object") 11 | } 12 | } 13 | 14 | wait_until_hash_field_exists <- function(con, key, field, every=.05, 15 | timeout=as.difftime(5, units="secs")) { 16 | t0 <- Sys.time() 17 | while (Sys.time() - t0 < timeout) { 18 | if (con$HEXISTS(key, field)) { 19 | return() 20 | } 21 | Sys.sleep(every) 22 | } 23 | stop(sprintf("field '%s' did not appear in time", field)) 24 | } 25 | 26 | ## Similar to the above, listen on a bunch of hash fields for 27 | ## something to exist. 28 | poll_hash_keys <- function(con, keys, field, wait, every=0.05) { 29 | if (wait <= 0) { 30 | res <- lapply(keys, con$HGET, field) 31 | } else { 32 | timeout <- as.difftime(wait, units="secs") 33 | t0 <- Sys.time() 34 | ok <- logical(length(keys)) 35 | res <- vector("list", length(keys)) 36 | while (Sys.time() - t0 < timeout) { 37 | exists <- as.logical(vnapply(keys[!ok], con$HEXISTS, field)) 38 | if (any(exists)) { 39 | i <- which(!ok)[exists] 40 | res[i] <- lapply(keys[i], con$HGET, field) 41 | ok[i] <- TRUE 42 | if (all(ok)) { 43 | break 44 | } 45 | } 46 | ## This should not be called on the last way through... 47 | Sys.sleep(every) 48 | } 49 | } 50 | names(res) <- keys 51 | res 52 | } 53 | 54 | clean_pttl <- function(x) { 55 | i <- x > 0 56 | x[i] <- x[i] / 1000 57 | x 58 | } 59 | 60 | ## Way more complicated, simulation of BLPOP with no timeout on 61 | ## multiple lists. Not anything safe. 62 | lpop_mult <- function(con, keys) { 63 | for (k in keys) { 64 | res <- con$LPOP(k) 65 | if (!is.null(res)) { 66 | return(list(k, res)) 67 | } 68 | } 69 | NULL 70 | } 71 | -------------------------------------------------------------------------------- /R/rrqlapply.R: -------------------------------------------------------------------------------- 1 | ##' Parallel version of lapply using Redis queuing 2 | ##' @title Parallel version of lapply using Redis queuing 3 | ##' @param X A vector 4 | ##' @param FUN The name of a function to apply to each element of the 5 | ##' list. \emph{this will change!}. 6 | ##' @param rrq An rrq object 7 | ##' @param ... Additional arguments passed to \code{FUN} 8 | ##' @param group Name of a group for generated task ids. If not 9 | ##' included, an ID will be generated. 10 | ##' @param timeout Total length of time to wait for tasks to be 11 | ##' completed. The default is to wait forever (like \code{lapply}). 12 | ##' 13 | ##' @param time_poll Time to poll for tasks. Must be an integer. 14 | ##' Because of how the function is implemented, R will be 15 | ##' unresponsive for this long each iteration (unless results are 16 | ##' returned), so the default of 1s should be reasonable. 17 | ##' @param delete_tasks Delete tasks on successful finish? 18 | ##' @param progress_bar Display a progress bar? 19 | ##' @param env Environment to look in. 20 | ##' @export 21 | rrqlapply <- function(X, FUN, rrq, ..., group=NULL, 22 | timeout=Inf, time_poll=1, delete_tasks=FALSE, 23 | progress_bar=TRUE, env=parent.frame()) { 24 | ## TODO: I've set progress_bar to be true on both submitting and 25 | ## retrieving, but the submit phase *should* be fast enough that 26 | ## it's not necessary. That's not true if we're running Redis over 27 | ## a slow connection though (which we do with the clusterous 28 | ## approach). This adds some overhead but I think it'll do for now. 29 | obj <- rrqlapply_submit(X, FUN, rrq, ..., group=group, 30 | progress_bar=progress_bar, env=env) 31 | tryCatch(obj$wait(timeout, time_poll, progress_bar), 32 | interrupt=function(e) obj) 33 | } 34 | 35 | ##' @export 36 | ##' @rdname rrqlapply 37 | rrqlapply_submit <- function(X, FUN, rrq, ..., group=NULL, 38 | progress_bar=TRUE, env=parent.frame()) { 39 | fun <- find_fun(FUN, env, rrq) 40 | DOTS <- list(...) 41 | 42 | n <- length(X) 43 | 44 | ## NOTE: the key_complete treatment here avoids possible race 45 | ## condition/implementation depenence by giving all tasks the same 46 | ## key_complete and making that shared with whatever the first gets 47 | ## (which is done via INCR). 48 | tasks <- vector("list", n) 49 | e <- environment() 50 | key_complete <- NULL 51 | group <- create_group(group, progress_bar) 52 | p <- progress(total=n, show=progress_bar, prefix="submitting: ") 53 | for (i in seq_len(n)) { 54 | expr <- as.call(c(list(fun, X[[i]]), DOTS)) 55 | tasks[[i]] <- rrq$enqueue_(expr, e, key_complete=key_complete, group=group) 56 | if (is.null(key_complete)) { 57 | key_complete <- tasks[[i]]$key_complete 58 | } 59 | p() 60 | } 61 | task_bundle(rrq, tasks, group, names(X)) 62 | } 63 | 64 | ## This is hopefully going to be enough: 65 | find_fun <- function(FUN, env, rrq) { 66 | dat <- match_fun_rrqueue(FUN, env, rrq$envir) 67 | if (dat[[1]] == "") { 68 | as.name(dat[[2]]) 69 | } else { 70 | call("::", as.name(dat[[1]]), as.name(dat[[2]])) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /R/status.R: -------------------------------------------------------------------------------- 1 | ## Start of bits for querying the state of the system 2 | ## 3 | ## TODO: This becomes list_queues(), for consistency with the rest of 4 | ## the system. 5 | queues <- function(redis_host=NULL, redis_port=6379) { 6 | con <- redis_connection(redis_host, redis_port) 7 | as.character(con$SMEMBERS("rrqueue:queues")) 8 | } 9 | -------------------------------------------------------------------------------- /R/task.R: -------------------------------------------------------------------------------- 1 | ##' Create a task handle object. This is a "pointer" to a task and 2 | ##' can be used to retrieve information about status, running times, 3 | ##' expression and the result of the task once complete. Generally 4 | ##' you do not need to make a task object as they will be created for 5 | ##' you by things like the \code{task_get} method of the 6 | ##' \code{\link{observer}} and \code{\link{queue}} objects. 7 | ##' 8 | ##' Tasks have a \emph{unique identifier}; these are unique within a 9 | ##' queue and are implemented as an incrementing integer. However, 10 | ##' this is an implementation detail and should not be relied on. The 11 | ##' identifier is represented as a \emph{character string} rather than 12 | ##' an integer in most places. 13 | ##' 14 | ##' Tasks exist in one of a number of \emph{statuses}. See the 15 | ##' \code{status} method below for a list of possible statuses and 16 | ##' their interpretation. 17 | ##' 18 | ##' @template task_methods 19 | ##' @title Create a task handle 20 | ##' @param obj A \code{queue} or \code{observer} object. 21 | ##' @param task_id Task identifier 22 | ##' @param key_complete If known, specify the \code{key_complete}, 23 | ##' otherwise we look it up on creation. 24 | ##' @export 25 | task <- function(obj, task_id, key_complete=NULL) { 26 | .R6_task$new(obj, task_id, key_complete) 27 | } 28 | 29 | ## First, the ideal lifecycle: 30 | ## * after submissing a job is pending (time_sub) 31 | TASK_PENDING <- "PENDING" 32 | ## * after it is picked up by a worker it is running (time_beg) 33 | TASK_RUNNING <- "RUNNING" 34 | ## * after it is finished by a worker it is complete or error (time_end) 35 | TASK_COMPLETE <- "COMPLETE" 36 | TASK_ERROR <- "ERROR" 37 | 38 | ## Alternatively: 39 | ## worker node died 40 | TASK_ORPHAN <- "ORPHAN" 41 | ## orphaned task was requeued 42 | TASK_REDIRECT <- "REDIRECT" 43 | ## An unknown task 44 | TASK_MISSING <- "MISSING" 45 | 46 | .R6_task <- R6::R6Class( 47 | "task", 48 | 49 | public=list( 50 | con=NULL, 51 | id=NULL, 52 | keys=NULL, 53 | key_complete=NULL, 54 | 55 | initialize=function(obj, id, key_complete=NULL) { 56 | self$con <- obj$con 57 | self$keys <- obj$keys 58 | 59 | self$id <- as.character(id) 60 | if (is.null(key_complete)) { 61 | key_complete <- self$con$HGET(self$keys$tasks_complete, id) 62 | } 63 | self$key_complete <- key_complete 64 | }, 65 | 66 | ## TODO: new methods: 67 | ## - drop / cancel / kill [hmm] 68 | 69 | ## TODO: These could be active bindings, but that might require a 70 | ## new R6 to CRAN to work nicely. 71 | status=function(follow_redirect=FALSE) { 72 | unname(tasks_status(self$con, self$keys, self$id, follow_redirect)) 73 | }, 74 | 75 | result=function(follow_redirect=FALSE) { 76 | task_result(self$con, self$keys, self$id, follow_redirect) 77 | }, 78 | 79 | expr=function(locals=FALSE) { 80 | task_expr(self$con, self$keys, self$id, 81 | if (locals) object_cache(self$keys$objects, self$con)) 82 | }, 83 | 84 | envir=function() { 85 | unname(tasks_envir(self$con, self$keys, self$id)) 86 | }, 87 | 88 | times=function(unit_elapsed="secs") { 89 | tasks_times(self$con, self$keys, self$id, unit_elapsed) 90 | }, 91 | 92 | wait=function(timeout, every=0.05) { 93 | task_wait(self$con, self$keys, self$id, timeout, every) 94 | } 95 | )) 96 | 97 | ## TODO: This is going to hit status too many times. Don't worry 98 | ## about this for now, but if speed becomes important this is a 99 | ## reasonable place to look. 100 | ## 101 | ## TODO: Scripting this is a little tricky because of the redirect 102 | ## loop; do do that in Lua requires implementing most of the work in 103 | ## tasks_status in Lua; that's going to be more work than is worth it 104 | ## right now, IMO. 105 | task_result <- function(con, keys, task_id, 106 | follow_redirect=FALSE, sanitise=FALSE) { 107 | status <- tasks_status(con, keys, task_id, follow_redirect=FALSE) 108 | 109 | if (follow_redirect && status == TASK_REDIRECT) { 110 | task_id <- task_redirect_target(con, keys, task_id) 111 | status <- tasks_status(con, keys, task_id, follow_redirect=FALSE) 112 | } 113 | 114 | if (status == TASK_COMPLETE || status == TASK_ERROR) { 115 | string_to_object(con$HGET(keys$tasks_result, task_id)) 116 | } else if (sanitise) { 117 | UnfetchableTask(task_id, status) 118 | } else { 119 | stop(sprintf("task %s is unfetchable: %s", task_id, status)) 120 | } 121 | } 122 | 123 | task_redirect_target <- function(con, keys, task_id) { 124 | to <- con$HGET(keys$tasks_redirect, task_id) 125 | if (is.null(to)) { 126 | task_id 127 | } else { 128 | task_redirect_target(con, keys, to) 129 | } 130 | } 131 | 132 | task_expr <- function(con, keys, task_id, object_cache=NULL) { 133 | task_expr <- con$HGET(keys$tasks_expr, task_id) 134 | if (!is.null(object_cache)) { 135 | e <- new.env(parent=baseenv()) 136 | expr <- restore_expression(task_expr, e, object_cache) 137 | attr(expr, "envir") <- e 138 | expr 139 | } else { 140 | restore_expression(task_expr, NULL, NULL) 141 | } 142 | } 143 | 144 | tasks_list <- function(con, keys) { 145 | as.character(con$HKEYS(keys$tasks_status)) 146 | } 147 | 148 | tasks_status <- function(con, keys, task_ids=NULL, follow_redirect=FALSE) { 149 | ret <- from_redis_hash(con, keys$tasks_status, task_ids, 150 | as.character, TASK_MISSING) 151 | if (follow_redirect) { 152 | task_ids <- names(ret) 153 | i <- ret == TASK_REDIRECT 154 | if (any(i)) { 155 | task2_id <- vcapply(task_ids[i], 156 | function(t) task_redirect_target(con, keys, t)) 157 | ret[i] <- unname(tasks_status(con, keys, task2_id, FALSE)) 158 | } 159 | } 160 | ret 161 | } 162 | 163 | tasks_overview <- function(con, keys, task_ids=NULL) { 164 | lvls <- c(TASK_PENDING, TASK_RUNNING, TASK_COMPLETE, TASK_ERROR) 165 | status <- tasks_status(con, keys, task_ids) 166 | lvls <- c(lvls, setdiff(unique(status), lvls)) 167 | table(factor(status, lvls)) 168 | } 169 | 170 | tasks_envir <- function(con, keys, task_ids=NULL) { 171 | from_redis_hash(con, keys$tasks_envir, task_ids) 172 | } 173 | 174 | tasks_times <- function(con, keys, task_ids=NULL, unit_elapsed="secs") { 175 | ## TODO: This could make a lot of requests; might be worth 176 | ## thinking about a little... 177 | f <- function(key) { 178 | from_redis_hash(con, key, task_ids, redis_time_to_r) 179 | } 180 | if (is.null(task_ids)) { 181 | task_ids <- tasks_list(con, keys) 182 | } 183 | ret <- data.frame(task_id = task_ids, 184 | submitted = f(keys$tasks_time_sub), 185 | started = f(keys$tasks_time_beg), 186 | finished = f(keys$tasks_time_end), 187 | stringsAsFactors=FALSE) 188 | now <- redis_time_to_r(redis_time(con)) 189 | started2 <- ret$started 190 | finished2 <- ret$finished 191 | finished2[is.na(finished2)] <- started2[is.na(started2)] <- now 192 | ret$waiting <- as.numeric(started2 - ret$submitted, unit_elapsed) 193 | ret$running <- as.numeric(finished2 - ret$started, unit_elapsed) 194 | ret$idle <- as.numeric(now - ret$finished, unit_elapsed) 195 | ret 196 | } 197 | 198 | ## Lookup functions. 199 | ## First, find the names of extant groups: 200 | tasks_groups_list <- function(con, keys) { 201 | unique(as.character(con$HVALS(keys$tasks_group))) 202 | } 203 | 204 | ## Then, the tasks associated with a given group: 205 | tasks_in_groups <- function(con, keys, groups) { 206 | ## TODO: This should be done with HSCAN do do it "properly", but 207 | ## that should probably move into the RedisAPI before I try. 208 | groups_hash <- from_redis_hash(con, keys$tasks_group) 209 | names(groups_hash)[groups_hash %in% groups] 210 | } 211 | 212 | ## TODO: not tested anywhere yet. 213 | tasks_lookup_group <- function(con, keys, task_ids=NULL) { 214 | if (is.null(task_ids)) { 215 | task_ids <- tasks_list(con, keys) 216 | } 217 | ## TODO: this is not how we usually do this... 218 | ## TODO: there are alot of possible edge cases here the should be 219 | ## tested, especially missing values 220 | groups <- from_redis_hash(con, keys$tasks_group) 221 | setNames(groups[task_ids], task_ids) 222 | } 223 | 224 | tasks_set_group <- function(con, keys, task_ids, group, 225 | exists_action="stop") { 226 | ## Alternatively we could recycle? 227 | if (!is.null(group)) { 228 | assert_scalar_character(group) 229 | } 230 | exists_action <- match_value(exists_action, 231 | c("stop", "warn", "pass", "overwrite")) 232 | ## should check that the ids are valid I think. 233 | ## This is pretty nasty: 234 | if (!is.null(group) && exists_action != "overwrite") { 235 | cur <- from_redis_hash(con, keys$tasks_group, task_ids) 236 | ok <- is.na(cur) | cur == group 237 | if (!all(ok)) { 238 | if (exists_action != "pass") { 239 | msg <- paste0("Groups already exist for tasks: ", 240 | paste(task_ids[!ok], collapse=", ")) 241 | if (exists_action == "stop") { 242 | stop(msg) 243 | } else { 244 | warning(msg, immediate.=TRUE) 245 | } 246 | } 247 | task_ids <- task_ids[ok] 248 | } 249 | } 250 | if (length(task_ids) > 0L) { 251 | if (is.null(group)) { 252 | con$HDEL(keys$tasks_group, task_ids) 253 | } else { 254 | con$HMSET(keys$tasks_group, task_ids, group) 255 | } 256 | } 257 | invisible(NULL) 258 | } 259 | 260 | task_wait <- function(con, keys, task_id, timeout, every=0.05) { 261 | t0 <- Sys.time() 262 | timeout <- as.difftime(timeout, units="secs") 263 | repeat { 264 | res <- task_result(con, keys, task_id, sanitise=TRUE) 265 | if (!inherits(res, "UnfetchableTask")) { 266 | return(res) 267 | } else if (Sys.time() - t0 < timeout) { 268 | Sys.sleep(every) 269 | } else { 270 | stop("task not returned in time") 271 | } 272 | } 273 | } 274 | -------------------------------------------------------------------------------- /R/task_bundle.R: -------------------------------------------------------------------------------- 1 | ##' Create a task bundle. Generally these are not created manually, 2 | ##' but this page serves to document what task bundles are and the 3 | ##' methods that they have. 4 | ##' 5 | ##' A task bundle exists to group together tasks that are related. It 6 | ##' is possible for a task to belong to multiple bundles. 7 | ##' 8 | ##' @template task_bundle_methods 9 | ##' 10 | ##' @title Create a task bundle 11 | ##' @param obj An observer or queue object 12 | ##' @param tasks A list of tasks 13 | ##' @param groups Optional vector of groups. If given, then additional 14 | ##' tasks can be added to the bundle if they share the same group names. 15 | ##' @param names Optional vector of names to label output with. 16 | ##' @export 17 | task_bundle <- function(obj, tasks, groups=NULL, names=NULL) { 18 | ## TODO: What is groups used for here? Seems no longer needed? 19 | .R6_task_bundle$new(obj, tasks, groups, names) 20 | } 21 | 22 | ## TODO: Next, make an automatically updating version. 23 | .R6_task_bundle <- R6::R6Class( 24 | "task_bundle", 25 | 26 | public= 27 | list( 28 | obj=NULL, 29 | tasks=NULL, 30 | key_complete=NULL, 31 | groups=NULL, 32 | names=NULL, 33 | con=NULL, 34 | keys=NULL, 35 | 36 | initialize=function(obj, tasks, groups, names) { 37 | self$con <- obj$con 38 | self$keys <- obj$keys 39 | self$obj <- obj 40 | self$tasks <- setNames(tasks, vcapply(tasks, "[[", "id")) 41 | 42 | self$key_complete <- unique(vcapply(tasks, "[[", "key_complete")) 43 | self$groups <- groups 44 | 45 | if (!is.null(names) && length(names) != length(tasks)) { 46 | stop("Incorrect length names") 47 | } 48 | self$names <- names 49 | }, 50 | 51 | ids=function() { 52 | names(self$tasks) 53 | }, 54 | 55 | update_groups=function() { 56 | task_ids <- setdiff(tasks_in_groups(self$con, self$keys, self$groups), 57 | self$ids()) 58 | if (length(task_ids)) { 59 | tasks <- setNames(lapply(task_ids, self$obj$task_get), task_ids) 60 | self$tasks <- c(self$tasks, tasks) 61 | self$key_complete <- union(self$key_complete, 62 | unique(vcapply(tasks, "[[", "key_complete"))) 63 | ## Can't deal with this for now :( 64 | self$names <- NULL 65 | } 66 | invisible(task_ids) 67 | }, 68 | 69 | overview=function() { 70 | tasks_overview(self$con, self$keys, self$ids()) 71 | }, 72 | status=function(follow_redirect=FALSE) { 73 | self$obj$tasks_status(self$ids(), follow_redirect=follow_redirect) 74 | }, 75 | results=function(follow_redirect=FALSE) { 76 | self$wait(0, 0, FALSE, follow_redirect) 77 | }, 78 | wait=function(timeout=60, time_poll=1, progress_bar=TRUE, follow_redirect=FALSE) { 79 | task_bundle_wait(self, timeout, time_poll, progress_bar, follow_redirect) 80 | }, 81 | wait1=function(timeout=60, time_poll=1, follow_redirect=FALSE) { 82 | task_bundle_wait1(self, timeout, time_poll, follow_redirect) 83 | }, 84 | times=function(unit_elapsed="secs") { 85 | tasks_times(self$con, self$keys, self$ids(), unit_elapsed) 86 | }, 87 | 88 | 89 | delete_tasks=function() { 90 | invisible(self$obj$tasks_drop(self$ids())) 91 | })) 92 | 93 | 94 | ## There are a bunch of ways of getting appropriate things here: 95 | task_bundle_get <- function(obj, groups=NULL, task_ids=NULL) { 96 | if (!xor(is.null(task_ids), is.null(groups))) { 97 | stop("Exactly one of task_ids or groups must be given") 98 | } 99 | if (is.null(groups)) { 100 | groups <- obj$tasks_lookup_group(task_ids) 101 | } else { 102 | task_ids <- obj$tasks_in_groups(groups) 103 | } 104 | 105 | tasks <- lapply(task_ids, obj$task_get) 106 | names(tasks) <- task_ids 107 | task_bundle(obj, tasks, groups) 108 | } 109 | 110 | 111 | task_bundle_wait <- function(obj, timeout, time_poll, progress_bar, follow_redirect) { 112 | assert_integer_like(time_poll) 113 | task_ids <- obj$ids() 114 | status <- obj$status() 115 | done <- !(status == TASK_PENDING | status == TASK_RUNNING | 116 | status == TASK_ORPHAN) 117 | 118 | ## Immediately collect all results: 119 | results <- named_list(task_ids) 120 | if (any(done)) { 121 | results[done] <- lapply(obj$tasks[done], 122 | function(t) t$result(follow_redirect)) 123 | } 124 | 125 | cleanup <- function(results, names) { 126 | if (!is.null(names)) { 127 | names(results) <- names 128 | } 129 | results 130 | } 131 | if (all(done)) { 132 | return(cleanup(results, obj$names)) 133 | } else if (timeout == 0) { 134 | stop("Tasks not yet completed; can't be immediately returned") 135 | } 136 | 137 | p <- progress(total=length(obj$tasks), show=progress_bar) 138 | t0 <- Sys.time() 139 | timeout <- as.difftime(timeout, units="secs") 140 | 141 | p(sum(done)) 142 | i <- 1L 143 | while (!all(done)) { 144 | if (Sys.time() - t0 > timeout) { 145 | stop(sprintf("Exceeded maximum time (%d / %d tasks pending)", 146 | sum(!done), length(done))) 147 | } 148 | res <- task_bundle_fetch1(obj, time_poll, follow_redirect) 149 | if (is.null(res)) { 150 | p(0) 151 | } else { 152 | p(1) 153 | task_id <- res[[1]] 154 | result <- res[[2]] 155 | done[[task_id]] <- TRUE 156 | ## NOTE: This conditional is needed to avoid deleting the 157 | ## element in results if we get a NULL result. 158 | if (!is.null(result)) { 159 | results[[task_id]] <- result 160 | } 161 | } 162 | 163 | } 164 | cleanup(results, obj$names) 165 | } 166 | 167 | task_bundle_wait1 <- function(obj, timeout, time_poll, follow_redirect) { 168 | status <- obj$status(follow_redirect) 169 | done <- !(status == TASK_PENDING | status == TASK_RUNNING | 170 | status == TASK_ORPHAN) 171 | if (all(done)) { 172 | done <- vnapply(obj$key_complete, obj$obj$con$LLEN) == 0 173 | } 174 | if (all(done)) { 175 | return(NULL) 176 | } 177 | times_up <- time_checker(timeout) 178 | repeat { 179 | if (times_up()) { 180 | return(NULL) 181 | } 182 | res <- task_bundle_fetch1(obj, time_poll, follow_redirect) 183 | if (!is.null(res)) { 184 | return(res) 185 | } 186 | } 187 | } 188 | 189 | task_bundle_fetch1 <- function(bundle, timeout, follow_redirect) { 190 | if (as.integer(timeout) > 0) { 191 | res <- bundle$con$BLPOP(bundle$key_complete, timeout) 192 | } else { 193 | res <- lpop_mult(bundle$con, bundle$key_complete) 194 | } 195 | if (!is.null(res)) { 196 | id <- res[[2]] 197 | list(id=id, 198 | result=bundle$obj$task_result(id, follow_redirect=follow_redirect)) 199 | 200 | } else { 201 | NULL 202 | } 203 | } 204 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | ##' @importFrom digest digest 2 | hash_string <- function(x) { 3 | digest::digest(x, serialize=FALSE) 4 | } 5 | 6 | hash_file <- function(x) { 7 | digest::digest(file=x) 8 | } 9 | 10 | hash_files <- function(x) { 11 | setNames(vcapply(x, hash_file), x) 12 | } 13 | 14 | compare_hash <- function(x) { 15 | if (length(x) == 0L) { 16 | TRUE 17 | } else { 18 | files <- names(x) 19 | all(file.exists(files)) && identical(hash_files(files), x) 20 | } 21 | } 22 | 23 | is_error <- function(x) { 24 | inherits(x, "try-error") 25 | } 26 | 27 | is_directory <- function(path) { 28 | file.info(path)$isdir 29 | } 30 | 31 | is_terminal <- function() { 32 | if (!isatty(stdout())) { 33 | return(FALSE) 34 | } 35 | if (.Platform$OS.type == "windows") { 36 | return(FALSE) 37 | } 38 | if (Sys.getenv("TERM") == "dumb") { 39 | return(FALSE) 40 | } 41 | !is_error(try(system("tput colors", intern=TRUE), silent=TRUE)) 42 | } 43 | 44 | vcapply <- function(X, FUN, ...) { 45 | vapply(X, FUN, character(1), ...) 46 | } 47 | vnapply <- function(X, FUN, ...) { 48 | vapply(X, FUN, numeric(1), ...) 49 | } 50 | viapply <- function(X, FUN, ...) { 51 | vapply(X, FUN, integer(1), ...) 52 | } 53 | vlapply <- function(X, FUN, ...) { 54 | vapply(X, FUN, logical(1), ...) 55 | } 56 | 57 | docopt_parse <- function(...) { 58 | oo <- options(warnPartialMatchArgs=FALSE) 59 | if (isTRUE(oo$warnPartialMatchArgs)) { 60 | on.exit(options(oo)) 61 | } 62 | docopt::docopt(...) 63 | } 64 | 65 | lstrip <- function(x) { 66 | sub("^\\s+", "", x, perl=TRUE) 67 | } 68 | rstrip <- function(x) { 69 | sub("\\s+$", "", x, perl=TRUE) 70 | } 71 | 72 | ## Source a file (using sys.source) and record all files that file 73 | ## sources (only via source and sys.source, ignoring file connections, 74 | ## assuming files don't change, etc, etc). 75 | sys_source <- function(...) { 76 | collector <- function(...) { 77 | e <- parent.frame(2) 78 | if (exists("file", e, inherits=FALSE)) { 79 | file <- get("file", e, inherits=FALSE) 80 | if (is.character(file)) { 81 | ## TODO: need to deal with the case where source(..., 82 | ## chdir=TRUE) was used and the path has changed; in that case 83 | ## we're going to need to work out where the file is relative 84 | ## to the current directory, which requires pathr to work. 85 | ## 86 | ## If we *do* do this, then the create_environment function 87 | ## needs to take care of that bookkeeping. 88 | ## 89 | ## NOTE: using hash_files(), not hash_file(), as the latter 90 | ## adds names. 91 | dat <<- c(dat, hash_files(file)) 92 | } else { 93 | warning("non-file source detected") 94 | } 95 | } else { 96 | warning("source detection failed") 97 | } 98 | dat 99 | } 100 | dat <- character(0) 101 | 102 | suppressMessages({ 103 | trace(base::source, function(...) collector(), print=FALSE) 104 | trace(base::sys.source, function(...) collector(), print=FALSE) 105 | }) 106 | on.exit({ 107 | suppressMessages({ 108 | untrace(base::source) 109 | untrace(base::sys.source) 110 | }) 111 | }) 112 | sys.source(...) 113 | dat 114 | } 115 | 116 | random_colour <- function(n=1) { 117 | rgb(runif(n), runif(n), runif(n)) 118 | } 119 | 120 | strrep <- function (str, n) { 121 | paste(rep_len(str, n), collapse = "") 122 | } 123 | 124 | Sys_kill <- function(pid, signal=NULL) { 125 | system2("kill", c(pid, signal)) 126 | } 127 | 128 | find_script <- function(name) { 129 | cmd <- Sys.which(name) 130 | if (cmd == "") { 131 | tmp <- tempfile() 132 | install_scripts(tmp) 133 | cmd <- file.path(tmp, name) 134 | } 135 | cmd 136 | } 137 | 138 | hostname <- function() { 139 | Sys.info()[["nodename"]] 140 | } 141 | process_id <- function() { 142 | Sys.getpid() 143 | } 144 | 145 | ## Potentially useful for a monitor thing: 146 | ## x <- sample(letters[1:4], 40, replace=TRUE) 147 | ## cols <- c(a="red", b="blue", c="green", d="purple") 148 | ## pretty_blocks(x, cols) 149 | pretty_blocks <- function(x, cols) { 150 | sq <- vcapply(cols, function(x) crayon::make_style(x)("\u2588")) 151 | paste(sq[x], collapse="") 152 | } 153 | 154 | ## Alternatives: 155 | ## http://stackoverflow.com/a/2685827 156 | spin_symbols <- function() { 157 | sym <- c("-", "\\", "|", "/") 158 | i <- 0L 159 | n <- length(sym) 160 | function() { 161 | sym[[i <<- if (i >= n) 1L else i + 1L]] 162 | } 163 | } 164 | 165 | ##' @importFrom progress progress_bar 166 | progress <- function(total, ..., show=TRUE, prefix="") { 167 | if (show) { 168 | fmt <- paste0(prefix, "[:bar] :percent :spin") 169 | pb <- progress::progress_bar$new(fmt, total=total) 170 | ws <- spin_symbols() 171 | function(len=1) { 172 | invisible(pb$tick(len, list(spin=ws()))) 173 | } 174 | } else { 175 | function(...) {} 176 | } 177 | } 178 | 179 | ## Short-circuit apply; returns the index of the first element of x 180 | ## for which cond(x[[i]]) holds true. 181 | scapply <- function(x, cond, no_match=NA_integer_) { 182 | for (i in seq_along(x)) { 183 | if (isTRUE(cond(x[[i]]))) { 184 | return(i) 185 | } 186 | } 187 | no_match 188 | } 189 | 190 | invert_names <- function(x) { 191 | setNames(names(x), x) 192 | } 193 | 194 | blank <- function(n) { 195 | paste(rep_len(" ", n), collapse="") 196 | } 197 | 198 | ## Possibly could be done faster. 199 | df_to_list <- function(x) { 200 | keep <- c("names", "class", "row.names") 201 | at <- attributes(x) 202 | attributes(x) <- at[intersect(names(at), keep)] 203 | unname(lapply(split(x, seq_len(nrow(x))), as.list)) 204 | } 205 | 206 | match_value <- function(arg, choices, name=deparse(substitute(arg))) { 207 | assert_scalar_character(arg) 208 | if (!(arg %in% choices)) { 209 | stop(sprintf("%s must be one of %s", 210 | name, paste(dQuote(choices), collapse=", "))) 211 | } 212 | arg 213 | } 214 | 215 | create_group <- function(group, verbose) { 216 | if (is.null(group)) { 217 | group <- ids::adjective_animal() 218 | if (verbose) { 219 | message(sprintf("Creating group: '%s'", group)) 220 | } 221 | } 222 | group 223 | } 224 | 225 | read_file_to_string <- function(filename) { 226 | readChar(filename, file.info(filename)$size) 227 | } 228 | write_string_to_file <- function(str, filename) { 229 | writeChar(str, filename, eos=NULL) 230 | } 231 | 232 | ##' Install rrqueue scripts. This currently includes a script for 233 | ##' starting a worker process. The script is extremely simple and 234 | ##' does not need upgrading when rrqueue is upgraded. 235 | ##' @title Install rrqueue scripts 236 | ##' @param dest Directory to install scripts to, preferably in the \code{$PATH} 237 | ##' @param overwrite Overwrite any existing file? 238 | ##' @export 239 | install_scripts <- function(dest, overwrite=TRUE) { 240 | src <- system.file("scripts", package=.packageName) 241 | scripts <- dir(src) 242 | dir.create(dest, FALSE, TRUE) 243 | ok <- file.copy(file.path(src, scripts), 244 | file.path(dest, scripts), overwrite=overwrite) 245 | invisible(ok) 246 | } 247 | 248 | yaml_load <- function(string) { 249 | handlers <- list(`bool#yes` = function(x) { 250 | if (identical(toupper(x), "TRUE")) TRUE else x 251 | }, `bool#no` = function(x) { 252 | if (identical(toupper(x), "FALSE")) FALSE else x 253 | }) 254 | yaml::yaml.load(string, handlers = handlers) 255 | } 256 | yaml_read <- function(filename) { 257 | yaml_load(paste(readLines(filename), collapse="\n")) 258 | } 259 | 260 | docopt_parse <- function(doc, args, clean=TRUE) { 261 | oo <- options(warnPartialMatchArgs=FALSE) 262 | if (isTRUE(oo$warnPartialMatchArgs)) { 263 | on.exit(options(oo)) 264 | } 265 | opts <- docopt::docopt(doc, args) 266 | if (clean) { 267 | opts <- docopt_clean(opts) 268 | } 269 | opts 270 | } 271 | 272 | docopt_clean <- function(opts) { 273 | opts <- opts[!(grepl("^-", names(opts)) | grepl("^<.*>$", names(opts)))] 274 | names(opts) <- gsub("-", "_", names(opts)) 275 | opts 276 | } 277 | 278 | with_wd <- function(path, expr) { 279 | if (path != ".") { 280 | if (!file.exists(path)) { 281 | stop(sprintf("Path '%s' does not exist", path)) 282 | } 283 | if (!is_directory(path)) { 284 | stop(sprintf("Path '%s' exists, but is not a directory", path)) 285 | } 286 | owd <- setwd(path) 287 | on.exit(setwd(owd)) 288 | } 289 | force(expr) 290 | } 291 | 292 | dir_create <- function(paths) { 293 | invisible(vlapply(unique(paths), dir.create, FALSE, TRUE)) 294 | } 295 | 296 | named_list <- function(names) { 297 | setNames(vector("list", length(names)), names) 298 | } 299 | named_logical <- function(names) { 300 | setNames(logical(length(names)), names) 301 | } 302 | 303 | time_checker <- function(timeout) { 304 | t0 <- Sys.time() 305 | timeout <- as.difftime(timeout, units="secs") 306 | function() { 307 | Sys.time() - t0 > timeout 308 | } 309 | } 310 | -------------------------------------------------------------------------------- /R/utils_assert.R: -------------------------------------------------------------------------------- 1 | ## Type 2 | assert_inherits <- function(x, what, name=deparse(substitute(x))) { 3 | if (!inherits(x, what)) { 4 | stop(sprintf("%s must be a %s", name, 5 | paste(what, collapse=" / ")), call.=FALSE) 6 | } 7 | } 8 | assert_character <- function(x, name=deparse(substitute(x))) { 9 | if (!is.character(x)) { 10 | stop(sprintf("%s must be character", name), call.=FALSE) 11 | } 12 | } 13 | 14 | assert_character_or_null <- function(x, name=deparse(substitute(x))) { 15 | if (!is.null(x)) { 16 | assert_character(x, name) 17 | } 18 | } 19 | 20 | ## Length 21 | assert_scalar <- function(x, name=deparse(substitute(x))) { 22 | if (length(x) != 1) { 23 | stop(sprintf("%s must be a scalar", name), call.=FALSE) 24 | } 25 | } 26 | 27 | ## Compound: 28 | assert_scalar_character <- function(x, name=deparse(substitute(x))) { 29 | assert_scalar(x, name) 30 | assert_character(x, name) 31 | } 32 | 33 | assert_integer_like <- function(x, name=deparse(substitute(x))) { 34 | if (!isTRUE(all.equal(as.integer(x), x))) { 35 | stop(sprintf("%s is not integer like", name)) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /R/worker_spawn.R: -------------------------------------------------------------------------------- 1 | ## There are *four* ways of launching workers; that's why this is so 2 | ## complicated. 3 | ## 4 | ## 1. manually using worker(...) 5 | ## 2. from R using worker_spawn(...) 6 | ## 3. from the command line using rrqueue_worker 7 | ## 4. from the command line using rrqueue_worker_tee 8 | 9 | ## TODO: autogenerate this in the same way that we do for the args 10 | rrqueue_worker_main <- function(args=commandArgs(TRUE)) { 11 | opts <- rrqueue_worker_args(args) 12 | con <- redux::hiredis(host=opts$redis_host, 13 | port=as.integer(opts$redis_port)) 14 | worker(opts$queue_name, con, 15 | heartbeat_period=opts$heartbeat_period, 16 | heartbeat_expire=opts$heartbeat_expire, 17 | key_worker_alive=opts$key_worker_alive) 18 | } 19 | 20 | rrqueue_worker_args <- function(args=commandArgs(TRUE)) { 21 | 'Usage: 22 | rrqueue_worker [options] 23 | rrqueue_worker --config=FILENAME [options] [] 24 | rrqueue_worker -h | --help 25 | 26 | Options: 27 | --redis-host HOSTNAME Hostname for Redis 28 | --redis-port PORT Port for Redis 29 | --heartbeat-period T Heartbeat period 30 | --heartbeat-expire T Heartbeat expiry time 31 | --key-worker-alive KEY Key to write to when the worker becomes alive 32 | --config FILENAME Optional YAML configuration filename 33 | 34 | Arguments: 35 | Name of queue 36 | ' -> doc 37 | 38 | given <- docopt_parse(doc, args) 39 | given <- given[!vlapply(given, is.null)] 40 | 41 | defaults <- as.list(formals(worker)) 42 | nms <- names(defaults) 43 | defaults$queue_name <- NULL 44 | 45 | ## This will be an issue for docopt interface to the main queue too. 46 | if (is.null(given$config)) { 47 | ret <- modifyList(defaults, given)[nms] 48 | } else { 49 | cfg <- tmp_fix_redis_config(load_config(given$config)) 50 | ret <- modifyList(modifyList(defaults, cfg), given)[nms] 51 | ## Check that we did get a queue_name 52 | if (is.null(ret$queue_name)) { 53 | stop("queue name must be given") 54 | } 55 | } 56 | ret 57 | } 58 | 59 | ##' Spawn a worker in the background 60 | ##' 61 | ##' Spawning multiple workers. If \code{n} is greater than one, 62 | ##' multiple workers will be spawned. This happens in parallel so it 63 | ##' does not take n times longer than spawing a single worker. 64 | ##' 65 | ##' Beware that signals like Ctrl-C passed to \emph{this} R instance 66 | ##' can still propagate to the child processes and can result in them 67 | ##' dying unexpectedly. It is probably safer to start processes in a 68 | ##' standalone session. 69 | ##' 70 | ##' @title Spawn a worker 71 | ##' @param queue_name Name of the queue to connect to 72 | ##' @param logfile Name of a log file to write to (consider 73 | ##' \code{tempfile()}). If \code{n} > 1, then \code{n} log files 74 | ##' must be provided. 75 | ##' @param redis_host Host name/IP for the Redis server 76 | ##' @param redis_port Port for the Redis server 77 | ##' @param n Number of workers to spawn 78 | ##' @param timeout Time to wait for the worker to appear 79 | ##' @param time_poll Period to poll for the worker (must be in 80 | ##' seconds) 81 | ##' @param heartbeat_period Period between heartbeat pulses 82 | ##' @param heartbeat_expire Time that heartbeat pulses will persist 83 | ##' @param path Path to start the worker in. By default workers will 84 | ##' start in the current working directory, but you can start them 85 | ##' elsewhere by providing a path here. If the path does not exist, 86 | ##' an error will be thrown. If \code{n} is greater than 1, all 87 | ##' workers will start in the same working directory. The 88 | ##' \code{logfile} argument will be interpreted relative to current 89 | ##' working directory (not the worker working directory); use 90 | ##' \code{\link{normalizePath}} to convert into an absolute path 91 | ##' name to prevent this. 92 | ##' @export 93 | worker_spawn <- function(queue_name, logfile, 94 | redis_host="127.0.0.1", 95 | redis_port=6379, 96 | n=1, 97 | timeout=20, time_poll=1, 98 | heartbeat_period=NULL, 99 | heartbeat_expire=NULL, 100 | path=".") { 101 | rrqueue_worker <- find_script("rrqueue_worker") 102 | env <- paste0("RLIBS=", paste(.libPaths(), collapse=":"), 103 | 'R_TESTS=""') 104 | 105 | if (length(logfile) != n) { 106 | stop("logfile must be a vector of length n") 107 | } 108 | assert_integer_like(time_poll) 109 | 110 | con <- redux::hiredis(host=redis_host, port=redis_port) 111 | key_worker_alive <- rrqueue_key_worker_alive(queue_name) 112 | 113 | opts <- character(0) 114 | if (!is.null(heartbeat_expire)) { 115 | opts <- c(opts, "--heartbeat-expire", heartbeat_expire) 116 | } 117 | if (!is.null(heartbeat_period)) { 118 | opts <- c(opts, "--heartbeat-period", heartbeat_period) 119 | } 120 | if (!is.null(redis_host)) { 121 | opts <- c(opts, "--redis-host", redis_host) 122 | } 123 | if (!is.null(redis_port)) { 124 | opts <- c(opts, "--redis-port", redis_port) 125 | } 126 | opts <- c(opts, "--key-worker-alive", key_worker_alive, queue_name) 127 | 128 | dir_create(dirname(logfile)) 129 | logfile <- file.path(normalizePath(dirname(logfile)), basename(logfile)) 130 | 131 | code <- integer(n) 132 | with_wd(path, { 133 | for (i in seq_len(n)) { 134 | code[[i]] <- system2(rrqueue_worker, opts, 135 | env=env, wait=FALSE, 136 | stdout=logfile[[i]], stderr=logfile[[i]]) 137 | } 138 | }) 139 | if (any(code != 0L)) { 140 | warning("Error launching script: worker *probably* does not exist") 141 | } 142 | 143 | ret <- rep.int(NA_character_, n) 144 | t0 <- Sys.time() 145 | timeout <- as.difftime(timeout, units="secs") 146 | 147 | i <- 1L 148 | repeat { 149 | x <- con$BLPOP(key_worker_alive, time_poll) 150 | if (is.null(x)) { 151 | message(".", appendLF=FALSE) 152 | flush.console() 153 | } else { 154 | ret[[i]] <- x[[2]] 155 | if (n > 1L) { 156 | message(sprintf("new worker: %s (%d / %d)", x[[2]], i, n)) 157 | } else { 158 | message(sprintf("new worker: %s", x[[2]])) 159 | } 160 | i <- i + 1 161 | } 162 | if (!any(is.na(ret))) { 163 | break 164 | } 165 | if (Sys.time() - t0 > timeout) { 166 | ## TODO: Better recover here. Ideally we'd stop any workers 167 | ## that *are* running, and provide data from the log files. 168 | stop(sprintf("%d / %d workers not identified in time", 169 | sum(is.na(ret)), n)) 170 | } 171 | } 172 | 173 | ret 174 | } 175 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rrqueue 2 | 3 | `rrqueue` is a *distributed task queue* for R, implemented on top of [Redis](http://redis.io). At the cost of a little more work it allows for more flexible parallelisation than afforded by `mclapply`. The main goal is to support non-map style operations: submit some tasks, collect the completed results, queue more even while some tasks are still running. 4 | 5 | Other features include: 6 | 7 | * Low-level task submission / retrieval has a simple API so that asynchronous task queues can be created. 8 | * Objects representing tasks, workers, queues, etc can be queried. 9 | * While blocking `mclapply`-like functions are available, the package is designed to be non-blocking so that intermediate results can be used. 10 | * Automatic fingerprinting of environments so that code run on a remote machine will correspond to the code found locally. 11 | * Works well connecting to a Redis database running on the cloud (e.g., on an AWS machine over an ssh tunnel). 12 | * Local workers can be added to a remote pool, so long as everything can talk to the same Redis server. 13 | ? 14 | * The worker pool can be scaled at any time (up or down). 15 | * Basic fault tolerance, supporting requeuing tasks lost on crashed workers. 16 | 17 | # Simple usage 18 | 19 | The basic workflow is: 20 | 21 | 1. Create a queue 22 | 2. Submit tasks to the queue 23 | 3. Start workers 24 | 4. Collect results 25 | 26 | The workers can be started at any time between 1-3, though they do need to be started before results can be collected. 27 | 28 | ## Create a queue 29 | 30 | Start a queue that we will submit tasks to 31 | ``` 32 | con <- rrqueue::queue("jobs") 33 | ``` 34 | 35 | Expressions can be queued using the `enqueue` method: 36 | 37 | ``` 38 | task <- con$enqueue(sin(1)) 39 | ``` 40 | 41 | Task objects can be inspected to find out (for example) how long they have been waiting for: 42 | 43 | ``` 44 | task$times() 45 | ``` 46 | 47 | or what their status is: 48 | 49 | ``` 50 | task$status() 51 | ``` 52 | 53 | To get workers to process jobs from this queue, interactively run (in a separate R instance) 54 | 55 | ``` 56 | w <- rrqueue::worker("jobs") 57 | ``` 58 | 59 | or spawn a worker in the background with 60 | 61 | ``` 62 | logfile <- tempfile() 63 | rrqueue::worker_spawn("jobs", logfile) 64 | ``` 65 | 66 | The task will complete: 67 | 68 | ``` 69 | task$status() 70 | ``` 71 | 72 | and the value can be retrieved: 73 | 74 | ``` 75 | task$result() 76 | ``` 77 | 78 | ``` 79 | con$send_message("STOP") 80 | ``` 81 | 82 | In contrast with many parallel approaches in R, workers can be added at at any time and will automatically start working on any remaining jobs. 83 | 84 | There's lots more in various stages of completion, including `mclapply`-like functions (`rrqlapply`), and lots of information gathering. 85 | 86 | # Installation 87 | 88 | Redis must be installed, `redis-server` must be running. If you are familiar with docker, the [redis](https://registry.hub.docker.com/_/redis/) docker image might be a good idea here. Alternatively, [download redis](http://redis.io/download), unpack and then install by running `make install` in a terminal window within the downloaded folder. 89 | 90 | Once installed start `redis-server` by typing in a terminal window 91 | 92 | ``` 93 | redis-server 94 | ``` 95 | (On Linux the server will probably be running for you if you. On Mac OSX, you might like to set it up to run as a daemon -- i.e. [background process](https://en.wikipedia.org/wiki/Daemon_(computing)) -- i f you end up using redis at lot, following [these instructions](http://naleid.com/blog/2011/03/05/running-redis-as-a-user-daemon-on-osx-with-launchd)) 96 | 97 | Try `redis-cli PING` to see if it is running; it should return `PONG` and not give an error. 98 | 99 | We'll also need to install some R packages (before), which will require installing the [hiredis](https://github.com/redis/hiredis) library. See the details on the [https://github.com/richfitz/RedisHeartbeat](RedisHeartbeat) page, but briefly: 100 | 101 | * Download the archive from the [https://github.com/redis/hiredis/releases/latest](hiredis release page) 102 | * install with `tar -zxvf hiredis-0.XX.Y.tar.gz && cd hiredis-0.XX.Y && make && sudo make install` 103 | * On OS/X, add to your `~/.profile` 104 | 105 | ``` 106 | export DYLD_LIBRARY_PATH=/usr/local/lib 107 | ``` 108 | 109 | On debian/ubuntu, install the `libhiredis-dev` package in `apt-get`. 110 | 111 | You can then install the required R packages: 112 | 113 | ``` 114 | install.packages(c("RcppRedis", "R6", "digest", "docopt")) 115 | devtools::install_github(c("ropensci/RedisAPI", "richfitz/redux", "richfitz/RedisHeartbeat", "richfitz/ids")) 116 | devtools::install_github("traitecoevo/rrqueue") 117 | ``` 118 | 119 | (*optional*) to see what is going on, in a terminal, run `redis-cli monitor` which will print all the Redis chatter, though it will impact on redis performance. 120 | 121 | # Starting workers 122 | 123 | Workers can be started from within an R process using `rrqueue::worker_spawn` function. This takes an optional argument `n` to start more than one worker at a time, and will block until all workers have appeared. 124 | 125 | From the command line, workers can be started using the `rrqueue_worker` script. The script can be installed by running (from R) 126 | 127 | ``` 128 | rrqueue::install_scripts("~/bin") 129 | ``` 130 | 131 | replacing `"~/bin"` with a path that is in your executable search path and which is writeable. 132 | 133 | ``` 134 | $ rrqueue_worker --help 135 | Usage: 136 | rrqueue_worker [options] 137 | rrqueue_worker --config=FILENAME [options] [] 138 | rrqueue_worker -h | --help 139 | 140 | Options: 141 | --redis-host HOSTNAME Hostname for Redis 142 | --redis-port PORT Port for Redis 143 | --heartbeat-period T Heartbeat period 144 | --heartbeat-expire T Heartbeat expiry time 145 | --key-worker-alive KEY Key to write to when the worker becomes alive 146 | --config FILENAME Optional YAML configuration filename 147 | 148 | Arguments: 149 | Name of queue 150 | ``` 151 | 152 | the arguments correspond to the arguments documented in `?worker_spawn`. The queue name is determined by position. 153 | 154 | The `config` argument is an optional path to a yml configuration file. That configuration file contains values for any of the arguments to `worker_spawn`, for example: 155 | 156 | ```yaml 157 | queue_name: tmpjobs 158 | redis_host: 127.0.0.1 159 | redis_port: 6379 160 | heartbeat_period: 30 161 | heartbeat_expire: 90 162 | ``` 163 | 164 | Arguments passed to `rrqueue_worker` in addition to the configuration will override values in the yaml. 165 | 166 | This file can also be passed to `queue` and `observer` as the `config` argument (e.g., `queue(config="config.yml")` rather than having to pass in lots of parameters. 167 | 168 | # Documentation 169 | 170 | Reference documentation and vignettes are available on [this website](http://traitecoevo.github.io/rrqueue/). If the vignettes are built (`make vignettes`), they will be avilable in the package, and this will be commited to github once things settle down. 171 | 172 | # Performance 173 | 174 | So far, I've done relatively little performance tuning. In particular, the *workers* make no effort to minimise the number of calls to Redis and assumes that this is fast connection. On the other hand, we use `rrqueue` where the controller many hops across the internet (controlling a queue on AWS). To reduce the time involved, `rrqueue` uses [lua scripting](https://en.wikipedia.org/wiki/Lua_(programming_language)) to reduce the number of instruction round trips. 175 | 176 | # False warning errors 177 | 178 | You may see a variant on errors like 179 | 180 | ``` 181 | Calls: -> .handleSimpleError -> h -> signalCondition 182 | Error in signalCondition(e) : 183 | no function to return from, jumping to top level 184 | ``` 185 | 186 | This is an issue somewhere within Rcpp modules (which RcppRedis uses) and seems harmless. 187 | -------------------------------------------------------------------------------- /autodoc.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | ## Dirty hack to compile docs in the absence of proper Roxygen R6 support. 4 | devtools::load_all(".") 5 | 6 | add_usage <- function(dat, generator) { 7 | capture_usage <- function(name) { 8 | tmp <- capture.output(args(generator$public_methods[[name]])) 9 | tmp <- strip_trailing_whitespace(paste(tmp[-length(tmp)], collapse="\n")) 10 | sub("^function\\s*", name, tmp) 11 | } 12 | 13 | valid <- names(generator$public_methods) 14 | extra <- setdiff(names(dat), valid) 15 | if (length(extra) > 0L) { 16 | warning(sprintf("In '%s', extra methods: %s", 17 | generator$classname, 18 | paste(extra, collapse=", ")), 19 | immediate.=TRUE, call.=FALSE) 20 | } 21 | 22 | for (name in names(dat)) { 23 | dat[[name]]$method_name <- name 24 | dat[[name]]$usage <- capture_usage(name) 25 | dat[[name]]$order <- names(formals(generator$public_methods[[name]])) 26 | } 27 | dat 28 | } 29 | 30 | indent <- function(str, n, pad=NULL) { 31 | if (is.null(pad)) { 32 | pad <- paste(rep(" ", n), collapse="") 33 | } 34 | p <- function(s) { 35 | paste(paste0(pad, s), collapse="\n") 36 | } 37 | vapply(strsplit(str, "\n"), p, character(1)) 38 | } 39 | 40 | format_params <- function(xp) { 41 | fmt1 <- "\\describe{\n%s\n}" 42 | fmt2 <- "\\item{\\code{%s}}{\n%s\n}\n" 43 | pars <- sprintf(fmt2, names(xp), indent(unlist(xp), 2)) 44 | sprintf(fmt1, indent(paste(pars, collapse="\n"), 2)) 45 | } 46 | 47 | format_method <- function(x) { 48 | title <- sprintf("\\item{\\code{%s}}{", x$method_name) 49 | end <- "}" 50 | 51 | p_msg <- setdiff(x$order, names(x$params)) 52 | p_extra <- setdiff(names(x$params), x$order) 53 | if (length(p_msg) > 0) { 54 | warning(sprintf("In '%s', missing parameters: %s", 55 | x$method_name, paste(p_msg, collapse=", ")), 56 | immediate.=TRUE, call.=FALSE) 57 | } 58 | if (length(p_extra) > 0) { 59 | warning(sprintf("In '%s', extra parameters: %s", 60 | x$method_name, paste(p_extra, collapse=", ")), 61 | immediate.=TRUE, call.=FALSE) 62 | } 63 | ## preseve order, though I'm pretty sure that the yaml package is 64 | ## actually preserving it. 65 | if (length(p_msg) == 0 && length(p_extra) == 0) { 66 | x$params <- x$params[x$order] 67 | } 68 | 69 | body <- sprintf("%s\n\n\\emph{Usage:}\n\\code{%s}", 70 | x$short, x$usage) 71 | if (!is.null(x$params)) { 72 | body <- paste0(body, "\n\n\\emph{Arguments:}\n", format_params(x$params)) 73 | } 74 | if (!is.null(x$details)) { 75 | body <- paste0(body, "\n\n\\emph{Details:}\n", x$details) 76 | } 77 | if (!is.null(x$value)) { 78 | body <- paste0(body, "\n\n\\emph{Value}:\n", x$value) 79 | } 80 | paste(title, indent(body, 2), end, sep="\n") 81 | } 82 | 83 | strip_trailing_whitespace <- function(x) { 84 | gsub("[ t]+(\n|$)", "\\1", x) 85 | } 86 | 87 | format_class <- function(x) { 88 | ret <- vapply(x, format_method, character(1)) 89 | ret <- sprintf("@section Methods:\n\n\\describe{\n%s\n}", 90 | paste(ret, collapse="\n")) 91 | ret <- indent(ret, pad="##' ") 92 | strip_trailing_whitespace(ret) 93 | } 94 | 95 | process <- function(type) { 96 | e <- environment(rrqueue::queue) 97 | generator <- get(sprintf(".R6_%s", type), e) 98 | dat <- rrqueue:::yaml_read(sprintf("man-roxygen/%s.yml", type)) 99 | ret <- format_class(add_usage(dat, generator)) 100 | writeLines(ret, sprintf("man-roxygen/%s_methods.R", type)) 101 | } 102 | 103 | types <- function() { 104 | re <- "\\.yml$" 105 | sub(re, "", dir("man-roxygen", pattern=re)) 106 | } 107 | 108 | process_all <- function() { 109 | for (t in types()) { 110 | message(paste("Generating:", t)) 111 | process(t) 112 | } 113 | } 114 | 115 | if (!interactive() && identical(commandArgs(TRUE), "process")) { 116 | process_all() 117 | } 118 | -------------------------------------------------------------------------------- /doc/how.md: -------------------------------------------------------------------------------- 1 | Through the lifecycle of a task: 2 | 3 | Assuming a queue name `rrq`, so all keys begin with `rrq:` 4 | 5 | ## Controller queues a task 6 | 7 | 1. Given `expr` (some unevaluted R expression) and `envir` (an environment to locate local variables) we do some processing to create something that can be serialised by Redis (see [below](#preparing-the-expression)) 8 | 2. Increment the *task counter* (`rqr:tasks:counter`) to get the *task id* 9 | 3. Store things in hashes with the key as the task id: 10 | * serialised expression in `rrq::tasks:expr` 11 | * environment id in `rrq:tasks:envir` 12 | * task status (pending) in `rrq:tasks:status` 13 | * time submitted in `rrq:tasks:time:sub` 14 | * name of the key to push to when the job is complete to `rrq:tasks:complete` 15 | 4. Push the task onto the task queue: `rrq:tasks:id` 16 | 17 | ## Worker accepts a task 18 | 19 | 1. Pop a task off `rrq:tasks:id` 20 | 2. Retrieve the expression 21 | 3. Store things in hashes indicating that we're working on the task: 22 | * Set the worker status as busy (`rrq:workers:status`) 23 | * Store the task id in `rrq:workers:task` 24 | * Store the worker id for this task in `rrq:tasks:worker` 25 | * Store the time the job was begun in `rrq:tasks:times:beg` 26 | * Set the task status as running in `rrq:tasks:status` 27 | 4. Get to work running the task 28 | 29 | (note that the task can be lost between steps 1 and 3 though it's easy enough to work out that this is the case because the task is still known to the system but will not be associated with any worker). 30 | 31 | ## Worker completes the task 32 | 33 | 1. Store things in hashes: 34 | * Write the serialized result to hash `rrq:tasks:result` 35 | * Store task status (complete / error / etc) in `rrq:tasks:status` 36 | * Store finished time in `rrq:tasks:times:end` 37 | * Set worker status as idle in `rrq:workers:status` 38 | 2. Push the task onto the finished queue - which we look up from `rrq:tasks:complete` 39 | 3. Return to polling `rrq:tasks:id` for new jobs 40 | 41 | # Preparing the expression 42 | 43 | 1. Look at all arguments of the expression and work out which entries are symbols 44 | 2. For each symbol 45 | * retrieve the value from the given environment 46 | * serialise it to Redis using the "object cache" object, using a key which is `:`. 47 | 3. Serialise the expression, plus the list of object names and mapping to the mangled names. 48 | 49 | The serialisation to Redis via the object cache uses a content-addressable system where objects are actually stored against keys that are the *hash* of the object contents, and then a pointer is stored from the mangled name to the object. There's also a counter in there that indicates how many things point at a given piece of data - once that counter drops to zero the object is deleted. This means that if there are regularly referenced large pieces of data, they are only stored occasionally. 50 | 51 | On the recieving end, the workers only pull the objects from the database if they have not pulled an object with that hash before. This avoids the bottleneck of pulling over the network and deserialising. 52 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM r-base 2 | 3 | RUN apt-get update \ 4 | && apt-get install -y --no-install-recommends \ 5 | ca-certificates \ 6 | curl \ 7 | git \ 8 | libcurl4-openssl-dev \ 9 | libhiredis-dev \ 10 | libssl-dev/unstable \ 11 | libxml2-dev \ 12 | pandoc \ 13 | ssh \ 14 | && apt-get clean 15 | 16 | RUN rm /usr/local/bin/install2.r && \ 17 | wget --no-check-certificate \ 18 | https://raw.githubusercontent.com/cboettig/littler/master/examples/install2.r \ 19 | -O /usr/local/bin/install2.r \ 20 | && chmod +x /usr/local/bin/install2.r 21 | 22 | RUN install2.r --error \ 23 | crayon \ 24 | devtools \ 25 | digest \ 26 | docopt \ 27 | downloader \ 28 | dplyr \ 29 | inline \ 30 | R6 \ 31 | RCurl \ 32 | rmarkdown \ 33 | stringr 34 | 35 | RUN installGithub.r \ 36 | ropensci/RedisAPI \ 37 | richfitz/RedisHeartbeat \ 38 | richfitz/storr \ 39 | richfitz/remake \ 40 | gaborcsardi/progress \ 41 | traitecoevo/dockertest \ 42 | traitecoevo/rrqueue 43 | 44 | RUN r -e 'remake:::install_remake("/usr/local/bin")' 45 | RUN r -e 'rrqueue:::install_scripts("/usr/local/bin")' 46 | 47 | WORKDIR /root 48 | 49 | CMD ["bash"] 50 | -------------------------------------------------------------------------------- /inst/scripts/rrqueue_worker: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | library(methods) 3 | w <- rrqueue:::rrqueue_worker_main() 4 | -------------------------------------------------------------------------------- /inst/scripts/rrqueue_worker_tee: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | # set -x 4 | LOGDIR=$1 5 | shift 6 | mkdir -p ${LOGDIR} 7 | 8 | # log files will have the form _ -- mktemp 9 | # and rely on timestamps to work out which is most recent; this is 10 | # safe for race conditions. 11 | LOGFILE=$(mktemp "${LOGDIR}/$(hostname)_XXXX") 12 | 13 | echo "Logging to $LOGFILE" 14 | rrqueue_worker "$@" 2>&1 | tee -i "$LOGFILE" 15 | -------------------------------------------------------------------------------- /man-roxygen/observer.yml: -------------------------------------------------------------------------------- 1 | tasks_list: 2 | short: Return a vector of known task ids. 3 | value: A character vector 4 | 5 | tasks_status: 6 | short: >- 7 | Returns a named character vector indicating the task status. 8 | params: 9 | task_ids: &task_ids >- 10 | Optional vector of task identifiers. If omitted all tasks known 11 | to rrqueue will be used. 12 | follow_redirect: >- 13 | should we follow redirects to get the status of any requeued tasks? 14 | value: | 15 | A named character vector; the names will be the task ids, and the 16 | values are the status of each task. Possible status values are 17 | \describe{ 18 | \item{\code{PENDING}}{queued, but not run by a worker} 19 | \item{\code{RUNNING}}{being run on a worker, but not complete} 20 | \item{\code{COMPLETE}}{task completed successfully} 21 | \item{\code{ERROR}}{task completed with an error} 22 | \item{\code{ORPHAN}}{task orphaned due to loss of worker} 23 | \item{\code{REDIRECT}}{orphaned task has been redirected} 24 | \item{\code{MISSING}}{task not known (deleted, or never existed)} 25 | } 26 | 27 | tasks_overview: 28 | short: >- 29 | High-level overview of the tasks in the queue; the number of tasks 30 | in each status. 31 | 32 | tasks_times: 33 | short: returns a summary of times for a set of tasks 34 | params: 35 | task_ids: *task_ids 36 | unit_elapsed: >- 37 | Unit to use in computing elapsed times. The default is to use 38 | "secs". This is passed through to \code{\link{difftime}} so the 39 | units there are available and are "auto", "secs", "mins", 40 | "hours", "days", "weeks". 41 | value: | 42 | A \code{data.frame}, one row per task, with columns 43 | \describe{ 44 | \item{\code{task_id}}{The task id} 45 | \item{\code{submitted}}{Time the task was submitted} 46 | \item{\code{started}}{Time the task was started, or \code{NA} if waiting} 47 | \item{\code{finished}}{Time the task was completed, or \code{NA} 48 | if waiting or running} 49 | \item{\code{waiting}}{Elapsed time spent waiting} 50 | \item{\code{running}}{Elapsed time spent running, or \code{NA} if waiting} 51 | \item{\code{idle}}{Elapsed time since finished, or \code{NA} 52 | if waiting or running} 53 | } 54 | 55 | tasks_envir: 56 | short: returns the mapping of tasks to environment 57 | params: 58 | task_ids: *task_ids 59 | value: >- 60 | A named character vector; names are the task ids and the value is 61 | the environment id associated with that task. 62 | 63 | task_get: 64 | short: >- 65 | returns a \code{\link{task}} object associated with a given task 66 | identifier. This can be used to interrogate an individual task. 67 | See the help for \code{\link{task}} objects for more about these objects. 68 | params: 69 | task_id: &task_id A single task identifier 70 | 71 | task_result: 72 | short: >- 73 | Get the result for a single task 74 | params: 75 | task_id: *task_id 76 | follow_redirect: &follow_redirect >- 77 | should we follow redirects to get the status of any requeued task? 78 | 79 | tasks_groups_list: 80 | short: >- 81 | Returns list of \emph{groups} known to rrqueue. Groups are 82 | assigned during task creation, or through the 83 | \code{tasks_set_group} method of \code{link{queue}}. 84 | 85 | tasks_in_groups: 86 | short: >- 87 | Returns a list of tasks belonging to any of the groups listed. 88 | params: 89 | groups: >- 90 | A character vector of one or more groups (use 91 | \code{tasks_groups_list} to get a list of valid groups). 92 | 93 | tasks_lookup_group: 94 | short: >- 95 | Look up the group for a set of tasks 96 | params: 97 | task_ids: *task_ids 98 | value: >- 99 | A named character vector; names refer to task ids and the value is 100 | the group (or \code{NA} if no group is set for that task id). 101 | 102 | task_bundle_get: 103 | short: >- 104 | Return a "bundle" of tasks that can be operated on together; see 105 | \code{\link{task_bundle}} 106 | params: 107 | groups: A vector of groups to include in the bundle 108 | task_ids: >- 109 | A vector of task ids in the bundle. Unlike all other uses of 110 | \code{task_ids} here, only one of \code{groups} or 111 | \code{task_ids} can be provided, so if \code{task_ids=NULL} then 112 | \code{task_ids} is ignored and \code{groups} is used. 113 | 114 | envirs_list: 115 | short: >- 116 | Return a vector of all known environment ids in this queue. 117 | envirs_contents: 118 | short: >- 119 | Return a vector of the environment contents 120 | params: 121 | envir_ids: &envir_ids >- 122 | Vector of environment ids. If omitted then all environments in 123 | this queue are used. 124 | value: | 125 | A list, each element of which is a list of elements 126 | \describe{ 127 | \item{\code{packages}}{a vector of packages loaded} 128 | \item{\code{sources}}{a vector of files explicitly sourced} 129 | \item{\code{source_files}}{a vector of files sourced including 130 | their hashes. This includes and files detected to be sourced 131 | by another file} 132 | } 133 | envir_workers: 134 | short: >- 135 | Determine which workers are known to be able to process tasks in a 136 | particular environment. 137 | params: 138 | envir_id: A single environment id 139 | worker_ids: &worker_ids 140 | Optional vector of worker identifiers. If omitted all workers known 141 | to rrqueue will be used (currently running workers only). 142 | value: >- 143 | A named logical vector; \code{TRUE} if a worker can use an 144 | environment, named by the worker identifers. 145 | 146 | workers_len: 147 | short: >- 148 | Number of workers that have made themselves known to rrqueue. 149 | There are situations where this is an overestimate and that may 150 | get fixed at some point. 151 | 152 | workers_list: 153 | short: >- 154 | Returns a vector of all known worker identifiers (may include 155 | workers that have crashed). 156 | 157 | workers_list_exited: 158 | short: >- 159 | Returns a vector of workers that are known to have exited. 160 | Workers leave behind most of the interesting bits of logs, times, 161 | etc, so these identifiers are useful for asking what they worked 162 | on. 163 | 164 | workers_status: 165 | short: >- 166 | Returns a named character vector indicating the task status. 167 | params: 168 | worker_ids: *worker_ids 169 | value: | 170 | A named character vector; the names will be the task ids, and the 171 | values are the status of each task. Possible status values are 172 | \describe{ 173 | \item{\code{IDLE}}{worker is idle} 174 | \item{\code{BUSY}}{worker is running a task} 175 | \item{\code{LOST}}{worker has been identified as lost by the 176 | \code{workers_identify_lost} of \code{\link{queue}}.} 177 | \item{\code{EXITED}}{worker has exited} 178 | \item{\code{PAUSED}}{worker is paused} 179 | } 180 | 181 | workers_task_id: 182 | short: >- 183 | Returns the tasks that workers are currently processing (or 184 | \code{NA} for workers that are not known to be working on a task) 185 | 186 | params: 187 | worker_ids: *worker_ids 188 | 189 | value: >- 190 | A named character vector. Names are the worker ids and value is 191 | the task id, or \code{NA} if no task is being worked on. 192 | 193 | workers_times: 194 | short: >- 195 | returns a summary of times for a set of workers. This only 196 | returns useful information if the workers are running a heartbeat 197 | process, which requires the \code{RedisHeartbeat} package. 198 | params: 199 | worker_ids: *worker_ids 200 | unit_elapsed: >- 201 | Unit to use in computing elapsed times. The default is to use 202 | "secs". This is passed through to \code{\link{difftime}} so the 203 | units there are available and are "auto", "secs", "mins", 204 | "hours", "days", "weeks". 205 | value: | 206 | A \code{data.frame}, one row per worker, with columns 207 | \describe{ 208 | \item{\code{worker_id}}{Worker identifier} 209 | \item{\code{expire_max}}{Maximum length of time before worker can 210 | be declared missing, in seconds} 211 | \item{\code{expire}}{Time until the worker will expire, in seconds} 212 | \item{\code{last_seen}}{Time since the worker was last seen} 213 | \item{\code{last_action}}{Time since the last worker action} 214 | } 215 | 216 | workers_log_tail: 217 | short: >- 218 | Return the last few entries in the worker logs. 219 | params: 220 | worker_ids: *worker_ids 221 | n: >- 222 | Number of log entries to return. Use \code{0} or \code{Inf} to 223 | return all entries. 224 | value: | 225 | 226 | A \code{data.frame} with columns 227 | \describe{ 228 | \item{\code{worker_id}}{the worker identifier} 229 | \item{\code{time}}{time of the event} 230 | \item{\code{command}}{the command (e.g., MESSAGE, ALIVE)} 231 | \item{\code{message}}{The message associated with the command} 232 | } 233 | 234 | workers_info: 235 | short: >- 236 | Returns a set of key/value information about workers. Includes 237 | things like hostnames, process ids, environments that can be run, 238 | etc. Note that this information is from the last time that the 239 | worker process registered an \code{INFO} command. This is 240 | registered at startup and after recieving a \code{INFO} message 241 | from a \code{\link{queue}} object. So the information may be out 242 | of date. 243 | params: 244 | worker_ids: *worker_ids 245 | value: | 246 | A list, each element of which is a \code{worker_info} 247 | 248 | worker_envir: 249 | short: >- 250 | Returns an up-to-date list of environments a worker is capable of 251 | using (in contrast to the entry in \code{workers_info} that might 252 | be out of date. 253 | params: 254 | worker_id: Single worker identifier 255 | -------------------------------------------------------------------------------- /man-roxygen/task.yml: -------------------------------------------------------------------------------- 1 | status: 2 | short: >- 3 | Returns a scalar character indicating the task status. 4 | 5 | params: 6 | follow_redirect: &follow_redirect >- 7 | should we follow redirects to get the status of any requeued task? 8 | 9 | value: | 10 | Scalar character. Possible values are 11 | \describe{ 12 | \item{\code{PENDING}}{queued, but not run by a worker} 13 | \item{\code{RUNNING}}{being run on a worker, but not complete} 14 | \item{\code{COMPLETE}}{task completed successfully} 15 | \item{\code{ERROR}}{task completed with an error} 16 | \item{\code{ORPHAN}}{task orphaned due to loss of worker} 17 | \item{\code{REDIRECT}}{orphaned task has been redirected} 18 | \item{\code{MISSING}}{task not known (deleted, or never existed)} 19 | } 20 | 21 | result: 22 | short: >- 23 | Fetch the result of a task, so long as it has completed. 24 | 25 | params: 26 | follow_redirect: *follow_redirect 27 | 28 | wait: 29 | short: >- 30 | Like \code{result}, but will wait until the task is complete. In 31 | order to preserve the \code{key_complete} for anything that might 32 | be listening for it (and to avoid collision with anything else 33 | writing to that key), this function repeatedly polls the database. 34 | Over a slow connection you may want to increase the \code{every} 35 | parameter. 36 | 37 | params: 38 | timeout: >- 39 | Length of time, in seconds, to wait. A value of zero will not 40 | wait. Infinite times are possible and can be escaped by 41 | pressing Ctrl-C or Escape (depending on platform). 42 | 43 | every: How often, in seconds, to poll for results 44 | 45 | expr: 46 | short: returns the expression stored in the task 47 | params: 48 | locals: >- 49 | Logical, indicating if the local variables associated with the 50 | expression should also be retuned. If \code{TRUE}, then local 51 | variables used in the expression will be returned in a 52 | \emph{attribute} of the expression \code{envir}. 53 | value: >- 54 | A quoted expression (a language object). Turn this into a string 55 | with deparse. If \code{locals} was \code{TRUE} there will be an 56 | environment attribute with local variables included. 57 | 58 | envir: 59 | short: returns the environment identifier for the task 60 | 61 | times: 62 | short: returns a summar of times associated with this task. 63 | params: 64 | unit_elapsed: >- 65 | Unit to use in computing elapsed times. The default is to use 66 | "secs". This is passed through to \code{\link{difftime}} so the 67 | units there are available and are "auto", "secs", "mins", 68 | "hours", "days", "weeks". 69 | value: | 70 | A one row \code{data.frame} with columns 71 | \describe{ 72 | \item{\code{submitted}}{Time the task was submitted} 73 | \item{\code{started}}{Time the task was started, or \code{NA} if waiting} 74 | \item{\code{finished}}{Time the task was completed, or \code{NA} 75 | if waiting or running} 76 | \item{\code{waiting}}{Elapsed time spent waiting} 77 | \item{\code{running}}{Elapsed time spent running, or \code{NA} if waiting} 78 | \item{\code{idle}}{Elapsed time since finished, or \code{NA} 79 | if waiting or running} 80 | } 81 | -------------------------------------------------------------------------------- /man-roxygen/task_bundle.yml: -------------------------------------------------------------------------------- 1 | ids: 2 | short: >- 3 | Return the vector of ids for the bundle (will return 4 | \code{character(0)} if the bundle is empty). 5 | 6 | status: 7 | short: >- 8 | Fetch the current task status (see \code{\link{task}}). 9 | 10 | params: 11 | follow_redirect: &follow_redirect >- 12 | should we follow redirects to get the status of any requeued task? 13 | 14 | overview: 15 | short: >- 16 | High-level overview of the tasks in the bundle; the number of tasks 17 | in each status. 18 | 19 | results: 20 | short: >- 21 | Fetch results for all tasks, immediately (unlike \code{wait}). 22 | params: 23 | follow_redirect: *follow_redirect 24 | 25 | value: >- 26 | 27 | Always returns a list, regardless of how many tasks are in the 28 | bundle \code{list()} for zero tasks, \code{list(x1)} for one task, 29 | \code{list(x1, x2)} for two, etc). This list will be named with 30 | the ids (see \code{ids()}) unless the bundles has a \code{names} 31 | attribute that has not been invalidated by using 32 | \code{update_groups()} (this interface may change). 33 | 34 | wait: 35 | short: >- 36 | Like \code{results}, but block until results are returned. 37 | 38 | params: 39 | timeout: &timeout >- 40 | 41 | Total time to wait, in seconds. If the tasks have not returned 42 | by this point an error willl be thrown. 43 | 44 | time_poll: &time_poll >- 45 | 46 | Interval, in seconds, to poll for new results. Increasing this 47 | time will reduce the number of Redis roundtrips but decrease the 48 | responsiveness to an interrupt. 49 | 50 | progress_bar: >- 51 | 52 | Display a progress bar while waiting? Includes a spinner for 53 | reassurance that the process has not died. 54 | 55 | follow_redirect: *follow_redirect 56 | 57 | wait1: 58 | short: >- 59 | 60 | Wait for a single task to complete,a nd return its result. 61 | 62 | params: 63 | 64 | timeout: *timeout 65 | time_poll: *time_poll 66 | follow_redirect: *follow_redirect 67 | 68 | value: >- 69 | 70 | If a task as available, either because all tasks are complete or 71 | because the waiting time was too short, \code{NULL} is returned. 72 | Otherwise a list of length 2 is returned, with elements \code{id} 73 | (the id of the task that completed) and \code{value} the value of 74 | that that task. This interface is inspired by Redis' \code{BLPOP} 75 | operation. 76 | 77 | times: 78 | short: >- 79 | Like times in \code{\link{observer}}, but limited to the ids in the bundle. 80 | 81 | params: 82 | unit_elapsed: >- 83 | Unit to use in computing elapsed times. The default is to use 84 | "secs". This is passed through to \code{\link{difftime}} so the 85 | units there are available and are "auto", "secs", "mins", 86 | "hours", "days", "weeks". 87 | 88 | update_groups: 89 | short: >- 90 | 91 | Check for new tasks that have been added to the groups included in 92 | this bunddle and add these tasks to the bundle. 93 | 94 | value: >- 95 | 96 | Invisibly, the ids of tasks that have been added to the bundle. 97 | -------------------------------------------------------------------------------- /man-roxygen/task_bundle_methods.R: -------------------------------------------------------------------------------- 1 | ##' @section Methods: 2 | ##' 3 | ##' \describe{ 4 | ##' \item{\code{ids}}{ 5 | ##' Return the vector of ids for the bundle (will return \code{character(0)} if the bundle is empty). 6 | ##' 7 | ##' \emph{Usage:} 8 | ##' \code{ids()} 9 | ##' } 10 | ##' \item{\code{status}}{ 11 | ##' Fetch the current task status (see \code{\link{task}}). 12 | ##' 13 | ##' \emph{Usage:} 14 | ##' \code{status(follow_redirect = FALSE)} 15 | ##' 16 | ##' \emph{Arguments:} 17 | ##' \describe{ 18 | ##' \item{\code{follow_redirect}}{ 19 | ##' should we follow redirects to get the status of any requeued task? 20 | ##' } 21 | ##' } 22 | ##' } 23 | ##' \item{\code{overview}}{ 24 | ##' High-level overview of the tasks in the bundle; the number of tasks in each status. 25 | ##' 26 | ##' \emph{Usage:} 27 | ##' \code{overview()} 28 | ##' } 29 | ##' \item{\code{results}}{ 30 | ##' Fetch results for all tasks, immediately (unlike \code{wait}). 31 | ##' 32 | ##' \emph{Usage:} 33 | ##' \code{results(follow_redirect = FALSE)} 34 | ##' 35 | ##' \emph{Arguments:} 36 | ##' \describe{ 37 | ##' \item{\code{follow_redirect}}{ 38 | ##' should we follow redirects to get the status of any requeued task? 39 | ##' } 40 | ##' } 41 | ##' 42 | ##' \emph{Value}: 43 | ##' 44 | ##' Always returns a list, regardless of how many tasks are in the bundle \code{list()} for zero tasks, \code{list(x1)} for one task, \code{list(x1, x2)} for two, etc). This list will be named with the ids (see \code{ids()}) unless the bundles has a \code{names} attribute that has not been invalidated by using \code{update_groups()} (this interface may change). 45 | ##' } 46 | ##' \item{\code{wait}}{ 47 | ##' Like \code{results}, but block until results are returned. 48 | ##' 49 | ##' \emph{Usage:} 50 | ##' \code{wait(timeout = 60, time_poll = 1, progress_bar = TRUE, follow_redirect = FALSE)} 51 | ##' 52 | ##' \emph{Arguments:} 53 | ##' \describe{ 54 | ##' \item{\code{timeout}}{ 55 | ##' 56 | ##' Total time to wait, in seconds. If the tasks have not returned by this point an error willl be thrown. 57 | ##' } 58 | ##' 59 | ##' \item{\code{time_poll}}{ 60 | ##' 61 | ##' Interval, in seconds, to poll for new results. Increasing this time will reduce the number of Redis roundtrips but decrease the responsiveness to an interrupt. 62 | ##' } 63 | ##' 64 | ##' \item{\code{progress_bar}}{ 65 | ##' 66 | ##' Display a progress bar while waiting? Includes a spinner for reassurance that the process has not died. 67 | ##' } 68 | ##' 69 | ##' \item{\code{follow_redirect}}{ 70 | ##' should we follow redirects to get the status of any requeued task? 71 | ##' } 72 | ##' } 73 | ##' } 74 | ##' \item{\code{wait1}}{ 75 | ##' 76 | ##' Wait for a single task to complete,a nd return its result. 77 | ##' 78 | ##' \emph{Usage:} 79 | ##' \code{wait1(timeout = 60, time_poll = 1, follow_redirect = FALSE)} 80 | ##' 81 | ##' \emph{Arguments:} 82 | ##' \describe{ 83 | ##' \item{\code{timeout}}{ 84 | ##' 85 | ##' Total time to wait, in seconds. If the tasks have not returned by this point an error willl be thrown. 86 | ##' } 87 | ##' 88 | ##' \item{\code{time_poll}}{ 89 | ##' 90 | ##' Interval, in seconds, to poll for new results. Increasing this time will reduce the number of Redis roundtrips but decrease the responsiveness to an interrupt. 91 | ##' } 92 | ##' 93 | ##' \item{\code{follow_redirect}}{ 94 | ##' should we follow redirects to get the status of any requeued task? 95 | ##' } 96 | ##' } 97 | ##' 98 | ##' \emph{Value}: 99 | ##' 100 | ##' If a task as available, either because all tasks are complete or because the waiting time was too short, \code{NULL} is returned. Otherwise a list of length 2 is returned, with elements \code{id} (the id of the task that completed) and \code{value} the value of that that task. This interface is inspired by Redis' \code{BLPOP} operation. 101 | ##' } 102 | ##' \item{\code{times}}{ 103 | ##' Like times in \code{\link{observer}}, but limited to the ids in the bundle. 104 | ##' 105 | ##' \emph{Usage:} 106 | ##' \code{times(unit_elapsed = "secs")} 107 | ##' 108 | ##' \emph{Arguments:} 109 | ##' \describe{ 110 | ##' \item{\code{unit_elapsed}}{ 111 | ##' Unit to use in computing elapsed times. The default is to use "secs". This is passed through to \code{\link{difftime}} so the units there are available and are "auto", "secs", "mins", "hours", "days", "weeks". 112 | ##' } 113 | ##' } 114 | ##' } 115 | ##' \item{\code{update_groups}}{ 116 | ##' 117 | ##' Check for new tasks that have been added to the groups included in this bunddle and add these tasks to the bundle. 118 | ##' 119 | ##' \emph{Usage:} 120 | ##' \code{update_groups()} 121 | ##' 122 | ##' \emph{Value}: 123 | ##' 124 | ##' Invisibly, the ids of tasks that have been added to the bundle. 125 | ##' } 126 | ##' } 127 | -------------------------------------------------------------------------------- /man-roxygen/task_methods.R: -------------------------------------------------------------------------------- 1 | ##' @section Methods: 2 | ##' 3 | ##' \describe{ 4 | ##' \item{\code{status}}{ 5 | ##' Returns a scalar character indicating the task status. 6 | ##' 7 | ##' \emph{Usage:} 8 | ##' \code{status(follow_redirect = FALSE)} 9 | ##' 10 | ##' \emph{Arguments:} 11 | ##' \describe{ 12 | ##' \item{\code{follow_redirect}}{ 13 | ##' should we follow redirects to get the status of any requeued task? 14 | ##' } 15 | ##' } 16 | ##' 17 | ##' \emph{Value}: 18 | ##' Scalar character. Possible values are 19 | ##' \describe{ 20 | ##' \item{\code{PENDING}}{queued, but not run by a worker} 21 | ##' \item{\code{RUNNING}}{being run on a worker, but not complete} 22 | ##' \item{\code{COMPLETE}}{task completed successfully} 23 | ##' \item{\code{ERROR}}{task completed with an error} 24 | ##' \item{\code{ORPHAN}}{task orphaned due to loss of worker} 25 | ##' \item{\code{REDIRECT}}{orphaned task has been redirected} 26 | ##' \item{\code{MISSING}}{task not known (deleted, or never existed)} 27 | ##' } 28 | ##' } 29 | ##' \item{\code{result}}{ 30 | ##' Fetch the result of a task, so long as it has completed. 31 | ##' 32 | ##' \emph{Usage:} 33 | ##' \code{result(follow_redirect = FALSE)} 34 | ##' 35 | ##' \emph{Arguments:} 36 | ##' \describe{ 37 | ##' \item{\code{follow_redirect}}{ 38 | ##' should we follow redirects to get the status of any requeued task? 39 | ##' } 40 | ##' } 41 | ##' } 42 | ##' \item{\code{wait}}{ 43 | ##' Like \code{result}, but will wait until the task is complete. In order to preserve the \code{key_complete} for anything that might be listening for it (and to avoid collision with anything else writing to that key), this function repeatedly polls the database. Over a slow connection you may want to increase the \code{every} parameter. 44 | ##' 45 | ##' \emph{Usage:} 46 | ##' \code{wait(timeout, every = 0.05)} 47 | ##' 48 | ##' \emph{Arguments:} 49 | ##' \describe{ 50 | ##' \item{\code{timeout}}{ 51 | ##' Length of time, in seconds, to wait. A value of zero will not wait. Infinite times are possible and can be escaped by pressing Ctrl-C or Escape (depending on platform). 52 | ##' } 53 | ##' 54 | ##' \item{\code{every}}{ 55 | ##' How often, in seconds, to poll for results 56 | ##' } 57 | ##' } 58 | ##' } 59 | ##' \item{\code{expr}}{ 60 | ##' returns the expression stored in the task 61 | ##' 62 | ##' \emph{Usage:} 63 | ##' \code{expr(locals = FALSE)} 64 | ##' 65 | ##' \emph{Arguments:} 66 | ##' \describe{ 67 | ##' \item{\code{locals}}{ 68 | ##' Logical, indicating if the local variables associated with the expression should also be retuned. If \code{TRUE}, then local variables used in the expression will be returned in a \emph{attribute} of the expression \code{envir}. 69 | ##' } 70 | ##' } 71 | ##' 72 | ##' \emph{Value}: 73 | ##' A quoted expression (a language object). Turn this into a string with deparse. If \code{locals} was \code{TRUE} there will be an environment attribute with local variables included. 74 | ##' } 75 | ##' \item{\code{envir}}{ 76 | ##' returns the environment identifier for the task 77 | ##' 78 | ##' \emph{Usage:} 79 | ##' \code{envir()} 80 | ##' } 81 | ##' \item{\code{times}}{ 82 | ##' returns a summar of times associated with this task. 83 | ##' 84 | ##' \emph{Usage:} 85 | ##' \code{times(unit_elapsed = "secs")} 86 | ##' 87 | ##' \emph{Arguments:} 88 | ##' \describe{ 89 | ##' \item{\code{unit_elapsed}}{ 90 | ##' Unit to use in computing elapsed times. The default is to use "secs". This is passed through to \code{\link{difftime}} so the units there are available and are "auto", "secs", "mins", "hours", "days", "weeks". 91 | ##' } 92 | ##' } 93 | ##' 94 | ##' \emph{Value}: 95 | ##' A one row \code{data.frame} with columns 96 | ##' \describe{ 97 | ##' \item{\code{submitted}}{Time the task was submitted} 98 | ##' \item{\code{started}}{Time the task was started, or \code{NA} if waiting} 99 | ##' \item{\code{finished}}{Time the task was completed, or \code{NA} 100 | ##' if waiting or running} 101 | ##' \item{\code{waiting}}{Elapsed time spent waiting} 102 | ##' \item{\code{running}}{Elapsed time spent running, or \code{NA} if waiting} 103 | ##' \item{\code{idle}}{Elapsed time since finished, or \code{NA} 104 | ##' if waiting or running} 105 | ##' } 106 | ##' } 107 | ##' } 108 | -------------------------------------------------------------------------------- /man/enqueue_bulk.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bulk.R 3 | \name{enqueue_bulk} 4 | \alias{enqueue_bulk} 5 | \alias{enqueue_bulk_submit} 6 | \title{Bulk queuing} 7 | \usage{ 8 | enqueue_bulk(X, FUN, rrq, do.call = FALSE, group = NULL, timeout = Inf, 9 | time_poll = 1, delete_tasks = FALSE, progress_bar = TRUE, 10 | env = parent.frame()) 11 | 12 | enqueue_bulk_submit(X, FUN, rrq, do.call = FALSE, group = NULL, 13 | progress_bar = TRUE, env = parent.frame()) 14 | } 15 | \arguments{ 16 | \item{X}{An object to loop over. If a list, we'll loop over the 17 | elements of the list, duplicating the behaviour of 18 | \code{\link{rrqlapply}} except for not handling dots. If a 19 | \code{data.frame} we'll loop over the \emph{rows}. Matrices are 20 | not supported.} 21 | 22 | \item{FUN}{A function. Will be found in the same way as 23 | \code{FUN} within \code{\link{rrqlapply}}.} 24 | 25 | \item{rrq}{An rrq object} 26 | 27 | \item{do.call}{Behave like (but not via) \code{\link{do.call}}; 28 | given an element \code{el}, rather than run \code{FUN(el)} run 29 | \code{FUN(el[[1]], el[[2]], ...)}.} 30 | 31 | \item{group}{Name of a group for generated task ids. If not 32 | included, an ID will be generated.} 33 | 34 | \item{timeout}{Total length of time to wait for tasks to be 35 | completed. The default is to wait forever (like \code{lapply}).} 36 | 37 | \item{time_poll}{Time to poll for tasks. Must be an integer. 38 | Because of how the function is implemented, R will be 39 | unresponsive for this long each iteration (unless results are 40 | returned), so the default of 1s should be reasonable.} 41 | 42 | \item{delete_tasks}{Delete tasks on successful finish?} 43 | 44 | \item{progress_bar}{Display a progress bar?} 45 | 46 | \item{env}{Environment to look in} 47 | } 48 | \description{ 49 | Bulk queuing. Similar in some respects to things like 50 | \code{\link{apply}}. This is an experiment to deal with the 51 | pattern where you have a big pile of parameters in a data.frame to 52 | loop over, by applying a function to each row. 53 | } 54 | \details{ 55 | There are two modes here; selected with \code{do.call}. With 56 | \code{do.call=FALSE}, the default, the function behaves similarly 57 | to \code{apply(X, FUN, 1)}; that is the function is applied to 58 | each row of the data.frame (as a list): 59 | \code{FUN(as.list(X[1,]))}, \code{FUN(as.list(X[2,]))}, and so on. 60 | The alternative mode (\code{do.call=TRUE}) is where the 61 | \code{data.frame} contains \emph{parameters} to the function 62 | \code{FUN} so equivalent to \code{FUN(X[1,1], X[1,2], ...}. This 63 | is similar (but not implemented as) running: \code{do.call("FUN", 64 | as.list(X[1,]))}. 65 | 66 | Be careful, this one is going to change, including the name 67 | probably. You have been warned. 68 | } 69 | 70 | -------------------------------------------------------------------------------- /man/install_scripts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{install_scripts} 4 | \alias{install_scripts} 5 | \title{Install rrqueue scripts} 6 | \usage{ 7 | install_scripts(dest, overwrite = TRUE) 8 | } 9 | \arguments{ 10 | \item{dest}{Directory to install scripts to, preferably in the \code{$PATH}} 11 | 12 | \item{overwrite}{Overwrite any existing file?} 13 | } 14 | \description{ 15 | Install rrqueue scripts. This currently includes a script for 16 | starting a worker process. The script is extremely simple and 17 | does not need upgrading when rrqueue is upgraded. 18 | } 19 | 20 | -------------------------------------------------------------------------------- /man/rrqlapply.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rrqlapply.R 3 | \name{rrqlapply} 4 | \alias{rrqlapply} 5 | \alias{rrqlapply_submit} 6 | \title{Parallel version of lapply using Redis queuing} 7 | \usage{ 8 | rrqlapply(X, FUN, rrq, ..., group = NULL, timeout = Inf, time_poll = 1, 9 | delete_tasks = FALSE, progress_bar = TRUE, env = parent.frame()) 10 | 11 | rrqlapply_submit(X, FUN, rrq, ..., group = NULL, progress_bar = TRUE, 12 | env = parent.frame()) 13 | } 14 | \arguments{ 15 | \item{X}{A vector} 16 | 17 | \item{FUN}{The name of a function to apply to each element of the 18 | list. \emph{this will change!}.} 19 | 20 | \item{rrq}{An rrq object} 21 | 22 | \item{...}{Additional arguments passed to \code{FUN}} 23 | 24 | \item{group}{Name of a group for generated task ids. If not 25 | included, an ID will be generated.} 26 | 27 | \item{timeout}{Total length of time to wait for tasks to be 28 | completed. The default is to wait forever (like \code{lapply}).} 29 | 30 | \item{time_poll}{Time to poll for tasks. Must be an integer. 31 | Because of how the function is implemented, R will be 32 | unresponsive for this long each iteration (unless results are 33 | returned), so the default of 1s should be reasonable.} 34 | 35 | \item{delete_tasks}{Delete tasks on successful finish?} 36 | 37 | \item{progress_bar}{Display a progress bar?} 38 | 39 | \item{env}{Environment to look in.} 40 | } 41 | \description{ 42 | Parallel version of lapply using Redis queuing 43 | } 44 | 45 | -------------------------------------------------------------------------------- /man/task.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/task.R 3 | \name{task} 4 | \alias{task} 5 | \title{Create a task handle} 6 | \usage{ 7 | task(obj, task_id, key_complete = NULL) 8 | } 9 | \arguments{ 10 | \item{obj}{A \code{queue} or \code{observer} object.} 11 | 12 | \item{task_id}{Task identifier} 13 | 14 | \item{key_complete}{If known, specify the \code{key_complete}, 15 | otherwise we look it up on creation.} 16 | } 17 | \description{ 18 | Create a task handle object. This is a "pointer" to a task and 19 | can be used to retrieve information about status, running times, 20 | expression and the result of the task once complete. Generally 21 | you do not need to make a task object as they will be created for 22 | you by things like the \code{task_get} method of the 23 | \code{\link{observer}} and \code{\link{queue}} objects. 24 | } 25 | \details{ 26 | Tasks have a \emph{unique identifier}; these are unique within a 27 | queue and are implemented as an incrementing integer. However, 28 | this is an implementation detail and should not be relied on. The 29 | identifier is represented as a \emph{character string} rather than 30 | an integer in most places. 31 | 32 | Tasks exist in one of a number of \emph{statuses}. See the 33 | \code{status} method below for a list of possible statuses and 34 | their interpretation. 35 | } 36 | \section{Methods}{ 37 | 38 | 39 | \describe{ 40 | \item{\code{status}}{ 41 | Returns a scalar character indicating the task status. 42 | 43 | \emph{Usage:} 44 | \code{status(follow_redirect = FALSE)} 45 | 46 | \emph{Arguments:} 47 | \describe{ 48 | \item{\code{follow_redirect}}{ 49 | should we follow redirects to get the status of any requeued task? 50 | } 51 | } 52 | 53 | \emph{Value}: 54 | Scalar character. Possible values are 55 | \describe{ 56 | \item{\code{PENDING}}{queued, but not run by a worker} 57 | \item{\code{RUNNING}}{being run on a worker, but not complete} 58 | \item{\code{COMPLETE}}{task completed successfully} 59 | \item{\code{ERROR}}{task completed with an error} 60 | \item{\code{ORPHAN}}{task orphaned due to loss of worker} 61 | \item{\code{REDIRECT}}{orphaned task has been redirected} 62 | \item{\code{MISSING}}{task not known (deleted, or never existed)} 63 | } 64 | } 65 | \item{\code{result}}{ 66 | Fetch the result of a task, so long as it has completed. 67 | 68 | \emph{Usage:} 69 | \code{result(follow_redirect = FALSE)} 70 | 71 | \emph{Arguments:} 72 | \describe{ 73 | \item{\code{follow_redirect}}{ 74 | should we follow redirects to get the status of any requeued task? 75 | } 76 | } 77 | } 78 | \item{\code{wait}}{ 79 | Like \code{result}, but will wait until the task is complete. In order to preserve the \code{key_complete} for anything that might be listening for it (and to avoid collision with anything else writing to that key), this function repeatedly polls the database. Over a slow connection you may want to increase the \code{every} parameter. 80 | 81 | \emph{Usage:} 82 | \code{wait(timeout, every = 0.05)} 83 | 84 | \emph{Arguments:} 85 | \describe{ 86 | \item{\code{timeout}}{ 87 | Length of time, in seconds, to wait. A value of zero will not wait. Infinite times are possible and can be escaped by pressing Ctrl-C or Escape (depending on platform). 88 | } 89 | 90 | \item{\code{every}}{ 91 | How often, in seconds, to poll for results 92 | } 93 | } 94 | } 95 | \item{\code{expr}}{ 96 | returns the expression stored in the task 97 | 98 | \emph{Usage:} 99 | \code{expr(locals = FALSE)} 100 | 101 | \emph{Arguments:} 102 | \describe{ 103 | \item{\code{locals}}{ 104 | Logical, indicating if the local variables associated with the expression should also be retuned. If \code{TRUE}, then local variables used in the expression will be returned in a \emph{attribute} of the expression \code{envir}. 105 | } 106 | } 107 | 108 | \emph{Value}: 109 | A quoted expression (a language object). Turn this into a string with deparse. If \code{locals} was \code{TRUE} there will be an environment attribute with local variables included. 110 | } 111 | \item{\code{envir}}{ 112 | returns the environment identifier for the task 113 | 114 | \emph{Usage:} 115 | \code{envir()} 116 | } 117 | \item{\code{times}}{ 118 | returns a summar of times associated with this task. 119 | 120 | \emph{Usage:} 121 | \code{times(unit_elapsed = "secs")} 122 | 123 | \emph{Arguments:} 124 | \describe{ 125 | \item{\code{unit_elapsed}}{ 126 | Unit to use in computing elapsed times. The default is to use "secs". This is passed through to \code{\link{difftime}} so the units there are available and are "auto", "secs", "mins", "hours", "days", "weeks". 127 | } 128 | } 129 | 130 | \emph{Value}: 131 | A one row \code{data.frame} with columns 132 | \describe{ 133 | \item{\code{submitted}}{Time the task was submitted} 134 | \item{\code{started}}{Time the task was started, or \code{NA} if waiting} 135 | \item{\code{finished}}{Time the task was completed, or \code{NA} 136 | if waiting or running} 137 | \item{\code{waiting}}{Elapsed time spent waiting} 138 | \item{\code{running}}{Elapsed time spent running, or \code{NA} if waiting} 139 | \item{\code{idle}}{Elapsed time since finished, or \code{NA} 140 | if waiting or running} 141 | } 142 | } 143 | } 144 | } 145 | 146 | -------------------------------------------------------------------------------- /man/task_bundle.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/task_bundle.R 3 | \name{task_bundle} 4 | \alias{task_bundle} 5 | \title{Create a task bundle} 6 | \usage{ 7 | task_bundle(obj, tasks, groups = NULL, names = NULL) 8 | } 9 | \arguments{ 10 | \item{obj}{An observer or queue object} 11 | 12 | \item{tasks}{A list of tasks} 13 | 14 | \item{groups}{Optional vector of groups. If given, then additional 15 | tasks can be added to the bundle if they share the same group names.} 16 | 17 | \item{names}{Optional vector of names to label output with.} 18 | } 19 | \description{ 20 | Create a task bundle. Generally these are not created manually, 21 | but this page serves to document what task bundles are and the 22 | methods that they have. 23 | } 24 | \details{ 25 | A task bundle exists to group together tasks that are related. It 26 | is possible for a task to belong to multiple bundles. 27 | } 28 | \section{Methods}{ 29 | 30 | 31 | \describe{ 32 | \item{\code{ids}}{ 33 | Return the vector of ids for the bundle (will return \code{character(0)} if the bundle is empty). 34 | 35 | \emph{Usage:} 36 | \code{ids()} 37 | } 38 | \item{\code{status}}{ 39 | Fetch the current task status (see \code{\link{task}}). 40 | 41 | \emph{Usage:} 42 | \code{status(follow_redirect = FALSE)} 43 | 44 | \emph{Arguments:} 45 | \describe{ 46 | \item{\code{follow_redirect}}{ 47 | should we follow redirects to get the status of any requeued task? 48 | } 49 | } 50 | } 51 | \item{\code{overview}}{ 52 | High-level overview of the tasks in the bundle; the number of tasks in each status. 53 | 54 | \emph{Usage:} 55 | \code{overview()} 56 | } 57 | \item{\code{results}}{ 58 | Fetch results for all tasks, immediately (unlike \code{wait}). 59 | 60 | \emph{Usage:} 61 | \code{results(follow_redirect = FALSE)} 62 | 63 | \emph{Arguments:} 64 | \describe{ 65 | \item{\code{follow_redirect}}{ 66 | should we follow redirects to get the status of any requeued task? 67 | } 68 | } 69 | 70 | \emph{Value}: 71 | 72 | Always returns a list, regardless of how many tasks are in the bundle \code{list()} for zero tasks, \code{list(x1)} for one task, \code{list(x1, x2)} for two, etc). This list will be named with the ids (see \code{ids()}) unless the bundles has a \code{names} attribute that has not been invalidated by using \code{update_groups()} (this interface may change). 73 | } 74 | \item{\code{wait}}{ 75 | Like \code{results}, but block until results are returned. 76 | 77 | \emph{Usage:} 78 | \code{wait(timeout = 60, time_poll = 1, progress_bar = TRUE, follow_redirect = FALSE)} 79 | 80 | \emph{Arguments:} 81 | \describe{ 82 | \item{\code{timeout}}{ 83 | 84 | Total time to wait, in seconds. If the tasks have not returned by this point an error willl be thrown. 85 | } 86 | 87 | \item{\code{time_poll}}{ 88 | 89 | Interval, in seconds, to poll for new results. Increasing this time will reduce the number of Redis roundtrips but decrease the responsiveness to an interrupt. 90 | } 91 | 92 | \item{\code{progress_bar}}{ 93 | 94 | Display a progress bar while waiting? Includes a spinner for reassurance that the process has not died. 95 | } 96 | 97 | \item{\code{follow_redirect}}{ 98 | should we follow redirects to get the status of any requeued task? 99 | } 100 | } 101 | } 102 | \item{\code{wait1}}{ 103 | 104 | Wait for a single task to complete,a nd return its result. 105 | 106 | \emph{Usage:} 107 | \code{wait1(timeout = 60, time_poll = 1, follow_redirect = FALSE)} 108 | 109 | \emph{Arguments:} 110 | \describe{ 111 | \item{\code{timeout}}{ 112 | 113 | Total time to wait, in seconds. If the tasks have not returned by this point an error willl be thrown. 114 | } 115 | 116 | \item{\code{time_poll}}{ 117 | 118 | Interval, in seconds, to poll for new results. Increasing this time will reduce the number of Redis roundtrips but decrease the responsiveness to an interrupt. 119 | } 120 | 121 | \item{\code{follow_redirect}}{ 122 | should we follow redirects to get the status of any requeued task? 123 | } 124 | } 125 | 126 | \emph{Value}: 127 | 128 | If a task as available, either because all tasks are complete or because the waiting time was too short, \code{NULL} is returned. Otherwise a list of length 2 is returned, with elements \code{id} (the id of the task that completed) and \code{value} the value of that that task. This interface is inspired by Redis' \code{BLPOP} operation. 129 | } 130 | \item{\code{times}}{ 131 | Like times in \code{\link{observer}}, but limited to the ids in the bundle. 132 | 133 | \emph{Usage:} 134 | \code{times(unit_elapsed = "secs")} 135 | 136 | \emph{Arguments:} 137 | \describe{ 138 | \item{\code{unit_elapsed}}{ 139 | Unit to use in computing elapsed times. The default is to use "secs". This is passed through to \code{\link{difftime}} so the units there are available and are "auto", "secs", "mins", "hours", "days", "weeks". 140 | } 141 | } 142 | } 143 | \item{\code{update_groups}}{ 144 | 145 | Check for new tasks that have been added to the groups included in this bunddle and add these tasks to the bundle. 146 | 147 | \emph{Usage:} 148 | \code{update_groups()} 149 | 150 | \emph{Value}: 151 | 152 | Invisibly, the ids of tasks that have been added to the bundle. 153 | } 154 | } 155 | } 156 | 157 | -------------------------------------------------------------------------------- /man/worker.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/worker.R 3 | \name{worker} 4 | \alias{worker} 5 | \title{Create an rrqueue worker} 6 | \usage{ 7 | worker(queue_name, redis_host = "127.0.0.1", redis_port = 6379, 8 | heartbeat_period = 30, heartbeat_expire = 90, key_worker_alive = NULL) 9 | } 10 | \arguments{ 11 | \item{queue_name}{Queue name} 12 | 13 | \item{redis_host}{Host name/IP for the Redis server} 14 | 15 | \item{redis_port}{Port for the Redis server} 16 | 17 | \item{heartbeat_period}{Period between heartbeat pulses} 18 | 19 | \item{heartbeat_expire}{Time that heartbeat pulses will persist 20 | for (must be greater than \code{heartbeat_period})} 21 | 22 | \item{key_worker_alive}{Optional key to write to when the worker 23 | becomes alive. The worker will push onto this key so that 24 | another process can monitor it and determine when a worker has 25 | come up.} 26 | } 27 | \description{ 28 | Create an rrqueue worker. This blocks the main loop. 29 | } 30 | 31 | -------------------------------------------------------------------------------- /man/worker_spawn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/worker_spawn.R 3 | \name{worker_spawn} 4 | \alias{worker_spawn} 5 | \title{Spawn a worker} 6 | \usage{ 7 | worker_spawn(queue_name, logfile, redis_host = "127.0.0.1", 8 | redis_port = 6379, n = 1, timeout = 20, time_poll = 1, 9 | heartbeat_period = NULL, heartbeat_expire = NULL, path = ".") 10 | } 11 | \arguments{ 12 | \item{queue_name}{Name of the queue to connect to} 13 | 14 | \item{logfile}{Name of a log file to write to (consider 15 | \code{tempfile()}). If \code{n} > 1, then \code{n} log files 16 | must be provided.} 17 | 18 | \item{redis_host}{Host name/IP for the Redis server} 19 | 20 | \item{redis_port}{Port for the Redis server} 21 | 22 | \item{n}{Number of workers to spawn} 23 | 24 | \item{timeout}{Time to wait for the worker to appear} 25 | 26 | \item{time_poll}{Period to poll for the worker (must be in 27 | seconds)} 28 | 29 | \item{heartbeat_period}{Period between heartbeat pulses} 30 | 31 | \item{heartbeat_expire}{Time that heartbeat pulses will persist} 32 | 33 | \item{path}{Path to start the worker in. By default workers will 34 | start in the current working directory, but you can start them 35 | elsewhere by providing a path here. If the path does not exist, 36 | an error will be thrown. If \code{n} is greater than 1, all 37 | workers will start in the same working directory. The 38 | \code{logfile} argument will be interpreted relative to current 39 | working directory (not the worker working directory); use 40 | \code{\link{normalizePath}} to convert into an absolute path 41 | name to prevent this.} 42 | } 43 | \description{ 44 | Spawn a worker in the background 45 | } 46 | \details{ 47 | Spawning multiple workers. If \code{n} is greater than one, 48 | multiple workers will be spawned. This happens in parallel so it 49 | does not take n times longer than spawing a single worker. 50 | 51 | Beware that signals like Ctrl-C passed to \emph{this} R instance 52 | can still propagate to the child processes and can result in them 53 | dying unexpectedly. It is probably safer to start processes in a 54 | standalone session. 55 | } 56 | 57 | -------------------------------------------------------------------------------- /man/worker_stop.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/worker.R 3 | \name{worker_stop} 4 | \alias{worker_stop} 5 | \title{Stop a worker} 6 | \usage{ 7 | worker_stop(queue, worker_id, type = "message", host = "127.0.0.1", 8 | port = 6379) 9 | } 10 | \arguments{ 11 | \item{queue}{Name of the queue} 12 | 13 | \item{worker_id}{Name of a single worker} 14 | 15 | \item{type}{Either "message" to send a message or "kill" or kill the 16 | worker.} 17 | 18 | \item{host}{Redis hostname} 19 | 20 | \item{port}{Redis port} 21 | } 22 | \description{ 23 | Try to stop a worker. You won't need to call this very often, hopefully. 24 | } 25 | 26 | -------------------------------------------------------------------------------- /man/yaml_env.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/env.R 3 | \name{yaml_env} 4 | \alias{yaml_env} 5 | \title{Load environment variables from a yaml file} 6 | \usage{ 7 | yaml_env(filename, section = NULL) 8 | } 9 | \arguments{ 10 | \item{filename}{Name of the file} 11 | 12 | \item{section}{An optional section of the file to load} 13 | } 14 | \description{ 15 | Load environment variables from a yaml file. This is a hack for a 16 | project. It may change and may move package. \code{callr} would 17 | be a better fit probably, but \code{callr} doesn't pull in 18 | \code{yaml} yet so I don't know that it's a good fit. 19 | } 20 | \details{ 21 | The yaml file must be sets of key/value pairs of simple data 22 | types. Something like: 23 | 24 | \preformatted{ 25 | REDIS_HOST: localhost 26 | } 27 | 28 | Alternatively, for use with section, add an extra layer of nesting: 29 | 30 | \preformatted{ 31 | local: 32 | REDIS_HOST: localhost 33 | remote: 34 | REDIS_HOST: redis.marathon.mesos 35 | } 36 | } 37 | 38 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(rrqueue) 3 | 4 | test_check("rrqueue") 5 | -------------------------------------------------------------------------------- /tests/testthat/config.yml: -------------------------------------------------------------------------------- 1 | queue_name: tmpjobs 2 | ## Only affect workers: 3 | heartbeat_period: 30 4 | heartbeat_expire: 90 5 | key_worker_alive: ~ 6 | ## Only affect queues: 7 | packages: ~ 8 | sources: 9 | - "myfuns.R" 10 | redis: 11 | host: 127.0.0.1 12 | port: 6379 13 | -------------------------------------------------------------------------------- /tests/testthat/config2.yml: -------------------------------------------------------------------------------- 1 | queue_name: tmpjobs 2 | redis: 3 | host: redis.mesos.marathon 4 | -------------------------------------------------------------------------------- /tests/testthat/config3.yml: -------------------------------------------------------------------------------- 1 | redis: 2 | host: redis.mesos.marathon 3 | -------------------------------------------------------------------------------- /tests/testthat/helper-rrqueue.R: -------------------------------------------------------------------------------- 1 | object_to_string <- RedisAPI::object_to_string 2 | string_to_object <- RedisAPI::string_to_object 3 | 4 | ## TODO: run in db 15 or something? 5 | empty_named_list <- function() { 6 | structure(list(), names = character(0)) 7 | } 8 | empty_named_character <- function() { 9 | structure(character(0), names = character(0)) 10 | } 11 | 12 | test_cleanup <- function() { 13 | test_queue_clean("tmpjobs") 14 | test_queue_clean("myqueue") # vignette 15 | test_queue_clean("testq:heartbeat") 16 | } 17 | 18 | test_queue_clean <- function(name) { 19 | queue_clean(redis_connection(NULL), name, purge=TRUE, stop_workers="kill") 20 | } 21 | 22 | ## Looks like a bug to me, relative to the docs: 23 | PSKILL_SUCCESS <- tools::pskill(Sys.getpid(), 0) 24 | pid_exists <- function(pid) { 25 | tools::pskill(pid, 0) == PSKILL_SUCCESS 26 | } 27 | -------------------------------------------------------------------------------- /tests/testthat/myfuns.R: -------------------------------------------------------------------------------- 1 | slowdouble <- function(x) { 2 | Sys.sleep(x) 3 | x * 2 4 | } 5 | 6 | suml <- function(x) { 7 | x[[1]] + x[[2]] 8 | } 9 | 10 | prod2 <- function(a, b) { 11 | a * b 12 | } 13 | 14 | ## Simulate a function called for side effects, but without sideeffects... 15 | ret_null <- function(...) { 16 | Sys.sleep(0.02) 17 | NULL 18 | } 19 | 20 | failure <- function(controlled) { 21 | if (controlled) { 22 | try(stop("an expected error"), silent=TRUE) 23 | } else { 24 | stop("an unexpected error") 25 | } 26 | } 27 | 28 | ## This is a blocking call that skips the R interrupt loop; we won't 29 | ## listen for SIGINT during this and the stop request will fail. 30 | block <- function(n) { 31 | key <- ids::adjective_animal() 32 | rrqueue:::redis_connection(NULL)$BLPOP(key, n) 33 | } 34 | -------------------------------------------------------------------------------- /tests/testthat/test-files.R: -------------------------------------------------------------------------------- 1 | context("files") 2 | 3 | test_that("io", { 4 | filename <- "myfuns.R" 5 | str <- read_file_to_string(filename) 6 | expect_that(hash_string(str), equals(hash_file(filename))) 7 | 8 | tmp <- tempfile("rrqueue_") 9 | write_string_to_file(str, tmp) 10 | expect_that(hash_file(tmp), equals(hash_file(filename))) 11 | expect_that(readLines(tmp), not(gives_warning())) 12 | }) 13 | 14 | test_that("files", { 15 | test_cleanup() 16 | con <- redis_connection(NULL) 17 | prefix <- "tmpjobs:files:" 18 | cache <- file_cache(prefix, con) 19 | 20 | expect_that(cache$list(), equals(character(0))) 21 | 22 | obj <- files_pack(cache) 23 | expect_that(obj, equals(structure(character(0), class="files_pack"))) 24 | 25 | obj <- files_pack(cache, "myfuns.R") 26 | expect_that(length(obj), equals(1)) 27 | expect_that(names(obj), equals("myfuns.R")) 28 | expect_that(obj[["myfuns.R"]], equals(hash_file("myfuns.R"))) 29 | 30 | tmp <- tempfile("rrqueue_") 31 | files_unpack(cache, obj, tmp) 32 | path <- file.path(tmp, "myfuns.R") 33 | expect_that(file.exists(path), is_true()) 34 | expect_that(hash_file(path), equals(obj[["myfuns.R"]])) 35 | }) 36 | -------------------------------------------------------------------------------- /tests/testthat/test-heartbeat.R: -------------------------------------------------------------------------------- 1 | context("heartbeat") 2 | 3 | ## This file collects all the really nasty stuff with starting and 4 | ## destroying worker processes, and detecting that with the heartbeat 5 | ## process. There is a lot of mocking up and it's slow to develop and 6 | ## test. There is a real problem with cascading test failures 7 | ## throughout, so many failures might just be caused by a single root 8 | ## thing. 9 | test_that("heartbeat with signals", { 10 | test_cleanup() 11 | 12 | ## Fairly fast worker timeout: 13 | expire <- 3 14 | obj <- queue("tmpjobs", sources="myfuns.R") 15 | expect_that(obj$workers_list_exited(), equals(character(0))) 16 | 17 | logfile <- "worker_heartbeat.log" 18 | Sys.setenv("R_TESTS" = "") 19 | wid <- worker_spawn(obj$queue_name, logfile, 20 | heartbeat_period=1, heartbeat_expire=expire) 21 | ## worker("tmpjobs", heartbeat_expire=3, heartbeat_period=1) 22 | 23 | pid <- obj$workers_info()[[wid]]$pid 24 | expect_that(pid_exists(pid), is_true()) 25 | 26 | expect_that(obj$workers_running(wid), 27 | equals(setNames(TRUE, wid))) 28 | expect_that(obj$workers_status(), 29 | equals(setNames(WORKER_IDLE, wid))) 30 | 31 | t <- obj$enqueue(slowdouble(100)) 32 | Sys.sleep(.5) 33 | expect_that(obj$workers_status(wid), equals(setNames("BUSY", wid))) 34 | expect_that(is.na(t$times()[["finished"]]), is_true()) 35 | 36 | obj$send_signal(tools::SIGINT, wid) 37 | Sys.sleep(.5) 38 | 39 | expect_that(t$status(), equals(TASK_ORPHAN)) 40 | expect_that(is.na(t$times()[["finished"]]), is_false()) 41 | 42 | ## Machine still alive: 43 | expect_that(pid_exists(pid), is_true()) 44 | 45 | ## Realistically, this will trigger fairly quickly. 46 | for (i in seq_len(10)) { 47 | ## Another one at this point will do nothing: 48 | obj$send_signal(tools::SIGINT, wid) 49 | Sys.sleep(.5) 50 | expect_that(pid_exists(pid), is_true()) 51 | 52 | ## Check that an additional job will work OK and not get caught by 53 | ## the *second* interrupt (this did happen to me before) 54 | tt <- obj$enqueue(sin(1)) 55 | expect_that(res <- tt$wait(1), not(throws_error())) 56 | expect_that(res, equals(sin(1))) 57 | 58 | if ("REQUEUE" %in% obj$workers_log_tail(wid, 0)$command) { 59 | break 60 | } 61 | } 62 | expect_that("REQUEUE" %in% obj$workers_log_tail(wid, 0)$command, 63 | is_true()) 64 | 65 | t <- obj$enqueue(slowdouble(100)) 66 | Sys.sleep(.5) 67 | obj$send_signal(tools::SIGTERM, wid) 68 | Sys.sleep(.5) 69 | 70 | expect_that(pid_exists(pid), is_false()) 71 | 72 | expect_that(obj$workers_running(wid), 73 | equals(setNames(TRUE, wid))) 74 | expect_that(obj$workers_list(), equals(wid)) 75 | 76 | Sys.sleep(expire) 77 | 78 | expect_that(obj$workers_running(wid), 79 | equals(setNames(FALSE, wid))) 80 | ## Still in the workers list, though: 81 | expect_that(obj$workers_list(), equals(wid)) 82 | 83 | ret <- obj$workers_identify_lost() 84 | 85 | expect_that(ret, equals(list(workers=wid, tasks=t$id))) 86 | expect_that(t$status(), equals(TASK_ORPHAN)) 87 | expect_that(is.na(t$times()[["finished"]]), is_false()) 88 | expect_that(obj$workers_status(wid), equals(setNames(WORKER_LOST, wid))) 89 | expect_that(obj$workers_list_exited(), equals(wid)) 90 | 91 | ## We were killed in the act: 92 | log <- obj$workers_log_tail(wid, 1) 93 | expect_that(log$command, equals("TASK_START")) 94 | 95 | ## Then we can clean this mess up: 96 | res <- obj$workers_delete_exited() 97 | expect_that(res, equals(wid)) 98 | expect_that(obj$workers_list_exited(), equals(character(0))) 99 | expect_that(obj$workers_delete_exited(), equals(character(0))) 100 | 101 | ## No worker keys, plenty of task keys: 102 | expect_that(RedisAPI::scan_find(obj$con, "tmpjobs:tasks:*"), 103 | not(equals(character(0)))) 104 | expect_that(RedisAPI::scan_find(obj$con, "tmpjobs:workers:*"), 105 | equals(character(0))) 106 | }) 107 | 108 | test_that("heartbeat shutdown when running job", { 109 | expire <- 3 110 | 111 | test_cleanup() 112 | obj <- queue("tmpjobs", sources="myfuns.R") 113 | expect_that(obj$workers_list_exited(), equals(character(0))) 114 | 115 | logfile <- "worker_heartbeat.log" 116 | Sys.setenv("R_TESTS" = "") 117 | wid <- worker_spawn(obj$queue_name, logfile, 118 | heartbeat_period=1, heartbeat_expire=expire) 119 | ## worker("tmpjobs", heartbeat_expire=3, heartbeat_period=1) 120 | ## wid <- obj$workers_list() 121 | pid <- obj$workers_info()[[wid]]$pid 122 | expect_that(pid_exists(pid), is_true()) 123 | 124 | t <- obj$enqueue(slowdouble(100)) 125 | Sys.sleep(.5) 126 | expect_that(obj$workers_status(wid), equals(setNames("BUSY", wid))) 127 | 128 | ## This message will be ignored: 129 | obj$send_message("STOP") 130 | Sys.sleep(.5) 131 | expect_that(obj$workers_status(wid), equals(setNames("BUSY", wid))) 132 | expect_that(pid_exists(pid), is_true()) 133 | expect_that(t$status(), equals("RUNNING")) 134 | 135 | ## But after a message will be honoured: 136 | obj$send_signal(tools::SIGINT, wid) 137 | Sys.sleep(.5) 138 | 139 | ## The shutdown *is* clean: 140 | expect_that(obj$workers_status(wid), 141 | equals(setNames(WORKER_EXITED, wid))) 142 | log <- obj$workers_log_tail(wid, 1) 143 | expect_that(log$command, equals("STOP")) 144 | expect_that(log$message, equals("OK")) 145 | expect_that(pid_exists(pid), is_false()) 146 | 147 | ## The task was orphaned: 148 | expect_that(t$status(), equals(TASK_ORPHAN)) 149 | }) 150 | 151 | test_that("requeue orphaned jobs", { 152 | expire <- 3 153 | 154 | test_cleanup() 155 | obj <- queue("tmpjobs", sources="myfuns.R") 156 | expect_that(obj$workers_list_exited(), equals(character(0))) 157 | 158 | logfile <- "worker_heartbeat.log" 159 | Sys.setenv("R_TESTS" = "") 160 | wid <- worker_spawn(obj$queue_name, logfile, 161 | heartbeat_period=1, heartbeat_expire=expire) 162 | ## worker("tmpjobs", heartbeat_expire=3, heartbeat_period=1) 163 | ## wid <- obj$workers_list() 164 | pid <- obj$workers_info()[[wid]]$pid 165 | expect_that(pid_exists(pid), is_true()) 166 | 167 | t_double <- 5 168 | t <- obj$enqueue(slowdouble(t_double)) 169 | Sys.sleep(.5) 170 | expect_that(obj$workers_status(wid), equals(setNames("BUSY", wid))) 171 | expect_that(is.na(t$times()[["finished"]]), is_true()) 172 | 173 | obj$send_signal(tools::SIGINT, wid) 174 | Sys.sleep(.5) 175 | 176 | expect_that(t$status(), equals(TASK_ORPHAN)) 177 | expect_that(is.na(t$times()[["finished"]]), is_false()) 178 | 179 | t2 <- obj$requeue(t$id) 180 | expect_that(t$status(), equals(TASK_REDIRECT)) 181 | Sys.sleep(0.5) 182 | expect_that(t2$status(), equals(TASK_RUNNING)) 183 | Sys.sleep(t_double) 184 | expect_that(t2$status(), equals(TASK_COMPLETE)) 185 | 186 | expect_that(t$status(follow_redirect=TRUE), equals(TASK_COMPLETE)) 187 | expect_that(t$result(follow_redirect=TRUE), equals(t_double * 2)) 188 | expect_that(t$result(), throws_error("task [0-9]+ is unfetchable")) 189 | 190 | tt <- obj$tasks_times(t2$id) 191 | expect_that(tt, is_a("data.frame")) 192 | expect_that(tt$running, is_more_than(t_double - 1)) 193 | expect_that(tt$running, is_less_than(t_double + 1)) 194 | 195 | obj$send_message("STOP") 196 | }) 197 | 198 | 199 | test_that("workers stop", { 200 | expire <- 3 201 | 202 | test_cleanup() 203 | obj <- queue("tmpjobs", sources="myfuns.R") 204 | expect_that(obj$workers_list_exited(), equals(character(0))) 205 | 206 | logfile <- "worker_heartbeat.log" 207 | Sys.setenv("R_TESTS" = "") 208 | wid <- worker_spawn(obj$queue_name, logfile, 209 | heartbeat_period=1, heartbeat_expire=expire) 210 | ## worker("tmpjobs", heartbeat_expire=3, heartbeat_period=1) 211 | ## wid <- obj$workers_list() 212 | pid <- obj$workers_info()[[wid]]$pid 213 | expect_that(pid_exists(pid), is_true()) 214 | 215 | ## TODO: worker handles could help avoid this bit of mock up: 216 | expect_that(msg <- worker_stop_message( 217 | list(con=obj$con, 218 | keys=list(queue_name=obj$queue_name), 219 | name=wid, 220 | styles=worker_styles())), 221 | shows_message()) 222 | cmp <- bquote(rrqueue::worker_stop(.(obj$queue_name), .(wid))) 223 | expect_that(parse(text=msg[[length(msg)]])[[1]], 224 | equals(cmp)) 225 | eval(cmp, .GlobalEnv) 226 | 227 | Sys.sleep(.5) 228 | expect_that(pid_exists(pid), is_false()) 229 | log <- obj$workers_log_tail(wid, 1) 230 | expect_that(log$command, equals("STOP")) 231 | expect_that(log$message, equals("OK")) 232 | }) 233 | 234 | test_that("stop_workers (simple case)", { 235 | test_cleanup() 236 | obj <- queue("tmpjobs", sources="myfuns.R") 237 | expect_that(obj$workers_list_exited(), equals(character(0))) 238 | 239 | logfile <- "worker_heartbeat.log" 240 | Sys.setenv("R_TESTS" = "") 241 | wid <- worker_spawn(obj$queue_name, logfile) 242 | 243 | ok <- obj$stop_workers(wid, wait=10) 244 | expect_that(ok, equals(wid)) 245 | 246 | expect_that(obj$workers_list(), equals(character(0))) 247 | expect_that(obj$workers_list_exited(), equals(wid)) 248 | log <- obj$workers_log_tail(wid, 3) 249 | expect_that(log$command, equals(c("MESSAGE", "RESPONSE", "STOP"))) 250 | expect_that(log$message, equals(c("STOP", "STOP", "OK"))) 251 | 252 | res <- obj$workers_identify_lost() 253 | expect_that(res, equals(list(workers=character(), tasks=character()))) 254 | 255 | expect_that(obj$workers_delete_exited(), equals(wid)) 256 | expect_that(obj$workers_list_exited(), equals(character(0))) 257 | expect_that(nrow(obj$workers_log_tail(wid, 1)), equals(0)) 258 | }) 259 | 260 | test_that("stop_workers (running, interrupt)", { 261 | test_cleanup() 262 | obj <- queue("tmpjobs", sources="myfuns.R") 263 | expect_that(obj$workers_list_exited(), equals(character(0))) 264 | 265 | logfile <- "worker_heartbeat.log" 266 | Sys.setenv("R_TESTS" = "") 267 | wid <- worker_spawn(obj$queue_name, logfile) 268 | 269 | len <- 2 270 | t <- obj$enqueue(slowdouble(len)) 271 | Sys.sleep(.25) 272 | expect_that(t$status(), equals(TASK_RUNNING)) 273 | ok <- obj$stop_workers(wid, interrupt=TRUE) 274 | expect_that(ok, equals(wid)) 275 | 276 | Sys.sleep(.25) 277 | 278 | expect_that(t$status(), equals(TASK_ORPHAN)) 279 | expect_that(obj$workers_list(), equals(character(0))) 280 | expect_that(obj$workers_list_exited(), equals(wid)) 281 | log <- obj$workers_log_tail(wid, 5) 282 | expect_that(log$command, equals(c("INTERRUPT", "TASK_ORPHAN", 283 | "MESSAGE", "RESPONSE", "STOP"))) 284 | expect_that(log$message, equals(c("", t$id, "STOP", "STOP", "OK"))) 285 | 286 | res <- obj$workers_identify_lost() 287 | expect_that(res, equals(list(workers=character(), tasks=character()))) 288 | 289 | expect_that(obj$workers_delete_exited(), equals(wid)) 290 | expect_that(obj$workers_list_exited(), equals(character(0))) 291 | expect_that(nrow(obj$workers_log_tail(wid, 1)), equals(0)) 292 | }) 293 | 294 | test_that("stop_workers (running, wait)", { 295 | test_cleanup() 296 | obj <- queue("tmpjobs", sources="myfuns.R") 297 | expect_that(obj$workers_list_exited(), equals(character(0))) 298 | 299 | logfile <- "worker_heartbeat.log" 300 | Sys.setenv("R_TESTS" = "") 301 | wid <- worker_spawn(obj$queue_name, logfile) 302 | 303 | len <- 2 304 | t <- obj$enqueue(slowdouble(len)) 305 | Sys.sleep(.25) 306 | expect_that(t$status(), equals(TASK_RUNNING)) 307 | ok <- obj$stop_workers(wid, interrupt=FALSE) 308 | expect_that(ok, equals(wid)) 309 | 310 | Sys.sleep(len + .25) 311 | expect_that(t$status(), equals(TASK_COMPLETE)) 312 | expect_that(obj$workers_list(), equals(character(0))) 313 | expect_that(obj$workers_list_exited(), equals(wid)) 314 | log <- obj$workers_log_tail(wid, 1) 315 | expect_that(log$command, equals("STOP")) 316 | expect_that(log$message, equals("OK")) 317 | 318 | expect_that(obj$workers_delete_exited(), equals(wid)) 319 | expect_that(obj$workers_list_exited(), equals(character(0))) 320 | expect_that(nrow(obj$workers_log_tail(wid, 1)), equals(0)) 321 | }) 322 | 323 | test_that("stop_workers (blocking)", { 324 | test_cleanup() 325 | obj <- queue("tmpjobs", sources="myfuns.R") 326 | expect_that(obj$workers_list_exited(), equals(character(0))) 327 | 328 | expire <- 3 329 | logfile <- "worker_heartbeat.log" 330 | Sys.setenv("R_TESTS" = "") 331 | wid <- worker_spawn(obj$queue_name, logfile, 332 | heartbeat_expire=expire, heartbeat_period=1) 333 | 334 | t <- obj$enqueue(block(10)) 335 | Sys.sleep(.5) 336 | 337 | ok <- obj$stop_workers(wid, wait=1) 338 | expect_that(ok, equals(wid)) 339 | expect_that(t$status(), equals(TASK_RUNNING)) 340 | 341 | Sys.sleep(expire) 342 | res <- obj$workers_identify_lost() 343 | expect_that(res, equals(list(workers=wid, tasks=t$id))) 344 | expect_that(t$status(), equals(TASK_ORPHAN)) 345 | }) 346 | 347 | test_that("clean up multiple workers", { 348 | test_cleanup() 349 | obj <- queue("tmpjobs", sources="myfuns.R") 350 | expect_that(obj$workers_list_exited(), equals(character(0))) 351 | 352 | expire <- 3 353 | n <- 4 354 | logfile <- sprintf("worker_%d.log", seq_len(n)) 355 | wid <- worker_spawn(obj$queue_name, logfile, n=n, 356 | heartbeat_expire=expire, heartbeat_period=1) 357 | 358 | expect_that(sort(obj$workers_list()), equals(sort(wid))) 359 | for (i in seq_len(n)) { 360 | obj$enqueue(slowdouble(1000)) 361 | } 362 | Sys.sleep(.5) 363 | ids <- obj$workers_task_id() 364 | 365 | ## Kill two savagely: 366 | wkill <- obj$workers_list()[2:3] 367 | pid <- viapply(obj$workers_info()[wkill], "[[", "pid") 368 | ok <- tools::pskill(pid, tools::SIGTERM) == PSKILL_SUCCESS 369 | Sys.sleep(expire + 1) 370 | 371 | expect_that(sort(obj$workers_list()), equals(sort(wid))) 372 | x <- obj$workers_identify_lost() 373 | 374 | expect_that(sort(x$workers), equals(sort(wkill))) 375 | expect_that(sort(x$tasks), equals(sort(unname(ids[wkill])))) 376 | 377 | expect_that(obj$tasks_status(x$tasks), 378 | equals(setNames(c(TASK_ORPHAN, TASK_ORPHAN), 379 | x$tasks))) 380 | expect_that(obj$workers_status(x$workers), 381 | equals(setNames(c(WORKER_LOST, WORKER_LOST), wkill))) 382 | 383 | obj$stop_workers() 384 | }) 385 | -------------------------------------------------------------------------------- /tests/testthat/test-match-fun.R: -------------------------------------------------------------------------------- 1 | context("function matching") 2 | 3 | ## NOTE: this process has the potential to be quite slow; I don't 4 | ## think we want to do it for everything over and over. But let's 5 | ## start here and work back to get 6 | 7 | test_that("has_namespace", { 8 | expect_that(has_namespace("foo::bar"), is_true()) 9 | expect_that(has_namespace("::bar"), is_true()) 10 | expect_that(has_namespace("foo::"), is_true()) 11 | ## false positive: 12 | expect_that(has_namespace("foo::bar::baz"), is_true()) 13 | expect_that(has_namespace("foo"), is_false()) 14 | expect_that(has_namespace(":foo:"), is_false()) 15 | }) 16 | 17 | test_that("split_namespace", { 18 | expect_that(split_namespace("foo::bar"), equals(c("foo", "bar"))) 19 | expect_that(split_namespace("::bar"), equals(c("", "bar"))) 20 | ## Odd behaviour of strsplit(): 21 | 22 | throws <- throws_error("Not a namespace-qualified variable") 23 | expect_that(split_namespace("foo::"), throws) 24 | ## false positive: - should error here? 25 | expect_that(split_namespace("foo::bar::baz"), throws) 26 | expect_that(split_namespace("foo"), throws) 27 | expect_that(split_namespace(":foo:"), throws) 28 | }) 29 | 30 | test_that("match_fun_name", { 31 | e <- new.env(parent=parent.env(.GlobalEnv)) 32 | source("myfuns.R", e) 33 | 34 | expect_that(match_fun_name("rrqueue::worker", e), 35 | equals(c("rrqueue", "worker"))) 36 | expect_that(match_fun_name("rrqueuex::worker", e), 37 | throws_error("Did not find function")) 38 | expect_that(match_fun_name("rrqueue::workerx", e), 39 | throws_error("Did not find function")) 40 | 41 | cmp <- structure(c("", "slowdouble"), envir=e) 42 | expect_that(match_fun_name("slowdouble", e), equals(cmp)) 43 | expect_that(match_fun_name("slowdouble_no_such", e), 44 | throws_error("Did not find function")) 45 | }) 46 | 47 | test_that("match_fun_symbol", { 48 | e <- new.env(parent=parent.env(.GlobalEnv)) 49 | source("myfuns.R", e) 50 | expect_that(match_fun_symbol(quote(worker), e), 51 | equals(c("rrqueue", "worker"))) 52 | expect_that(match_fun_symbol(quote(workerx), e), 53 | throws_error("Did not find function")) 54 | 55 | cmp <- structure(c("", "slowdouble"), envir=e) 56 | expect_that(match_fun_symbol(quote(slowdouble), e), 57 | equals(cmp)) 58 | expect_that(match_fun_symbol(quote(slowdouble_no_such), e), 59 | throws_error("Did not find function")) 60 | }) 61 | 62 | test_that("match_fun_value", { 63 | e <- new.env(parent=parent.env(.GlobalEnv)) 64 | source("myfuns.R", e) 65 | fun <- e$slowdouble 66 | 67 | expect_that(match_fun_value(worker, e), 68 | equals(c("rrqueue", "worker"))) 69 | ww <- worker 70 | expect_that(match_fun_value(ww, e), 71 | equals(c("rrqueue", "worker"))) 72 | expect_that(match_fun_value(get("worker"), e), 73 | equals(c("rrqueue", "worker"))) 74 | 75 | cmp <- structure(c("", "slowdouble"), envir=e) 76 | expect_that(match_fun_value(fun, e), 77 | equals(cmp)) 78 | 79 | e2 <- new.env(parent=parent.env(.GlobalEnv)) 80 | source("myfuns.R", e2) 81 | expect_that(e2$slowdouble, not(is_identical_to(e$slowdouble))) 82 | 83 | expect_that(match_fun_value(worker, baseenv()), 84 | throws_error("Did not find function")) 85 | expect_that(match_fun_value(fun, e2), 86 | throws_error("Did not find function")) 87 | }) 88 | 89 | test_that("match_fun", { 90 | env <- new.env(parent=parent.env(.GlobalEnv)) 91 | source("myfuns.R", env) 92 | 93 | fun <- env$slowdouble 94 | cmp <- structure(c("", "slowdouble"), envir=env) 95 | 96 | expect_that(match_fun("slowdouble", env), equals(cmp)) 97 | expect_that(match_fun(get("slowdouble", env), env), equals(cmp)) 98 | expect_that(match_fun(fun, env), equals(cmp)) 99 | 100 | fun <- rrqueue::worker 101 | cmp <- c("rrqueue", "worker") 102 | 103 | expect_that(match_fun("rrqueue::worker", env), equals(cmp)) 104 | expect_that(match_fun("worker", env), equals(cmp)) 105 | expect_that(match_fun(rrqueue::worker, env), equals(cmp)) 106 | expect_that(match_fun(get("worker", env), env), equals(cmp)) 107 | expect_that(match_fun(fun, env), equals(cmp)) 108 | }) 109 | 110 | test_that("match_fun_rrqueue", { 111 | env1 <- new.env(parent=parent.env(.GlobalEnv)) 112 | env2 <- new.env(parent=parent.env(.GlobalEnv)) 113 | env3 <- new.env(parent=parent.env(.GlobalEnv)) 114 | source("myfuns.R", env1) 115 | env3$slowdouble <- function(x) x * 2 116 | 117 | fun <- env1$slowdouble 118 | cmp <- structure(c("", "slowdouble"), envir=env1) 119 | 120 | expect_that(match_fun_rrqueue("slowdouble", env1, env1), 121 | equals(cmp)) 122 | expect_that(match_fun_rrqueue("slowdouble", env1, env2), 123 | throws_error("Function not found in rrqueue environment")) 124 | expect_that(match_fun_rrqueue("slowdouble", env1, env3), 125 | throws_error("Function found in given and rrqueue")) 126 | 127 | expect_that(match_fun_rrqueue(fun, env1, env1), 128 | equals(cmp)) 129 | expect_that(match_fun_rrqueue(fun, env1, env2), 130 | throws_error("Function not found in rrqueue environment")) 131 | expect_that(match_fun_rrqueue(fun, env1, env3), 132 | throws_error("Function found in given and rrqueue")) 133 | }) 134 | -------------------------------------------------------------------------------- /tests/testthat/test-rrqlapply.R: -------------------------------------------------------------------------------- 1 | context("rrqlapply") 2 | 3 | ## The first "high" level thing; a really basic mclapply like clone. 4 | ## TODO: test passing in an unknown function 5 | test_that("Basic use", { 6 | test_cleanup() 7 | on.exit(test_cleanup()) 8 | 9 | obj <- queue("tmpjobs", sources="myfuns.R") 10 | expect_that(obj$tasks_groups_list(), equals(character(0))) 11 | x <- sample(1:10, 20, replace=TRUE) 12 | rrql <- rrqlapply_submit(x, "sin", obj) 13 | expect_that(rrql, is_a("task_bundle")) 14 | 15 | grp <- obj$tasks_groups_list() 16 | expect_that(length(grp), equals(1L)) 17 | 18 | expect_that(grp, equals(rrql$groups)) 19 | expect_that(obj$tasks_in_groups(grp), 20 | equals(rrql$ids())) 21 | expect_that(rrql$names, equals(names(x))) 22 | 23 | tmp <- task_bundle_get(obj, groups=grp) 24 | expect_that(tmp, is_a("task_bundle")) 25 | expect_that(tmp$groups, equals(grp)) 26 | expect_that(tmp$names, equals(NULL)) 27 | expect_that(tmp$key_complete, equals(rrql$key_complete)) 28 | 29 | ## TODO: This would be nice to do filtering by jobs in the bundle... 30 | monitor_status(obj) 31 | 32 | task_ids <- rrql$ids() 33 | expect_that(obj$tasks_status(task_ids), 34 | equals(setNames(rep(TASK_PENDING, length(x)), task_ids))) 35 | 36 | wid <- worker_spawn(obj$queue_name, "rrqlapply.log") 37 | ## w <- rrqueue::worker("tmpjobs", heartbeat_period=10) 38 | 39 | ## TODO: 40 | Sys.sleep(0.5) 41 | expect_that(obj$tasks_status(task_ids), 42 | equals(setNames(rep(TASK_COMPLETE, length(x)), task_ids))) 43 | 44 | res <- rrql$wait() 45 | cmp <- setNames(lapply(x, sin), rrql$ids()) 46 | expect_that(res, equals(cmp, tolerance=1e-15)) 47 | 48 | expect_that(obj$tasks_status(task_ids), 49 | equals(setNames(rep(TASK_COMPLETE, length(x)), task_ids))) 50 | 51 | rrql$delete_tasks() 52 | 53 | ## Cleanup has happened 54 | expect_that(obj$tasks_status(task_ids), 55 | equals(setNames(rep(TASK_MISSING, length(x)), task_ids))) 56 | 57 | res <- rrqlapply(x, "sin", obj, progress_bar=FALSE) 58 | ## NOTE: hardcoded name here: 59 | expect_that(res, equals(setNames(cmp, 21:40), tolerance=1e-15)) 60 | 61 | obj$send_message("STOP") 62 | }) 63 | 64 | test_that("null return", { 65 | test_cleanup() 66 | on.exit(test_cleanup()) 67 | 68 | obj <- queue("tmpjobs", sources="myfuns.R") 69 | x <- sample(1:10, 20, replace=TRUE) 70 | rrql <- rrqlapply_submit(x, "ret_null", obj) 71 | monitor_status(obj) 72 | 73 | task_ids <- rrql$ids() 74 | expect_that(obj$tasks_status(task_ids), 75 | equals(setNames(rep(TASK_PENDING, length(x)), task_ids))) 76 | 77 | wid <- worker_spawn(obj$queue_name, "rrqlapply.log") 78 | ## w <- rrqueue::worker("tmpjobs", heartbeat_period=10) 79 | 80 | ## TODO: 81 | Sys.sleep(1.0) 82 | expect_that(obj$tasks_status(task_ids), 83 | equals(setNames(rep(TASK_COMPLETE, length(x)), task_ids))) 84 | 85 | res <- rrql$wait(progress_bar=FALSE) 86 | cmp <- named_list(rrql$ids()) 87 | expect_that(res, equals(cmp)) 88 | 89 | ## And again: 90 | expect_that(rrql$wait(progress_bar=FALSE), equals(cmp)) 91 | rrql$delete_tasks() 92 | 93 | ## Cleanup has happened 94 | expect_that(obj$tasks_status(task_ids), 95 | equals(setNames(rep(TASK_MISSING, length(x)), task_ids))) 96 | 97 | res <- rrqlapply(x, "ret_null", obj, progress_bar=FALSE) 98 | expect_that(res, equals(setNames(cmp, 21:40))) 99 | 100 | obj$send_message("STOP") 101 | }) 102 | 103 | test_that("bulk", { 104 | test_cleanup() 105 | x <- expand.grid(a=1:4, b=runif(3)) 106 | 107 | obj <- queue("tmpjobs", sources="myfuns.R") 108 | 109 | ## Serial versions: 110 | cmp_sum <- lapply(df_to_list(x), suml) 111 | cmp_prod <- lapply(df_to_list(x), function(el) prod2(el$a, el$b)) 112 | 113 | wid <- worker_spawn(obj$queue_name, "rrqlapply.log") 114 | 115 | res <- enqueue_bulk_submit(x, suml, obj) 116 | expect_that(res$groups, is_a("character")) 117 | 118 | ans <- res$wait() 119 | expect_that(ans, equals(setNames(cmp_sum, res$ids()))) 120 | 121 | ## All at once: 122 | res <- enqueue_bulk(x, prod2, obj, do.call=TRUE) 123 | expect_that(unname(res), equals(cmp_prod)) 124 | 125 | obj$send_message("STOP") 126 | test_cleanup() 127 | }) 128 | -------------------------------------------------------------------------------- /tests/testthat/test-task-bundle.R: -------------------------------------------------------------------------------- 1 | context("task_bundle") 2 | 3 | test_that("simple", { 4 | test_cleanup() 5 | on.exit(test_cleanup()) 6 | 7 | existing <- queues() 8 | ## expect_that(existing, equals(character(0))) 9 | 10 | obj <- queue("tmpjobs", sources="myfuns.R") 11 | 12 | group <- "mygroup" 13 | x <- obj$task_bundle_get(group) 14 | 15 | expect_that(x, is_a("task_bundle")) 16 | expect_that(x$ids(), equals(character(0))) 17 | expect_that(x$groups, equals(group)) 18 | expect_that(x$update_groups(), equals(character(0))) 19 | expect_that(x$results(), equals(empty_named_list())) 20 | expect_that(x$wait(), equals(empty_named_list())) 21 | expect_that(as.list(x$overview()), 22 | equals(list(PENDING=0, RUNNING=0, COMPLETE=0, ERROR=0))) 23 | 24 | ## Queue up a job: 25 | t <- obj$enqueue(sin(1), group=group) 26 | 27 | expect_that(x$update_groups(), equals(t$id)) 28 | expect_that(x$update_groups(), equals(character(0))) 29 | 30 | ids <- t$id 31 | for (i in 1:3) { 32 | t <- obj$enqueue(sin(1), group=group) 33 | ids <- c(ids, t$id) 34 | } 35 | 36 | expect_that(x$update_groups(), equals(ids[-1])) 37 | expect_that(x$update_groups(), equals(character(0))) 38 | 39 | expect_that(x$ids(), equals(ids)) 40 | 41 | expect_that(x$status(), 42 | equals(setNames(rep(TASK_PENDING, length(ids)), ids))) 43 | 44 | expect_that(x$results(), throws_error("Tasks not yet completed")) 45 | expect_that(x$wait(0), throws_error("Tasks not yet completed")) 46 | expect_that(x$wait(1), throws_error("Exceeded maximum time")) 47 | 48 | expect_that(x$status(), 49 | equals(setNames(rep(TASK_PENDING, length(ids)), ids))) 50 | 51 | ## Start a worker: 52 | logfile <- "worker.log" 53 | Sys.setenv("R_TESTS" = "") 54 | wid <- worker_spawn(obj$queue_name, logfile) 55 | 56 | Sys.sleep(.5) 57 | 58 | expect_that(x$status(), 59 | equals(setNames(rep(TASK_COMPLETE, length(ids)), ids))) 60 | 61 | cmp <- setNames(rep(list(sin(1)), length(ids)), ids) 62 | expect_that(x$results(), equals(cmp)) 63 | 64 | ## Set some names and watch them come out too: 65 | x$names <- letters[1:4] 66 | x$results() 67 | cmp <- setNames(rep(list(sin(1)), length(ids)), letters[1:4]) 68 | expect_that(x$results(), equals(cmp)) 69 | 70 | ## Add an new task: 71 | t <- obj$enqueue(sin(1), group=group) 72 | Sys.sleep(.5) 73 | 74 | ## Nothing has changed in the bundle: 75 | expect_that(x$results(), equals(cmp)) 76 | 77 | id <- x$update_groups() 78 | expect_that(id, equals(t$id)) 79 | ids <- c(ids, id) 80 | 81 | ## Names have been removed: 82 | expect_that(x$names, equals(NULL)) 83 | 84 | cmp <- setNames(rep(list(sin(1)), length(ids)), ids) 85 | expect_that(x$results(), equals(cmp)) 86 | 87 | t <- obj$enqueue(slowdouble(2), group=group) 88 | x$update_groups() 89 | st <- setNames(rep(c(TASK_COMPLETE, TASK_RUNNING), c(length(ids), 1)), 90 | c(ids, t$id)) 91 | Sys.sleep(.2) 92 | expect_that(x$status(), equals(st)) 93 | r <- x$wait(3) 94 | 95 | cmp <- c(cmp, setNames(list(4), t$id)) 96 | expect_that(r, equals(cmp)) 97 | expect_that(x$status(), 98 | equals(setNames(rep(TASK_COMPLETE, length(r)), names(r)))) 99 | 100 | expect_that(x$wait1(1), equals(NULL)) 101 | expect_that(x$wait1(1), takes_less_than(1)) 102 | 103 | t1 <- obj$enqueue(slowdouble(1), group=group) 104 | t2 <- obj$enqueue(slowdouble(2), group=group) 105 | x$update_groups() 106 | res <- x$wait1(60) 107 | expect_that(res[[1]], equals(t1$id)) 108 | expect_that(res[[2]], equals(2)) 109 | res <- x$wait1(60) 110 | expect_that(res[[1]], equals(t2$id)) 111 | expect_that(res[[2]], equals(4)) 112 | expect_that(x$wait1(), is_null()) 113 | 114 | t3 <- obj$enqueue(slowdouble(3), group=group) 115 | x$update_groups() 116 | res0 <- x$wait1(1) 117 | expect_that(res0, is_null()) 118 | res <- x$wait1(10) 119 | expect_that(res, equals(list(id=t3$id, result=t3$result()))) 120 | 121 | expect_that(as.list(x$overview()), 122 | equals(list(PENDING=0, RUNNING=0, COMPLETE=9, ERROR=0))) 123 | 124 | ## Yeah, this is not going to work. 125 | xt <- x$times() 126 | expect_that(xt, is_a("data.frame")) 127 | cols <- c("task_id", "submitted", "started", "finished", 128 | "waiting", "running", "idle") 129 | expect_that(names(xt), equals(cols)) 130 | cols <- setdiff(cols, "idle") 131 | expect_that(xt[cols], equals(obj$tasks_times(x$ids())[cols])) 132 | 133 | ## Get the bundle again: 134 | y <- obj$task_bundle_get(group) 135 | expect_that(y$ids(), equals(x$ids())) 136 | expect_that(y$status(), equals(x$status())) 137 | expect_that(y$results(), equals(x$results())) 138 | 139 | obj$stop_workers(wid) 140 | }) 141 | -------------------------------------------------------------------------------- /tests/testthat/test-worker.R: -------------------------------------------------------------------------------- 1 | context("worker") 2 | 3 | test_that("config", { 4 | expect_that(rrqueue_worker_args(character(0)), 5 | throws_error("usage: rrqueue_worker")) 6 | 7 | queue_name <- "tmpjobs" 8 | 9 | opts <- rrqueue_worker_args(queue_name) 10 | expect_that(opts$queue_name, equals(queue_name)) 11 | expect_that(opts$redis_host, equals("127.0.0.1")) 12 | expect_that(opts$redis_port, equals(6379)) 13 | expect_that(opts$heartbeat_period, equals(30)) 14 | expect_that(opts$heartbeat_expire, equals(90)) 15 | expect_that(opts$key_worker_alive, is_null()) 16 | 17 | opts <- rrqueue_worker_args(c("--config", "config.yml")) 18 | dat <- yaml_read("config.yml") 19 | expect_that(opts$queue_name, equals(dat$queue_name)) 20 | expect_that(opts$redis_host, equals(dat$redis$host)) 21 | expect_that(opts$redis_port, equals(dat$redis$port)) 22 | expect_that(opts$heartbeat_period, equals(dat$heartbeat_period)) 23 | expect_that(opts$heartbeat_expire, equals(dat$heartbeat_expire)) 24 | expect_that(opts$key_worker_alive, is_null()) 25 | 26 | ## override some opts: 27 | opts <- rrqueue_worker_args(c("--config", "config.yml", 28 | "--key-worker-alive", "mykey")) 29 | expect_that(opts$key_worker_alive, equals("mykey")) 30 | 31 | opts <- rrqueue_worker_args(c("--config", "config.yml", 32 | "--redis-port", "9999")) 33 | expect_that(opts$redis_port, equals("9999")) 34 | 35 | opts <- rrqueue_worker_args(c("--config", "config.yml", queue_name)) 36 | expect_that(opts$queue_name, equals(queue_name)) 37 | 38 | ## And again with a configuration that loads very little: 39 | opts <- rrqueue_worker_args(c("--config", "config2.yml", 40 | "--key-worker-alive", "mykey")) 41 | expect_that(opts$key_worker_alive, equals("mykey")) 42 | expect_that(opts$redis_host, equals(yaml_read("config2.yml")$redis$host)) 43 | expect_that(opts$redis_port, equals(6379)) 44 | 45 | expect_that(rrqueue_worker_args(c("--config", "config3.yml")), 46 | throws_error("queue name must be given")) 47 | opts <- rrqueue_worker_args(c("--config", "config3.yml", queue_name)) 48 | expect_that(opts$queue_name, equals(queue_name)) 49 | }) 50 | 51 | test_that("workers_times - nonexistant worker", { 52 | obs <- observer("tmpjobs") 53 | name <- "no such worker" 54 | t <- obs$workers_times(name) 55 | expect_that(t, is_a("data.frame")) 56 | expect_that(nrow(t), equals(1)) 57 | expect_that(t, equals(data.frame(worker_id=name, 58 | expire_max=NA_real_, 59 | expire=-2.0, 60 | last_seen=NA_real_, 61 | last_action=NA_real_, 62 | stringsAsFactors=FALSE))) 63 | }) 64 | 65 | test_that("workers_times - no workers", { 66 | obs <- observer("tmpjobs") 67 | t <- obs$workers_times() 68 | expect_that(t, is_a("data.frame")) 69 | expect_that(nrow(t), equals(0)) 70 | expect_that(t, equals(data.frame(worker_id=character(0), 71 | expire_max=numeric(0), 72 | expire=numeric(0), 73 | last_seen=numeric(0), 74 | last_action=numeric(0), 75 | stringsAsFactors=FALSE))) 76 | 77 | expect_that(obs$workers_list(), equals(character(0))) 78 | expect_that(obs$workers_status(), equals(empty_named_character())) 79 | 80 | log <- obs$workers_log_tail() 81 | expect_that(log, equals(data.frame(worker_id=character(0), 82 | time=character(0), 83 | command=character(0), 84 | message=character(0), 85 | stringsAsFactors=FALSE))) 86 | test_cleanup() 87 | }) 88 | -------------------------------------------------------------------------------- /update_web.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | DOCS_DIR=inst/web 5 | VERSION=$(git rev-parse --short HEAD) 6 | REMOTE_URL=$(git config --get remote.origin.url) 7 | 8 | rm -rf ${DOCS_DIR}/.git 9 | git init ${DOCS_DIR} 10 | git -C ${DOCS_DIR} checkout --orphan gh-pages 11 | git -C ${DOCS_DIR} add . 12 | git -C ${DOCS_DIR} commit --no-verify -m "Update docs for version ${VERSION}" 13 | git -C ${DOCS_DIR} remote add origin -m "gh-pages" ${REMOTE_URL} 14 | git -C ${DOCS_DIR} push --force -u origin gh-pages 15 | -------------------------------------------------------------------------------- /vignettes/introduction.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Introduction to rrqueue" 3 | author: "Rich FitzJohn" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Introduction to rrqueue} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | # Overview 13 | 14 | `rrqueue` is a *distributed task queue* for R, implemented on top of [Redis](http://redis.io). 15 | At the cost of a little more work it allows for more flexible parallelisation than afforded by `mclapply`. 16 | The main goal is to support non-map style operations: submit some tasks, collect the completed results, 17 | queue more even while some tasks are still running. 18 | 19 | Other features include: 20 | 21 | * Low-level task submission / retrieval has a simple API so that asynchronous task queues can be created. 22 | * Objects representing tasks, workers, queues, etc can be queried. 23 | * While blocking `mclapply`-like functions are available, the package is designed to be non-blocking so that intermediate results can be used. 24 | * Automatic fingerprinting of environments so that code run on a remote machine will correspond to the code found locally. 25 | * Works well connecting to a Redis database running on the cloud (e.g., on an AWS machine over an ssh tunnel). 26 | * Local workers can be added to a remote pool, so long as everything can talk to the same Redis server. 27 | * The worker pool can be scaled at any time (up or down). 28 | * Basic fault tolerance, supporting re-queuing tasks lost on crashed workers. 29 | 30 | The basic workflow is: 31 | 32 | 1. Create a queue 33 | 2. Submit tasks to the queue 34 | 3. Start workers 35 | 4. Collect results 36 | 37 | The workers can be started at any time between 1-3, though they do need to be started before results can be collected. 38 | 39 | Documenting things that work asynchronously is difficult. This 40 | document gives a tutorial-style overview of working with rrqueue. 41 | 42 | # Getting started 43 | 44 | The queue and workers can be started in any order, but it's easiest 45 | to explain starting the queue first. 46 | 47 | Suppose we have some simulation code; it needs to be in a file that 48 | the queue can see. For now, I'll use the file `myfuns.R` which is 49 | the test code. It has a function in it called `slowdouble` that 50 | takes a number, sleeps for that many seconds, and then returns 51 | twice the number. It's useful for testing. 52 | 53 | ``` {r echo=FALSE, results="hide"} 54 | rrqueue:::queue_clean(redux::hiredis(), "myqueue", 55 | purge=TRUE, stop_workers="kill") 56 | lang_output <- function(x, lang) { 57 | cat(c(sprintf("```%s", lang), x, "```"), sep="\n") 58 | } 59 | cpp_output <- function(x) lang_output(x, "c++") 60 | r_output <- function(x) lang_output(x, "r") 61 | yaml_output <- function(x) lang_output(x, "yaml") 62 | plain_output <- function(x) lang_output(x, "plain") 63 | ``` 64 | 65 | You'll also need a running Redis server. I have one operating with 66 | the default parameters, so this works: 67 | ``` {r } 68 | redux::hiredis()$PING() 69 | ``` 70 | 71 | Create queue called "myqueue", tell it to load the source file 72 | "myfuns.R". If it was to load packages, then passing 73 | `packages=c("package1", "package2")` would indicate that workers 74 | would need to load those packages, too. 75 | ``` {r } 76 | obj <- rrqueue::queue("myqueue", sources="myfuns.R") 77 | ``` 78 | 79 | The message "creating new queue" here indicates that `rrqueue` did 80 | not find any previous queues in place. Queues are designed to be 81 | re-attachable so we can immediately just do that: 82 | ``` {r } 83 | obj <- rrqueue::queue("myqueue", sources="myfuns.R") 84 | ``` 85 | 86 | The message also notes that we have no workers available, so no 87 | work is going to get done. But we can still queue some tasks. 88 | 89 | # Queuing tasks 90 | 91 | The simplest sort of task queuing is to pass an expression into enqueue: 92 | ``` {r } 93 | t <- obj$enqueue(1 + 1) 94 | ``` 95 | 96 | The expression is not evaluated but stored and will be evaluated on 97 | the worker. Saving the result of this gives a `task` object which 98 | can be inspected. 99 | ``` {r } 100 | t 101 | ``` 102 | 103 | The expression stored in the task: 104 | ``` {r } 105 | t$expr() 106 | ``` 107 | 108 | The status of the task: 109 | ``` {r } 110 | t$status() 111 | ``` 112 | 113 | The result of the task, which will throw an error if we try to call it: 114 | ``` {r error=TRUE} 115 | t$result() 116 | ``` 117 | 118 | And how long the task has been waiting: 119 | ``` {r } 120 | t$times() 121 | ``` 122 | 123 | Tasks can use local variables, too: 124 | ``` {r } 125 | x <- 10 126 | t2 <- obj$enqueue(x * 2) 127 | t2$expr() 128 | ``` 129 | 130 | And because using unevaluated expressions can be problematic, 131 | `rrqueue` has a standard-evaluation version (`enqueue_`) which takes 132 | either strings representing expressions or quoted expressions: 133 | ``` {r } 134 | obj$enqueue_(quote(x / 2)) 135 | ``` 136 | 137 | Now we have three tasks: 138 | ``` {r } 139 | obj$tasks_list() 140 | ``` 141 | 142 | All the tasks are waiting to be run: 143 | ``` {r } 144 | obj$tasks_status() 145 | ``` 146 | 147 | We can get an overview of the tasks: 148 | ``` {r } 149 | obj$tasks_overview() 150 | ``` 151 | 152 | # Starting workers 153 | 154 | `rrqueue` includes a script `rrqueue_worker` for starting workers 155 | from the command line (install with `rrqueue::install_scripts()`. 156 | Workers can also be started from within R using the `worker_spawn` 157 | function: 158 | ``` {r } 159 | logfile <- tempfile() 160 | wid <- rrqueue::worker_spawn("myqueue", logfile) 161 | ``` 162 | ``` {r echo=FALSE} 163 | Sys.sleep(.5) 164 | ``` 165 | 166 | This function returns the *worker identifier*, which is also 167 | printed to the screen. 168 | 169 | It's probably informative at this point to read the logfile of the 170 | worker to see what it did on startup: 171 | 172 | ``` {r results="asis", echo=TRUE} 173 | plain_output(readLines(logfile)) 174 | ``` 175 | 176 | The worker first prints a lot of diagnostic information to the 177 | screen (or log file) indicating the name of the worker, the version 178 | of rrqueue, machine information, and special keys in the database 179 | where important information is stored. 180 | 181 | Then after broadcasting that it is awake (`ALIVE`) it detected that 182 | there was a controller on the queue and it attempts to construct 183 | the environment that the controller wants `r paste("ENVIR", obj$envir_id)`. 184 | 185 | After that, there are a series of `TASK_START`, `EXPR`, and 186 | `TASK_COMPLETE` lines as each of the three tasks is processed. 187 | ``` {r } 188 | obj$tasks_status() 189 | ``` 190 | 191 | The times here give an indication of the rrqueue overhead; the 192 | running time of these simple expressions should be close to zero. 193 | ``` {r } 194 | obj$tasks_times() 195 | ``` 196 | 197 | The task handle created before can now give a result: 198 | ``` {r } 199 | t$result() 200 | ``` 201 | 202 | Similarly, results can be retrieved from the queue directly: 203 | ``` {r } 204 | obj$task_result(1) 205 | obj$task_result(2) 206 | obj$task_result(3) 207 | ``` 208 | 209 | The worker that we created can be seen here: 210 | ``` {r } 211 | obj$workers_list() 212 | ``` 213 | 214 | Queue a slower task; this time the `slowdouble` function. This 215 | will take 1s: 216 | ``` {r echo=TRUE} 217 | t <- obj$enqueue(slowdouble(1)) 218 | t$status() 219 | Sys.sleep(.3) 220 | t$status() 221 | Sys.sleep(1) 222 | t$status() 223 | t$result() 224 | ``` 225 | 226 | Again, times are available: 227 | ``` {r } 228 | t$times() 229 | ``` 230 | 231 | # Finishing up 232 | ``` {r } 233 | obj$stop_workers() 234 | ``` 235 | 236 | ``` {r echo=FALSE} 237 | Sys.sleep(.5) 238 | ``` 239 | 240 | ``` {r results="asis", echo=FALSE} 241 | plain_output(readLines(logfile)) 242 | ``` 243 | 244 | worker is now in the exited list 245 | ``` {r } 246 | obj$workers_list_exited() 247 | ``` 248 | 249 | The full log from our worker (dropping the first column which is 250 | the worker id and takes up valuable space here): 251 | ``` {r } 252 | obj$workers_log_tail(wid, Inf)[-1] 253 | ``` 254 | 255 | ``` {r echo=FALSE, results="hide"} 256 | rrqueue:::queue_clean(obj$con, obj$queue_name, purge=TRUE) 257 | ``` 258 | -------------------------------------------------------------------------------- /vignettes/messages.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "rrqueue messages" 3 | author: "Rich FitzJohn" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{rrqueue messages} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ``` {r echo=FALSE, results="hide"} 13 | rrqueue:::queue_clean(redux::hiredis(), "myqueue", 14 | purge=TRUE, stop_workers="kill") 15 | lang_output <- function(x, lang) { 16 | cat(c(sprintf("```%s", lang), x, "```"), sep="\n") 17 | } 18 | make_reader <- function(filename, lang="plain") { 19 | n <- 0 20 | force(filename) 21 | force(lang) 22 | function() { 23 | txt <- readLines(logfile) 24 | if (n > 0) { 25 | txt <- txt[-seq_len(n)] 26 | } 27 | n <<- n + length(txt) 28 | lang_output(txt, lang) 29 | } 30 | } 31 | ``` 32 | 33 | In addition to passing tasks (and results) between a controller and 34 | workers, the controller can also send "messages" to workers. This 35 | vignette shows what the possible messages do. 36 | 37 | In order to do this, we're going to need a queue and a worker: 38 | ``` {r } 39 | obj <- rrqueue::queue("myqueue", sources="myfuns.R") 40 | logfile <- tempfile() 41 | worker_id <- rrqueue::worker_spawn("myqueue", logfile) 42 | ``` 43 | 44 | On startup the worker log contains: 45 | ``` {r results="asis", echo=FALSE} 46 | reader <- make_reader(logfile) 47 | reader() 48 | ``` 49 | 50 | Because one of the main effects of messages is to print to the 51 | worker logfile, we'll print this fairly often. 52 | 53 | ## Messages and responses 54 | 55 | 1. The queue sends a message for one or more workers to process. 56 | The message has an *identifier* that is derived from the current 57 | time. Messages are written to a first-in-first-out queue, *per 58 | worker*, and are processed independently by workers who do not 59 | look to see if other workers have messages or are processing 60 | them. 61 | 62 | 2. As soon as a worker has finished processing any current job it 63 | will process the message (it must wait to finish a current job 64 | but will not start any further jobs). 65 | 66 | 3. Once the message has been processed (see below) a response will 67 | be written to a response list with the same identifier as the 68 | message. 69 | 70 | ## `PING` 71 | 72 | The `PING` message simply asks the worker to return `PONG`. It's 73 | useful for diagnosing communication issues because it does so 74 | little 75 | ``` {r } 76 | message_id <- obj$send_message("PING") 77 | ``` 78 | 79 | The message id is going to be useful for getting responses: 80 | ``` {r } 81 | message_id 82 | ``` 83 | 84 | (this is derived from the current time, according to Redis which is 85 | the central reference point of time for the whole system). 86 | 87 | wait a little while: 88 | ``` {r } 89 | Sys.sleep(.5) 90 | ``` 91 | 92 | ``` {r results="asis", echo=TRUE} 93 | reader() 94 | ``` 95 | 96 | The logfile prints: 97 | 98 | 1. the request for the `PING` (`MESSAGE PING`) 99 | 2. the value `PONG` to the R message stream 100 | 3. logging a response (`RESPONSE PONG`), which means that something is written to the response stream. 101 | 102 | We can access the same bits of information in the worker log: 103 | ``` {r } 104 | obj$workers_log_tail(n=Inf)[-1] 105 | ``` 106 | 107 | This includes the `ALIVE` and `ENVIR` bits as the worker comes up. 108 | 109 | Inspecting the logs is fine for interactive use, but it's going to 110 | be more useful often to poll for a response. 111 | 112 | We already know that our worker has a response, but we can ask anyway: 113 | ``` {r } 114 | obj$has_responses(message_id) 115 | ``` 116 | 117 | Or inversely we can as what messages a given worker has responses for: 118 | ``` {r } 119 | obj$response_ids(worker_id) 120 | ``` 121 | 122 | To fetch the responses from all workers it was sent to (always 123 | returning a named list): 124 | ``` {r } 125 | obj$get_responses(message_id) 126 | ``` 127 | 128 | or to fetch the response from a given worker: 129 | ``` {r } 130 | obj$get_response(message_id, worker_id) 131 | ``` 132 | 133 | The response can be deleted by passing `delete=TRUE` to this method: 134 | ``` {r } 135 | obj$get_response(message_id, worker_id, delete=TRUE) 136 | ``` 137 | 138 | after which recalling the message will throw an error: 139 | ``` {r error=TRUE} 140 | obj$get_response(message_id, worker_id, delete=TRUE) 141 | ``` 142 | 143 | There is also a `wait` argument that lets you wait until a response 144 | is ready. The `slowdouble` command will take a few seconds, so to 145 | demonstrate: 146 | ``` {r } 147 | obj$enqueue(slowdouble(2)) 148 | message_id <- obj$send_message("PING") 149 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 150 | ``` 151 | 152 | Looking at the log will show what went on here: 153 | ``` {r } 154 | obj$workers_log_tail(n=4)[-1] 155 | ``` 156 | 157 | 1. A task is recieved 158 | 2. 2s later the task is completed 159 | 3. Then the message is recieved 160 | 4. Then, basically instantaneously, the message is responded to 161 | 162 | However, because the message is only processed after the task is 163 | completed, the response takes a while to come back. Equivalently, 164 | from the worker log: 165 | 166 | ``` {r results="asis", echo=FALSE} 167 | reader() 168 | ``` 169 | 170 | ## `ECHO` 171 | 172 | This is basically like `PING` and not very interesting; it prints 173 | an arbitrary string to the log. It always returns `"OK"` as a 174 | response. 175 | 176 | ``` {r } 177 | message_id <- obj$send_message("ECHO", "hello world!") 178 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 179 | ``` 180 | 181 | ``` {r results="asis", echo=FALSE} 182 | reader() 183 | ``` 184 | 185 | ## `INFO` 186 | 187 | The `INFO` command refreshes and returns the worker information. 188 | 189 | We already have a copy of the worker info; it was created when the 190 | worker started up: 191 | ``` {r } 192 | obj$workers_info()[[worker_id]] 193 | ``` 194 | 195 | Note that the `envir` field is currently empty (`{}`) because when 196 | the worker started it did not know about any environments. 197 | 198 | ``` {r } 199 | message_id <- obj$send_message("INFO") 200 | ``` 201 | 202 | Here's the new worker information, complete with an updated `envir` 203 | field: 204 | ``` {r } 205 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 206 | ``` 207 | 208 | This has been updated on the database copy too: 209 | ``` {r } 210 | obj$workers_info()[[worker_id]]$envir 211 | ``` 212 | 213 | and the same information is printed to the worker log: 214 | ``` {r results="asis", echo=FALSE} 215 | reader() 216 | ``` 217 | 218 | ## `DIR` 219 | 220 | This is useful for listing directory contents, similar to the `dir` 221 | function in R. However, because file *contents* are usually more 222 | interesting (e.g., working out why something is not running on the 223 | remote machine), this is basically the result of passing the 224 | results of `dir` to `tools::md5sum` in order to get the md5sum of 225 | the file. 226 | ``` {r } 227 | message_id <- obj$send_message("DIR") 228 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 229 | ``` 230 | 231 | Additional arguments to `dir` can be passed through: 232 | ``` {r } 233 | message_id <- obj$send_message("DIR", list(pattern="\\.R$")) 234 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 235 | ``` 236 | 237 | If you pass in invalid arguments to `dir`, then a reasonably 238 | helpful message should be generated: 239 | ``` {r } 240 | message_id <- obj$send_message("DIR", list(foo="bar")) 241 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 242 | ``` 243 | 244 | (note that this does not generate an error locally, but you can 245 | test to see if it did throw an error by checking the class of the 246 | returned value). 247 | 248 | and the same information is printed to the worker log: 249 | ``` {r results="asis", echo=FALSE} 250 | reader() 251 | ``` 252 | 253 | ## `PUSH` 254 | 255 | The commands `PUSH` and `PULL` move files from and to the worker. 256 | The command is interpreted as an instruction to the worker so 257 | `PUSH` pushes files from the worker into the database while `PULL` 258 | pulls files from the database into the worker. There are (will be) 259 | prefereable higher-level ways of dealing with this. 260 | 261 | Things to be aware of here: Redis is an in memory store and rrqueue 262 | is not at all agressive about deleting objects. If you push a 1GB 263 | file into Redis things *will* go badly. There are no checks for 264 | this at present! 265 | 266 | `PUSH` takes a vector of filename as an argument. The response is 267 | not the file itself (how could it do that?) but instead the *hash* 268 | of that file. By the time the response is recieved the file 269 | contents are stored in the database and can be returned. 270 | ``` {r } 271 | message_id <- obj$send_message("PUSH", "myfuns.R") 272 | res <- obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 273 | res 274 | ``` 275 | 276 | We can save the file onto a temporary directory in the filesystem 277 | using the \code{files_unpack} method of \code{queue}: 278 | ``` {r } 279 | path <- obj$files_unpack(res) 280 | dir(path) 281 | ``` 282 | 283 | And the files have the expected hash: 284 | ``` {r } 285 | tools::md5sum(file.path(path, names(res))) 286 | ``` 287 | 288 | ``` {r results="asis", echo=FALSE} 289 | reader() 290 | ``` 291 | 292 | ## `PULL` 293 | 294 | This is the inverse of `PUSH` and takes files from the machine the 295 | queue is running on and copies them into the worker (from the view 296 | of the worker, the files in question are already in the database 297 | and it will "pull" them down locally. 298 | 299 | First, we need to save files into the database. Let's rename the 300 | temporary file above and save that: 301 | ``` {r } 302 | file.rename(file.path(path, "myfuns.R"), 303 | "brandnewcode.R") 304 | res <- obj$files_pack("brandnewcode.R") 305 | res 306 | ``` 307 | 308 | Note that the hash here is the same as above: `rrqueue` can tell 309 | this is the same file even though it has the same filename. Note 310 | also that filenames will be interepted relative to the working 311 | directory, because the directory layout on the worker outside of 312 | this point could be arbitrarily different. 313 | 314 | Now the the files have been packed, we can run the PULL command: 315 | 316 | (note that the `PULL` command *always* unpacks files into the 317 | workers working directory). 318 | ``` {r } 319 | message_id <- obj$send_message("PULL") 320 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 321 | ``` 322 | 323 | And the new file will be present in the directory: 324 | ``` {r } 325 | message_id <- obj$send_message("DIR", list(pattern="\\.R$")) 326 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 327 | ``` 328 | 329 | ``` {r results="asis", echo=FALSE} 330 | reader() 331 | ``` 332 | 333 | ## `EVAL` 334 | 335 | Evaluate an arbitrary R expression, passed as a string (*not* as 336 | any sort of unevaluated or quoted expression). This expression is 337 | evaluated in the global environment, which is *not* the environment 338 | in which queued code is evaluated in. 339 | 340 | ``` {r } 341 | message_id <- obj$send_message("EVAL", "1 + 1") 342 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 343 | ``` 344 | 345 | We can delete the file created above: 346 | ``` {r } 347 | message_id <- obj$send_message("EVAL", "file.remove('brandnewcode.R')") 348 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 349 | ``` 350 | 351 | ``` {r results="asis", echo=FALSE} 352 | reader() 353 | ``` 354 | 355 | This could be used to evaluate code that has side effects, such as 356 | installing packages. However, due to limitations with how R loads 357 | packages the only way to update and reload a package is going to be 358 | to restart the worker. 359 | 360 | ## `STOP` 361 | 362 | Stop sends a shutdown message to the worker. Generally you should 363 | prefer the `stop_workers` method, which uses `STOP` behind the 364 | scenes. 365 | 366 | ``` {r } 367 | message_id <- obj$send_message("STOP") 368 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 369 | ``` 370 | 371 | ``` {r results="asis", echo=FALSE} 372 | reader() 373 | ``` 374 | -------------------------------------------------------------------------------- /vignettes/myfuns.R: -------------------------------------------------------------------------------- 1 | slowdouble <- function(x) { 2 | Sys.sleep(x) 3 | x * 2 4 | } 5 | -------------------------------------------------------------------------------- /vignettes/src/introduction.R: -------------------------------------------------------------------------------- 1 | ## --- 2 | ## title: "Introduction to rrqueue" 3 | ## author: "Rich FitzJohn" 4 | ## date: "`r Sys.Date()`" 5 | ## output: rmarkdown::html_vignette 6 | ## vignette: > 7 | ## %\VignetteIndexEntry{Introduction to rrqueue} 8 | ## %\VignetteEngine{knitr::rmarkdown} 9 | ## %\VignetteEncoding{UTF-8} 10 | ## --- 11 | 12 | ## # Overview 13 | 14 | ## `rrqueue` is a *distributed task queue* for R, implemented on top of [Redis](http://redis.io). 15 | ## At the cost of a little more work it allows for more flexible parallelisation than afforded by `mclapply`. 16 | ## The main goal is to support non-map style operations: submit some tasks, collect the completed results, 17 | ## queue more even while some tasks are still running. 18 | 19 | ## Other features include: 20 | 21 | ## * Low-level task submission / retrieval has a simple API so that asynchronous task queues can be created. 22 | ## * Objects representing tasks, workers, queues, etc can be queried. 23 | ## * While blocking `mclapply`-like functions are available, the package is designed to be non-blocking so that intermediate results can be used. 24 | ## * Automatic fingerprinting of environments so that code run on a remote machine will correspond to the code found locally. 25 | ## * Works well connecting to a Redis database running on the cloud (e.g., on an AWS machine over an ssh tunnel). 26 | ## * Local workers can be added to a remote pool, so long as everything can talk to the same Redis server. 27 | ## * The worker pool can be scaled at any time (up or down). 28 | ## * Basic fault tolerance, supporting re-queuing tasks lost on crashed workers. 29 | 30 | ## The basic workflow is: 31 | 32 | ## 1. Create a queue 33 | ## 2. Submit tasks to the queue 34 | ## 3. Start workers 35 | ## 4. Collect results 36 | 37 | ## The workers can be started at any time between 1-3, though they do need to be started before results can be collected. 38 | 39 | ## Documenting things that work asynchronously is difficult. This 40 | ## document gives a tutorial-style overview of working with rrqueue. 41 | 42 | ## # Getting started 43 | 44 | ## The queue and workers can be started in any order, but it's easiest 45 | ## to explain starting the queue first. 46 | 47 | ## Suppose we have some simulation code; it needs to be in a file that 48 | ## the queue can see. For now, I'll use the file `myfuns.R` which is 49 | ## the test code. It has a function in it called `slowdouble` that 50 | ## takes a number, sleeps for that many seconds, and then returns 51 | ## twice the number. It's useful for testing. 52 | 53 | ##+ echo=FALSE, results="hide" 54 | rrqueue:::queue_clean(redux::hiredis(), "myqueue", 55 | purge=TRUE, stop_workers="kill") 56 | lang_output <- function(x, lang) { 57 | cat(c(sprintf("```%s", lang), x, "```"), sep="\n") 58 | } 59 | cpp_output <- function(x) lang_output(x, "c++") 60 | r_output <- function(x) lang_output(x, "r") 61 | yaml_output <- function(x) lang_output(x, "yaml") 62 | plain_output <- function(x) lang_output(x, "plain") 63 | 64 | ## You'll also need a running Redis server. I have one operating with 65 | ## the default parameters, so this works: 66 | redux::hiredis()$PING() 67 | 68 | ## Create queue called "myqueue", tell it to load the source file 69 | ## "myfuns.R". If it was to load packages, then passing 70 | ## `packages=c("package1", "package2")` would indicate that workers 71 | ## would need to load those packages, too. 72 | obj <- rrqueue::queue("myqueue", sources="myfuns.R") 73 | 74 | ## The message "creating new queue" here indicates that `rrqueue` did 75 | ## not find any previous queues in place. Queues are designed to be 76 | ## re-attachable so we can immediately just do that: 77 | obj <- rrqueue::queue("myqueue", sources="myfuns.R") 78 | 79 | ## The message also notes that we have no workers available, so no 80 | ## work is going to get done. But we can still queue some tasks. 81 | 82 | ## # Queuing tasks 83 | 84 | ## The simplest sort of task queuing is to pass an expression into enqueue: 85 | t <- obj$enqueue(1 + 1) 86 | 87 | ## The expression is not evaluated but stored and will be evaluated on 88 | ## the worker. Saving the result of this gives a `task` object which 89 | ## can be inspected. 90 | t 91 | 92 | ## The expression stored in the task: 93 | t$expr() 94 | 95 | ## The status of the task: 96 | t$status() 97 | 98 | ## The result of the task, which will throw an error if we try to call it: 99 | ##+ error=TRUE 100 | t$result() 101 | 102 | ## And how long the task has been waiting: 103 | t$times() 104 | 105 | ## Tasks can use local variables, too: 106 | x <- 10 107 | t2 <- obj$enqueue(x * 2) 108 | t2$expr() 109 | 110 | ## And because using unevaluated expressions can be problematic, 111 | ## `rrqueue` has a standard-evaluation version (`enqueue_`) which takes 112 | ## either strings representing expressions or quoted expressions: 113 | ### obj$enqueue_("x / 2") 114 | obj$enqueue_(quote(x / 2)) 115 | 116 | ## Now we have three tasks: 117 | obj$tasks_list() 118 | 119 | ## All the tasks are waiting to be run: 120 | obj$tasks_status() 121 | 122 | ## We can get an overview of the tasks: 123 | obj$tasks_overview() 124 | 125 | ## # Starting workers 126 | 127 | ## `rrqueue` includes a script `rrqueue_worker` for starting workers 128 | ## from the command line (install with `rrqueue::install_scripts()`. 129 | ## Workers can also be started from within R using the `worker_spawn` 130 | ## function: 131 | logfile <- tempfile() 132 | wid <- rrqueue::worker_spawn("myqueue", logfile) 133 | ##+ echo=FALSE 134 | Sys.sleep(.5) 135 | 136 | ## This function returns the *worker identifier*, which is also 137 | ## printed to the screen. 138 | 139 | ## It's probably informative at this point to read the logfile of the 140 | ## worker to see what it did on startup: 141 | 142 | ### See the RcppR6 tutorial for how to do this nicely. 143 | ##+ results="asis", echo=TRUE 144 | plain_output(readLines(logfile)) 145 | 146 | ## The worker first prints a lot of diagnostic information to the 147 | ## screen (or log file) indicating the name of the worker, the version 148 | ## of rrqueue, machine information, and special keys in the database 149 | ## where important information is stored. 150 | 151 | ## Then after broadcasting that it is awake (`ALIVE`) it detected that 152 | ## there was a controller on the queue and it attempts to construct 153 | ## the environment that the controller wants `r paste("ENVIR", obj$envir_id)`. 154 | 155 | ## After that, there are a series of `TASK_START`, `EXPR`, and 156 | ## `TASK_COMPLETE` lines as each of the three tasks is processed. 157 | obj$tasks_status() 158 | 159 | ## The times here give an indication of the rrqueue overhead; the 160 | ## running time of these simple expressions should be close to zero. 161 | obj$tasks_times() 162 | 163 | ## The task handle created before can now give a result: 164 | t$result() 165 | 166 | ## Similarly, results can be retrieved from the queue directly: 167 | obj$task_result(1) 168 | obj$task_result(2) 169 | obj$task_result(3) 170 | 171 | ## The worker that we created can be seen here: 172 | obj$workers_list() 173 | 174 | ## Queue a slower task; this time the `slowdouble` function. This 175 | ## will take 1s: 176 | ##+ echo=TRUE 177 | t <- obj$enqueue(slowdouble(1)) 178 | t$status() 179 | Sys.sleep(.3) 180 | t$status() 181 | Sys.sleep(1) 182 | t$status() 183 | t$result() 184 | 185 | ## Again, times are available: 186 | t$times() 187 | 188 | ## # Finishing up 189 | obj$stop_workers() 190 | 191 | ##+ echo=FALSE 192 | Sys.sleep(.5) 193 | 194 | ##+ results="asis", echo=FALSE 195 | plain_output(readLines(logfile)) 196 | 197 | ## worker is now in the exited list 198 | obj$workers_list_exited() 199 | 200 | ## The full log from our worker (dropping the first column which is 201 | ## the worker id and takes up valuable space here): 202 | obj$workers_log_tail(wid, Inf)[-1] 203 | 204 | ### This is unfortunate, but I really need things cleaned up nicely so 205 | ### that I can get tests to pass in R CMD check. Will try to expose 206 | ### this function soon. 207 | ##+ echo=FALSE, results="hide" 208 | rrqueue:::queue_clean(obj$con, obj$queue_name, purge=TRUE) 209 | -------------------------------------------------------------------------------- /vignettes/src/messages.R: -------------------------------------------------------------------------------- 1 | ## --- 2 | ## title: "rrqueue messages" 3 | ## author: "Rich FitzJohn" 4 | ## date: "`r Sys.Date()`" 5 | ## output: rmarkdown::html_vignette 6 | ## vignette: > 7 | ## %\VignetteIndexEntry{rrqueue messages} 8 | ## %\VignetteEngine{knitr::rmarkdown} 9 | ## %\VignetteEncoding{UTF-8} 10 | ## --- 11 | 12 | ##+ echo=FALSE, results="hide" 13 | rrqueue:::queue_clean(redux::hiredis(), "myqueue", 14 | purge=TRUE, stop_workers="kill") 15 | lang_output <- function(x, lang) { 16 | cat(c(sprintf("```%s", lang), x, "```"), sep="\n") 17 | } 18 | make_reader <- function(filename, lang="plain") { 19 | n <- 0 20 | force(filename) 21 | force(lang) 22 | function() { 23 | txt <- readLines(logfile) 24 | if (n > 0) { 25 | txt <- txt[-seq_len(n)] 26 | } 27 | n <<- n + length(txt) 28 | lang_output(txt, lang) 29 | } 30 | } 31 | 32 | ## In addition to passing tasks (and results) between a controller and 33 | ## workers, the controller can also send "messages" to workers. This 34 | ## vignette shows what the possible messages do. 35 | 36 | ## In order to do this, we're going to need a queue and a worker: 37 | obj <- rrqueue::queue("myqueue", sources="myfuns.R") 38 | logfile <- tempfile() 39 | worker_id <- rrqueue::worker_spawn("myqueue", logfile) 40 | 41 | ## On startup the worker log contains: 42 | ##+ results="asis", echo=FALSE 43 | reader <- make_reader(logfile) 44 | reader() 45 | 46 | ## Because one of the main effects of messages is to print to the 47 | ## worker logfile, we'll print this fairly often. 48 | 49 | ## ## Messages and responses 50 | 51 | ## 1. The queue sends a message for one or more workers to process. 52 | ## The message has an *identifier* that is derived from the current 53 | ## time. Messages are written to a first-in-first-out queue, *per 54 | ## worker*, and are processed independently by workers who do not 55 | ## look to see if other workers have messages or are processing 56 | ## them. 57 | ## 58 | ## 2. As soon as a worker has finished processing any current job it 59 | ## will process the message (it must wait to finish a current job 60 | ## but will not start any further jobs). 61 | ## 62 | ## 3. Once the message has been processed (see below) a response will 63 | ## be written to a response list with the same identifier as the 64 | ## message. 65 | 66 | ## ## `PING` 67 | 68 | ## The `PING` message simply asks the worker to return `PONG`. It's 69 | ## useful for diagnosing communication issues because it does so 70 | ## little 71 | message_id <- obj$send_message("PING") 72 | 73 | ## The message id is going to be useful for getting responses: 74 | message_id 75 | 76 | ## (this is derived from the current time, according to Redis which is 77 | ## the central reference point of time for the whole system). 78 | 79 | ## wait a little while: 80 | Sys.sleep(.5) 81 | 82 | ##+ results="asis", echo=TRUE 83 | reader() 84 | 85 | ## The logfile prints: 86 | 87 | ## 1. the request for the `PING` (`MESSAGE PING`) 88 | ## 2. the value `PONG` to the R message stream 89 | ## 3. logging a response (`RESPONSE PONG`), which means that something is written to the response stream. 90 | 91 | ## We can access the same bits of information in the worker log: 92 | obj$workers_log_tail(n=Inf)[-1] 93 | 94 | ## This includes the `ALIVE` and `ENVIR` bits as the worker comes up. 95 | 96 | ## Inspecting the logs is fine for interactive use, but it's going to 97 | ## be more useful often to poll for a response. 98 | 99 | ## We already know that our worker has a response, but we can ask anyway: 100 | obj$has_responses(message_id) 101 | 102 | ## Or inversely we can as what messages a given worker has responses for: 103 | obj$response_ids(worker_id) 104 | 105 | ## To fetch the responses from all workers it was sent to (always 106 | ## returning a named list): 107 | obj$get_responses(message_id) 108 | 109 | ## or to fetch the response from a given worker: 110 | obj$get_response(message_id, worker_id) 111 | 112 | ## The response can be deleted by passing `delete=TRUE` to this method: 113 | obj$get_response(message_id, worker_id, delete=TRUE) 114 | 115 | ## after which recalling the message will throw an error: 116 | ##+ error=TRUE 117 | obj$get_response(message_id, worker_id, delete=TRUE) 118 | 119 | ## There is also a `wait` argument that lets you wait until a response 120 | ## is ready. The `slowdouble` command will take a few seconds, so to 121 | ## demonstrate: 122 | obj$enqueue(slowdouble(2)) 123 | message_id <- obj$send_message("PING") 124 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 125 | 126 | ## Looking at the log will show what went on here: 127 | obj$workers_log_tail(n=4)[-1] 128 | 129 | ## 1. A task is recieved 130 | ## 2. 2s later the task is completed 131 | ## 3. Then the message is recieved 132 | ## 4. Then, basically instantaneously, the message is responded to 133 | 134 | ## However, because the message is only processed after the task is 135 | ## completed, the response takes a while to come back. Equivalently, 136 | ## from the worker log: 137 | 138 | ##+ results="asis", echo=FALSE 139 | reader() 140 | 141 | ## ## `ECHO` 142 | 143 | ## This is basically like `PING` and not very interesting; it prints 144 | ## an arbitrary string to the log. It always returns `"OK"` as a 145 | ## response. 146 | 147 | message_id <- obj$send_message("ECHO", "hello world!") 148 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 149 | 150 | ##+ results="asis", echo=FALSE 151 | reader() 152 | 153 | ## ## `INFO` 154 | 155 | ## The `INFO` command refreshes and returns the worker information. 156 | 157 | ## We already have a copy of the worker info; it was created when the 158 | ## worker started up: 159 | obj$workers_info()[[worker_id]] 160 | 161 | ## Note that the `envir` field is currently empty (`{}`) because when 162 | ## the worker started it did not know about any environments. 163 | 164 | message_id <- obj$send_message("INFO") 165 | 166 | ## Here's the new worker information, complete with an updated `envir` 167 | ## field: 168 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 169 | 170 | ## This has been updated on the database copy too: 171 | obj$workers_info()[[worker_id]]$envir 172 | 173 | ## and the same information is printed to the worker log: 174 | ##+ results="asis", echo=FALSE 175 | reader() 176 | 177 | ## ## `DIR` 178 | 179 | ## This is useful for listing directory contents, similar to the `dir` 180 | ## function in R. However, because file *contents* are usually more 181 | ## interesting (e.g., working out why something is not running on the 182 | ## remote machine), this is basically the result of passing the 183 | ## results of `dir` to `tools::md5sum` in order to get the md5sum of 184 | ## the file. 185 | message_id <- obj$send_message("DIR") 186 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 187 | 188 | ## Additional arguments to `dir` can be passed through: 189 | message_id <- obj$send_message("DIR", list(pattern="\\.R$")) 190 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 191 | 192 | ## If you pass in invalid arguments to `dir`, then a reasonably 193 | ## helpful message should be generated: 194 | message_id <- obj$send_message("DIR", list(foo="bar")) 195 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 196 | 197 | ## (note that this does not generate an error locally, but you can 198 | ## test to see if it did throw an error by checking the class of the 199 | ## returned value). 200 | 201 | ## and the same information is printed to the worker log: 202 | ##+ results="asis", echo=FALSE 203 | reader() 204 | 205 | ## ## `PUSH` 206 | 207 | ## The commands `PUSH` and `PULL` move files from and to the worker. 208 | ## The command is interpreted as an instruction to the worker so 209 | ## `PUSH` pushes files from the worker into the database while `PULL` 210 | ## pulls files from the database into the worker. There are (will be) 211 | ## prefereable higher-level ways of dealing with this. 212 | 213 | ## Things to be aware of here: Redis is an in memory store and rrqueue 214 | ## is not at all agressive about deleting objects. If you push a 1GB 215 | ## file into Redis things *will* go badly. There are no checks for 216 | ## this at present! 217 | 218 | ## `PUSH` takes a vector of filename as an argument. The response is 219 | ## not the file itself (how could it do that?) but instead the *hash* 220 | ## of that file. By the time the response is recieved the file 221 | ## contents are stored in the database and can be returned. 222 | message_id <- obj$send_message("PUSH", "myfuns.R") 223 | res <- obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 224 | res 225 | 226 | ## We can save the file onto a temporary directory in the filesystem 227 | ## using the \code{files_unpack} method of \code{queue}: 228 | path <- obj$files_unpack(res) 229 | dir(path) 230 | 231 | ## And the files have the expected hash: 232 | tools::md5sum(file.path(path, names(res))) 233 | 234 | ##+ results="asis", echo=FALSE 235 | reader() 236 | 237 | ## ## `PULL` 238 | 239 | ## This is the inverse of `PUSH` and takes files from the machine the 240 | ## queue is running on and copies them into the worker (from the view 241 | ## of the worker, the files in question are already in the database 242 | ## and it will "pull" them down locally. 243 | 244 | ## First, we need to save files into the database. Let's rename the 245 | ## temporary file above and save that: 246 | file.rename(file.path(path, "myfuns.R"), 247 | "brandnewcode.R") 248 | res <- obj$files_pack("brandnewcode.R") 249 | res 250 | 251 | ## Note that the hash here is the same as above: `rrqueue` can tell 252 | ## this is the same file even though it has the same filename. Note 253 | ## also that filenames will be interepted relative to the working 254 | ## directory, because the directory layout on the worker outside of 255 | ## this point could be arbitrarily different. 256 | 257 | ## Now the the files have been packed, we can run the PULL command: 258 | 259 | ## (note that the `PULL` command *always* unpacks files into the 260 | ## workers working directory). 261 | message_id <- obj$send_message("PULL") 262 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 263 | 264 | ## And the new file will be present in the directory: 265 | message_id <- obj$send_message("DIR", list(pattern="\\.R$")) 266 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 267 | 268 | ##+ results="asis", echo=FALSE 269 | reader() 270 | 271 | ## ## `EVAL` 272 | 273 | ## Evaluate an arbitrary R expression, passed as a string (*not* as 274 | ## any sort of unevaluated or quoted expression). This expression is 275 | ## evaluated in the global environment, which is *not* the environment 276 | ## in which queued code is evaluated in. 277 | 278 | message_id <- obj$send_message("EVAL", "1 + 1") 279 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 280 | 281 | ## We can delete the file created above: 282 | message_id <- obj$send_message("EVAL", "file.remove('brandnewcode.R')") 283 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 284 | 285 | ##+ results="asis", echo=FALSE 286 | reader() 287 | 288 | ## This could be used to evaluate code that has side effects, such as 289 | ## installing packages. However, due to limitations with how R loads 290 | ## packages the only way to update and reload a package is going to be 291 | ## to restart the worker. 292 | 293 | ## ## `STOP` 294 | 295 | ## Stop sends a shutdown message to the worker. Generally you should 296 | ## prefer the `stop_workers` method, which uses `STOP` behind the 297 | ## scenes. 298 | 299 | message_id <- obj$send_message("STOP") 300 | obj$get_response(message_id, worker_id, delete=TRUE, wait=10) 301 | 302 | ##+ results="asis", echo=FALSE 303 | reader() 304 | -------------------------------------------------------------------------------- /vignettes/src/myfuns.R: -------------------------------------------------------------------------------- 1 | ../myfuns.R --------------------------------------------------------------------------------