├── .Rbuildignore
├── .github
    └── workflows
    │   └── check-standard.yaml
├── .gitignore
├── .gitmodules
├── DESCRIPTION
├── LICENSE
├── Makefile
├── NAMESPACE
├── NEWS.md
├── R
    ├── Q.r
    ├── Q_rows.r
    ├── RcppExports.R
    ├── check_args.r
    ├── chunk.r
    ├── clustermq-package.r
    ├── foreach.r
    ├── master.r
    ├── pool.r
    ├── qsys.r
    ├── qsys_local.r
    ├── qsys_lsf.r
    ├── qsys_multicore.r
    ├── qsys_multiprocess.r
    ├── qsys_sge.r
    ├── qsys_slurm.r
    ├── qsys_ssh.r
    ├── ssh_proxy.r
    ├── summarize_result.r
    ├── util.r
    ├── work_chunk.r
    ├── worker.r
    ├── workers.r
    └── zzz.r
├── README.md
├── _pkgdown.yml
├── cleanup
├── configure
├── configure.win
├── inst
    ├── CITATION
    ├── LSF.tmpl
    ├── PBS.tmpl
    ├── SGE.tmpl
    ├── SLURM.tmpl
    ├── SSH.tmpl
    └── TORQUE.tmpl
├── man
    ├── LOCAL.Rd
    ├── LSF.Rd
    ├── MULTICORE.Rd
    ├── MULTIPROCESS.Rd
    ├── Pool.Rd
    ├── Q.Rd
    ├── QSys.Rd
    ├── Q_rows.Rd
    ├── SGE.Rd
    ├── SLURM.Rd
    ├── SSH.Rd
    ├── check_args.Rd
    ├── chunk.Rd
    ├── clustermq-package.Rd
    ├── cmq_foreach.Rd
    ├── dot-onAttach.Rd
    ├── dot-onLoad.Rd
    ├── fill_template.Rd
    ├── host.Rd
    ├── master.Rd
    ├── msg_fmt.Rd
    ├── register_dopar_cmq.Rd
    ├── ssh_proxy.Rd
    ├── summarize_result.Rd
    ├── vec_lookup.Rd
    ├── work_chunk.Rd
    ├── worker.Rd
    ├── workers.Rd
    └── wrap_error.Rd
├── src
    ├── CMQMaster.cpp
    ├── CMQMaster.h
    ├── CMQProxy.cpp
    ├── CMQProxy.h
    ├── CMQWorker.cpp
    ├── CMQWorker.h
    ├── Makevars.in
    ├── Makevars.win
    ├── RcppExports.cpp
    ├── common.cpp
    ├── common.h
    ├── util.cpp
    └── util
    │   ├── build_libzmq.sh
    │   ├── patch_libzmq.sh
    │   ├── test_cpp11.cpp
    │   └── test_libzmq.c
├── tests
    ├── bin
    │   ├── bkill
    │   ├── bsub
    │   ├── fake_scheduler.sh
    │   ├── qdel
    │   ├── qsub
    │   ├── sbatch
    │   └── scancel
    ├── testthat.R
    └── testthat
    │   ├── helper-util.r
    │   ├── test-0-util.r
    │   ├── test-1-check_args.r
    │   ├── test-2-worker.r
    │   ├── test-3-work_chunk.r
    │   ├── test-4-pool.r
    │   ├── test-5-queue.r
    │   ├── test-6-queue_impl.r
    │   ├── test-7-ssh_proxy.r
    │   └── test-8-foreach.r
├── tools
    └── winlibs.R
└── vignettes
    ├── faq.Rmd
    ├── technicaldocs.Rmd
    └── userguide.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^doc$
 2 | ^configure.backup$
 3 | ^Meta$
 4 | ^doc(s)?$
 5 | \.gitignore
 6 | backup
 7 | ^CHANGES\.md$
 8 | ^Makefile$$
 9 | ^\.travis\.yml$
10 | ^\.travis-ssh\.sh$
11 | ^CITATION$
12 | ^_pkgdown\.yml$
13 | clustermq_[0-9.]+\.tar\.gz
14 | ^\.github$
15 | ^src/libzmq/\.
16 | ^src/libzmq/config/.*\.m4$
17 | ^src/libzmq/build_qnx/.*Makefile$
18 | ^src/libzmq/builds/openwrt/Makefile$
19 | ^src/libzmq/Makefile$
20 | ^src/libzmq/CMakeFiles$
21 | ^src/libzmq/external/wepoll$
22 | ^src/libzmq/src/tweetnacl.c$
23 | ^src/cppzmq/\.
24 | ^windows$
25 | 


--------------------------------------------------------------------------------
/.github/workflows/check-standard.yaml:
--------------------------------------------------------------------------------
  1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
  2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
  3 | on:
  4 |   push:
  5 |     branches-ignore: gh-pages
  6 |   pull_request:
  7 |     branches-ignore: gh-pages
  8 |   schedule:
  9 |     - cron: "0 0 * * 2"
 10 | 
 11 | name: R-check
 12 | 
 13 | jobs:
 14 |   R-CMD-check:
 15 |     runs-on: ${{ matrix.config.os }}
 16 | 
 17 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
 18 | 
 19 |     strategy:
 20 |       fail-fast: false
 21 |       matrix:
 22 |         config:
 23 |           - {os: windows-latest, r: 'release'}
 24 |           - {os: windows-latest, r: 'devel'}
 25 |           - {os: macOS-latest, r: 'release'}
 26 |           - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
 27 |           - {os: ubuntu-latest, r: 'release'}
 28 |           - {os: ubuntu-latest, r: 'oldrel-1'}
 29 | 
 30 |     env:
 31 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 32 |       R_KEEP_PKG_SOURCE: yes
 33 | 
 34 |     steps:
 35 |       - uses: actions/checkout@v3
 36 |         with:
 37 |           submodules: recursive
 38 | 
 39 |       - uses: r-lib/actions/setup-pandoc@v2
 40 | 
 41 |       - uses: r-lib/actions/setup-r@v2
 42 |         with:
 43 |           r-version: ${{ matrix.config.r }}
 44 |           http-user-agent: ${{ matrix.config.http-user-agent }}
 45 |           use-public-rspm: true
 46 | 
 47 |       - uses: r-lib/actions/setup-r-dependencies@v2
 48 |         with:
 49 |           extra-packages: any::rcmdcheck
 50 |           needs: check
 51 | 
 52 |       - name: Install system dependencies (macOS)
 53 |         if: runner.os == 'macOS'
 54 |         run: |
 55 |           brew update
 56 |           brew install coreutils automake libtool zeromq
 57 | 
 58 |       - name: Set up local key-based SSH
 59 |         if: runner.os != 'Windows' # GHA does not allow empty passphrase on Windows
 60 |         run: |
 61 |           ssh-keygen -t rsa -f ~/.ssh/id_rsa -N "" -q
 62 |           cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
 63 |           ssh-keyscan -t rsa 127.0.0.1 >> ~/.ssh/known_hosts
 64 |           echo "Host 127.0.0.1" >> ~/.ssh/config
 65 |           echo "  IdentityFile ~/.ssh/id_rsa" >> ~/.ssh/config
 66 |           echo "$(hostname) 127.0.0.1" >> ~/.hosts
 67 |           chmod og-rw ~
 68 | 
 69 |       - name: Install R package and add paths
 70 |         if: runner.os != 'Windows'
 71 |         run: |
 72 |           R CMD INSTALL .
 73 |           echo '.libPaths("~/work/_temp/Library")' >> ~/.Rprofile # cmq package in R
 74 |           echo 'options(clustermq.scheduler="multicore")' >> ~/.Rprofile
 75 |           echo "$(pwd)/tests/bin" >> $GITHUB_PATH # local cmq
 76 |           sed -i "1iexport PATH=$(pwd)/tests/bin:\$PATH" ~/.bashrc || true # ssh cmq
 77 | 
 78 |       - name: Query capabilities
 79 |         if: runner.os != 'Windows' # does not recognize -e
 80 |         run: |
 81 |           set -x
 82 |           which R
 83 |           which sbatch || echo "sbatch not found"
 84 |           ssh 127.0.0.1 'which R; which sbatch; echo $PATH' || true
 85 |           ssh 127.0.0.1 'R --slave --no-restore -e ".libPaths()"' || true
 86 |           R --slave --no-restore -e "message(clustermq:::qsys_default)" || true
 87 |           ssh 127.0.0.1 'R --slave --no-restore -e "message(clustermq:::qsys_default)"' || true
 88 | 
 89 |       - name: make test
 90 |         if: runner.os != 'Windows'
 91 |         run: |
 92 |           timeout 300 make test
 93 | 
 94 |       - uses: r-lib/actions/check-r-package@v2
 95 |         with:
 96 |           upload-snapshots: true
 97 | 
 98 |       - name: Print logs if failure
 99 |         if: failure() && runner.os != 'Windows'
100 |         run: |
101 |           set -x
102 |           cat ~/*.log || true
103 |           cat ~/worker.log || true
104 |           cat ~/ssh_proxy.log || true
105 |           cat clustermq.Rcheck/tests/* || true
106 | 
107 |       - name: Upload check results
108 |         if: failure()
109 |         uses: actions/upload-artifact@main
110 |         with:
111 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
112 |           path: check
113 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | Meta
 2 | doc
 3 | docs
 4 | backup
 5 | inst/doc
 6 | vignettes/*.R
 7 | vignettes/*.md
 8 | vignettes/.build.timestamp
 9 | src/*.o
10 | src/*.so
11 | *.swp
12 | *.tar.gz
13 | *.log
14 | *.html
15 | clustermq.Rcheck
16 | src/Makevars
17 | .github/*.html
18 | windows
19 | /doc/
20 | /Meta/
21 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "src/libzmq"]
2 | 	path = src/libzmq
3 | 	url = https://github.com/zeromq/libzmq.git
4 | [submodule "src/cppzmq"]
5 | 	path = src/cppzmq
6 | 	url = https://github.com/zeromq/cppzmq.git
7 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: clustermq
 2 | Title: Evaluate Function Calls on HPC Schedulers (LSF, SGE, SLURM, PBS/Torque)
 3 | Version: 0.9.9
 4 | Authors@R: c(
 5 |     person('Michael', 'Schubert', email='mschu.dev@gmail.com',
 6 |            role = c('aut', 'cre', 'cph'),
 7 |            comment = c(ORCID='0000-0002-6862-5221')),
 8 |     person('ZeroMQ authors',
 9 |            role = c('aut', 'cph'),
10 |            comment = "source files in 'src/libzmq' and 'src/cppzmq'"))
11 | Maintainer: Michael Schubert <mschu.dev@gmail.com>
12 | Description: Evaluate arbitrary function calls using workers on HPC schedulers
13 |     in single line of code. All processing is done on the network without
14 |     accessing the file system. Remote schedulers are supported via SSH.
15 | URL: https://mschubert.github.io/clustermq/
16 | BugReports: https://github.com/mschubert/clustermq/issues
17 | SystemRequirements: ZeroMQ (libzmq) >= 4.3.0 (optional; otherwise bundled)
18 | Depends:
19 |     R (>= 3.6.2)
20 | LinkingTo: Rcpp
21 | Imports:
22 |     methods,
23 |     globals,
24 |     progress,
25 |     R6,
26 |     Rcpp,
27 |     utils
28 | License: Apache License (== 2.0) | file LICENSE
29 | Encoding: UTF-8
30 | Suggests:
31 |     BiocParallel,
32 |     callr,
33 |     devtools,
34 |     foreach,
35 |     iterators,
36 |     knitr,
37 |     parallel,
38 |     rmarkdown,
39 |     roxygen2 (>= 5.0.0),
40 |     testthat,
41 |     tools
42 | VignetteBuilder: knitr
43 | Roxygen: list(r6 = FALSE)
44 | RoxygenNote: 7.3.2
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all
 2 | all: rcpp doc vignettes
 3 | 
 4 | R = R --no-save --no-restore -e
 5 | BIN = $(abspath $(lastword $(MAKEFILE_LIST))/../tests/bin)
 6 | PKGVER = $(shell grep Version: < DESCRIPTION | sed "s/Version: //")
 7 | 
 8 | .PHONY: test
 9 | test:
10 | 	PATH=$(BIN):$$PATH $(R) "devtools::test()"
11 | 
12 | .PHONY: check
13 | check:
14 | 	PATH=$(BIN):$$PATH $(R) "devtools::check()"
15 | 
16 | .PHONY: rcpp
17 | rcpp:
18 | 	$(R) "Rcpp::compileAttributes()"
19 | 
20 | rmd_files=$(wildcard vignettes/*.rmd)
21 | knit_results=$(patsubst vignettes/%.rmd,inst/doc/%.md,$(rmd_files))
22 | 
23 | .PHONY: vignettes
24 | vignettes: inst/doc ${knit_results}
25 | 	$(R) "library(knitr); library(devtools); build_vignettes()"
26 | 
27 | inst/doc:
28 | 	mkdir -p $@
29 | 
30 | inst/doc/%.md: vignettes/%.rmd
31 | 	$(R) "knitr::knit('$<', '$@')"
32 | 
33 | .PHONY: doc
34 | doc:
35 | 	$(R) "devtools::document()"
36 | 
37 | .PHONY: package
38 | package: rcpp doc vignettes
39 | 	./src/util/patch_libzmq.sh
40 | 	PATH=$(BIN):$$PATH R CMD build .
41 | 	R CMD check --as-cran clustermq_$(PKGVER).tar.gz
42 | 
43 | .PHONY: deploy
44 | deploy:
45 | 	$(R) "pkgdown::deploy_to_branch()"
46 | 
47 | .PHONY: clean
48 | clean:
49 | 	${RM} -r inst/doc
50 | 	${RM} -r man
51 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 | 
3 | export(Q)
4 | export(Q_rows)
5 | export(register_dopar_cmq)
6 | export(workers)
7 | import(Rcpp)
8 | useDynLib(clustermq)
9 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
  1 | # clustermq 0.9.9
  2 | 
  3 | * The Windows binary no longer includes the disconnect monitor
  4 | * Fix more CRAN warnings and test timeouts
  5 | 
  6 | # clustermq 0.9.8
  7 | 
  8 | * Suppress R6 clonable message
  9 | * Fix CRAN warning about `cppzmq` deprecated declaration
 10 | 
 11 | # clustermq 0.9.7
 12 | 
 13 | * Fix a bug where `BiocGenerics` could break template filling (#337)
 14 | * Remove deprecated automatic array splitting in `Q`
 15 | 
 16 | # clustermq 0.9.6
 17 | 
 18 | * Large common data size is now reported correctly (#336)
 19 | * Template filling will no longer convert large numbers to scientific format
 20 | * Common data will no longer be duplicated when sending to workers
 21 | 
 22 | # clustermq 0.9.5
 23 | 
 24 | * Fix a bug where an outdated system `libzmq` led to compilation errors (#327)
 25 | * New option `clustermq.ports` specifies eligible port range (#328) @michaelmayer2
 26 | 
 27 | # clustermq 0.9.4
 28 | 
 29 | * Fix a bug where worker stats were shown as `NA` (#325)
 30 | * Worker API: `env()` now visibly lists environment if called without arguments
 31 | 
 32 | # clustermq 0.9.3
 33 | 
 34 | * Fix a bug where `BiocParallel` did not export required objects (#302)
 35 | * Fix a bug where already finished workers were killed (#307)
 36 | * Fix a bug where worker results and stats could be garbage collected (#324)
 37 | * There is now an FAQ vignette with answers to frequently asked questions
 38 | * Worker API: `send()` now reports a call identifier that `current()` tracks
 39 | 
 40 | # clustermq 0.9.2
 41 | 
 42 | * Fix a bug where SSH proxy would not cache data properly (#320)
 43 | * Fix a bug where `max_calls_worker` was not respected (#322)
 44 | * Local parallelism (`multicore`, `multiprocess`) again uses local IP (#321)
 45 | * Worker API: `info()` now also returns current worker and number of calls
 46 | 
 47 | # clustermq 0.9.1
 48 | 
 49 | * Disconnect monitor (libzmq with `-DZMQ_BUILD_DRAFT_API=1`) is now optional (#317)
 50 | * Fix a bug where worker shutdown notifications can cause a crash (#306, #308, #310)
 51 | * Fix a bug where template values were not filled correctly (#309)
 52 | * Fix a bug where using `Rf_error` lead to improper cleanup of resources (#311)
 53 | * Fix a bug where maximum worker timeout was multiplied and led to undefined behavior
 54 | * Fix a bug where ZeroMQ's `-Werror` flag led to compilation issues on M1 Mac
 55 | * Fix a bug where SSH tests could error with timeout on high load
 56 | * Worker API: `CMQMaster` now needs to know `add_pending_workers(n)`
 57 | * Worker API: status report `info()` now displays properly
 58 | 
 59 | # clustermq 0.9.0
 60 | 
 61 | #### Features
 62 | 
 63 | * Reuse of common data is now supported (#154)
 64 | * Jobs now error instead of stalling upon unexpected worker disconnect (#150)
 65 | * Workers now error if they can not establish a connection within a time limit
 66 | * Error if `n_jobs` and `max_calls_worker` provide insufficient call slots (#258)
 67 | * Request 1 GB by default in SGE template (#298) @nickholway
 68 | * Error and warning summary now orders by index and severity (#304)
 69 | * A call can have multiple warnings forwarded, not only last
 70 | 
 71 | #### Bugfix
 72 | 
 73 | * Fix bug where max memory reporting by `gc()` may be in different column (#240)
 74 | * Fix passing numerical `job_id` to `qdel` in PBS (#265)
 75 | * The job port/id pool is now used properly upon binding failure (#270) @luwidmer
 76 | * Common data size warning is now only displayed when exceeding limits (#287)
 77 | 
 78 | #### Internal
 79 | 
 80 | * Complete rewrite of the worker API
 81 | * We no longer depend on the `purrr` package
 82 | 
 83 | # clustermq 0.8.95
 84 | 
 85 | * We are now using _ZeroMQ_ via `Rcpp` in preparation for `v0.9` (#151)
 86 | * New `multiprocess` backend via `callr` instead of forking (#142, #197)
 87 | * Sending data on sockets is now blocking to avoid excess memory usage (#161)
 88 | * `multicore`, `multiprocess` schedulers now support logging (#169)
 89 | * New option `clustermq.host` can specify host IP or network interface name (#170)
 90 | * Template filling will now raise error for missing keys (#174, #198)
 91 | * Workers failing with large common data is improved (fixed?) (#146, #179, #191)
 92 | * Local connections are now routed via `127.0.0.1` instead of `localhost` (#192)
 93 | * Submit messages are different between local, multicore and HPC (#196)
 94 | * Functions exported by `foreach` now have their environment stripped (#200)
 95 | * Deprecation of `log_worker=T/F` argument is rescinded
 96 | 
 97 | # clustermq 0.8.9
 98 | 
 99 | * New option `clustermq.ssh.timeout` for SSH proxy startup (#157) @brendanf
100 | * New option `clustermq.worker.timeout` for delay before worker shutdown (#188)
101 | * Fixed PBS/Torque docs, template and cleanup (#184, #186) @mstr3336
102 | * Warning if common data is very large, set by `clustermq.data.warning` (#189)
103 | 
104 | # clustermq 0.8.8
105 | 
106 | * `Q`, `Q_rows` have new arguments `verbose` (#111) and `pkgs` (#144)
107 | * `foreach` backend now uses its dedicated API where possible (#143, #144)
108 | * Number and size of objects common to all calls now work properly
109 | * Templates are filled internally and no longer depend on `infuser` package
110 | 
111 | # clustermq 0.8.7
112 | 
113 | * `Q` now has `max_calls_worker` argument to avoid walltime (#110)
114 | * Submission messages now list size of common data (drake#800)
115 | * All default templates now have an optional `cores` per job field (#123)
116 | * `foreach` now treats `.export` (#124) and `.combine` (#126) correctly
117 | * New option `clustermq.error.timeout` to not wait for clean shutdown (#134)
118 | * SSH command is now specified via a template file (#122)
119 | * SSH will now forward errors to the local process (#135)
120 | * The Wiki is deprecated, use https://mschubert.github.io/clustermq/ instead
121 | 
122 | # clustermq 0.8.6
123 | 
124 | * Progress bar is now shown before any workers start (#107)
125 | * Socket connections are now authenticated using a session password (#125)
126 | * Marked internal functions with `@keywords internal`
127 | * Added vignettes for the _User Guide_ and _Technical Documentation_
128 | 
129 | # clustermq 0.8.5
130 | 
131 | * Added experimental support as parallel foreach backend (#83)
132 | * Moved templates to package `inst/` directory (#85)
133 | * Added `send_call` to worker to evaluate arbitrary expressions (drake#501; #86)
134 | * Option `clustermq.scheduler` is now respected if set after package load (#88)
135 | * System interrupts are now handled correctly (rzmq#44; #73, #93, #97)
136 | * Number of workers running/total is now shown in progress bar (#98)
137 | * Unqualified (short) host names are now resolved by default (#104)
138 | 
139 | # clustermq 0.8.4
140 | 
141 | * Fix error for `qsys$reusable` when using `n_jobs=0`/local processing (#75)
142 | * Scheduler-specific templates are deprecated. Use `clustermq.template` instead
143 | * Allow option `clustermq.defaults` to fill default template values (#71)
144 | * Errors in worker processing are now shut down cleanly (#67)
145 | * Progress bar now shows estimated time remaining (#66)
146 | * Progress bar now also shown when processing locally
147 | * Memory summary now adds estimated memory of R session (#69)
148 | 
149 | # clustermq 0.8.3
150 | 
151 | * Support `rettype` for function calls where return type is known (#59)
152 | * Reduce memory requirements by processing results when we receive them
153 | * Fix a bug where cleanup, `log_worker` flag were not working for SGE/SLURM
154 | 
155 | # clustermq 0.8.2
156 | 
157 | * Fix a bug where never-started jobs are not cleaned up
158 | * Fix a bug where tests leave processes if port binding fails (#60)
159 | * Multicore no longer prints worker debug messages (#61)
160 | 
161 | # clustermq 0.8.1
162 | 
163 | * Fix performance issues for a high number of function calls (#56)
164 | * Fix bug where multicore workers were not shut down properly (#58)
165 | * Fix default templates for SGE, LSF and SLURM (misplaced quote)
166 | 
167 | # clustermq 0.8.0
168 | 
169 | #### Features
170 | 
171 | * Templates changed: `clustermq:::worker` now takes only master as argument
172 | * Creating `workers` is now separated from `Q`, enabling worker reuse (#45)
173 | * Objects in the function environment must now be `export`ed explicitly (#47)
174 | * Added `multicore` qsys using the `parallel` package (#49)
175 | * New function `Q_rows` using data.frame rows as iterated arguments (#43)
176 | * Job summary will now report max memory as reported by `gc` (#18)
177 | 
178 | #### Bugfix
179 | 
180 | * Fix a bug where copies of `common_data` are collected by gc too slowly (#19)
181 | 
182 | #### Internal
183 | 
184 | * Messages on the master are now processed in threads (#42)
185 | * Jobs will now be submitted as array if possible
186 | 
187 | # clustermq 0.7.0
188 | 
189 | * Initial release on CRAN
190 | 


--------------------------------------------------------------------------------
/R/Q.r:
--------------------------------------------------------------------------------
 1 | #' Queue function calls on the cluster
 2 | #'
 3 | #' @param fun             A function to call
 4 | #' @param ...             Objects to be iterated in each function call
 5 | #' @param const           A list of constant arguments passed to each function call
 6 | #' @param export          List of objects to be exported to the worker
 7 | #' @param pkgs            Character vector of packages to load on the worker
 8 | #' @param seed            A seed to set for each function call
 9 | #' @param memory          Short for `template=list(memory=value)`
10 | #' @param template        A named list of values to fill in the scheduler template
11 | #' @param n_jobs          The number of jobs to submit; upper limit of jobs if job_size
12 | #'                        is given as well
13 | #' @param job_size        The number of function calls per job
14 | #' @param rettype         Return type of function call (vector type or 'list')
15 | #' @param fail_on_error   If an error occurs on the workers, continue or fail?
16 | #' @param workers         Optional instance of QSys representing a worker pool
17 | #' @param log_worker      Write a log file for each worker
18 | #' @param chunk_size      Number of function calls to chunk together
19 | #'                        defaults to 100 chunks per worker or max. 10 kb per chunk
20 | #' @param timeout         Maximum time in seconds to wait for worker (default: Inf)
21 | #' @param max_calls_worker  Maxmimum number of chunks that will be sent to one worker
22 | #' @param verbose         Print status messages and progress bar (default: TRUE)
23 | #' @return                A list of whatever `fun` returned
24 | #' @export
25 | #'
26 | #' @examples
27 | #' \dontrun{
28 | #' # Run a simple multiplication for numbers 1 to 3 on a worker node
29 | #' fx = function(x) x * 2
30 | #' Q(fx, x=1:3, n_jobs=1)
31 | #' # list(2,4,6)
32 | #'
33 | #' # Run a mutate() call in dplyr on a worker node
34 | #' iris %>%
35 | #'     mutate(area = Q(`*`, e1=Sepal.Length, e2=Sepal.Width, n_jobs=1))
36 | #' # iris with an additional column 'area'
37 | #' }
38 | Q = function(fun, ..., const=list(), export=list(), pkgs=c(), seed=128965,
39 |         memory=NULL, template=list(), n_jobs=NULL, job_size=NULL,
40 |         rettype="list", fail_on_error=TRUE, workers=NULL, log_worker=FALSE,
41 |         chunk_size=NA, timeout=Inf, max_calls_worker=Inf, verbose=TRUE) {
42 | 
43 |     df = check_args(fun, list(...), const)
44 | 
45 |     Q_rows(fun = fun,
46 |            df = df,
47 |            const = const,
48 |            export = export,
49 |            pkgs = pkgs,
50 |            seed = seed,
51 |            memory = memory,
52 |            template = template,
53 |            n_jobs = n_jobs,
54 |            job_size = job_size,
55 |            rettype = rettype,
56 |            fail_on_error = fail_on_error,
57 |            workers = workers,
58 |            log_worker = log_worker,
59 |            chunk_size = chunk_size,
60 |            timeout = timeout,
61 |            max_calls_worker = max_calls_worker,
62 |            verbose = verbose)
63 | }
64 | 


--------------------------------------------------------------------------------
/R/Q_rows.r:
--------------------------------------------------------------------------------
 1 | #' Queue function calls defined by rows in a data.frame
 2 | #'
 3 | #' @param df  data.frame with iterated arguments
 4 | #' @inheritParams Q
 5 | #' @export
 6 | #'
 7 | #' @examples
 8 | #' \dontrun{
 9 | #' # Run a simple multiplication for data frame columns x and y on a worker node
10 | #' fx = function (x, y) x * y
11 | #' df = data.frame(x = 5, y = 10)
12 | #' Q_rows(df, fx, job_size = 1)
13 | #' # [1] 50
14 | #'
15 | #' # Q_rows also matches the names of a data frame with the function arguments
16 | #' fx = function (x, y) x - y
17 | #' df = data.frame(y = 5, x = 10)
18 | #' Q_rows(df, fx, job_size = 1)
19 | #' # [1] 5
20 | #' }
21 | Q_rows = function(df, fun, const=list(), export=list(), pkgs=c(), seed=128965,
22 |         memory=NULL, template=list(), n_jobs=NULL, job_size=NULL,
23 |         rettype="list", fail_on_error=TRUE, workers=NULL, log_worker=FALSE,
24 |         chunk_size=NA, timeout=Inf, max_calls_worker=Inf, verbose=TRUE) {
25 | 
26 |     # check if call args make sense
27 |     if (!is.null(memory))
28 |         template$memory = memory
29 |     if (!is.null(template$memory) && template$memory < 50)
30 |         stop("Worker needs about 23 MB overhead, set memory>=50")
31 |     if (is.na(seed) || length(seed) != 1)
32 |         stop("'seed' needs to be a length-1 integer")
33 | 
34 |     fun = match.fun(fun)
35 |     df = as.data.frame(df, check.names=FALSE, stringsAsFactors=FALSE)
36 |     n_calls = nrow(df)
37 |     seed = as.integer(seed)
38 |     check_args(fun, df, const)
39 | 
40 |     # set up workers if none provided
41 |     if (is.null(workers)) {
42 |         qsys_id = toupper(getOption("clustermq.scheduler", qsys_default))
43 |         if (!is.null(n_jobs) && n_jobs == 0)
44 |             qsys_id = "LOCAL"
45 |         if (qsys_id != "LOCAL" && is.null(n_jobs) && is.null(job_size))
46 |             stop("n_jobs or job_size is required")
47 |         n_jobs = Reduce(min, c(ceiling(n_calls / job_size), n_jobs, n_calls))
48 |     } else {
49 |         qsys_id = class(workers$workers)[1]
50 |         n_jobs = Inf #todo: number of workers
51 |     }
52 |     if (qsys_id != "LOCAL" && n_calls > n_jobs*max_calls_worker)
53 |         stop("n_jobs and max_calls_worker provide fewer call slots than required")
54 |     if (is.null(workers))
55 |         workers = workers(n_jobs, reuse=FALSE, template=template,
56 |                           log_worker=log_worker, verbose=verbose)
57 |     workers$env(fun=fun, rettype=rettype, common_seed=seed, const=const)
58 |     workers$pkg(pkgs)
59 |     objs = do.call(workers$env, export)
60 |     if (!is.null(template$memory) && 2*sum(objs$size)/1024^2 > template$memory)
61 |         stop("Not enough memory requested to unserialize data on workers")
62 | 
63 |     # heuristic for chunk size
64 |     if (is.na(chunk_size))
65 |         chunk_size = round(Reduce(min, c(
66 |             500,                    # never more than 500
67 |             n_calls / n_jobs / 100, # each worker reports back 100 times
68 |             n_calls / 2000,         # at most 2000 reports total
69 |             1e4 * n_calls / utils::object.size(df)[[1]] # no more than 10 kb
70 |         )))
71 |     chunk_size = max(chunk_size, 1)
72 | 
73 |     # process calls
74 |     if (inherits(workers$workers, "LOCAL")) {
75 |         list2env(export, envir=environment(fun))
76 |         for (pkg in pkgs) # is it possible to attach the package to fun's env?
77 |             library(pkg, character.only=TRUE)
78 |         re = work_chunk(df=df, fun=fun, const=const, rettype=rettype,
79 |                         common_seed=seed, progress=TRUE)
80 |         summarize_result(re$result, length(re$errors), length(re$warnings),
81 |                          re[c("errors", "warnings")], fail_on_error=fail_on_error)
82 |     } else {
83 |         master(pool=workers, iter=df, rettype=rettype,
84 |                fail_on_error=fail_on_error, chunk_size=chunk_size,
85 |                timeout=timeout, max_calls_worker=max_calls_worker,
86 |                verbose=verbose)
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | has_connectivity <- function(host) {
 5 |     .Call('_clustermq_has_connectivity', PACKAGE = 'clustermq', host)
 6 | }
 7 | 
 8 | libzmq_has_draft <- function() {
 9 |     .Call('_clustermq_libzmq_has_draft', PACKAGE = 'clustermq')
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/R/check_args.r:
--------------------------------------------------------------------------------
 1 | #' Function to check arguments with which Q() is called
 2 | #'
 3 | #' @param fun    A function to call
 4 | #' @param iter   Objects to be iterated in each function call
 5 | #' @param const  A list of constant arguments passed to each function call
 6 | #' @return       Processed iterated argument list if 'iter' is a list
 7 | #' @keywords     internal
 8 | check_args = function(fun, iter, const=list()) {
 9 |     if (!is.list(iter) || length(iter) == 0)
10 |         stop("'iter' needs to be a list with at least one element")
11 | 
12 |     # check function and arguments provided
13 |     funargs = formals(fun)
14 |     required = names(funargs)[unlist(lapply(funargs, function(f) class(f)=='name'))]
15 |     required = setdiff(required, "...")
16 | 
17 |     if (length(iter) == 1 && length(required) == 1 && is.null(names(iter)))
18 |         names(iter) = required
19 | 
20 |     provided = names(c(iter, const))
21 | 
22 |     sdiff = unlist(setdiff(required, provided))
23 |     if (length(sdiff) > 1)
24 |         stop(paste("If more than one argument, all must be named:",
25 |                    paste(sdiff, collapse=" ")))
26 | 
27 |     sdiff = unlist(setdiff(provided, names(funargs)))
28 |     if (length(sdiff) > 0 && ! '...' %in% names(funargs))
29 |         stop(paste("Argument provided but not accepted by function:",
30 |                    paste(sdiff, collapse=" ")))
31 | 
32 |     dups = duplicated(provided)
33 |     if (any(dups))
34 |         stop(paste("Argument duplicated:", paste(provided[[dups]], collapse=" ")))
35 | 
36 |     if (!is.data.frame(iter)) {
37 |         df = data.frame(..placeholder.. = seq_along(iter[[1]]))
38 |         for (field in names(iter))
39 |             df[[field]] = iter[[field]]
40 |         df$..placeholder.. = NULL
41 |         df
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/R/chunk.r:
--------------------------------------------------------------------------------
 1 | #' Subset index chunk for processing
 2 | #'
 3 | #' 'attr' in `[.data.frame` takes too much CPU time
 4 | #'
 5 | #' @param x  Index data.frame
 6 | #' @param i  Rows to subset
 7 | #' @return   x[i,]
 8 | #' @keywords  internal
 9 | chunk = function(x, i) {
10 |     re = lapply(x, `[`, i=i)
11 |     re$` id ` = i
12 |     re
13 | }
14 | 


--------------------------------------------------------------------------------
/R/clustermq-package.r:
--------------------------------------------------------------------------------
 1 | #' Evaluate Function Calls on HPC Schedulers (LSF, SGE, SLURM)
 2 | #'
 3 | #' Provides the \code{Q} function to send arbitrary function calls to
 4 | #' workers on HPC schedulers without relying on network-mounted storage.
 5 | #' Allows using remote schedulers via SSH.
 6 | #'
 7 | #' Under the hood, this will submit a cluster job that connects to the master
 8 | #' via TCP the master will then send the function and argument chunks to the
 9 | #' worker and the worker will return the results to the master until everything
10 | #' is done and you get back your result
11 | #'
12 | #' Computations are done entirely on the network and without any temporary
13 | #' files on network-mounted storage, so there is no strain on the file system
14 | #' apart from starting up R once per job. This removes the biggest bottleneck
15 | #' in distributed computing.
16 | #'
17 | #' Using this approach, we can easily do load-balancing, i.e. workers that get
18 | #' their jobs done faster will also receive more function calls to work on. This
19 | #' is especially useful if not all calls return after the same time, or one
20 | #' worker has a high load.
21 | #'
22 | #' For more detailed usage instructions, see the documentation of the \code{Q}
23 | #' function.
24 | #'
25 | #' @docType package
26 | #' @keywords internal
27 | #' @useDynLib clustermq
28 | #' @import Rcpp
29 | "_PACKAGE"
30 | 


--------------------------------------------------------------------------------
/R/foreach.r:
--------------------------------------------------------------------------------
 1 | #' Register clustermq as `foreach` parallel handler
 2 | #'
 3 | #' @param ...  List of arguments passed to the `Q` function, e.g. n_jobs
 4 | #' @export
 5 | register_dopar_cmq = function(...) {
 6 |     dots = list(...)
 7 |     workers = NA
 8 |     if ("n_jobs" %in% names(dots))
 9 |         workers = dots$n_jobs
10 |     info = function(data, item) {
11 |         switch(item,
12 |                name = "clustermq",
13 |                version = utils::packageVersion("clustermq"),
14 |                workers = workers)
15 |     }
16 |     foreach::setDoPar(cmq_foreach, data=dots, info=info)
17 | }
18 | 
19 | #' clustermq foreach handler
20 | #'
21 | #' @param obj  Returned from foreach::foreach, containing the following variables:
22 | #'   args    : Arguments passed, each as a call
23 | #'   argnames: character vector of arguments passed
24 | #'   evalenv : Environment where to evaluate the arguments
25 | #'   export  : character vector of variable names to export to nodes
26 | #'   packages: character vector of required packages
27 | #'   verbose : whether to print status messages [logical]
28 | #'   errorHandling: string of function name to call error with, e.g. "stop"
29 | #' @param expr   An R expression in curly braces
30 | #' @param envir  Environment where to evaluate the arguments
31 | #' @param data   Common arguments passed by register_dopcar_cmq(), e.g. n_jobs
32 | #' @keywords  internal
33 | cmq_foreach = function(obj, expr, envir, data) {
34 |     stopifnot(inherits(obj, "foreach"))
35 |     stopifnot(inherits(envir, "environment"))
36 | 
37 |     it = iterators::iter(obj)
38 |     args_df = do.call(rbind, as.list(it))
39 | 
40 |     # if we call a function by name, add it to the export list
41 |     if (is.call(expr) && as.character(expr[[1]]) != "{")
42 |         obj$export = c(as.character(expr[[1]]), obj$export)
43 | 
44 |     # wrap whatever we call in a function for use with Q(...)
45 |     fun = function(...) NULL
46 |     add = stats::setNames(replicate(ncol(args_df), substitute()), obj$argnames)
47 |     formals(fun) = c(add, formals(fun))
48 |     body(fun) = expr
49 | 
50 |     # scan 'expr' for exports, eval and add objects ref'd in '.export'
51 |     globs = globals::globalsOf(expr, envir=envir, mustExist=FALSE)
52 |     globs = globs[! names(globs) %in% c(names(formals(fun)), ls(baseenv()))]
53 |     data$export = utils::modifyList(as.list(data$export), globs, keep.null=TRUE)
54 | 
55 |     # make sure packages are loaded on the dopar target
56 |     if (length(obj$packages) > 0)
57 |         data$pkgs = unique(c(data$pkgs, obj$packages))
58 | 
59 |     result = do.call(Q_rows, c(list(df=args_df, fun=fun), data))
60 | 
61 |     accum = foreach::makeAccum(it)
62 |     accum(result, tags=seq_along(result))
63 |     foreach::getResult(it)
64 | }
65 | 


--------------------------------------------------------------------------------
/R/master.r:
--------------------------------------------------------------------------------
  1 | #' Master controlling the workers
  2 | #'
  3 | #' exchanging messages between the master and workers works the following way:
  4 | #'  * we have submitted a job where we don't know when it will start up
  5 | #'  * it starts, sends is a message list(id=0) indicating it is ready
  6 | #'  * we send it the function definition and common data
  7 | #'    * we also send it the first data set to work on
  8 | #'  * when we get any id > 0, it is a result that we store
  9 | #'    * and send the next data set/index to work on
 10 | #'  * when computatons are complete, we send id=0 to the worker
 11 | #'    * it responds with id=-1 (and usage stats) and shuts down
 12 | #'
 13 | #' @param pool           Instance of Pool object
 14 | #' @param iter           Objects to be iterated in each function call
 15 | #' @param rettype        Return type of function
 16 | #' @param fail_on_error  If an error occurs on the workers, continue or fail?
 17 | #' @param chunk_size     Number of function calls to chunk together
 18 | #'                       defaults to 100 chunks per worker or max. 500 kb per chunk
 19 | #' @param timeout         Maximum time in seconds to wait for worker (default: Inf)
 20 | #' @param max_calls_worker  Maxmimum number of function calls that will be sent to one worker
 21 | #' @param verbose        Print progress messages
 22 | #' @return               A list of whatever `fun` returned
 23 | #' @keywords  internal
 24 | master = function(pool, iter, rettype="list", fail_on_error=TRUE,
 25 |                   chunk_size=NA, timeout=Inf, max_calls_worker=Inf, verbose=TRUE) {
 26 |     # prepare empty variables for managing results
 27 |     n_calls = nrow(iter)
 28 |     job_result = rep(vec_lookup[[rettype]], n_calls)
 29 |     submit_index = 1:chunk_size
 30 |     jobs_running = 0
 31 |     cond_msgs = list(warnings=list(), errors=list())
 32 |     n_errors = 0
 33 |     n_warnings = 0
 34 |     shutdown = FALSE
 35 |     kill_workers = FALSE
 36 |     penv = pool$env(work_chunk=work_chunk)
 37 |     obj_size = structure(sum(penv$size), class="object_size")
 38 |     obj_size_fmt = format(obj_size, big.mark=",", units="auto")
 39 |     if (is.infinite(timeout)) {
 40 |         timeout = -1L
 41 |     } else {
 42 |         timeout = timeout * 1000 # Rcpp API uses msec
 43 |     }
 44 | 
 45 |     #TODO: warn before serialization, create pool+env & then submit
 46 |     if (obj_size/1e6 > getOption("clustermq.data.warning", 500))
 47 |         warning("Common data is ", obj_size_fmt, ". Recommended limit is ",
 48 |                 getOption("clustermq.data.warning", 500),
 49 |                 " Mb (set by clustermq.data.warning option)", immediate.=TRUE)
 50 | 
 51 |     if (!pool$reusable)
 52 |         on.exit(pool$cleanup())
 53 | 
 54 |     if (verbose) {
 55 |         message("Running ", format(n_calls, big.mark=",", scientific=FALSE),
 56 |                 " calculations (", nrow(penv), " objs/", obj_size_fmt,
 57 |                 " common; ", chunk_size, " calls/chunk) ...")
 58 |         pb = progress::progress_bar$new(total = n_calls,
 59 |                 format = "[:bar] :percent (:wup/:wtot wrk) eta: :eta")
 60 |         pb$tick(0, tokens=list(wtot=pool$workers_total, wup=pool$workers_running))
 61 |     }
 62 | 
 63 |     # main event loop
 64 |     while((!shutdown && submit_index[1] <= n_calls) || jobs_running > 0) {
 65 |         msg = pool$recv(timeout)
 66 |         if (inherits(msg, "worker_error"))
 67 |             stop("Worker Error: ", msg)
 68 | 
 69 |         if (verbose)
 70 |             pb$tick(length(msg$result),
 71 |                     tokens=list(wtot=pool$workers_total, wup=pool$workers_running))
 72 | 
 73 |         # process the result data if we got some
 74 |         if (!is.null(msg$result)) {
 75 |             call_id = names(msg$result)
 76 |             jobs_running = jobs_running - length(call_id)
 77 |             job_result[as.integer(call_id)] = msg$result
 78 | 
 79 |             n_warnings = n_warnings + length(msg$warnings)
 80 |             n_errors = n_errors + length(msg$errors)
 81 |             if (n_errors > 0 && fail_on_error == TRUE)
 82 |                 shutdown = TRUE
 83 |             if (length(cond_msgs$warnings) < 50)
 84 |                 cond_msgs$warnings = c(cond_msgs$warnings, msg$warnings)
 85 |             if (length(cond_msgs$errors) < 50)
 86 |                 cond_msgs$errors = c(cond_msgs$errors, msg$errors)
 87 |         }
 88 | 
 89 |         if (shutdown || pool$current()$calls >= max_calls_worker) {
 90 |             pool$send_shutdown()
 91 |             next
 92 |         }
 93 | 
 94 |         if (submit_index[1] <= n_calls) {
 95 |             # if we have work, send it to the worker
 96 |             submit_index = submit_index[submit_index <= n_calls]
 97 |             pool$send(work_chunk(chunk, fun=fun, const=const, rettype=rettype,
 98 |                 common_seed=common_seed), chunk=chunk(iter, submit_index))
 99 |             jobs_running = jobs_running + length(submit_index)
100 |             submit_index = submit_index + chunk_size
101 | 
102 |             # adapt chunk size towards end of processing
103 |             cs = ceiling((n_calls - submit_index[1]) / pool$workers_running)
104 |             if (cs < chunk_size) {
105 |                 chunk_size = max(cs, 1)
106 |                 submit_index = submit_index[1:chunk_size]
107 |             }
108 | 
109 |         } else if (pool$reusable) {
110 |             pool$send_wait()
111 |         } else { # or else shut it down
112 |             pool$send_shutdown()
113 |         }
114 |     }
115 | 
116 |     summarize_result(job_result, n_errors, n_warnings, cond_msgs,
117 |                      min(submit_index)-1, fail_on_error)
118 | }
119 | 


--------------------------------------------------------------------------------
/R/pool.r:
--------------------------------------------------------------------------------
  1 | loadModule("cmq_master", TRUE) # CMQMaster C++ class
  2 | 
  3 | #' Class for basic queuing system functions
  4 | #'
  5 | #' Provides the basic functions needed to communicate between machines
  6 | #' This should abstract most functions of rZMQ so the scheduler
  7 | #' implementations can rely on the higher level functionality
  8 | #'
  9 | #' @keywords internal
 10 | Pool = R6::R6Class("Pool",
 11 |     public = list(
 12 |         initialize = function(addr=sample(host()), reuse=TRUE) {
 13 |             private$master = methods::new(CMQMaster)
 14 |             # ZeroMQ allows connecting by node name, but binding must be either
 15 |             # a numerical IP or an interfacet name. This is a bit of a hack to
 16 |             # seem to allow node-name bindings
 17 |             nodename = Sys.info()["nodename"]
 18 |             addr = sub(nodename, "*", addr, fixed=TRUE)
 19 |             bound = private$master$listen(addr)
 20 |             private$addr = sub("0.0.0.0", nodename, bound, fixed=TRUE)
 21 |             private$timer = proc.time()
 22 |             private$reuse = reuse
 23 |         },
 24 | 
 25 |         print = function() {
 26 |             cat(sprintf("<clustermq> worker pool with %i member(s)\n", self$workers$n()))
 27 |         },
 28 | 
 29 |         info = function() {
 30 |             info = private$master$list_workers()
 31 |             times = do.call(rbind, info$time)[,1:3,drop=FALSE]
 32 |             mem = function(field) sapply(info$mem, function(m) sum(m[,field] * c(56,1)))
 33 |             do.call(data.frame, c(info[c("worker", "status")],
 34 |                                   current=list(info$worker==info$cur),
 35 |                                   info["calls"], as.data.frame(times),
 36 |                                   list(mem.used=mem("used"), mem.max=mem("max used"))))
 37 |         },
 38 |         current = function() {
 39 |             private$master$current()
 40 |         },
 41 | 
 42 |         add = function(qsys, n, ...) {
 43 |             self$workers = qsys$new(addr=private$addr, master=private$master, n_jobs=n, ...)
 44 |         },
 45 | 
 46 |         env = function(...) {
 47 |             args = list(...)
 48 |             for (name in names(args))
 49 |                 private$master$add_env(name, args[[name]])
 50 |             if (length(args) == 0)
 51 |                 private$master$list_env()
 52 |             else
 53 |                 invisible(private$master$list_env())
 54 |         },
 55 | 
 56 |         pkg = function(...) {
 57 |             args = as.list(...)
 58 |             for (elm in args)
 59 |                 private$master$add_pkg(elm)
 60 |         },
 61 | 
 62 |         ### START pre-0.9 compatibility functions (deprecated)
 63 |         set_common_data = function(..., export=list(), pkgs=c(), token="") {
 64 |             .Deprecated("env")
 65 |             do.call(self$env, c(list(...), export))
 66 |             if (length(pkgs) > 0)
 67 |                 do.call(self$pkg, as.list(pkgs))
 68 |             private$token = token
 69 |         },
 70 |         send_common_data = function() {
 71 |             .Deprecated("handled implicitly")
 72 |             self$send()
 73 |         },
 74 |         send_shutdown_worker = function() {
 75 |             .Deprecated("send_shutdown")
 76 |             self$send_shutdown()
 77 |         },
 78 |         send_call = function(expr, env=list(), ref=substitute(expr)) {
 79 |             .Deprecated("send")
 80 |             pcall = quote(substitute(expr))
 81 |             do.call(self$send, c(list(cmd=eval(pcall)), env))
 82 |         },
 83 |         receive_data = function() {
 84 |             .Deprecated("recv")
 85 |             rd = self$recv()
 86 |             list(result=rd, warnings=c(), errors=c(), token=private$token)
 87 |         },
 88 |         ### END pre-0.9 compatibility functions (deprecated)
 89 | 
 90 |         send = function(cmd, ...) {
 91 |             pcall = quote(substitute(cmd))
 92 |             cmd = as.expression(do.call(substitute, list(eval(pcall), env=list(...))))
 93 |             invisible(private$master$send(cmd))
 94 |         },
 95 |         send_shutdown = function() {
 96 |             private$master$send_shutdown()
 97 |         },
 98 |         send_wait = function(wait=50) {
 99 |             private$master$send(Sys.sleep(wait/1000))
100 |         },
101 | 
102 |         recv = function(timeout=-1L) {
103 |             private$master$recv(timeout)
104 |         },
105 | 
106 |         cleanup = function(timeout=5) {
107 |             success = private$master$close(as.integer(timeout*1000))
108 |             success = self$workers$cleanup(success, timeout) # timeout left?
109 | 
110 |             info = self$info()
111 |             max_mem = max(c(info$mem.max+2e8, 0), na.rm=TRUE) # add 200 Mb
112 |             max_mem_str = format(structure(max_mem, class="object_size"), units="auto")
113 | 
114 |             if (nrow(info) > 0) {
115 |                 wt = lapply(info[c("user.self", "sys.self", "elapsed")], mean, na.rm=TRUE)
116 |             } else {
117 |                 wt = rep(NA, 3)
118 |             }
119 |             rt = proc.time() - private$timer
120 |             rt3_fmt = difftime(as.POSIXct(rt[[3]], origin="1970-01-01"),
121 |                                as.POSIXct(0, origin="1970-01-01"), units="auto")
122 |             rt3_str = sprintf("%.1f %s", rt3_fmt, attr(rt3_fmt, "units"))
123 | 
124 |             fmt = "Master: [%s %.1f%% CPU]; Worker: [avg %.1f%% CPU, max %s]"
125 |             message(sprintf(fmt, rt3_str, 100*(rt[[1]]+rt[[2]])/rt[[3]],
126 |                             100*(wt[[1]]+wt[[2]])/wt[[3]], max_mem_str))
127 | 
128 |             invisible(success)
129 |         },
130 | 
131 |         workers = NULL
132 |     ),
133 | 
134 |     active = list(
135 |         workers_total = function() private$master$workers_total(),
136 |         workers_running = function() private$master$workers_running(),
137 |         reusable = function() private$reuse
138 |     ),
139 | 
140 |     private = list(
141 |         token = NULL, ### pre-0.9 compatibility functions (deprecated)
142 | 
143 |         master = NULL,
144 |         addr = NULL,
145 |         timer = NULL,
146 |         reuse = NULL,
147 | 
148 |         finalize = function() {
149 |             private$master$close(0L)
150 |         }
151 |     ),
152 | 
153 |     cloneable = FALSE
154 | )
155 | 


--------------------------------------------------------------------------------
/R/qsys.r:
--------------------------------------------------------------------------------
 1 | loadModule("cmq_master", TRUE) # CMQMaster C++ class
 2 | 
 3 | #' Class for basic queuing system functions
 4 | #'
 5 | #' Provides the basic functions needed to communicate between machines
 6 | #' This should abstract most functions of rZMQ so the scheduler
 7 | #' implementations can rely on the higher level functionality
 8 | #'
 9 | #' @keywords internal
10 | QSys = R6::R6Class("QSys",
11 |     public = list(
12 |         # Create a class instance
13 |         #
14 |         # Initializes ZeroMQ and sets and sets up our primary communication socket
15 |         #
16 |         # @param addr    Vector of possible addresses to bind
17 |         # @param bind    Whether to bind 'addr' or just refer to it
18 |         initialize = function(addr, master, template=NULL) {
19 |             private$master = master
20 |             private$addr = addr
21 |             private$port = as.integer(sub(".*:", "", addr))
22 | 
23 |             if (!is.null(template)) {
24 |                 if (!file.exists(template))
25 |                     template = system.file(paste0(template, ".tmpl"),
26 |                                            package="clustermq", mustWork=TRUE)
27 |                 if (file.exists(template)) {
28 |                     private$template_file = template
29 |                     private$template = readChar(template, file.info(template)$size)
30 |                 } else
31 |                     stop("Template file does not exist: ", sQuote(template))
32 |             }
33 |             private$defaults = getOption("clustermq.defaults", list())
34 |         },
35 | 
36 |         cleanup = function(success, timeout) TRUE,
37 | 
38 |         n = function() private$workers_total
39 |     ),
40 | 
41 |     private = list(
42 |         master = NULL,
43 |         addr = NULL,
44 |         port = NULL,
45 |         template = NULL,
46 |         template_file = NULL,
47 |         workers_total = NULL,
48 |         defaults = list(),
49 |         is_cleaned_up = NULL,
50 | 
51 |         fill_options = function(...) {
52 |             values = utils::modifyList(private$defaults, list(...))
53 |             values$master = private$addr
54 |             if (grepl("CMQ_AUTH", private$template)) {
55 |                 # note: auth will be obligatory in the future and this check will
56 |                 #   be removed (i.e., filling will fail if no field in template)
57 |                 values$auth = paste(sample(letters, 5, TRUE), collapse="")
58 |             } else {
59 |                 values$auth = NULL
60 |                 warning("Add 'CMQ_AUTH={{ auth }}' to template to enable socket authentication",
61 |                         immediate.=TRUE)
62 |             }
63 |             if (!"job_name" %in% names(values))
64 |                 values$job_name = paste0("cmq", private$port)
65 |             private$workers_total = values$n_jobs
66 |             values
67 |         },
68 | 
69 |         template_error = function(scheduler, status, filled) {
70 |             message("\nThe filled ", scheduler, " template ", sQuote(private$template_file),
71 |                     " was:\n", '"""', "\n", filled, '"""', "\n")
72 |             message("see: https://mschubert.github.io/clustermq/articles/userguide.html#scheduler-setup\n")
73 |             stop("Job submission failed with error code ", status, call.=FALSE)
74 |         }
75 |     ),
76 | 
77 |     cloneable = FALSE
78 | )
79 | 


--------------------------------------------------------------------------------
/R/qsys_local.r:
--------------------------------------------------------------------------------
 1 | #' Placeholder for local processing
 2 | #'
 3 | #' Mainly so tests pass without setting up a scheduler
 4 | #'
 5 | #' @keywords internal
 6 | LOCAL = R6::R6Class("LOCAL",
 7 |     inherit = QSys,
 8 | 
 9 |     public = list(
10 |         initialize = function(addr="unused", n_jobs=0, master=NULL, ...,
11 |                               log_worker=FALSE, log_file=NULL, verbose=TRUE) {
12 |             super$initialize(addr=addr, master=master)
13 |             if (verbose)
14 |                 message("Running sequentially ('LOCAL') ...")
15 |             private$is_cleaned_up = TRUE
16 |         }
17 |     ),
18 | 
19 |     cloneable = FALSE
20 | )
21 | 


--------------------------------------------------------------------------------
/R/qsys_lsf.r:
--------------------------------------------------------------------------------
 1 | #' LSF scheduler functions
 2 | #'
 3 | #' Derives from QSys to provide LSF-specific functions
 4 | #'
 5 | #' @keywords internal
 6 | LSF = R6::R6Class("LSF",
 7 |     inherit = QSys,
 8 | 
 9 |     public = list(
10 |         initialize = function(addr, n_jobs, master, ..., template=getOption("clustermq.template", "LSF"),
11 |                               log_worker=FALSE, log_file=NULL, verbose=TRUE) {
12 |             super$initialize(addr=addr, master=master, template=template)
13 | 
14 |             opts = private$fill_options(n_jobs=n_jobs, ...)
15 |             private$job_id = opts$job_name
16 |             if (!is.null(opts$log_file))
17 |                 opts$log_file = normalizePath(opts$log_file, mustWork=FALSE)
18 |             else if (log_worker)
19 |                 opts$log_file = paste0(private$job_id, "-%I.log")
20 |             filled = fill_template(private$template, opts,
21 |                                    required=c("master", "job_name", "n_jobs"))
22 | 
23 |             if (verbose)
24 |                 message("Submitting ", n_jobs, " worker jobs to ", class(self)[1],
25 |                         " as ", sQuote(private$job_id), " ...")
26 | 
27 |             status = system("bsub", input=filled, ignore.stdout=TRUE)
28 |             if (status != 0)
29 |                 private$template_error("LSF", status, filled)
30 |             private$master$add_pending_workers(n_jobs)
31 |             private$is_cleaned_up = FALSE
32 |         },
33 | 
34 |         cleanup = function(success, timeout) {
35 |             private$is_cleaned_up = success
36 |             private$finalize()
37 |         }
38 |     ),
39 | 
40 |     private = list(
41 |         job_id = NULL,
42 | 
43 |         finalize = function(quiet=self$workers_running == 0) {
44 |             quiet = FALSE #TODO:
45 |             if (!private$is_cleaned_up) {
46 |                 system(paste("bkill -J", private$job_id),
47 |                        ignore.stdout=quiet, ignore.stderr=quiet, wait=FALSE)
48 |             }
49 |             private$is_cleaned_up = TRUE
50 |         }
51 |     ),
52 | 
53 |     cloneable = FALSE
54 | )
55 | 


--------------------------------------------------------------------------------
/R/qsys_multicore.r:
--------------------------------------------------------------------------------
 1 | #' Process on multiple cores on one machine
 2 | #'
 3 | #' Derives from QSys to provide multicore-specific functions
 4 | #'
 5 | #' @keywords internal
 6 | MULTICORE = R6::R6Class("MULTICORE",
 7 |     inherit = QSys,
 8 | 
 9 |     public = list(
10 |         initialize = function(addr, n_jobs, master, ..., log_worker=FALSE, log_file=NULL, verbose=TRUE) {
11 |             addr = sub(Sys.info()["nodename"], "127.0.0.1", addr, fixed=TRUE)
12 |             super$initialize(addr=addr, master=master)
13 |             if (verbose)
14 |                 message("Starting ", n_jobs, " cores ...")
15 |             if (log_worker && is.null(log_file))
16 |                 log_file = sprintf("cmq%i-%%i.log", private$port)
17 | 
18 |             for (i in seq_len(n_jobs)) {
19 |                 if (is.character(log_file))
20 |                     log_i = suppressWarnings(sprintf(log_file, i))
21 |                 else
22 |                     log_i = nullfile()
23 |                 wrapper = function(m, logfile) {
24 |                     fout = file(logfile, open="wt")
25 |                     sink(file=fout, type="output")
26 |                     sink(file=fout, type="message")
27 |                     on.exit({ sink(type="message"); sink(type="output"); close(fout) })
28 |                     clustermq:::worker(m)
29 |                 }
30 |                 p = parallel::mcparallel(quote(wrapper(private$addr, log_i)))
31 |                 private$children[[as.character(p$pid)]] = p
32 |             }
33 |             private$master$add_pending_workers(n_jobs)
34 |             private$workers_total = n_jobs
35 |             private$is_cleaned_up = FALSE
36 |         },
37 | 
38 |         cleanup = function(success, timeout=5L) {
39 |             private$is_cleaned_up = success
40 |             private$collect_children(wait=FALSE, timeout=timeout)
41 |             private$finalize()
42 |         }
43 |     ),
44 | 
45 |     private = list(
46 |         collect_children = function(...) {
47 |             pids = as.integer(names(private$children))
48 |             res = suppressWarnings(parallel::mccollect(pids, ...))
49 |             finished = intersect(names(private$children), names(res))
50 |             private$children[finished] = NULL
51 |         },
52 | 
53 |         children = list(),
54 | 
55 |         finalize = function(quiet=FALSE) {
56 |             if (!private$is_cleaned_up) {
57 |                 private$collect_children(wait=FALSE, timeout=0)
58 |                 running = names(private$children)
59 |                 if (length(running) > 0) {
60 |                     if (!quiet)
61 |                         warning("Unclean shutdown for PIDs: ",
62 |                                 paste(running, collapse=", "),
63 |                                 immediate.=TRUE)
64 |                     tools::pskill(running, tools::SIGKILL)
65 |                 }
66 |                 private$children = list()
67 |             }
68 |             private$is_cleaned_up = TRUE
69 |         }
70 |     ),
71 | 
72 |     cloneable = FALSE
73 | )
74 | 


--------------------------------------------------------------------------------
/R/qsys_multiprocess.r:
--------------------------------------------------------------------------------
 1 | #' Process on multiple processes on one machine
 2 | #'
 3 | #' Derives from QSys to provide callr-specific functions
 4 | #'
 5 | #' @keywords internal
 6 | MULTIPROCESS = R6::R6Class("MULTIPROCESS",
 7 |     inherit = QSys,
 8 | 
 9 |     public = list(
10 |         initialize = function(addr, n_jobs, master, ..., log_worker=FALSE, log_file=NULL, verbose=TRUE) {
11 |             if (! requireNamespace("callr", quietly=TRUE))
12 |                 stop("The ", sQuote(callr), " package is required for ", sQuote("multiprocess"))
13 |             addr = sub(Sys.info()["nodename"], "127.0.0.1", addr, fixed=TRUE)
14 |             super$initialize(addr=addr, master=master)
15 | 
16 |             if (verbose)
17 |                 message("Starting ", n_jobs, " processes ...")
18 | 
19 |             if (log_worker && is.null(log_file))
20 |                 log_file = sprintf("cmq%i-%%i.log", private$port)
21 | 
22 |             for (i in seq_len(n_jobs)) {
23 |                 if (is.character(log_file))
24 |                     log_i = suppressWarnings(sprintf(log_file, i))
25 |                 else
26 |                     log_i = nullfile()
27 |                 cr = callr::r_bg(function(m) clustermq:::worker(m),
28 |                                  args=list(m=private$addr),
29 |                                  stdout=log_i, stderr=log_i)
30 |                 private$callr[[as.character(cr$get_pid())]] = cr
31 |             }
32 |             private$master$add_pending_workers(n_jobs)
33 |             private$workers_total = n_jobs
34 |             private$is_cleaned_up = FALSE
35 |         },
36 | 
37 |         cleanup = function(success, timeout) {
38 |             dead_workers = sapply(private$callr, function(x) ! x$is_alive())
39 |             if (length(dead_workers) > 0)
40 |                 private$callr[dead_workers] = NULL
41 |             else
42 |                 private$is_cleaned_up = TRUE
43 |             private$is_cleaned_up
44 |         }
45 |     ),
46 | 
47 |     private = list(
48 |         callr = list(),
49 | 
50 |         finalize = function(quiet=FALSE) {
51 |             if (!private$is_cleaned_up) {
52 |                 dead_workers = sapply(private$callr, function(x) ! x$is_alive())
53 |                 if (length(dead_workers) > 0)
54 |                     private$callr[dead_workers] = NULL
55 |                 if (!quiet && length(private$callr) > 0)
56 |                     warning("Unclean shutdown for PIDs: ",
57 |                             paste(names(private$callr), collapse=", "), immediate.=TRUE)
58 |                 for (cr in private$callr)
59 |                     cr$kill_tree()
60 |             }
61 |             private$is_cleaned_up = TRUE
62 |         }
63 |     ),
64 | 
65 |     cloneable = FALSE
66 | )
67 | 


--------------------------------------------------------------------------------
/R/qsys_sge.r:
--------------------------------------------------------------------------------
 1 | 
 2 | #' SGE scheduler functions
 3 | #'
 4 | #' Derives from QSys to provide SGE-specific functions
 5 | #'
 6 | #' @keywords internal
 7 | SGE = R6::R6Class("SGE",
 8 |     inherit = QSys,
 9 | 
10 |     public = list(
11 |         initialize = function(addr, n_jobs, master, ..., template=getOption("clustermq.template", "SGE"),
12 |                               log_worker=FALSE, log_file=NULL, verbose=TRUE) {
13 |             super$initialize(addr=addr, master=master, template=template)
14 | 
15 |             opts = private$fill_options(n_jobs=n_jobs, ...)
16 |             private$job_name = opts$job_name
17 |             if (!is.null(opts$log_file))
18 |                 opts$log_file = normalizePath(opts$log_file, mustWork=FALSE)
19 |             else if (log_worker)
20 |                 opts$log_file = sprintf("%s-%s.log", private$job_name, private$array_idx)
21 |             filled = fill_template(private$template, opts, required=c("master", "n_jobs"))
22 | 
23 |             if (verbose)
24 |                 message("Submitting ", n_jobs, " worker jobs to ", class(self)[1],
25 |                         " as ", sQuote(private$job_id), " ...")
26 | 
27 |             private$qsub_stdout = system2("qsub", input=filled, stdout=TRUE)
28 |             status = attr(private$qsub_stdout, "status")
29 |             if (!is.null(status) && status != 0)
30 |                 private$template_error("SGE", status, filled)
31 |             private$job_id = private$job_name
32 |             private$master$add_pending_workers(n_jobs)
33 |             private$is_cleaned_up = FALSE
34 |         },
35 | 
36 |         cleanup = function(success, timeout) {
37 |             private$is_cleaned_up = success
38 |             private$finalize()
39 |         }
40 |     ),
41 | 
42 |     private = list(
43 |         qsub_stdout = NULL,
44 |         job_name = NULL,
45 |         job_id   = NULL,
46 |         array_idx = "$TASK_ID",
47 | 
48 |         finalize = function(quiet = TRUE) { # self$workers_running == 0
49 |             if (!private$is_cleaned_up) {
50 |                 system(paste("qdel", private$job_id),
51 |                        ignore.stdout=quiet, ignore.stderr=quiet, wait=FALSE)
52 |             }
53 |             private$is_cleaned_up = TRUE
54 |         }
55 |     ),
56 | 
57 |     cloneable = FALSE
58 | )
59 | 
60 | PBS = R6::R6Class("PBS",
61 |     inherit = SGE,
62 | 
63 |     public = list(
64 |         initialize = function(..., template=getOption("clustermq.template", "PBS")) {
65 |             super$initialize(..., template=template)
66 |             private$array_idx = "$PBS_ARRAY_INDEX"
67 |             private$job_id = private$qsub_stdout[1]
68 |         }
69 |     ),
70 | 
71 |     cloneable = FALSE
72 | )
73 | 
74 | TORQUE = R6::R6Class("TORQUE",
75 |     inherit = PBS,
76 | 
77 |     public = list(
78 |         initialize = function(..., template=getOption("clustermq.template", "TORQUE")) {
79 |             super$initialize(..., template=template)
80 |             private$array_idx = "$PBS_ARRAYID"
81 |         }
82 |     ),
83 | 
84 |     cloneable = FALSE
85 | )
86 | 


--------------------------------------------------------------------------------
/R/qsys_slurm.r:
--------------------------------------------------------------------------------
 1 | #' SLURM scheduler functions
 2 | #'
 3 | #' Derives from QSys to provide SLURM-specific functions
 4 | #'
 5 | #' @keywords internal
 6 | SLURM = R6::R6Class("SLURM",
 7 |     inherit = QSys,
 8 | 
 9 |     public = list(
10 |         initialize = function(addr, n_jobs, master, ..., template=getOption("clustermq.template", "SLURM"),
11 |                               log_worker=FALSE, verbose=TRUE) {
12 |             super$initialize(addr=addr, master=master, template=template)
13 | 
14 |             opts = private$fill_options(n_jobs=n_jobs, ...)
15 |             private$job_id = opts$job_name
16 |             if (!is.null(opts$log_file))
17 |                 opts$log_file = normalizePath(opts$log_file, mustWork=FALSE)
18 |             else if (log_worker)
19 |                 opts$log_file = paste0(private$job_id, "-%a.log")
20 |             filled = fill_template(private$template, opts,
21 |                                    required=c("master", "job_name", "n_jobs"))
22 | 
23 |             if (verbose)
24 |                 message("Submitting ", n_jobs, " worker jobs to ", class(self)[1],
25 |                         " as ", sQuote(private$job_id), " ...")
26 | 
27 |             status = system("sbatch", input=filled, ignore.stdout=TRUE)
28 |             if (status != 0)
29 |                 private$template_error("SLURM", status, filled)
30 |             private$master$add_pending_workers(n_jobs)
31 |             private$is_cleaned_up = FALSE
32 |         },
33 | 
34 |         cleanup = function(success, timeout) {
35 |             private$is_cleaned_up = success
36 |             private$finalize()
37 |         }
38 |     ),
39 | 
40 |     private = list(
41 |         job_id = NULL,
42 | 
43 |         finalize = function(quiet = TRUE) { # self$workers_running == 0
44 |             if (!private$is_cleaned_up) {
45 |                 system(paste("scancel --name", private$job_id),
46 |                        ignore.stdout=quiet, ignore.stderr=quiet, wait=FALSE)
47 |             }
48 |             private$is_cleaned_up = TRUE
49 |         }
50 |     ),
51 | 
52 |     cloneable = FALSE
53 | )
54 | 


--------------------------------------------------------------------------------
/R/qsys_ssh.r:
--------------------------------------------------------------------------------
 1 | #' SSH scheduler functions
 2 | #'
 3 | #' Derives from QSys to provide SSH-specific functions
 4 | #'
 5 | #' @keywords internal
 6 | SSH = R6::R6Class("SSH",
 7 |     inherit = QSys,
 8 | 
 9 |     public = list(
10 |         initialize = function(addr, n_jobs, ..., master,
11 |                               ssh_host = getOption("clustermq.ssh.host"),
12 |                               ssh_log = getOption("clustermq.ssh.log"),
13 |                               template = getOption("clustermq.template", "SSH"),
14 |                               verbose = TRUE) {
15 |             if (is.null(ssh_host))
16 |                 stop("Option 'clustermq.ssh.host' required for SSH but not set")
17 |             if (!grepl("^tcp://", addr))
18 |                 stop("SSH QSys must connect via tcp:// not ", sQuote(addr))
19 | 
20 |             super$initialize(addr=addr, master=master, template=template)
21 |             private$template = paste(trimws(readLines(textConnection(private$template))), collapse=" ")
22 | 
23 |             # set forward and run ssh.r (send port, master)
24 |             opts = private$fill_options(ssh_log=ssh_log, ssh_host=ssh_host)
25 |             ssh_cmd = fill_template(private$template, opts,
26 |                 required=c("local_port", "ssh.hpc_fwd_port", "ssh_host"))
27 | 
28 |             # wait for ssh to connect
29 |             message(sprintf("Connecting to %s via SSH ...", sQuote(ssh_host)))
30 |             system(ssh_cmd, wait=TRUE, ignore.stdout=TRUE, ignore.stderr=TRUE)
31 | 
32 |             master$add_pending_workers(n_jobs)
33 |             args = c(list(...), list(n_jobs=n_jobs))
34 |             init_timeout = getOption("clustermq.ssh.timeout", 10)
35 |             tryCatch(private$master$proxy_submit_cmd(args, init_timeout*1000),
36 |                 error = function(e) {
37 |                     if (grepl("timed out", conditionMessage(e))) {
38 |                         stop("Remote R process did not respond after ",
39 |                              init_timeout, " seconds. Check your SSH server log.")
40 |                     } else stop(e)
41 |             })
42 | 
43 |             private$workers_total = args$n_jobs
44 |         },
45 | 
46 |         cleanup = function(success, timeout) {
47 |             private$finalize()
48 |             TRUE
49 |         }
50 |     ),
51 | 
52 |     private = list(
53 |         ssh_proxy_running = TRUE,
54 | 
55 |         fill_options = function(...) {
56 |             args = list(...)
57 |             args$local_port = sub(".*:", "", private$addr)
58 |             args$ssh.hpc_fwd_port=getOption("clustermq.ssh.hpc_fwd_port", 50000:55000) 
59 |             if (length(args$ssh.hpc_fwd_port) > 1)
60 |                 args$ssh.hpc_fwd_port = sample(args$ssh.hpc_fwd_port, 1)
61 |             utils::modifyList(private$defaults, args)
62 |         },
63 | 
64 |         finalize = function(quiet = self$workers_running == 0) {
65 | #            if (private$ssh_proxy_running) {
66 | #                private$zmq$send(
67 | #                    list(id="PROXY_STOP", finalize=!private$is_cleaned_up),
68 | #                    "proxy"
69 | #                )
70 | #            }
71 |             private$ssh_proxy_running = FALSE
72 |         }
73 |     ),
74 | 
75 |     cloneable = FALSE
76 | )
77 | 


--------------------------------------------------------------------------------
/R/ssh_proxy.r:
--------------------------------------------------------------------------------
 1 | loadModule("cmq_proxy", TRUE) # CMQProxy C++ class
 2 | 
 3 | #' SSH proxy for different schedulers
 4 | #'
 5 | #' Do not call this manually, the SSH qsys will do that
 6 | #'
 7 | #' @param fwd_port The port of the master address to connect to
 8 | #'                 (remote end of reverse tunnel)
 9 | #' @param qsys_id  Character string of QSys class to use
10 | #' @keywords internal
11 | ssh_proxy = function(fwd_port, qsys_id=qsys_default) {
12 |     message = msg_fmt()
13 | 
14 |     master = sprintf("tcp://127.0.0.1:%s", fwd_port)
15 |     p = methods::new(CMQProxy)
16 |     p$connect(master, 10000L)
17 | 
18 |     tryCatch({
19 |         nodename = Sys.info()["nodename"]
20 |         addr = p$listen(sub(nodename, "*", sample(host()), fixed=TRUE))
21 |         addr = sub("0.0.0.0", nodename, addr, fixed=TRUE)
22 |         message("listening for workers at ", addr)
23 | 
24 |         p$proxy_request_cmd()
25 |         args = p$proxy_receive_cmd()
26 |         message("submit args: ", paste(mapply(paste, names(args), args, sep="="), collapse=", "))
27 |         stopifnot(inherits(args, "list"), "n_jobs" %in% names(args))
28 | 
29 |         # set up qsys on cluster
30 |         message("setting up qsys: ", qsys_id)
31 |         if (toupper(qsys_id) %in% c("LOCAL", "SSH"))
32 |             stop("Remote SSH QSys ", sQuote(qsys_id), " is not allowed")
33 |         qsys = get(toupper(qsys_id), envir=parent.env(environment()))
34 |         qsys = do.call(qsys$new, c(list(addr=addr, master=p), args))
35 |         on.exit(qsys$cleanup())
36 | 
37 |         while(p$process_one()) {
38 |             message("event at: ", Sys.time())
39 |         }
40 | 
41 |         message("shutting down")
42 |         p$close(1000L)
43 | 
44 |     }, error = function(e) {
45 |         stop(e)
46 |     })
47 | }
48 | 


--------------------------------------------------------------------------------
/R/summarize_result.r:
--------------------------------------------------------------------------------
 1 | #' Print a summary of errors and warnings that occurred during processing
 2 | #'
 3 | #' @param result      A list or vector of the processing result
 4 | #' @param n_errors    How many errors occurred
 5 | #' @param n_warnings  How many warnings occurred
 6 | #' @param cond_msgs   Error and warnings messages, we display first 50
 7 | #' @param at          How many calls were procesed  up to this point
 8 | #' @param fail_on_error  Stop if error(s) occurred
 9 | #' @keywords internal
10 | summarize_result = function(result, n_errors, n_warnings,
11 |                             cond_msgs, at=length(result), fail_on_error=TRUE) {
12 | 
13 |     cond_msgs$errors = cond_msgs$errors[order(as.integer(names(cond_msgs$errors)))]
14 |     cond_msgs$warnings = cond_msgs$warnings[order(as.integer(names(cond_msgs$warnings)))]
15 |     cond_msgs = utils::head(c(cond_msgs$errors, cond_msgs$warnings), 50)
16 |     detail = paste(unlist(cond_msgs), collapse="\n")
17 | 
18 |     if (n_errors > 0) {
19 |         msg = sprintf("%i/%i jobs failed (%i warnings)", n_errors, at, n_warnings)
20 |         if (fail_on_error)
21 |             stop(msg, ". Stopping.\n", detail, call.=FALSE)
22 |         else
23 |             warning(msg, "\n", detail, immediate.=TRUE, call.=FALSE)
24 |     } else if (n_warnings > 0) {
25 |         msg = sprintf("%i warnings occurred in processing\n", n_warnings)
26 |         warning(msg, detail, immediate.=TRUE, call.=FALSE)
27 |     }
28 |     unname(result)
29 | }
30 | 


--------------------------------------------------------------------------------
/R/util.r:
--------------------------------------------------------------------------------
 1 | #' Construct the ZeroMQ host address
 2 | #'
 3 | #' @param node   Node or device name
 4 | #' @param ports  Range of ports to consider
 5 | #' @param n      How many addresses to return
 6 | #' @return       The possible addresses as character vector
 7 | #' @keywords internal
 8 | # @param short  Whether to use unqualified host name (before first dot)
 9 | host = function(node=getOption("clustermq.host", Sys.info()["nodename"]),
10 |                 ports=getOption("clustermq.ports", 6000:9999), n=100) {
11 |     utils::head(sample(sprintf("tcp://%s:%i", node, ports)), n)
12 | }
13 | 
14 | #' Fill a template string with supplied values
15 | #'
16 | #' @param template  A character string of a submission template
17 | #' @param values    A named list of key-value pairs
18 | #' @param required  Keys that must be present in the template (default: none)
19 | #' @return          A template where placeholder fields were replaced by values
20 | #' @keywords internal
21 | fill_template = function(template, values, required=c()) {
22 |     pattern = "\\{\\{\\s*([^\\s]+)\\s*(\\|\\s*[^\\s]+\\s*)?\\}\\}"
23 |     match_obj = gregexpr(pattern, template, perl=TRUE)
24 |     matches = regmatches(template, match_obj)[[1]]
25 | 
26 |     no_delim = substr(matches, 3, nchar(matches)-2)
27 |     kv_str = strsplit(no_delim, "|", fixed=TRUE)
28 |     keys = sapply(kv_str, function(s) gsub("\\s", "", s[1]))
29 |     vals = sapply(kv_str, function(s) gsub("\\s", "", s[2]))
30 |     if (! all(required %in% keys))
31 |         stop("Template keys required but not provided: ",
32 |              paste(setdiff(required, keys), collapse=", "))
33 | 
34 |     upd = keys %in% names(values)
35 |     is_num = sapply(values, is.numeric)
36 |     if (length(is_num) > 0)
37 |         values[is_num] = format.default(values[is_num], scientific=FALSE, trim=TRUE)
38 |     vals[upd] = unlist(values)[keys[upd]]
39 |     if (any(is.na(vals)))
40 |         stop("Template values required but not provided: ",
41 |              paste(unique(keys[is.na(vals)]), collapse=", "))
42 | 
43 |     for (i in seq_along(matches))
44 |         template = sub(matches[i], vals[i], template, fixed=TRUE)
45 |     template
46 | }
47 | 
48 | #' Lookup table for return types to vector NAs
49 | #'
50 | #' @keywords internal
51 | vec_lookup = list(
52 |     "list" = list(NULL),
53 |     "logical" = as.logical(NA),
54 |     "numeric" = NA_real_,
55 |     "integer" = NA_integer_,
56 |     "character" = NA_character_,
57 |     "lgl" = as.logical(NA),
58 |     "dbl" = NA_real_,
59 |     "int" = NA_integer_,
60 |     "chr" = NA_character_
61 | )
62 | 
63 | #' Wraps an error in a condition object
64 | #'
65 | #' @keywords internal
66 | wrap_error = function(call) {
67 |     structure(class = c("worker_error", "condition"),
68 |               list(message=geterrmessage(), call=call))
69 | }
70 | 
71 | #' Message format for logging
72 | #'
73 | #' @keywords internal
74 | msg_fmt = function(verbose=TRUE) {
75 |     if (verbose)
76 |         function(...) base::message(format(Sys.time(), "%Y-%m-%d %H:%M:%OS9 | "), ...)
77 |     else
78 |         function(...) invisible(NULL)
79 | }
80 | 


--------------------------------------------------------------------------------
/R/work_chunk.r:
--------------------------------------------------------------------------------
 1 | #' Function to process a chunk of calls
 2 | #'
 3 | #' Each chunk comes encapsulated in a data.frame
 4 | #'
 5 | #' @param df           A data.frame with call IDs as rownames and arguments as columns
 6 | #' @param fun          The function to call
 7 | #' @param const        Constant arguments passed to each call
 8 | #' @param rettype      Return type of function
 9 | #' @param common_seed  A seed offset common to all function calls
10 | #' @param progress     Logical indicated whether to display a progress bar
11 | #' @return             A list of call results (or try-error if they failed)
12 | #' @keywords internal
13 | work_chunk = function(df, fun, const=list(), rettype="list",
14 |                       common_seed=NULL, progress=FALSE) {
15 |     context = new.env()
16 |     context$warnings = list()
17 |     context$errors = list()
18 |     if (progress) {
19 |         pb = progress::progress_bar$new(total = nrow(df),
20 |                                         format = "[:bar] :percent eta: :eta")
21 |         pb$tick(0)
22 |     }
23 | 
24 |     fwrap = function(..., ` id `, ` seed `=NA) {
25 |         chr_id = as.character(` id `)
26 |         if (!is.na(` seed `))
27 |             set.seed(` seed `)
28 | 
29 |         result = withCallingHandlers(
30 |             withRestarts(
31 |                 do.call(fun, c(list(...), const)),
32 |                 muffleStop = function(e) if (rettype == "list")
33 |                     structure(e, class="error")
34 |             ),
35 |             warning = function(w) {
36 |                 wmsg = paste0("(#", chr_id, ") ", conditionMessage(w))
37 |                 context$warnings[[chr_id]] = c(context$warnings[[chr_id]], wmsg)
38 |                 invokeRestart("muffleWarning")
39 |             },
40 |             error = function(e) {
41 |                 emsg = paste0("(Error #", chr_id, ") ", conditionMessage(e))
42 |                 context$errors[[chr_id]] = emsg
43 |                 invokeRestart("muffleStop", emsg)
44 |             }
45 |         )
46 | 
47 |         if (progress)
48 |             pb$tick()
49 |         result
50 |     }
51 | 
52 |     if (is.null(df$` id `))
53 |         df$` id ` = seq_along(df[[1]])
54 | 
55 |     if (!is.null(common_seed))
56 |         df$` seed ` = as.integer(df$` id ` %% .Machine$integer.max) - common_seed
57 | 
58 |     re = stats::setNames(.mapply(fwrap, df, NULL), df$` id `)
59 |     if (rettype != "list")
60 |         re = unlist(re)
61 |     list(result = re, warnings = context$warnings, errors = context$errors)
62 | }
63 | 


--------------------------------------------------------------------------------
/R/worker.r:
--------------------------------------------------------------------------------
 1 | loadModule("cmq_worker", TRUE) # CMQWorker C++ class
 2 | utils::globalVariables(c("common_seed", "const", "fun")) # worker .GlobalEnv
 3 | 
 4 | #' R worker submitted as cluster job
 5 | #'
 6 | #' Do not call this manually, the master will do that
 7 | #'
 8 | #' @param master   The master address (tcp://ip:port)
 9 | #' @param ...      Catch-all to not break older template values (ignored)
10 | #' @param verbose  Whether to print debug messages
11 | #' @param context  ZeroMQ context (for internal testing)
12 | #' @keywords internal
13 | worker = function(master, ..., verbose=TRUE, context=NULL) {
14 |     message = msg_fmt(verbose)
15 | 
16 |     #TODO: replace this by proper authentication
17 |     auth = Sys.getenv("CMQ_AUTH")
18 | 
19 |     message("Master: ", master)
20 |     if (length(list(...)) > 0)
21 |         warning("Arguments ignored: ", paste(names(list(...)), collapse=", "))
22 | 
23 |     # connect to master
24 |     if (is.null(context))
25 |         w = methods::new(CMQWorker)
26 |     else
27 |         w = methods::new(CMQWorker, context)
28 |     message("connecting to: ", master)
29 |     w$connect(master, 10000L)
30 | 
31 |     counter = 0
32 |     repeat {
33 |         tic = proc.time()
34 |         w$poll()
35 |         delta = proc.time() - tic
36 |         counter = counter + 1
37 |         message(sprintf("> call %i (%.3fs wait)", counter, delta[3]))
38 |         if (! w$process_one())
39 |             break
40 |     }
41 | 
42 |     message("shutting down worker")
43 |     run_time = proc.time()
44 |     fmt = "%i in %.2fs [user], %.2fs [system], %.2fs [elapsed]"
45 |     message("\nTotal: ", sprintf(fmt, counter, run_time[1], run_time[2], run_time[3]))
46 | }
47 | 


--------------------------------------------------------------------------------
/R/workers.r:
--------------------------------------------------------------------------------
 1 | #' Creates a pool of workers
 2 | #'
 3 | #' @param n_jobs      Number of jobs to submit (0 implies local processing)
 4 | #' @param data        Set common data (function, constant args, seed)
 5 | #' @param reuse       Whether workers are reusable or get shut down after call
 6 | #' @param template    A named list of values to fill in template
 7 | #' @param log_worker  Write a log file for each worker
 8 | #' @param qsys_id     Character string of QSys class to use
 9 | #' @param verbose     Print message about worker startup
10 | #' @param ...         Additional arguments passed to the qsys constructor
11 | #' @return            An instance of the QSys class
12 | #' @export
13 | workers = function(n_jobs, data=NULL, reuse=TRUE, template=list(), log_worker=FALSE,
14 |                    qsys_id=getOption("clustermq.scheduler", qsys_default),
15 |                    verbose=FALSE, ...) {
16 |     if (n_jobs == 0)
17 |         qsys_id = "LOCAL"
18 | 
19 |     gc() # be sure to clean up old zmq handles (zeromq/libzmq/issues/1108)
20 | 
21 |     qsys = get(toupper(qsys_id), envir=parent.env(environment()))
22 | 
23 |     p = Pool$new(reuse=reuse)
24 | #    p$add(qsys, n_jobs, log_worker=log_worker, verbose=verbose, ...)
25 |     args = c(list(qsys=qsys, n=n_jobs, log_worker=log_worker, verbose=verbose), template, list(...))
26 |     do.call(p$add, args)
27 |     p
28 | }
29 | 


--------------------------------------------------------------------------------
/R/zzz.r:
--------------------------------------------------------------------------------
 1 | #' Select the queueing system on package loading
 2 | #'
 3 | #' This is done by setting the variable 'qsys' in the package environment
 4 | #' to the object that contains the desired queueing system.
 5 | #'
 6 | #' @param libname  default arg for compatibility
 7 | #' @param pkgname  default arg for compatibility
 8 | #' @keywords internal
 9 | .onLoad = function(libname, pkgname) {
10 |     qsys_default = toupper(getOption('clustermq.scheduler'))
11 | 
12 |     if (length(qsys_default) == 0) {
13 |         qname = c("SLURM", "LSF", "SGE", "LOCAL")
14 |         exec = Sys.which(c("sbatch", "bsub", "qsub"))
15 |         select = c(which(nchar(exec) > 0), 4)[1]
16 |         qsys_default = qname[select]
17 |     }
18 | 
19 |     assign("qsys_default", qsys_default, envir=parent.env(environment()))
20 | }
21 | 
22 | #' Report queueing system on package attach if not set
23 | #'
24 | #' @param libname  default arg for compatibility
25 | #' @param pkgname  default arg for compatibility
26 | #' @keywords internal
27 | .onAttach = function(libname, pkgname) {
28 |     if (is.null(getOption("clustermq.scheduler"))) {
29 |         packageStartupMessage("* Option 'clustermq.scheduler' not set, ",
30 |                 "defaulting to ", sQuote(qsys_default))
31 |         packageStartupMessage("--- see: https://mschubert.github.io/clustermq/articles/userguide.html#configuration")
32 |     }
33 |     if (!libzmq_has_draft()) {
34 |         packageStartupMessage("* Worker disconnect monitor is disabled")
35 |         packageStartupMessage("--- see: https://mschubert.github.io/clustermq/articles/userguide.html#installation")
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ClusterMQ: send R function calls as cluster jobs
  2 | ================================================
  3 | 
  4 | [![CRAN version](https://www.r-pkg.org/badges/version/clustermq)](https://cran.r-project.org/package=clustermq)
  5 | [![Build Status](https://github.com/mschubert/clustermq/actions/workflows/check-standard.yaml/badge.svg)](https://github.com/mschubert/clustermq/actions)
  6 | [![CRAN downloads](https://cranlogs.r-pkg.org/badges/clustermq)](https://cran.r-project.org/package=clustermq)
  7 | [![DOI](https://zenodo.org/badge/DOI/10.1093/bioinformatics/btz284.svg)](https://doi.org/10.1093/bioinformatics/btz284)
  8 | 
  9 | This package will allow you to send function calls as jobs on a computing
 10 | cluster with a minimal interface provided by the `Q` function:
 11 | 
 12 | ```r
 13 | # install the package if you haven't done so yet
 14 | install.packages('clustermq')
 15 | 
 16 | # load the library and create a simple function
 17 | library(clustermq)
 18 | fx = function(x) x * 2
 19 | 
 20 | # queue the function call on your scheduler
 21 | Q(fx, x=1:3, n_jobs=1)
 22 | # list(2,4,6)
 23 | ```
 24 | 
 25 | Computations are done [entirely on the network](https://zeromq.org/)
 26 | and without any temporary files on network-mounted storage, so there is no
 27 | strain on the file system apart from starting up R once per job. All
 28 | calculations are load-balanced, i.e. workers that get their jobs done faster
 29 | will also receive more function calls to work on. This is especially useful if
 30 | not all calls return after the same time, or one worker has a high load.
 31 | 
 32 | Browse the vignettes here:
 33 | 
 34 | * [User Guide](https://mschubert.github.io/clustermq/articles/userguide.html)
 35 | * [Technical Documentation](https://mschubert.github.io/clustermq/articles/technicaldocs.html)
 36 | * [FAQ](https://mschubert.github.io/clustermq/articles/faq.html)
 37 | 
 38 | Schedulers
 39 | ----------
 40 | 
 41 | An HPC cluster's scheduler ensures that computing jobs are distributed to
 42 | available worker nodes. Hence, this is what clustermq interfaces with in order
 43 | to do computations.
 44 | 
 45 | We currently support the [following
 46 | schedulers](https://mschubert.github.io/clustermq/articles/userguide.html#configuration)
 47 | (either locally or via SSH):
 48 | 
 49 | * [Multiprocess](https://mschubert.github.io/clustermq/articles/userguide.html#local-parallelization) -
 50 |   *test your calls and parallelize on cores using* `options(clustermq.scheduler="multiprocess")`
 51 | * [SLURM](https://mschubert.github.io/clustermq/articles/userguide.html#slurm) - *should work without setup*
 52 | * [LSF](https://mschubert.github.io/clustermq/articles/userguide.html#lsf) - *should work without setup*
 53 | * [SGE](https://mschubert.github.io/clustermq/articles/userguide.html#sge) - *may require configuration*
 54 | * [PBS](https://mschubert.github.io/clustermq/articles/userguide.html#pbs)/[Torque](https://mschubert.github.io/clustermq/articles/userguide.html#torque) - *needs* `options(clustermq.scheduler="PBS"/"Torque")`
 55 | * via [SSH](https://mschubert.github.io/clustermq/articles/userguide.html#ssh-connector) -
 56 | *needs* `options(clustermq.scheduler="ssh", clustermq.ssh.host=<yourhost>)`
 57 | 
 58 | > [!TIP]
 59 | > Follow the links above to configure your scheduler in case it is not working
 60 | > out of the box and check the
 61 | > [FAQ](https://mschubert.github.io/clustermq/articles/faq.html) if
 62 | > your job submission errors or gets stuck
 63 | 
 64 | Usage
 65 | -----
 66 | 
 67 | The most common arguments for `Q` are:
 68 | 
 69 |  * `fun` - The function to call. This needs to be self-sufficient (because it
 70 |         will not have access to the `master` environment)
 71 |  * `...` - All iterated arguments passed to the function. If there is more than
 72 |         one, all of them need to be named
 73 |  * `const` - A named list of non-iterated arguments passed to `fun`
 74 |  * `export` - A named list of objects to export to the worker environment
 75 | 
 76 | The documentation for other arguments can be accessed by typing `?Q`. Examples
 77 | of using `const` and `export` would be:
 78 | 
 79 | ```r
 80 | # adding a constant argument
 81 | fx = function(x, y) x * 2 + y
 82 | Q(fx, x=1:3, const=list(y=10), n_jobs=1)
 83 | 
 84 | # exporting an object to workers
 85 | fx = function(x) x * 2 + y
 86 | Q(fx, x=1:3, export=list(y=10), n_jobs=1)
 87 | ```
 88 | 
 89 | We can also use `clustermq` as a parallel backend in
 90 | [`foreach`](https://cran.r-project.org/package=foreach) or
 91 | [`BiocParallel`](https://bioconductor.org/packages/release/bioc/html/BiocParallel.html):
 92 | 
 93 | ```r
 94 | # using foreach
 95 | library(foreach)
 96 | register_dopar_cmq(n_jobs=2, memory=1024) # see `?workers` for arguments
 97 | foreach(i=1:3) %dopar% sqrt(i) # this will be executed as jobs
 98 | 
 99 | # using BiocParallel
100 | library(BiocParallel)
101 | register(DoparParam()) # after register_dopar_cmq(...)
102 | bplapply(1:3, sqrt)
103 | ```
104 | 
105 | More examples are available in [the
106 | User Guide](https://mschubert.github.io/clustermq/articles/userguide.html).
107 | 
108 | Comparison to other packages
109 | ----------------------------
110 | 
111 | There are some packages that provide high-level parallelization of R function calls
112 | on a computing cluster. We compared `clustermq` to `BatchJobs` and `batchtools` for
113 | processing many short-running jobs, and found it to have approximately 1000x less
114 | overhead cost.
115 | 
116 | ![Overhead comparison](http://image.ibb.co/cRgYNR/plot.png)
117 | 
118 | In short, use `clustermq` if you want:
119 | 
120 | * a one-line solution to run cluster jobs with minimal setup
121 | * access cluster functions from your local Rstudio via SSH
122 | * fast processing of many function calls without network storage I/O
123 | 
124 | Use [`batchtools`](https://github.com/mllg/batchtools) if you:
125 | 
126 | * want to use a mature and well-tested package
127 | * don't mind that arguments to every call are written to/read from disc
128 | * don't mind there's no load-balancing at run-time
129 | 
130 | Use [Snakemake](https://snakemake.readthedocs.io/en/latest/) or
131 | [`targets`](https://github.com/ropensci/targets) if:
132 | 
133 | * you want to design and run a workflow on HPC
134 | 
135 | Don't use [`batch`](https://cran.r-project.org/package=batch)
136 | (last updated 2013) or [`BatchJobs`](https://github.com/tudo-r/BatchJobs)
137 | (issues with SQLite on network-mounted storage).
138 | 
139 | Contributing
140 | ------------
141 | 
142 | Contributions are welcome and they come in many different forms, shapes, and
143 | sizes. These include, but are not limited to:
144 | 
145 | * **Questions**: Ask on the [Github
146 |   Discussions](https://github.com/mschubert/clustermq/discussions) board. If
147 |   you are an advanced user, please also consider answering questions there.
148 | * **Bug reports**: [File an issue](https://github.com/mschubert/clustermq/issues)
149 |   if something does not work as expected. Be sure to
150 |   include a self-contained [Minimal Reproducible
151 |   Example](https://stackoverflow.com/help/minimal-reproducible-example) and set
152 |   `log_worker=TRUE`.
153 | * **Code contributions**: Have a look at the [`good first
154 |   issue`](https://github.com/mschubert/clustermq/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
155 |   tag. Please discuss anything more complicated before putting a lot of work
156 |   in, I'm happy to help you get started.
157 | 
158 | > [!TIP]
159 | > Check the
160 | > [User Guide](https://mschubert.github.io/clustermq/articles/userguide.html) and the
161 | > [FAQ](https://mschubert.github.io/clustermq/articles/faq.html) first, maybe
162 | > your query is already answered there
163 | 
164 | Citation
165 | --------
166 | 
167 | This project is part of my academic work, for which I will be evaluated on
168 | citations. If you like me to be able to continue working on research support
169 | tools like `clustermq`, please cite the article when using it for publications:
170 | 
171 | > M Schubert. clustermq enables efficient parallelisation of genomic analyses.
172 | > *Bioinformatics* (2019).
173 | > [doi:10.1093/bioinformatics/btz284](https://doi.org/10.1093/bioinformatics/btz284)
174 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | template:
 2 |   bootstrap: 5
 3 |   light-switch: true
 4 | 
 5 | toc:
 6 |     depth: 2 # level 3 currently not distinguishable from 2
 7 | 
 8 | navbar:
 9 |     type: default
10 |     left:
11 |         - icon: fa-home
12 |           href: index.html
13 |         - text: User Guide
14 |           href: articles/userguide.html
15 |         - text: Technical Documentation
16 |           href: articles/technicaldocs.html
17 |         - text: FAQ
18 |           href: articles/faq.html
19 |         - text: Reference
20 |           href: reference/index.html
21 |         - text: Changelog
22 |           href: news/index.html
23 |     right:
24 |         - icon: fa-github fa-lg
25 |           href: https://github.com/mschubert/clustermq
26 | 
27 | reference:
28 |     - title: Overview
29 |       contents:
30 |           - clustermq
31 |     - title: Run calls on HPC
32 |       contents:
33 |           - Q
34 |           - Q_rows
35 |     - title: Manage worker pools
36 |       contents:
37 |           - workers
38 |     - title: "`foreach` support"
39 |       contents:
40 |           - register_dopar_cmq
41 | 


--------------------------------------------------------------------------------
/cleanup:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | rm -f src/Makevars src/*.o src/*.so* src/*.dylib configure.log autobrew
3 | if [ -f src/libzmq/Makefile ]; then
4 |   make -C src/libzmq distclean
5 | fi
6 | 


--------------------------------------------------------------------------------
/configure:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | CC=$(${R_HOME}/bin/R CMD config CC)
 3 | CXX=$(${R_HOME}/bin/R CMD config CXX)
 4 | CXXFLAGS=$(${R_HOME}/bin/R CMD config CXXFLAGS)
 5 | CPPFLAGS=$(${R_HOME}/bin/R CMD config CPPFLAGS)
 6 | 
 7 | # remove code that causes R-check warnings
 8 | ./src/util/patch_libzmq.sh
 9 | 
10 | if [ -z "$CLUSTERMQ_USE_SYSTEM_LIBZMQ" ]; then
11 |   if $($CC -o test_libzmq src/util/test_libzmq.c $(pkg-config --cflags --libs libzmq) >/dev/null 2>&1); then
12 |     echo "* system has libzmq -> linking system library"
13 |     if $(./test_libzmq 2>/dev/null); then
14 |       echo "* libzmq has DRAFT API -> enabling crash monitor feature"
15 |       CLUSTERMQ_USE_SYSTEM_LIBZMQ=1
16 |     else
17 |       echo "* libzmq without DRAFT API found -> disabling crash monitor feature"
18 |       CLUSTERMQ_USE_SYSTEM_LIBZMQ=1
19 |     fi
20 |   else
21 |     echo "* no system libzmq>=4.3.0 found -> using bundled libzmq"
22 |     CLUSTERMQ_USE_SYSTEM_LIBZMQ=0
23 |   fi
24 |   rm -f test_libzmq
25 | fi
26 | 
27 | if ! $($CXX -o test_cpp11 src/util/test_cpp11.cpp >/dev/null 2>&1); then
28 |   echo "ERROR: compiler needs full c++11 support (gcc>=5, clang>=3.3) -> check 'cc --version'"
29 |   exit 1
30 | fi
31 | rm -f test_cpp11
32 | 
33 | if [ "$CLUSTERMQ_USE_SYSTEM_LIBZMQ" -eq "0" ]; then
34 |   PKG_CFLAGS="-DZMQ_STATIC -DZMQ_BUILD_DRAFT_API=1 -fPIC -Ilibzmq/include -Icppzmq"
35 |   PKG_LIBS="libzmq/src/.libs/libzmq.a"
36 |   ./src/util/build_libzmq.sh
37 | else
38 |   PKG_CFLAGS="$(pkg-config --cflags libzmq) -fPIC -Icppzmq"
39 |   PKG_LIBS="$(pkg-config --libs libzmq)"
40 | fi
41 | 
42 | sed -e "s|@cflags@|$PKG_CFLAGS|" -e "s|@libs@|$PKG_LIBS|" src/Makevars.in > src/Makevars
43 | 


--------------------------------------------------------------------------------
/configure.win:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | # remove code that causes R-check warnings
4 | ./src/util/patch_libzmq.sh
5 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | bibentry(
 2 |   bibtype  = "Article",
 3 |   title    = "clustermq enables efficient parallelisation of genomic analyses",
 4 |   author   = "Michael Schubert",
 5 |   journal  = "Bioinformatics",
 6 |   month    = "May",
 7 |   year     = "2019",
 8 |   language = "en",
 9 |   doi      = "10.1093/bioinformatics/btz284",
10 |   url      = "https://github.com/mschubert/clustermq",
11 |   textVersion = paste(
12 |     "Schubert, M.",
13 |     "clustermq enables efficient parallelisation of genomic analyses.",
14 |     "Bioinformatics (2019).",
15 |     "doi:10.1093/bioinformatics/btz284"
16 |   ),
17 |   header = "To cite clustermq in publications use:"
18 | )
19 | 


--------------------------------------------------------------------------------
/inst/LSF.tmpl:
--------------------------------------------------------------------------------
 1 | #BSUB-J {{ job_name }}[1-{{ n_jobs }}]
 2 | #BSUB-n {{ cores | 1 }}
 3 | #BSUB-o {{ log_file | /dev/null }}
 4 | #BSUB-M {{ memory | 4096 }}
 5 | #BSUB-R rusage[mem={{ memory | 4096  }}]
 6 | #BSUB-R span[ptile=1]
 7 | 
 8 | ulimit -v $(( 1024 * {{ memory | 4096 }} ))
 9 | CMQ_AUTH={{ auth }} R --no-save --no-restore -e 'clustermq:::worker("{{ master }}")'
10 | 


--------------------------------------------------------------------------------
/inst/PBS.tmpl:
--------------------------------------------------------------------------------
 1 | #PBS -N {{ job_name }}
 2 | #PBS -J 1-{{ n_jobs }}
 3 | #PBS -l nodes=1:ppn={{ cores | 1 }}:mem={{ memory | 4096 }}MB
 4 | # ppn=P is equivalent to ncpus=P:mpiprocs=P
 5 | # "New" syntax: #PBS -l select=1:ncpus={{ cores | 1 }}:mpiprocs={{ cores | 1 }}:mem={{ memory | 4096 }}MB
 6 | 
 7 | #PBS -l walltime={{ walltime | 12:00:00 }}
 8 | #PBS -o {{ log_file | /dev/null }}
 9 | #PBS -j oe
10 | 
11 | # Uncomment if R is an environement module
12 | # module load R
13 | 
14 | # Uncomment to set the working directory
15 | # cd {{ workdir | "$PBS_O_WORKDIR" }}
16 | 
17 | ulimit -v $(( 1024 * {{ memory | 4096 }} ))
18 | CMQ_AUTH={{ auth }} R --no-save --no-restore -e 'clustermq:::worker("{{ master }}")'
19 | 


--------------------------------------------------------------------------------
/inst/SGE.tmpl:
--------------------------------------------------------------------------------
 1 | #$ -N {{ job_name }}
 2 | #$ -j y
 3 | #$ -o {{ log_file | /dev/null }}
 4 | #$ -cwd
 5 | #$ -V
 6 | #$ -t 1-{{ n_jobs }}
 7 | #$ -pe smp {{ cores | 1 }}
 8 | #$ -l m_mem_free={{ memory | 1073741824 }}
 9 | 
10 | ulimit -v $(( 1024 * {{ memory | 4096 }} ))
11 | CMQ_AUTH={{ auth }} R --no-save --no-restore -e 'clustermq:::worker("{{ master }}")'
12 | 


--------------------------------------------------------------------------------
/inst/SLURM.tmpl:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | #SBATCH --job-name={{ job_name }}
 3 | #SBATCH --output={{ log_file | /dev/null }}
 4 | #SBATCH --error={{ log_file | /dev/null }}
 5 | #SBATCH --mem-per-cpu={{ memory | 4096 }}
 6 | #SBATCH --array=1-{{ n_jobs }}
 7 | #SBATCH --cpus-per-task={{ cores | 1 }}
 8 | 
 9 | ulimit -v $(( 1024 * {{ memory | 4096 }} ))
10 | CMQ_AUTH={{ auth }} R --no-save --no-restore -e 'clustermq:::worker("{{ master }}")'
11 | 


--------------------------------------------------------------------------------
/inst/SSH.tmpl:
--------------------------------------------------------------------------------
1 | ssh -o "ExitOnForwardFailure yes" -f
2 |     -R {{ ssh.hpc_fwd_port }}:127.0.0.1:{{ local_port }}
3 |     {{ ssh_host }}
4 |     "R --no-save --no-restore -e
5 |         'clustermq:::ssh_proxy({{ ssh.hpc_fwd_port }})'
6 |         > {{ ssh_log | /dev/null }} 2>&1"
7 | 


--------------------------------------------------------------------------------
/inst/TORQUE.tmpl:
--------------------------------------------------------------------------------
1 | #PBS -N {{ job_name }}
2 | #PBS -l nodes={{ n_jobs }}:ppn={{ cores | 1 }},walltime={{ walltime | 12:00:00 }}
3 | #PBS -o {{ log_file | /dev/null }}
4 | #PBS -q default
5 | #PBS -j oe
6 | 
7 | ulimit -v $(( 1024 * {{ memory | 4096 }} ))
8 | CMQ_AUTH={{ auth }} R --no-save --no-restore -e 'clustermq:::worker("{{ master }}")'
9 | 


--------------------------------------------------------------------------------
/man/LOCAL.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qsys_local.r
 3 | \name{LOCAL}
 4 | \alias{LOCAL}
 5 | \title{Placeholder for local processing}
 6 | \description{
 7 | Mainly so tests pass without setting up a scheduler
 8 | }
 9 | \keyword{internal}
10 | 


--------------------------------------------------------------------------------
/man/LSF.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qsys_lsf.r
 3 | \name{LSF}
 4 | \alias{LSF}
 5 | \title{LSF scheduler functions}
 6 | \description{
 7 | Derives from QSys to provide LSF-specific functions
 8 | }
 9 | \keyword{internal}
10 | 


--------------------------------------------------------------------------------
/man/MULTICORE.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qsys_multicore.r
 3 | \name{MULTICORE}
 4 | \alias{MULTICORE}
 5 | \title{Process on multiple cores on one machine}
 6 | \description{
 7 | Derives from QSys to provide multicore-specific functions
 8 | }
 9 | \keyword{internal}
10 | 


--------------------------------------------------------------------------------
/man/MULTIPROCESS.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qsys_multiprocess.r
 3 | \name{MULTIPROCESS}
 4 | \alias{MULTIPROCESS}
 5 | \title{Process on multiple processes on one machine}
 6 | \description{
 7 | Derives from QSys to provide callr-specific functions
 8 | }
 9 | \keyword{internal}
10 | 


--------------------------------------------------------------------------------
/man/Pool.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pool.r
 3 | \name{Pool}
 4 | \alias{Pool}
 5 | \title{Class for basic queuing system functions}
 6 | \description{
 7 | Provides the basic functions needed to communicate between machines
 8 | This should abstract most functions of rZMQ so the scheduler
 9 | implementations can rely on the higher level functionality
10 | }
11 | \keyword{internal}
12 | 


--------------------------------------------------------------------------------
/man/Q.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Q.r
 3 | \name{Q}
 4 | \alias{Q}
 5 | \title{Queue function calls on the cluster}
 6 | \usage{
 7 | Q(
 8 |   fun,
 9 |   ...,
10 |   const = list(),
11 |   export = list(),
12 |   pkgs = c(),
13 |   seed = 128965,
14 |   memory = NULL,
15 |   template = list(),
16 |   n_jobs = NULL,
17 |   job_size = NULL,
18 |   rettype = "list",
19 |   fail_on_error = TRUE,
20 |   workers = NULL,
21 |   log_worker = FALSE,
22 |   chunk_size = NA,
23 |   timeout = Inf,
24 |   max_calls_worker = Inf,
25 |   verbose = TRUE
26 | )
27 | }
28 | \arguments{
29 | \item{fun}{A function to call}
30 | 
31 | \item{...}{Objects to be iterated in each function call}
32 | 
33 | \item{const}{A list of constant arguments passed to each function call}
34 | 
35 | \item{export}{List of objects to be exported to the worker}
36 | 
37 | \item{pkgs}{Character vector of packages to load on the worker}
38 | 
39 | \item{seed}{A seed to set for each function call}
40 | 
41 | \item{memory}{Short for `template=list(memory=value)`}
42 | 
43 | \item{template}{A named list of values to fill in the scheduler template}
44 | 
45 | \item{n_jobs}{The number of jobs to submit; upper limit of jobs if job_size
46 | is given as well}
47 | 
48 | \item{job_size}{The number of function calls per job}
49 | 
50 | \item{rettype}{Return type of function call (vector type or 'list')}
51 | 
52 | \item{fail_on_error}{If an error occurs on the workers, continue or fail?}
53 | 
54 | \item{workers}{Optional instance of QSys representing a worker pool}
55 | 
56 | \item{log_worker}{Write a log file for each worker}
57 | 
58 | \item{chunk_size}{Number of function calls to chunk together
59 | defaults to 100 chunks per worker or max. 10 kb per chunk}
60 | 
61 | \item{timeout}{Maximum time in seconds to wait for worker (default: Inf)}
62 | 
63 | \item{max_calls_worker}{Maxmimum number of chunks that will be sent to one worker}
64 | 
65 | \item{verbose}{Print status messages and progress bar (default: TRUE)}
66 | }
67 | \value{
68 | A list of whatever `fun` returned
69 | }
70 | \description{
71 | Queue function calls on the cluster
72 | }
73 | \examples{
74 | \dontrun{
75 | # Run a simple multiplication for numbers 1 to 3 on a worker node
76 | fx = function(x) x * 2
77 | Q(fx, x=1:3, n_jobs=1)
78 | # list(2,4,6)
79 | 
80 | # Run a mutate() call in dplyr on a worker node
81 | iris \%>\%
82 |     mutate(area = Q(`*`, e1=Sepal.Length, e2=Sepal.Width, n_jobs=1))
83 | # iris with an additional column 'area'
84 | }
85 | }
86 | 


--------------------------------------------------------------------------------
/man/QSys.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qsys.r
 3 | \name{QSys}
 4 | \alias{QSys}
 5 | \title{Class for basic queuing system functions}
 6 | \description{
 7 | Provides the basic functions needed to communicate between machines
 8 | This should abstract most functions of rZMQ so the scheduler
 9 | implementations can rely on the higher level functionality
10 | }
11 | \keyword{internal}
12 | 


--------------------------------------------------------------------------------
/man/Q_rows.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Q_rows.r
 3 | \name{Q_rows}
 4 | \alias{Q_rows}
 5 | \title{Queue function calls defined by rows in a data.frame}
 6 | \usage{
 7 | Q_rows(
 8 |   df,
 9 |   fun,
10 |   const = list(),
11 |   export = list(),
12 |   pkgs = c(),
13 |   seed = 128965,
14 |   memory = NULL,
15 |   template = list(),
16 |   n_jobs = NULL,
17 |   job_size = NULL,
18 |   rettype = "list",
19 |   fail_on_error = TRUE,
20 |   workers = NULL,
21 |   log_worker = FALSE,
22 |   chunk_size = NA,
23 |   timeout = Inf,
24 |   max_calls_worker = Inf,
25 |   verbose = TRUE
26 | )
27 | }
28 | \arguments{
29 | \item{df}{data.frame with iterated arguments}
30 | 
31 | \item{fun}{A function to call}
32 | 
33 | \item{const}{A list of constant arguments passed to each function call}
34 | 
35 | \item{export}{List of objects to be exported to the worker}
36 | 
37 | \item{pkgs}{Character vector of packages to load on the worker}
38 | 
39 | \item{seed}{A seed to set for each function call}
40 | 
41 | \item{memory}{Short for `template=list(memory=value)`}
42 | 
43 | \item{template}{A named list of values to fill in the scheduler template}
44 | 
45 | \item{n_jobs}{The number of jobs to submit; upper limit of jobs if job_size
46 | is given as well}
47 | 
48 | \item{job_size}{The number of function calls per job}
49 | 
50 | \item{rettype}{Return type of function call (vector type or 'list')}
51 | 
52 | \item{fail_on_error}{If an error occurs on the workers, continue or fail?}
53 | 
54 | \item{workers}{Optional instance of QSys representing a worker pool}
55 | 
56 | \item{log_worker}{Write a log file for each worker}
57 | 
58 | \item{chunk_size}{Number of function calls to chunk together
59 | defaults to 100 chunks per worker or max. 10 kb per chunk}
60 | 
61 | \item{timeout}{Maximum time in seconds to wait for worker (default: Inf)}
62 | 
63 | \item{max_calls_worker}{Maxmimum number of chunks that will be sent to one worker}
64 | 
65 | \item{verbose}{Print status messages and progress bar (default: TRUE)}
66 | }
67 | \description{
68 | Queue function calls defined by rows in a data.frame
69 | }
70 | \examples{
71 | \dontrun{
72 | # Run a simple multiplication for data frame columns x and y on a worker node
73 | fx = function (x, y) x * y
74 | df = data.frame(x = 5, y = 10)
75 | Q_rows(df, fx, job_size = 1)
76 | # [1] 50
77 | 
78 | # Q_rows also matches the names of a data frame with the function arguments
79 | fx = function (x, y) x - y
80 | df = data.frame(y = 5, x = 10)
81 | Q_rows(df, fx, job_size = 1)
82 | # [1] 5
83 | }
84 | }
85 | 


--------------------------------------------------------------------------------
/man/SGE.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qsys_sge.r
 3 | \name{SGE}
 4 | \alias{SGE}
 5 | \title{SGE scheduler functions}
 6 | \description{
 7 | Derives from QSys to provide SGE-specific functions
 8 | }
 9 | \keyword{internal}
10 | 


--------------------------------------------------------------------------------
/man/SLURM.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qsys_slurm.r
 3 | \name{SLURM}
 4 | \alias{SLURM}
 5 | \title{SLURM scheduler functions}
 6 | \description{
 7 | Derives from QSys to provide SLURM-specific functions
 8 | }
 9 | \keyword{internal}
10 | 


--------------------------------------------------------------------------------
/man/SSH.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qsys_ssh.r
 3 | \name{SSH}
 4 | \alias{SSH}
 5 | \title{SSH scheduler functions}
 6 | \description{
 7 | Derives from QSys to provide SSH-specific functions
 8 | }
 9 | \keyword{internal}
10 | 


--------------------------------------------------------------------------------
/man/check_args.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/check_args.r
 3 | \name{check_args}
 4 | \alias{check_args}
 5 | \title{Function to check arguments with which Q() is called}
 6 | \usage{
 7 | check_args(fun, iter, const = list())
 8 | }
 9 | \arguments{
10 | \item{fun}{A function to call}
11 | 
12 | \item{iter}{Objects to be iterated in each function call}
13 | 
14 | \item{const}{A list of constant arguments passed to each function call}
15 | }
16 | \value{
17 | Processed iterated argument list if 'iter' is a list
18 | }
19 | \description{
20 | Function to check arguments with which Q() is called
21 | }
22 | \keyword{internal}
23 | 


--------------------------------------------------------------------------------
/man/chunk.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/chunk.r
 3 | \name{chunk}
 4 | \alias{chunk}
 5 | \title{Subset index chunk for processing}
 6 | \usage{
 7 | chunk(x, i)
 8 | }
 9 | \arguments{
10 | \item{x}{Index data.frame}
11 | 
12 | \item{i}{Rows to subset}
13 | }
14 | \value{
15 | x[i,]
16 | }
17 | \description{
18 | 'attr' in `[.data.frame` takes too much CPU time
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/clustermq-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/clustermq-package.r
 3 | \docType{package}
 4 | \name{clustermq-package}
 5 | \alias{clustermq}
 6 | \alias{clustermq-package}
 7 | \title{Evaluate Function Calls on HPC Schedulers (LSF, SGE, SLURM)}
 8 | \description{
 9 | Provides the \code{Q} function to send arbitrary function calls to
10 | workers on HPC schedulers without relying on network-mounted storage.
11 | Allows using remote schedulers via SSH.
12 | }
13 | \details{
14 | Under the hood, this will submit a cluster job that connects to the master
15 | via TCP the master will then send the function and argument chunks to the
16 | worker and the worker will return the results to the master until everything
17 | is done and you get back your result
18 | 
19 | Computations are done entirely on the network and without any temporary
20 | files on network-mounted storage, so there is no strain on the file system
21 | apart from starting up R once per job. This removes the biggest bottleneck
22 | in distributed computing.
23 | 
24 | Using this approach, we can easily do load-balancing, i.e. workers that get
25 | their jobs done faster will also receive more function calls to work on. This
26 | is especially useful if not all calls return after the same time, or one
27 | worker has a high load.
28 | 
29 | For more detailed usage instructions, see the documentation of the \code{Q}
30 | function.
31 | }
32 | \seealso{
33 | Useful links:
34 | \itemize{
35 |   \item \url{https://mschubert.github.io/clustermq/}
36 |   \item Report bugs at \url{https://github.com/mschubert/clustermq/issues}
37 | }
38 | 
39 | }
40 | \author{
41 | \strong{Maintainer}: Michael Schubert \email{mschu.dev@gmail.com} (\href{https://orcid.org/0000-0002-6862-5221}{ORCID}) [copyright holder]
42 | 
43 | Authors:
44 | \itemize{
45 |   \item ZeroMQ authors (source files in 'src/libzmq' and 'src/cppzmq') [copyright holder]
46 | }
47 | 
48 | }
49 | \keyword{internal}
50 | 


--------------------------------------------------------------------------------
/man/cmq_foreach.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/foreach.r
 3 | \name{cmq_foreach}
 4 | \alias{cmq_foreach}
 5 | \title{clustermq foreach handler}
 6 | \usage{
 7 | cmq_foreach(obj, expr, envir, data)
 8 | }
 9 | \arguments{
10 | \item{obj}{Returned from foreach::foreach, containing the following variables:
11 | args    : Arguments passed, each as a call
12 | argnames: character vector of arguments passed
13 | evalenv : Environment where to evaluate the arguments
14 | export  : character vector of variable names to export to nodes
15 | packages: character vector of required packages
16 | verbose : whether to print status messages [logical]
17 | errorHandling: string of function name to call error with, e.g. "stop"}
18 | 
19 | \item{expr}{An R expression in curly braces}
20 | 
21 | \item{envir}{Environment where to evaluate the arguments}
22 | 
23 | \item{data}{Common arguments passed by register_dopcar_cmq(), e.g. n_jobs}
24 | }
25 | \description{
26 | clustermq foreach handler
27 | }
28 | \keyword{internal}
29 | 


--------------------------------------------------------------------------------
/man/dot-onAttach.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/zzz.r
 3 | \name{.onAttach}
 4 | \alias{.onAttach}
 5 | \title{Report queueing system on package attach if not set}
 6 | \usage{
 7 | .onAttach(libname, pkgname)
 8 | }
 9 | \arguments{
10 | \item{libname}{default arg for compatibility}
11 | 
12 | \item{pkgname}{default arg for compatibility}
13 | }
14 | \description{
15 | Report queueing system on package attach if not set
16 | }
17 | \keyword{internal}
18 | 


--------------------------------------------------------------------------------
/man/dot-onLoad.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/zzz.r
 3 | \name{.onLoad}
 4 | \alias{.onLoad}
 5 | \title{Select the queueing system on package loading}
 6 | \usage{
 7 | .onLoad(libname, pkgname)
 8 | }
 9 | \arguments{
10 | \item{libname}{default arg for compatibility}
11 | 
12 | \item{pkgname}{default arg for compatibility}
13 | }
14 | \description{
15 | This is done by setting the variable 'qsys' in the package environment
16 | to the object that contains the desired queueing system.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/fill_template.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.r
 3 | \name{fill_template}
 4 | \alias{fill_template}
 5 | \title{Fill a template string with supplied values}
 6 | \usage{
 7 | fill_template(template, values, required = c())
 8 | }
 9 | \arguments{
10 | \item{template}{A character string of a submission template}
11 | 
12 | \item{values}{A named list of key-value pairs}
13 | 
14 | \item{required}{Keys that must be present in the template (default: none)}
15 | }
16 | \value{
17 | A template where placeholder fields were replaced by values
18 | }
19 | \description{
20 | Fill a template string with supplied values
21 | }
22 | \keyword{internal}
23 | 


--------------------------------------------------------------------------------
/man/host.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.r
 3 | \name{host}
 4 | \alias{host}
 5 | \title{Construct the ZeroMQ host address}
 6 | \usage{
 7 | host(
 8 |   node = getOption("clustermq.host", Sys.info()["nodename"]),
 9 |   ports = getOption("clustermq.ports", 6000:9999),
10 |   n = 100
11 | )
12 | }
13 | \arguments{
14 | \item{node}{Node or device name}
15 | 
16 | \item{ports}{Range of ports to consider}
17 | 
18 | \item{n}{How many addresses to return}
19 | }
20 | \value{
21 | The possible addresses as character vector
22 | }
23 | \description{
24 | Construct the ZeroMQ host address
25 | }
26 | \keyword{internal}
27 | 


--------------------------------------------------------------------------------
/man/master.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/master.r
 3 | \name{master}
 4 | \alias{master}
 5 | \title{Master controlling the workers}
 6 | \usage{
 7 | master(
 8 |   pool,
 9 |   iter,
10 |   rettype = "list",
11 |   fail_on_error = TRUE,
12 |   chunk_size = NA,
13 |   timeout = Inf,
14 |   max_calls_worker = Inf,
15 |   verbose = TRUE
16 | )
17 | }
18 | \arguments{
19 | \item{pool}{Instance of Pool object}
20 | 
21 | \item{iter}{Objects to be iterated in each function call}
22 | 
23 | \item{rettype}{Return type of function}
24 | 
25 | \item{fail_on_error}{If an error occurs on the workers, continue or fail?}
26 | 
27 | \item{chunk_size}{Number of function calls to chunk together
28 | defaults to 100 chunks per worker or max. 500 kb per chunk}
29 | 
30 | \item{timeout}{Maximum time in seconds to wait for worker (default: Inf)}
31 | 
32 | \item{max_calls_worker}{Maxmimum number of function calls that will be sent to one worker}
33 | 
34 | \item{verbose}{Print progress messages}
35 | }
36 | \value{
37 | A list of whatever `fun` returned
38 | }
39 | \description{
40 | exchanging messages between the master and workers works the following way:
41 |  * we have submitted a job where we don't know when it will start up
42 |  * it starts, sends is a message list(id=0) indicating it is ready
43 |  * we send it the function definition and common data
44 |    * we also send it the first data set to work on
45 |  * when we get any id > 0, it is a result that we store
46 |    * and send the next data set/index to work on
47 |  * when computatons are complete, we send id=0 to the worker
48 |    * it responds with id=-1 (and usage stats) and shuts down
49 | }
50 | \keyword{internal}
51 | 


--------------------------------------------------------------------------------
/man/msg_fmt.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.r
 3 | \name{msg_fmt}
 4 | \alias{msg_fmt}
 5 | \title{Message format for logging}
 6 | \usage{
 7 | msg_fmt(verbose = TRUE)
 8 | }
 9 | \description{
10 | Message format for logging
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/register_dopar_cmq.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/foreach.r
 3 | \name{register_dopar_cmq}
 4 | \alias{register_dopar_cmq}
 5 | \title{Register clustermq as `foreach` parallel handler}
 6 | \usage{
 7 | register_dopar_cmq(...)
 8 | }
 9 | \arguments{
10 | \item{...}{List of arguments passed to the `Q` function, e.g. n_jobs}
11 | }
12 | \description{
13 | Register clustermq as `foreach` parallel handler
14 | }
15 | 


--------------------------------------------------------------------------------
/man/ssh_proxy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ssh_proxy.r
 3 | \name{ssh_proxy}
 4 | \alias{ssh_proxy}
 5 | \title{SSH proxy for different schedulers}
 6 | \usage{
 7 | ssh_proxy(fwd_port, qsys_id = qsys_default)
 8 | }
 9 | \arguments{
10 | \item{fwd_port}{The port of the master address to connect to
11 | (remote end of reverse tunnel)}
12 | 
13 | \item{qsys_id}{Character string of QSys class to use}
14 | }
15 | \description{
16 | Do not call this manually, the SSH qsys will do that
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/summarize_result.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summarize_result.r
 3 | \name{summarize_result}
 4 | \alias{summarize_result}
 5 | \title{Print a summary of errors and warnings that occurred during processing}
 6 | \usage{
 7 | summarize_result(
 8 |   result,
 9 |   n_errors,
10 |   n_warnings,
11 |   cond_msgs,
12 |   at = length(result),
13 |   fail_on_error = TRUE
14 | )
15 | }
16 | \arguments{
17 | \item{result}{A list or vector of the processing result}
18 | 
19 | \item{n_errors}{How many errors occurred}
20 | 
21 | \item{n_warnings}{How many warnings occurred}
22 | 
23 | \item{cond_msgs}{Error and warnings messages, we display first 50}
24 | 
25 | \item{at}{How many calls were procesed  up to this point}
26 | 
27 | \item{fail_on_error}{Stop if error(s) occurred}
28 | }
29 | \description{
30 | Print a summary of errors and warnings that occurred during processing
31 | }
32 | \keyword{internal}
33 | 


--------------------------------------------------------------------------------
/man/vec_lookup.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.r
 3 | \docType{data}
 4 | \name{vec_lookup}
 5 | \alias{vec_lookup}
 6 | \title{Lookup table for return types to vector NAs}
 7 | \format{
 8 | An object of class \code{list} of length 9.
 9 | }
10 | \usage{
11 | vec_lookup
12 | }
13 | \description{
14 | Lookup table for return types to vector NAs
15 | }
16 | \keyword{internal}
17 | 


--------------------------------------------------------------------------------
/man/work_chunk.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/work_chunk.r
 3 | \name{work_chunk}
 4 | \alias{work_chunk}
 5 | \title{Function to process a chunk of calls}
 6 | \usage{
 7 | work_chunk(
 8 |   df,
 9 |   fun,
10 |   const = list(),
11 |   rettype = "list",
12 |   common_seed = NULL,
13 |   progress = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{df}{A data.frame with call IDs as rownames and arguments as columns}
18 | 
19 | \item{fun}{The function to call}
20 | 
21 | \item{const}{Constant arguments passed to each call}
22 | 
23 | \item{rettype}{Return type of function}
24 | 
25 | \item{common_seed}{A seed offset common to all function calls}
26 | 
27 | \item{progress}{Logical indicated whether to display a progress bar}
28 | }
29 | \value{
30 | A list of call results (or try-error if they failed)
31 | }
32 | \description{
33 | Each chunk comes encapsulated in a data.frame
34 | }
35 | \keyword{internal}
36 | 


--------------------------------------------------------------------------------
/man/worker.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/worker.r
 3 | \name{worker}
 4 | \alias{worker}
 5 | \title{R worker submitted as cluster job}
 6 | \usage{
 7 | worker(master, ..., verbose = TRUE, context = NULL)
 8 | }
 9 | \arguments{
10 | \item{master}{The master address (tcp://ip:port)}
11 | 
12 | \item{...}{Catch-all to not break older template values (ignored)}
13 | 
14 | \item{verbose}{Whether to print debug messages}
15 | 
16 | \item{context}{ZeroMQ context (for internal testing)}
17 | }
18 | \description{
19 | Do not call this manually, the master will do that
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/man/workers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/workers.r
 3 | \name{workers}
 4 | \alias{workers}
 5 | \title{Creates a pool of workers}
 6 | \usage{
 7 | workers(
 8 |   n_jobs,
 9 |   data = NULL,
10 |   reuse = TRUE,
11 |   template = list(),
12 |   log_worker = FALSE,
13 |   qsys_id = getOption("clustermq.scheduler", qsys_default),
14 |   verbose = FALSE,
15 |   ...
16 | )
17 | }
18 | \arguments{
19 | \item{n_jobs}{Number of jobs to submit (0 implies local processing)}
20 | 
21 | \item{data}{Set common data (function, constant args, seed)}
22 | 
23 | \item{reuse}{Whether workers are reusable or get shut down after call}
24 | 
25 | \item{template}{A named list of values to fill in template}
26 | 
27 | \item{log_worker}{Write a log file for each worker}
28 | 
29 | \item{qsys_id}{Character string of QSys class to use}
30 | 
31 | \item{verbose}{Print message about worker startup}
32 | 
33 | \item{...}{Additional arguments passed to the qsys constructor}
34 | }
35 | \value{
36 | An instance of the QSys class
37 | }
38 | \description{
39 | Creates a pool of workers
40 | }
41 | 


--------------------------------------------------------------------------------
/man/wrap_error.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.r
 3 | \name{wrap_error}
 4 | \alias{wrap_error}
 5 | \title{Wraps an error in a condition object}
 6 | \usage{
 7 | wrap_error(call)
 8 | }
 9 | \description{
10 | Wraps an error in a condition object
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/src/CMQMaster.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | #include "CMQMaster.h"
 3 | 
 4 | RCPP_MODULE(cmq_master) {
 5 |     using namespace Rcpp;
 6 |     class_<CMQMaster>("CMQMaster")
 7 |         .constructor()
 8 |         .method("context", &CMQMaster::context)
 9 |         .method("listen", &CMQMaster::listen)
10 |         .method("close", &CMQMaster::close)
11 |         .method("recv", &CMQMaster::recv)
12 |         .method("send", &CMQMaster::send)
13 |         .method("send_shutdown", &CMQMaster::send_shutdown)
14 |         .method("proxy_submit_cmd", &CMQMaster::proxy_submit_cmd)
15 |         .method("add_env", &CMQMaster::add_env)
16 |         .method("add_pkg", &CMQMaster::add_pkg)
17 |         .method("list_env", &CMQMaster::list_env)
18 |         .method("add_pending_workers", &CMQMaster::add_pending_workers)
19 |         .method("list_workers", &CMQMaster::list_workers)
20 |         .method("current", &CMQMaster::current)
21 |         .method("workers_running", &CMQMaster::workers_running)
22 |         .method("workers_total", &CMQMaster::workers_total)
23 |     ;
24 | }
25 | 


--------------------------------------------------------------------------------
/src/CMQMaster.h:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | #include "common.h"
  3 | 
  4 | class CMQMaster {
  5 | public:
  6 |     CMQMaster(): ctx(new zmq::context_t(3)) {}
  7 |     ~CMQMaster() { close(); }
  8 | 
  9 |     SEXP context() const {
 10 |         Rcpp::XPtr<zmq::context_t> p(ctx, true);
 11 |         return p;
 12 |     }
 13 | 
 14 |     std::string listen(Rcpp::CharacterVector addrs) {
 15 |         sock = zmq::socket_t(*ctx, ZMQ_ROUTER);
 16 |         sock.set(zmq::sockopt::router_mandatory, 1);
 17 |         #ifdef ZMQ_BUILD_DRAFT_API
 18 |         sock.set(zmq::sockopt::router_notify, ZMQ_NOTIFY_DISCONNECT);
 19 |         #endif
 20 | 
 21 |         int i;
 22 |         for (i=0; i<addrs.length(); i++) {
 23 |             auto addr = Rcpp::as<std::string>(addrs[i]);
 24 |             try {
 25 |                 sock.bind(addr);
 26 |                 return sock.get(zmq::sockopt::last_endpoint);
 27 |             } catch(zmq::error_t const &e) {
 28 |                 if ((errno != EADDRINUSE && errno != EINTR) || pending_interrupt())
 29 |                     Rcpp::stop(std::string("Binding port failed (") + e.what() + ")");
 30 |             }
 31 |         }
 32 |         Rcpp::stop("Could not bind port to any address in provided pool");
 33 |     }
 34 | 
 35 |     bool close(int timeout=1000) {
 36 |         if (ctx == nullptr)
 37 |             return is_cleaned_up;
 38 | 
 39 |         auto pitems = std::vector<zmq::pollitem_t>(1);
 40 |         pitems[0].socket = sock;
 41 |         pitems[0].events = ZMQ_POLLIN;
 42 | 
 43 |         auto time_ms = std::chrono::milliseconds(timeout);
 44 |         auto time_left = time_ms;
 45 |         auto start = Time::now();
 46 |         while (time_left.count() > 0) {
 47 |             if (std::find_if(peers.begin(), peers.end(), [](const std::pair<const std::string, worker_t> &w) { // 'const auto &w' is C++14
 48 |                         return w.second.status == wlife_t::active; }) == peers.end()) {
 49 |                 is_cleaned_up = true;
 50 |                 break;
 51 |             }
 52 | 
 53 |             if (peers.find(cur) != peers.end()) {
 54 |                 auto &w = peers[cur];
 55 |                 if (w.status == wlife_t::active && w.call == R_NilValue)
 56 |                     try {
 57 |                         send_shutdown();
 58 |                     } catch (...) {}
 59 |             }
 60 | 
 61 |             try {
 62 |                 int rc = zmq::poll(pitems, time_left);
 63 |                 if (pitems[0].revents) {
 64 |                     std::vector<zmq::message_t> msgs;
 65 |                     auto n = recv_multipart(sock, std::back_inserter(msgs));
 66 |                     register_peer(msgs);
 67 |                 }
 68 |             } catch (zmq::error_t const &e) {
 69 |                 if (errno != EINTR || pending_interrupt())
 70 |                     throw;
 71 |             } catch (...) {
 72 |                 timeout = 0;
 73 |                 break;
 74 |             }
 75 |             time_left = time_ms - std::chrono::duration_cast<ms>(Time::now() - start);
 76 |         };
 77 | 
 78 |         env.clear();
 79 |         pending_workers = 0;
 80 | 
 81 |         if (sock.handle() != nullptr) {
 82 |             sock.set(zmq::sockopt::linger, timeout);
 83 |             sock.close();
 84 |         }
 85 |         if (ctx != nullptr) {
 86 |             ctx->close();
 87 |             ctx = nullptr;
 88 |         }
 89 |         return is_cleaned_up;
 90 |     }
 91 | 
 92 |     SEXP recv(int timeout=-1) {
 93 |         int data_offset;
 94 |         std::vector<zmq::message_t> msgs;
 95 | 
 96 |         do {
 97 |             int w_active = pending_workers;
 98 |             for (const auto &kv: peers) {
 99 |                 if (kv.second.status == wlife_t::active || kv.second.status == wlife_t::proxy_cmd)
100 |                     w_active++;
101 |             }
102 |             if (w_active <= 0)
103 |                 Rcpp::stop("Trying to receive data without workers");
104 | 
105 |             msgs.clear();
106 |             timeout = poll(timeout);
107 |             auto n = recv_multipart(sock, std::back_inserter(msgs));
108 |             data_offset = register_peer(msgs);
109 |         } while(data_offset >= msgs.size());
110 | 
111 |         return msg2r(std::move(msgs[data_offset]), true);
112 |     }
113 | 
114 |     int send(SEXP cmd) {
115 |         auto &w = check_current_worker(wlife_t::active);
116 |         auto add_to_worker = set_difference(env_names, w.env);
117 |         auto mp = init_multipart(w, wlife_t::active);
118 |         mp.push_back(r2msg(cmd));
119 | 
120 |         if (w.via.empty()) {
121 |             for (auto &str : add_to_worker)
122 |                 multipart_add_obj(mp, str, w.env);
123 |         } else {
124 |             std::vector<std::string> proxy_add_env;
125 |             auto &via_env = peers[w.via].env;
126 |             for (auto &str : add_to_worker) {
127 |                 w.env.insert(str);
128 |                 if (via_env.find(str) == via_env.end())
129 |                     multipart_add_obj(mp, str, via_env);
130 |                 else
131 |                     proxy_add_env.push_back(str);
132 |             }
133 |             mp.push_back(r2msg(Rcpp::wrap(proxy_add_env)));
134 |         }
135 | 
136 |         w.call = cmd;
137 |         w.call_ref = ++call_counter;
138 |         mp.send(sock);
139 |         return w.call_ref;
140 |     }
141 |     void send_shutdown() {
142 |         auto &w = check_current_worker(wlife_t::active);
143 |         auto mp = init_multipart(w, wlife_t::shutdown);
144 |         w.call = R_NilValue;
145 |         w.status = wlife_t::shutdown;
146 |         mp.send(sock);
147 |     }
148 | 
149 |     void proxy_submit_cmd(SEXP args, int timeout=10000) {
150 |         poll(timeout);
151 |         std::vector<zmq::message_t> msgs;
152 |         auto n = recv_multipart(sock, std::back_inserter(msgs));
153 |         register_peer(msgs);
154 |         // msgs[0] == "proxy" routing id
155 |         // msgs[1] == delimiter
156 |         // msgs[2] == wlife_t::proxy_cmd
157 | 
158 |         auto &w = check_current_worker(wlife_t::proxy_cmd);
159 |         auto mp = init_multipart(w, wlife_t::proxy_cmd);
160 |         mp.push_back(r2msg(args));
161 |         mp.send(sock);
162 |     }
163 | 
164 |     void add_env(std::string name, SEXP obj) {
165 |         for (auto &w : peers)
166 |             w.second.env.erase(name);
167 |         env_names.insert(name);
168 |         env[name] = r2msg(R_serialize(obj, R_NilValue));
169 |     }
170 |     void add_pkg(Rcpp::CharacterVector pkg) {
171 |         add_env("package:" + Rcpp::as<std::string>(pkg), pkg);
172 |     }
173 |     Rcpp::DataFrame list_env() const {
174 |         std::vector<std::string> names;
175 |         names.reserve(env.size());
176 |         std::vector<long> sizes;
177 |         sizes.reserve(env.size());
178 |         for (const auto &kv: env) {
179 |             names.push_back(kv.first);
180 |             sizes.push_back(kv.second.size());
181 |         }
182 |         return Rcpp::DataFrame::create(Rcpp::_["object"] = Rcpp::wrap(names),
183 |                 Rcpp::_["size"] = Rcpp::wrap(sizes));
184 |     }
185 | 
186 |     void add_pending_workers(int n) {
187 |         pending_workers += n;
188 |     }
189 | 
190 |     Rcpp::List list_workers() const {
191 |         std::vector<std::string> names, status;
192 |         std::vector<int> calls;
193 |         names.reserve(peers.size());
194 |         status.reserve(peers.size());
195 |         calls.reserve(peers.size());
196 |         Rcpp::List wtime, mem;
197 |         std::string cur_z85;
198 |         for (const auto &kv: peers) {
199 |             if (kv.second.status == wlife_t::proxy_cmd || kv.second.status == wlife_t::error)
200 |                 continue;
201 |             names.push_back(z85_encode_routing_id(kv.first));
202 |             if (kv.first == cur)
203 |                 cur_z85 = names.back();
204 |             status.push_back(std::string(wlife_t2str(kv.second.status)));
205 |             calls.push_back(kv.second.n_calls);
206 |             wtime.push_back(kv.second.time);
207 |             mem.push_back(kv.second.mem);
208 |         }
209 |         return Rcpp::List::create(
210 |             Rcpp::_["worker"] = Rcpp::wrap(names),
211 |             Rcpp::_["status"] = Rcpp::wrap(status),
212 |             Rcpp::_["current"] = cur_z85,
213 |             Rcpp::_["calls"] = calls,
214 |             Rcpp::_["time"] = wtime,
215 |             Rcpp::_["mem"] = mem,
216 |             Rcpp::_["pending"] = pending_workers
217 |         );
218 |     }
219 |     Rcpp::List current() {
220 |         if (peers.find(cur) == peers.end())
221 |             return Rcpp::List::create();
222 |         const auto &w = peers[cur];
223 |         return Rcpp::List::create(
224 |             Rcpp::_["worker"] = z85_encode_routing_id(cur),
225 |             Rcpp::_["status"] = Rcpp::wrap(wlife_t2str(w.status)),
226 |             Rcpp::_["call_ref"] = w.call_ref,
227 |             Rcpp::_["calls"] = w.n_calls,
228 |             Rcpp::_["time"] = w.time,
229 |             Rcpp::_["mem"] = w.mem
230 |         );
231 |     }
232 |     int workers_running() {
233 |         return std::count_if(peers.begin(), peers.end(), [](const std::pair<std::string, worker_t> &w) { // 'const auto &w' is C++14
234 |                 return w.second.status == wlife_t::active; });
235 |     }
236 |     int workers_total() {
237 |         return workers_running() + pending_workers;
238 |     }
239 | 
240 | private:
241 |     struct worker_t {
242 |         std::set<std::string> env;
243 |         Rcpp::RObject call {R_NilValue};
244 |         Rcpp::RObject time {R_NilValue};
245 |         Rcpp::RObject mem {R_NilValue};
246 |         wlife_t status;
247 |         std::string via;
248 |         int n_calls {-1};
249 |         int call_ref {-1};
250 |     };
251 | 
252 |     zmq::context_t *ctx {nullptr};
253 |     bool is_cleaned_up {false};
254 |     int pending_workers {0};
255 |     int call_counter {-1};
256 |     zmq::socket_t sock;
257 |     std::string cur;
258 |     std::unordered_map<std::string, worker_t> peers;
259 |     std::unordered_map<std::string, zmq::message_t> env;
260 |     std::set<std::string> env_names;
261 | 
262 |     worker_t &check_current_worker(const wlife_t status) {
263 |         if (peers.find(cur) == peers.end())
264 |             Rcpp::stop("Trying to send to worker that does not exist");
265 |         auto &w = peers[cur];
266 |         if (w.status != status)
267 |             Rcpp::stop("Trying to send to worker with invalid status");
268 |         return w;
269 |     }
270 |     zmq::multipart_t init_multipart(const worker_t &w, const wlife_t status) const {
271 |         zmq::multipart_t mp;
272 |         if (!w.via.empty())
273 |             mp.push_back(zmq::message_t(w.via));
274 |         mp.push_back(zmq::message_t(cur));
275 |         mp.push_back(zmq::message_t(0));
276 |         mp.push_back(int2msg(status));
277 |         return mp;
278 |     }
279 | 
280 |     void multipart_add_obj(zmq::multipart_t &mp, std::string str, std::set<std::string> &tracker) {
281 |         auto &obj = env[str];
282 |         tracker.insert(str);
283 |         mp.push_back(zmq::message_t(str));
284 |         mp.push_back(zmq::message_t(obj.data(), obj.size(), [](void*, void*){}));
285 |     }
286 | 
287 |     int poll(int timeout=-1) {
288 |         auto pitems = std::vector<zmq::pollitem_t>(1);
289 |         pitems[0].socket = sock;
290 |         pitems[0].events = ZMQ_POLLIN;
291 | 
292 |         auto time_ms = std::chrono::milliseconds(timeout);
293 |         auto time_left = time_ms;
294 |         auto start = Time::now();
295 | 
296 |         int rc = 0;
297 |         do {
298 |             try {
299 |                 rc = zmq::poll(pitems, time_left);
300 |             } catch (zmq::error_t const &e) {
301 |                 if (errno != EINTR || pending_interrupt())
302 |                     Rcpp::stop(e.what());
303 |             }
304 | 
305 |             if (timeout != -1) {
306 |                 auto ms_diff = std::chrono::duration_cast<ms>(Time::now() - start);
307 |                 time_left = time_ms - ms_diff;
308 |                 timeout = time_left.count();
309 |                 if (timeout < 0) {
310 |                     std::ostringstream err;
311 |                     err << "Socket timed out after " << ms_diff.count() << " ms\n";
312 |                     throw Rcpp::exception(err.str().c_str());
313 |                 }
314 |             }
315 |         } while (rc == 0);
316 | 
317 |         return timeout;
318 |     }
319 | 
320 |     int register_peer(std::vector<zmq::message_t> &msgs) {
321 | //        std::cout << "Received message: ";
322 | //        for (int i=0; i<msgs.size(); i++)
323 | //            std::cout << msgs[i].size() << " ";
324 | //        std::cout << "\n";
325 | 
326 |         int cur_i = 0;
327 |         if (msgs[1].size() != 0)
328 |             ++cur_i;
329 | 
330 |         cur = msgs[cur_i].to_string();
331 |         int prev_size = peers.size();
332 |         auto &w = peers[cur];
333 |         w.call = R_NilValue;
334 |         if (cur_i == 1)
335 |             w.via = msgs[0].to_string();
336 | 
337 |         if (msgs[++cur_i].size() != 0)
338 |             Rcpp::stop("No frame delimiter found at expected position");
339 | 
340 |         // handle status frame if present, else it's a disconnect notification
341 |         if (msgs.size() > ++cur_i) {
342 |             w.status = msg2wlife_t(msgs[cur_i]);
343 |             w.n_calls++;
344 |         } else {
345 |             if (w.status == wlife_t::proxy_cmd) {
346 |                 for (const auto &w: peers) {
347 |                     if (w.second.via == cur && w.second.status == wlife_t::active)
348 |                         Rcpp::stop("Proxy disconnect with active worker(s)");
349 |                 }
350 |             } else if (w.status == wlife_t::shutdown) {
351 |                 w.status = wlife_t::finished;
352 |             } else
353 |                 Rcpp::stop("Unexpected worker disconnect");
354 |         }
355 | 
356 |         if (peers.size() > prev_size && w.status == wlife_t::active) {
357 |             if (--pending_workers < 0)
358 |                 Rcpp::stop("More workers registered than expected");
359 |         }
360 | 
361 |         if (msgs.size() > cur_i+2) {
362 |             w.time = msg2r(std::move(msgs[++cur_i]), true);
363 |             w.mem = msg2r(std::move(msgs[++cur_i]), true);
364 |         }
365 |         return ++cur_i;
366 |     }
367 | };
368 | 


--------------------------------------------------------------------------------
/src/CMQProxy.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | #include "CMQProxy.h"
 3 | 
 4 | RCPP_MODULE(cmq_proxy) {
 5 |     using namespace Rcpp;
 6 |     class_<CMQProxy>("CMQProxy")
 7 |         .constructor()
 8 |         .constructor<SEXP>()
 9 |         .method("listen", &CMQProxy::listen)
10 |         .method("connect", &CMQProxy::connect)
11 |         .method("proxy_request_cmd", &CMQProxy::proxy_request_cmd)
12 |         .method("proxy_receive_cmd", &CMQProxy::proxy_receive_cmd)
13 |         .method("add_pending_workers", &CMQProxy::add_pending_workers)
14 |         .method("close", &CMQProxy::close)
15 |         .method("process_one", &CMQProxy::process_one)
16 |     ;
17 | }
18 | 


--------------------------------------------------------------------------------
/src/CMQProxy.h:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | #include "common.h"
  3 | #include "CMQMaster.h"
  4 | 
  5 | class CMQProxy {
  6 | public:
  7 |     CMQProxy(): ctx(new zmq::context_t(1)) {
  8 |         external_context = false;
  9 |     }
 10 |     CMQProxy(SEXP ctx_): ctx(Rcpp::as<Rcpp::XPtr<zmq::context_t>>(ctx_)) {}
 11 |     ~CMQProxy() { close(); }
 12 | 
 13 |     void close(int timeout=1000L) {
 14 |         if (mon.handle() != nullptr) {
 15 |             mon.set(zmq::sockopt::linger, 0);
 16 |             mon.close();
 17 |         }
 18 |         if (to_worker.handle() != nullptr) {
 19 |             to_worker.set(zmq::sockopt::linger, timeout);
 20 |             to_worker.close();
 21 |         }
 22 |         if (to_master.handle() != nullptr) {
 23 |             to_master.set(zmq::sockopt::linger, timeout);
 24 |             to_master.close();
 25 |         }
 26 |         if (!external_context && ctx != nullptr) {
 27 |             ctx->close();
 28 |             delete ctx;
 29 |             ctx = nullptr;
 30 |         }
 31 |     }
 32 | 
 33 |     void connect(std::string addr, int timeout=-1) {
 34 |         to_master = zmq::socket_t(*ctx, ZMQ_DEALER);
 35 |         to_master.set(zmq::sockopt::connect_timeout, timeout);
 36 |         to_master.set(zmq::sockopt::routing_id, "proxy");
 37 | 
 38 |         if (zmq_socket_monitor(to_master, "inproc://monitor", ZMQ_EVENT_DISCONNECTED) < 0)
 39 |             Rcpp::stop("failed to create socket monitor");
 40 |         mon = zmq::socket_t(*ctx, ZMQ_PAIR);
 41 |         mon.connect("inproc://monitor");
 42 | 
 43 |         to_master.connect(addr);
 44 |     }
 45 | 
 46 |     void proxy_request_cmd() {
 47 |         to_master.send(zmq::message_t(0), zmq::send_flags::sndmore);
 48 |         to_master.send(int2msg(wlife_t::proxy_cmd), zmq::send_flags::sndmore);
 49 |         to_master.send(r2msg(proc_time()), zmq::send_flags::sndmore);
 50 |         to_master.send(r2msg(gc()), zmq::send_flags::none);
 51 |     }
 52 |     SEXP proxy_receive_cmd() {
 53 |         std::vector<zmq::message_t> msgs;
 54 |         auto n = recv_multipart(to_master, std::back_inserter(msgs));
 55 |         auto status = msg2wlife_t(msgs[1]);
 56 |         return msg2r(std::move(msgs[2]), true);
 57 |     }
 58 | 
 59 |     void add_pending_workers(int n) {
 60 |         // proxy will always wait
 61 |     }
 62 | 
 63 |     std::string listen(Rcpp::CharacterVector addrs) {
 64 |         to_worker = zmq::socket_t(*ctx, ZMQ_ROUTER);
 65 |         to_worker.set(zmq::sockopt::router_mandatory, 1);
 66 |         #ifdef ZMQ_BUILD_DRAFT_API
 67 |         to_worker.set(zmq::sockopt::router_notify, ZMQ_NOTIFY_DISCONNECT);
 68 |         #endif
 69 | 
 70 |         int i;
 71 |         for (i=0; i<addrs.length(); i++) {
 72 |             auto addr = Rcpp::as<std::string>(addrs[i]);
 73 |             try {
 74 |                 to_worker.bind(addr);
 75 |                 return to_worker.get(zmq::sockopt::last_endpoint);
 76 |             } catch(zmq::error_t const &e) {
 77 |                 if (errno != EADDRINUSE)
 78 |                     Rcpp::stop(e.what());
 79 |             }
 80 |         }
 81 |         Rcpp::stop("Could not bind port to any address in provided pool");
 82 |     }
 83 | 
 84 |     bool process_one() {
 85 |         auto pitems = std::vector<zmq::pollitem_t>(3);
 86 |         pitems[0].socket = to_master;
 87 |         pitems[0].events = ZMQ_POLLIN;
 88 |         pitems[1].socket = to_worker;
 89 |         pitems[1].events = ZMQ_POLLIN;
 90 |         pitems[2].socket = mon;
 91 |         pitems[2].events = ZMQ_POLLIN;
 92 | 
 93 |         auto time_left = std::chrono::milliseconds(-1);
 94 |         int rc = 0;
 95 |         do {
 96 |             try {
 97 |                 rc = zmq::poll(pitems, time_left);
 98 |             } catch (zmq::error_t const &e) {
 99 |                 if (errno != EINTR || pending_interrupt())
100 |                     Rcpp::stop(e.what());
101 |             }
102 |         } while (rc == 0);
103 | 
104 |         // master to worker communication -> add R env objects
105 |         // frames: id, delim, status, call, [objs{1..n},] env_add
106 |         if (pitems[0].revents > 0) {
107 |             std::vector<zmq::message_t> msgs;
108 |             auto n = recv_multipart(to_master, std::back_inserter(msgs));
109 |             std::vector<std::string> add_from_proxy;
110 |             if (msgs.size() >= 5) {
111 |                 add_from_proxy = Rcpp::as<std::vector<std::string>>(msg2r(std::move(msgs.back()), true));
112 |                 msgs.pop_back();
113 |             }
114 | 
115 |             zmq::multipart_t mp;
116 |             for (int i=0; i<msgs.size(); i++) {
117 |                 mp.push_back(zmq::message_t(msgs[i].data(), msgs[i].size()));
118 |                 if (i >= 4) {
119 |                     auto name = msgs[i++].to_string();
120 |                     mp.push_back(zmq::message_t(msgs[i].data(), msgs[i].size()));
121 |                     env[name] = zmq::message_t(msgs[i].data(), msgs[i].size());
122 |                 }
123 |             }
124 | 
125 | //            std::cout << "adding from proxy env: (" << add_from_proxy.size() << ")";
126 |             for (auto &name : add_from_proxy) {
127 |                 mp.push_back(zmq::message_t(name));
128 |                 mp.push_back(zmq::message_t(env[name].data(), env[name].size(), [](void*, void*){}));
129 |             }
130 | //            std::cout << "\nMESSAGE SIZE to worker: " << mp.size() << "\n\n";
131 |             mp.send(to_worker);
132 |         }
133 | 
134 |         // worker to master communication -> simple forward
135 |         if (pitems[1].revents > 0) {
136 |             std::vector<zmq::message_t> msgs;
137 |             auto n = recv_multipart(to_worker, std::back_inserter(msgs));
138 |             zmq::multipart_t mp;
139 |             for (int i=0; i<msgs.size(); i++)
140 |                 mp.push_back(std::move(msgs[i]));
141 |             mp.send(to_master);
142 |         }
143 | 
144 |         if (pitems[2].revents > 0)
145 |             return false;
146 | 
147 |         return true;
148 |     }
149 | 
150 | private:
151 |     Rcpp::Function proc_time {"proc.time"};
152 |     Rcpp::Function gc {"gc"};
153 |     bool external_context {true};
154 |     zmq::context_t *ctx {nullptr};
155 |     zmq::socket_t to_master;
156 |     zmq::socket_t to_worker;
157 |     zmq::socket_t mon;
158 |     std::unordered_map<std::string, zmq::message_t> env;
159 | };
160 | 


--------------------------------------------------------------------------------
/src/CMQWorker.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | #include "CMQWorker.h"
 3 | 
 4 | RCPP_MODULE(cmq_worker) {
 5 |     using namespace Rcpp;
 6 |     class_<CMQWorker>("CMQWorker")
 7 |         .constructor()
 8 |         .constructor<SEXP>()
 9 |         .method("connect", &CMQWorker::connect)
10 |         .method("close", &CMQWorker::close)
11 |         .method("poll", &CMQWorker::poll)
12 |         .method("process_one", &CMQWorker::process_one)
13 |     ;
14 | }
15 | 


--------------------------------------------------------------------------------
/src/CMQWorker.h:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | #include "common.h"
  3 | 
  4 | class CMQWorker {
  5 | public:
  6 |     CMQWorker(): ctx(new zmq::context_t(1)) {
  7 |         external_context = false;
  8 |     }
  9 |     CMQWorker(SEXP ctx_): ctx(Rcpp::as<Rcpp::XPtr<zmq::context_t>>(ctx_)) {}
 10 |     ~CMQWorker() { close(); }
 11 | 
 12 |     void connect(std::string addr, int timeout=5000) {
 13 |         sock = zmq::socket_t(*ctx, ZMQ_REQ);
 14 |         // timeout would need ZMQ_RECONNECT_STOP_CONN_REFUSED (draft, no C++ yet) to work
 15 |         sock.set(zmq::sockopt::connect_timeout, timeout);
 16 |         sock.set(zmq::sockopt::immediate, 1);
 17 | 
 18 |         if (mon.handle() == nullptr) {
 19 |             if (zmq_socket_monitor(sock, "inproc://monitor", ZMQ_EVENT_DISCONNECTED) < 0)
 20 |                 Rcpp::stop("failed to create socket monitor");
 21 |             mon = zmq::socket_t(*ctx, ZMQ_PAIR);
 22 |             mon.connect("inproc://monitor");
 23 |         }
 24 | 
 25 |         try {
 26 |             sock.connect(addr);
 27 |             check_send_ready(timeout);
 28 |             sock.send(int2msg(wlife_t::active), zmq::send_flags::sndmore);
 29 |             sock.send(r2msg(proc_time()), zmq::send_flags::sndmore);
 30 |             sock.send(r2msg(gc()), zmq::send_flags::sndmore);
 31 |             sock.send(r2msg(R_NilValue), zmq::send_flags::none);
 32 |         } catch (zmq::error_t const &e) {
 33 |             Rcpp::stop(e.what());
 34 |         }
 35 |     }
 36 | 
 37 |     void close() {
 38 |         if (mon.handle() != nullptr) {
 39 |             mon.set(zmq::sockopt::linger, 0);
 40 |             mon.close();
 41 |         }
 42 |         if (sock.handle() != nullptr) {
 43 |             sock.set(zmq::sockopt::linger, 10000);
 44 |             sock.close();
 45 |         }
 46 |         if (!external_context && ctx != nullptr) {
 47 |             ctx->close();
 48 |             delete ctx;
 49 |             ctx = nullptr;
 50 |         }
 51 |     }
 52 | 
 53 |     void poll() {
 54 |         auto pitems = std::vector<zmq::pollitem_t>(2);
 55 |         pitems[0].socket = sock;
 56 |         pitems[0].events = ZMQ_POLLIN;
 57 |         pitems[1].socket = mon;
 58 |         pitems[1].events = ZMQ_POLLIN;
 59 | 
 60 |         int total_sock_ev = 0;
 61 |         do {
 62 |             try {
 63 |                 zmq::poll(pitems, std::chrono::milliseconds{-1});
 64 |             } catch (zmq::error_t const &e) {
 65 |                 if (errno != EINTR || pending_interrupt())
 66 |                     Rcpp::stop(e.what());
 67 |             }
 68 |             if (pitems[1].revents > 0)
 69 |                 Rcpp::stop("Unexpected peer disconnect");
 70 |             total_sock_ev = pitems[0].revents;
 71 |         } while (total_sock_ev == 0);
 72 |     }
 73 | 
 74 |     bool process_one() {
 75 |         std::vector<zmq::message_t> msgs;
 76 |         auto n = recv_multipart(sock, std::back_inserter(msgs));
 77 | 
 78 | //        std::cout << "Received message: ";
 79 | //        for (int i=0; i<msgs.size(); i++)
 80 | //            std::cout << msgs[i].size() << " ";
 81 | //        std::cout << "\n";
 82 | //        for (int i=0; i<msgs.size(); i++)
 83 | //            std::cout << i << ": " << msgs[i].str() << "\n";
 84 | 
 85 |         if (msg2wlife_t(msgs[0]) == wlife_t::shutdown) {
 86 |             close();
 87 |             return false;
 88 |         }
 89 |         for (auto it=msgs.begin()+3; it<msgs.end(); it+=2) {
 90 |             std::string name = (it-1)->to_string();
 91 |             if (name.compare(0, 8, "package:") == 0)
 92 |                 load_pkg(name.substr(8, std::string::npos));
 93 |             else
 94 |                 env.assign(name, msg2r(std::move(*it), true));
 95 |         }
 96 | 
 97 |         SEXP cmd, eval, time, mem;
 98 |         PROTECT(cmd = msg2r(std::move(msgs[1]), true));
 99 |         int err = 0;
100 |         PROTECT(eval = R_tryEvalSilent(Rcpp::as<Rcpp::List>(cmd)[0], env, &err));
101 |         if (err) {
102 |             auto cmq = Rcpp::Environment::namespace_env("clustermq");
103 |             Rcpp::Function wrap_error = cmq["wrap_error"];
104 |             UNPROTECT(1);
105 |             PROTECT(eval = wrap_error(cmd));
106 |         }
107 |         PROTECT(time = proc_time());
108 |         PROTECT(mem = gc());
109 |         sock.send(int2msg(wlife_t::active), zmq::send_flags::sndmore);
110 |         sock.send(r2msg(time), zmq::send_flags::sndmore);
111 |         sock.send(r2msg(mem), zmq::send_flags::sndmore);
112 |         sock.send(r2msg(eval), zmq::send_flags::none);
113 |         UNPROTECT(4);
114 |         return true;
115 |     }
116 | 
117 | private:
118 |     bool external_context {true};
119 |     zmq::context_t *ctx {nullptr};
120 |     zmq::socket_t sock;
121 |     zmq::socket_t mon;
122 |     Rcpp::Environment env {1};
123 |     Rcpp::Function load_pkg {"library"};
124 |     Rcpp::Function proc_time {"proc.time"};
125 |     Rcpp::Function gc {"gc"};
126 | 
127 |     void check_send_ready(int timeout=5000) {
128 |         auto pitems = std::vector<zmq::pollitem_t>(1);
129 |         pitems[0].socket = sock;
130 |         pitems[0].events = ZMQ_POLLOUT;
131 | 
132 |         auto time_ms = std::chrono::milliseconds(timeout);
133 |         auto time_left = time_ms;
134 |         auto start = Time::now();
135 | 
136 |         do {
137 |             try {
138 |                 zmq::poll(pitems, time_left);
139 |             } catch (zmq::error_t const &e) {
140 |                 if (errno != EINTR || pending_interrupt())
141 |                     Rcpp::stop(e.what());
142 |             }
143 | 
144 |             auto ms_diff = std::chrono::duration_cast<ms>(Time::now() - start);
145 |             time_left = time_ms - ms_diff;
146 |             if (time_left.count() < 0) {
147 |                 std::ostringstream err;
148 |                 err << "Connection failed after " << ms_diff.count() << " ms\n";
149 |                 throw Rcpp::exception(err.str().c_str());
150 |             }
151 |         } while (pitems[0].revents == 0);
152 |     }
153 | };
154 | 


--------------------------------------------------------------------------------
/src/Makevars.in:
--------------------------------------------------------------------------------
1 | PKG_CPPFLAGS = @cflags@
2 | PKG_CFLAGS = @cflags@
3 | PKG_LIBS = @libs@
4 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | PKG_CPPFLAGS = -DZMQ_STATIC -Icppzmq -I"$(R_TOOLS_SOFT)/include"
2 | PKG_LIBS = -L"$(R_TOOLS_SOFT)/lib" -lzmq -lsodium -lpthread -liphlpapi -lws2_32
3 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | #include <Rcpp.h>
 5 | 
 6 | using namespace Rcpp;
 7 | 
 8 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
 9 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
10 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
11 | #endif
12 | 
13 | // has_connectivity
14 | bool has_connectivity(std::string host);
15 | RcppExport SEXP _clustermq_has_connectivity(SEXP hostSEXP) {
16 | BEGIN_RCPP
17 |     Rcpp::RObject rcpp_result_gen;
18 |     Rcpp::RNGScope rcpp_rngScope_gen;
19 |     Rcpp::traits::input_parameter< std::string >::type host(hostSEXP);
20 |     rcpp_result_gen = Rcpp::wrap(has_connectivity(host));
21 |     return rcpp_result_gen;
22 | END_RCPP
23 | }
24 | // libzmq_has_draft
25 | bool libzmq_has_draft();
26 | RcppExport SEXP _clustermq_libzmq_has_draft() {
27 | BEGIN_RCPP
28 |     Rcpp::RObject rcpp_result_gen;
29 |     Rcpp::RNGScope rcpp_rngScope_gen;
30 |     rcpp_result_gen = Rcpp::wrap(libzmq_has_draft());
31 |     return rcpp_result_gen;
32 | END_RCPP
33 | }
34 | 
35 | RcppExport SEXP _rcpp_module_boot_cmq_master();
36 | RcppExport SEXP _rcpp_module_boot_cmq_proxy();
37 | RcppExport SEXP _rcpp_module_boot_cmq_worker();
38 | 
39 | static const R_CallMethodDef CallEntries[] = {
40 |     {"_clustermq_has_connectivity", (DL_FUNC) &_clustermq_has_connectivity, 1},
41 |     {"_clustermq_libzmq_has_draft", (DL_FUNC) &_clustermq_libzmq_has_draft, 0},
42 |     {"_rcpp_module_boot_cmq_master", (DL_FUNC) &_rcpp_module_boot_cmq_master, 0},
43 |     {"_rcpp_module_boot_cmq_proxy", (DL_FUNC) &_rcpp_module_boot_cmq_proxy, 0},
44 |     {"_rcpp_module_boot_cmq_worker", (DL_FUNC) &_rcpp_module_boot_cmq_worker, 0},
45 |     {NULL, NULL, 0}
46 | };
47 | 
48 | RcppExport void R_init_clustermq(DllInfo *dll) {
49 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
50 |     R_useDynamicSymbols(dll, FALSE);
51 | }
52 | 


--------------------------------------------------------------------------------
/src/common.cpp:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | Rcpp::Function R_serialize("serialize");
 4 | Rcpp::Function R_unserialize("unserialize");
 5 | 
 6 | const char* wlife_t2str(wlife_t status) {
 7 |     switch(status) {
 8 |         case wlife_t::active: return "active";
 9 |         case wlife_t::shutdown: return "shutdown";
10 |         case wlife_t::finished: return "finished";
11 |         case wlife_t::error: return "error";
12 |         case wlife_t::proxy_cmd: return "proxy_cmd";
13 |         case wlife_t::proxy_error: return "proxy_error";
14 |         default: Rcpp::stop("Invalid worker status");
15 |     }
16 | }
17 | 
18 | void check_interrupt_fn(void *dummy) {
19 |     R_CheckUserInterrupt();
20 | }
21 | 
22 | int pending_interrupt() {
23 |     return !(R_ToplevelExec(check_interrupt_fn, NULL));
24 | }
25 | 
26 | zmq::message_t int2msg(const int val) {
27 |     zmq::message_t msg(sizeof(int));
28 |     memcpy(msg.data(), &val, sizeof(int));
29 |     return msg;
30 | }
31 | 
32 | zmq::message_t r2msg(SEXP data) {
33 |     if (TYPEOF(data) != RAWSXP)
34 |         data = R_serialize(data, R_NilValue);
35 |     zmq::message_t msg(Rf_xlength(data));
36 |     memcpy(msg.data(), RAW(data), Rf_xlength(data));
37 |     return msg;
38 | }
39 | 
40 | SEXP msg2r(const zmq::message_t &&msg, const bool unserialize) {
41 |     SEXP ans = Rf_allocVector(RAWSXP, msg.size());
42 |     memcpy(RAW(ans), msg.data(), msg.size());
43 |     if (unserialize)
44 |         return R_unserialize(ans);
45 |     else
46 |         return ans;
47 | }
48 | 
49 | wlife_t msg2wlife_t(const zmq::message_t &msg) {
50 |     wlife_t res;
51 |     memcpy(&res, msg.data(), msg.size());
52 |     return res;
53 | }
54 | 
55 | std::string z85_encode_routing_id(const std::string rid) {
56 |     std::string dest(5, 0);
57 |     zmq_z85_encode(&dest[0], reinterpret_cast<const uint8_t*>(&rid[1]), 4);
58 |     return dest;
59 | }
60 | 
61 | std::set<std::string> set_difference(std::set<std::string> &set1, std::set<std::string> &set2) {
62 |     std::set<std::string> diff;
63 |     std::set_difference(set1.begin(), set1.end(), set2.begin(), set2.end(),
64 |             std::inserter(diff, diff.end()));
65 |     return diff;
66 | }
67 | 


--------------------------------------------------------------------------------
/src/common.h:
--------------------------------------------------------------------------------
 1 | #ifndef _COMMON_H_
 2 | #define _COMMON_H_
 3 | 
 4 | #include <Rcpp.h>
 5 | #include <chrono>
 6 | #include <string>
 7 | #include <thread>
 8 | #include <unordered_map>
 9 | #include "zmq.hpp"
10 | #include "zmq_addon.hpp"
11 | 
12 | #if ! ZMQ_VERSION >= ZMQ_MAKE_VERSION(4, 3, 0) || \
13 |     ! CPPZMQ_VERSION >= ZMQ_MAKE_VERSION(4, 10, 0)
14 | #define XSTR(x) STR(x)
15 | #define STR(x) #x
16 | #pragma message "libzmq version is: " XSTR(ZMQ_VERSION_MAJOR) "." \
17 |     XSTR(ZMQ_VERSION_MINOR) "." XSTR(ZMQ_VERSION_PATCH)
18 | #pragma message "cppzmq version is: " XSTR(CPPZMQ_VERSION_MAJOR) "." \
19 |     XSTR(CPPZMQ_VERSION_MINOR) "." XSTR(CPPZMQ_VERSION_PATCH)
20 | #error clustermq needs libzmq>=4.3.0 and cppzmq>=4.10.0
21 | #endif
22 | 
23 | enum wlife_t {
24 |     active,
25 |     shutdown,
26 |     finished,
27 |     error,
28 |     proxy_cmd,
29 |     proxy_error
30 | };
31 | const char* wlife_t2str(wlife_t status);
32 | typedef std::chrono::high_resolution_clock Time;
33 | typedef std::chrono::milliseconds ms;
34 | extern Rcpp::Function R_serialize;
35 | extern Rcpp::Function R_unserialize;
36 | 
37 | void check_interrupt_fn(void *dummy);
38 | int pending_interrupt();
39 | zmq::message_t int2msg(const int val);
40 | zmq::message_t r2msg(SEXP data);
41 | SEXP msg2r(const zmq::message_t &&msg, const bool unserialize);
42 | wlife_t msg2wlife_t(const zmq::message_t &msg);
43 | std::string z85_encode_routing_id(const std::string rid);
44 | std::set<std::string> set_difference(std::set<std::string> &set1, std::set<std::string> &set2);
45 | 
46 | #endif // _COMMON_H_
47 | 


--------------------------------------------------------------------------------
/src/util.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | #include <string>
 3 | #include "zmq.hpp"
 4 | 
 5 | // [[Rcpp::export]]
 6 | bool has_connectivity(std::string host) {
 7 |     bool success = false;
 8 |     zmq::context_t ctx;
 9 |     zmq::socket_t server = zmq::socket_t(ctx, ZMQ_REP);
10 |     zmq::socket_t client = zmq::socket_t(ctx, ZMQ_REQ);
11 | 
12 |     try {
13 |         server.bind("tcp://*:*");
14 |         std::string addr = server.get(zmq::sockopt::last_endpoint);
15 |         const std::string all_hosts = "0.0.0.0";
16 |         addr.replace(addr.find(all_hosts), all_hosts.size(), host);
17 | 
18 |         client.connect(addr);
19 |         const std::string msg1 = "testing connection";
20 |         client.send(zmq::buffer(msg1), zmq::send_flags::none);
21 | 
22 |         zmq::message_t msg2;
23 |         auto time_ms = std::chrono::milliseconds(200);
24 |         auto pitems = std::vector<zmq::pollitem_t>(1);
25 |         pitems[0].socket = server;
26 |         pitems[0].events = ZMQ_POLLIN;
27 |         zmq::poll(pitems, time_ms);
28 |         auto n = server.recv(msg2, zmq::recv_flags::dontwait);
29 |         auto msg2_s = std::string(reinterpret_cast<const char*>(msg2.data()), msg2.size());
30 | 
31 |         if (msg1 == msg2_s)
32 |             success = true;
33 |     } catch(zmq::error_t const &e) {
34 | //        std::cerr << e.what() << "\n";
35 |         success = false;
36 |     }
37 | 
38 |     client.set(zmq::sockopt::linger, 0);
39 |     client.close();
40 |     server.set(zmq::sockopt::linger, 0);
41 |     server.close();
42 |     ctx.close();
43 | 
44 |     return success;
45 | }
46 | 
47 | // [[Rcpp::export]]
48 | bool libzmq_has_draft() {
49 |     #ifdef ZMQ_BUILD_DRAFT_API
50 |     return true;
51 |     #else
52 |     return false;
53 |     #endif
54 | }
55 | 


--------------------------------------------------------------------------------
/src/util/build_libzmq.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | cd "$(dirname $0)"/../libzmq
 4 | 
 5 | if [ ! -f Makefile.in ]; then
 6 |   ./autogen.sh || exit 1
 7 | fi
 8 | 
 9 | if [ ! -f src/.libs/libzmq.a ]; then
10 |   CXX="$CXX" CXXFLAGS="$CXXFLAGS -fPIC" CPPFLAGS="$CPPFLAGS" ./configure \
11 |     --enable-drafts \
12 |     --enable-static \
13 |     --disable-shared \
14 |     --disable-maintainer-mode \
15 |     --disable-Werror \
16 |     --disable-libbsd \
17 |     --disable-libunwind \
18 |     --disable-perf \
19 |     --disable-curve \
20 |     --disable-curve-keygen \
21 |     --disable-ws \
22 |     --disable-radix-tree \
23 |     --without-docs
24 |   make || exit 1
25 | fi
26 | 


--------------------------------------------------------------------------------
/src/util/patch_libzmq.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | cd "$(dirname $0)"/..
 4 | 
 5 | # the tarball has the submodules, a fresh clone does not
 6 | if [ ! -f libzmq/autogen.sh ]; then
 7 |   git submodule update --init --recursive
 8 | fi
 9 | 
10 | cd libzmq
11 | 
12 | # remove code format helper and valgrind support that CRAN complains about
13 | # sed -i does not work on macOS
14 | if [ ! -f src/Makefile.am.orig ]; then
15 |   mv Makefile.am Makefile.am.orig
16 |   sed '/WITH_CLANG_FORMAT/,/VALGRIND_SUPPRESSIONS_FILES/d' Makefile.am.orig > Makefile.am
17 | fi
18 | 
19 | # remove disabled gcc check that cran complains about
20 | if [ ! -f src/curve_client_tools.hpp.orig ]; then
21 |   mv src/curve_client_tools.hpp src/curve_client_tools.hpp.orig
22 |   sed '/^#pragma/s|^|//|' src/curve_client_tools.hpp.orig > src/curve_client_tools.hpp
23 | fi
24 | if [ ! -f include/zmq_utils.h.orig ]; then
25 |   mv include/zmq_utils.h include/zmq_utils.h.orig
26 |   sed '/^#pragma/s|^|//|' include/zmq_utils.h.orig > include/zmq_utils.h
27 | fi
28 | 


--------------------------------------------------------------------------------
/src/util/test_cpp11.cpp:
--------------------------------------------------------------------------------
1 | #if (!defined(__llvm__) && !defined(__INTEL_COMPILER) && defined(__GNUC__) && __GNUC__ < 5) || \
2 |     (defined(__GLIBCXX__) && __GLIBCXX__ < 20160805)
3 | #error "gcc with no or only partial c++11 support"
4 | #endif
5 | 
6 | int main() {}
7 | 


--------------------------------------------------------------------------------
/src/util/test_libzmq.c:
--------------------------------------------------------------------------------
 1 | #include <zmq.h>
 2 | #if ZMQ_VERSION < ZMQ_MAKE_VERSION(4, 3, 0)
 3 | #error clustermq needs libzmq>=4.3.0
 4 | #endif
 5 | int main() {
 6 |     #ifndef ZMQ_BUILD_DRAFT_API
 7 |     return 1;
 8 |     #endif
 9 | }
10 | 


--------------------------------------------------------------------------------
/tests/bin/bkill:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 


--------------------------------------------------------------------------------
/tests/bin/bsub:
--------------------------------------------------------------------------------
1 | fake_scheduler.sh


--------------------------------------------------------------------------------
/tests/bin/fake_scheduler.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | OUT=/dev/stderr
3 | echo "starting PID $$" > $OUT
4 | timeout 30 sh < /dev/stdin >> $OUT 2>&1 &
5 | [[ $? == 0 ]] && echo "started PID $$" >> $OUT
6 | 


--------------------------------------------------------------------------------
/tests/bin/qdel:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 


--------------------------------------------------------------------------------
/tests/bin/qsub:
--------------------------------------------------------------------------------
1 | fake_scheduler.sh


--------------------------------------------------------------------------------
/tests/bin/sbatch:
--------------------------------------------------------------------------------
1 | fake_scheduler.sh


--------------------------------------------------------------------------------
/tests/bin/scancel:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | test_check("clustermq")
3 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-util.r:
--------------------------------------------------------------------------------
 1 | ssh_opts = "-oPasswordAuthentication=no -oChallengeResponseAuthentication=no"
 2 | 
 3 | has_ssh = function(host) {
 4 |     status = system(paste("ssh", ssh_opts, host, "'exit'"), wait=TRUE,
 5 |                     ignore.stdout=TRUE, ignore.stderr=TRUE)
 6 |     status == 0
 7 | }
 8 | 
 9 | has_ssh_cmq = function(host) {
10 |     status = suppressWarnings(
11 |         system(paste("ssh", ssh_opts, host, "'R -e \"library(clustermq)\"'"),
12 |                wait=TRUE, ignore.stdout=TRUE, ignore.stderr=TRUE))
13 |     status == 0
14 | }
15 | 
16 | has_cmq = function(host) {
17 |     status = system("R -e 'library(clustermq)'", wait=TRUE,
18 |                     ignore.stdout=TRUE, ignore.stderr=TRUE)
19 |     status == 0
20 | }
21 | 


--------------------------------------------------------------------------------
/tests/testthat/test-0-util.r:
--------------------------------------------------------------------------------
 1 | context("util")
 2 | 
 3 | test_that("template filler", {
 4 |     tmpl = "this is my {{ template }}"
 5 |     values = list(template = "filled")
 6 | 
 7 |     filled = fill_template(tmpl, values)
 8 |     expect_equal(filled, "this is my filled")
 9 | 
10 |     expect_error(fill_template(tmpl, list(key="unrelated")))
11 | })
12 | 
13 | test_that("template default values", {
14 |     tmpl = "this is my {{ template | default }}"
15 |     values = list(template = "filled")
16 | 
17 |     filled1 = fill_template(tmpl, values)
18 |     expect_equal(filled1, "this is my filled")
19 | 
20 |     filled2 = fill_template(tmpl, list())
21 |     expect_equal(filled2, "this is my default")
22 | })
23 | 
24 | test_that("template required key", {
25 |     tmpl = "this is my {{ template }}"
26 |     values = list(template = "filled")
27 | 
28 |     expect_error(fill_template(tmpl, values, required="missing"))
29 | })
30 | 
31 | test_that("template filling works with vectors", {
32 |     tmpl = "{{ var1 }} and {{ var2 }}"
33 |     values = c(var1=1, var2=2)
34 | 
35 |     expect_equal(fill_template(tmpl, values), "1 and 2")
36 | })
37 | 
38 | test_that("template numbers are not converted to sci format", {
39 |     tmpl = "this is my {{ template }}"
40 |     values = list(template = 100000)
41 | 
42 |     expect_equal(fill_template(tmpl, values), "this is my 100000")
43 | })
44 | 
45 | test_that("no sci format when passing vectors", {
46 |     tmpl = "{{ var1 }} and {{ var2 }}"
47 |     values = c(var1=1, var2=1e6)
48 | 
49 |     expect_equal(fill_template(tmpl, values), "1 and 1000000")
50 | })
51 | 
52 | test_that("BiocGenerics changes format dispatch (#337)", {
53 |     # see: https://github.com/Bioconductor/BiocGenerics/blob/RELEASE_3_20/R/format.R
54 |     setGeneric("format")
55 |     format.list = base::format.AsIs
56 | 
57 |     tmpl = "{{ var1 }} and {{ var2 }}"
58 |     values = c(var1=1, var2=100)
59 |     expect_equal(fill_template(tmpl, values), "1 and 100")
60 | })
61 | 


--------------------------------------------------------------------------------
/tests/testthat/test-1-check_args.r:
--------------------------------------------------------------------------------
 1 | context("check_args")
 2 | 
 3 | test_that("required args are provided", {
 4 |     f1 = function(x) x
 5 |     # x is provided
 6 |     expect_is(check_args(f1, iter=list(x=1)), "data.frame")
 7 |     expect_error(check_args(f1, iter=list(y=1)))
 8 | 
 9 |     # don't allow empty iter argument
10 |     expect_error(check_args(f1, iter=list()))
11 |     expect_error(check_args(f1, const=list(x=1)))
12 | })
13 | 
14 | test_that("no superfluous args unless function takes `...`", {
15 |     f1 = function(x) x
16 |     expect_error(check_args(f1, iter=list(x=1, y=1)))
17 |     expect_error(check_args(f1, iter=list(x=1), const=list(y=1)))
18 | 
19 |     f2 = function(x, ...) x
20 |     expect_is(check_args(f2, iter=list(x=1, y=1)), "data.frame")
21 |     expect_is(check_args(f2, iter=list(x=1), const=list(y=1)), "data.frame")
22 | })
23 | 
24 | test_that("allow 1 non-optional unnamed arg", {
25 |     f1 = function(x) x
26 |     f2 = function(x, y=1) x+y
27 |     f3 = function(x, y) x+y
28 | 
29 |     # allow 1 unnamed arg, but not wrong name
30 |     expect_is(check_args(f1, iter=list(1)), "data.frame")
31 |     expect_is(check_args(f2, iter=list(1)), "data.frame")
32 |     expect_error(check_args(f3, iter=list(1)))
33 | })
34 | 


--------------------------------------------------------------------------------
/tests/testthat/test-2-worker.r:
--------------------------------------------------------------------------------
  1 | context("worker usage")
  2 | 
  3 | test_that("connect to invalid endpoint errors", {
  4 |     w = methods::new(CMQWorker)
  5 |     expect_error(w$connect("tcp://localhost:12345", 0L))
  6 |     w$close()
  7 | })
  8 | 
  9 | test_that("recv without pending workers errors before timeout", {
 10 |     m = methods::new(CMQMaster)
 11 |     addr = m$listen("inproc://endpoint")
 12 |     expect_error(m$recv(-1L))
 13 |     m$close(500L)
 14 | })
 15 | 
 16 | test_that("recv timeout works", {
 17 |     m = methods::new(CMQMaster)
 18 |     addr = m$listen("inproc://endpoint")
 19 |     m$add_pending_workers(1L)
 20 |     expect_error(m$recv(0L))
 21 |     m$close(500L)
 22 | })
 23 | 
 24 | test_that("worker evaluation", {
 25 |     m = methods::new(CMQMaster)
 26 |     w = methods::new(CMQWorker, m$context())
 27 |     addr = m$listen("inproc://endpoint")
 28 |     m$add_pending_workers(1L)
 29 |     w$connect(addr, 500L)
 30 | 
 31 |     m$recv(500L)
 32 |     m$send(expression(5 * 2))
 33 |     status = w$process_one()
 34 |     result = m$recv(500L)
 35 | 
 36 |     expect_true(status)
 37 |     expect_equal(result, 10)
 38 | 
 39 |     w$close()
 40 |     m$close(500L)
 41 | })
 42 | 
 43 | test_that("export variable to worker", {
 44 |     m = methods::new(CMQMaster)
 45 |     w = methods::new(CMQWorker, m$context())
 46 |     addr = m$listen("inproc://endpoint")
 47 |     m$add_pending_workers(1L)
 48 |     w$connect(addr, 500L)
 49 | 
 50 |     m$add_env("x", 3)
 51 |     m$recv(500L)
 52 |     m$send(expression(5 + x))
 53 |     status = w$process_one()
 54 |     result = m$recv(500L)
 55 |     expect_true(status)
 56 |     expect_equal(result, 8)
 57 | 
 58 |     m$add_env("x", 5)
 59 |     m$send(expression(5 + x))
 60 |     status = w$process_one()
 61 |     result = m$recv(500L)
 62 |     expect_true(status)
 63 |     expect_equal(result, 10)
 64 | 
 65 |     w$close()
 66 |     m$close(500L)
 67 | })
 68 | 
 69 | test_that("load package on worker", {
 70 |     m = methods::new(CMQMaster)
 71 |     w = methods::new(CMQWorker, m$context())
 72 |     addr = m$listen("inproc://endpoint")
 73 |     m$add_pending_workers(1L)
 74 |     w$connect(addr, 500L)
 75 | 
 76 |     m$add_pkg("parallel")
 77 | 
 78 |     m$recv(500L)
 79 |     m$send(expression(splitIndices(1, 1)[[1]]))
 80 |     status = w$process_one()
 81 |     result = m$recv(500L)
 82 | 
 83 |     expect_true(status)
 84 |     expect_equal(result, 1)
 85 | 
 86 |     w$close()
 87 |     m$close(500L)
 88 | })
 89 | 
 90 | test_that("errors are sent back to master", {
 91 |     skip("this works interactively but evaluates the error on testthat")
 92 | 
 93 |     m = methods::new(CMQMaster)
 94 |     w = methods::new(CMQWorker, m$context())
 95 |     addr = m$listen("inproc://endpoint")
 96 |     m$add_pending_workers(1L)
 97 |     w$connect(addr, 500L)
 98 | 
 99 |     m$recv(500L)
100 |     m$send(expression(stop("errmsg")))
101 |     status = w$process_one()
102 |     result = m$recv(500L)
103 | 
104 |     expect_true(status)
105 |     expect_true(inherits(result, c("condition", "worker_error")))
106 | 
107 |     w$close()
108 |     m$close(500L)
109 | })
110 | 
111 | test_that("worker R API", {
112 |     skip_on_os("windows")
113 |     skip_if_not(has_connectivity("127.0.0.1")) # -> this or inproc w/ passing context
114 | 
115 |     m = methods::new(CMQMaster)
116 |     addr = m$listen("tcp://127.0.0.1:*")
117 |     m$add_pending_workers(1L)
118 | #    addr = m$listen("inproc://endpoint") # mailbox.cpp assertion error
119 | 
120 |     p = parallel::mcparallel(worker(addr))
121 |     expect_null(m$recv(5000L))
122 |     m$send(expression(5 + 1))
123 |     res = m$recv(500L)
124 |     expect_equal(res[[1]], 6)
125 | 
126 |     m$send_shutdown()
127 |     pc = parallel::mccollect(p, wait=TRUE, timeout=0.5)
128 |     expect_equal(pc[[1]], NULL)
129 |     m$close(500L)
130 | })
131 | 
132 | test_that("communication with two workers", {
133 |     skip_on_os("windows")
134 |     skip_if_not(has_connectivity("127.0.0.1"))
135 | 
136 |     m = methods::new(CMQMaster)
137 |     addr = m$listen("tcp://127.0.0.1:*")
138 |     m$add_pending_workers(2L)
139 |     w1 = parallel::mcparallel(worker(addr))
140 |     w2 = parallel::mcparallel(worker(addr))
141 | 
142 |     expect_null(m$recv(5000L)) # worker 1 up
143 |     m$send(expression({ Sys.sleep(0.5); 5 + 2 }))
144 |     expect_null(m$recv(500L)) # worker 2 up
145 |     m$send(expression({ Sys.sleep(0.5); 3 + 1 }))
146 |     r1 = m$recv(1000L)
147 |     m$send_shutdown()
148 |     r2 = m$recv(1000L)
149 |     m$send_shutdown()
150 |     expect_equal(sort(c(r1, r2)), c(4,7))
151 | 
152 |     coll1 = parallel::mccollect(w1, wait=TRUE, timeout=0.5)
153 |     expect_equal(names(coll1), as.character(w1$pid))
154 |     coll2 = parallel::mccollect(w2, wait=TRUE, timeout=0.5)
155 |     expect_equal(names(coll2), as.character(w2$pid))
156 | 
157 |     m$close(500L)
158 | })
159 | 


--------------------------------------------------------------------------------
/tests/testthat/test-3-work_chunk.r:
--------------------------------------------------------------------------------
 1 | context("work_chunk")
 2 | 
 3 | df = structure(row.names=c(NA, -3), class="data.frame", .Data=list(
 4 |     a = 1:3,
 5 |     b = as.list(letters[1:3]),
 6 |     c = setNames(as.list(3:1), letters[1:3])
 7 | ))
 8 | 
 9 | test_that("data types and arg names", {
10 |     fx = function(c, a, b) a + c
11 |     expect_equal(work_chunk(df, fx)$result,
12 |                  setNames(as.list(rep(4,3)), rownames(df)))
13 | 
14 |     expect_equal(work_chunk(df, fx, rettype="numeric")$result,
15 |                  setNames(rep(4,3), rownames(df)))
16 | })
17 | 
18 | test_that("check call classes", {
19 |     df2 = df
20 |     df2$a = list(matrix(1:4, nrow=2))
21 |     fx = function(...) sapply(list(...), class)
22 | 
23 |     re = sapply(colnames(df2), function(i) class(df2[[1,i]]))
24 |     expect_equal(work_chunk(df2, fx)$result, setNames(rep(list(re), 3), c(1:3)))
25 | })
26 | 
27 | test_that("do not unlist matrix in data.frame", {
28 |     elm = structure(1:4, .Dim = c(2,2), .Dimnames=list(c("r1","r2"), c("c1","c2")))
29 |     df2 = structure(list(expr = structure(list(expr = elm))))
30 | 
31 |     fx = function(...) list(...)
32 |     expect_equal(work_chunk(df2, fx)$result$'1', list(expr=elm))
33 | })
34 | 
35 | test_that("warning and error handling", {
36 |     fx = function(a, ...) {
37 |         if (a %% 3 == 0)
38 |             warning("warning")
39 |         if (a %% 2 == 0)
40 |             stop("error")
41 |         a
42 |     }
43 | 
44 |     re = work_chunk(data.frame(a=1:6), fx)
45 |     expect_equal(sapply(re$result, class) == "error",
46 |                  setNames(rep(c(FALSE,TRUE), 3), 1:6))
47 |     expect_equal(c(1,3,5), unname(unlist(re$result[c(1,3,5)])))
48 |     expect_equal(c(1,3,5), as.integer(names(re$result[c(1,3,5)])))
49 |     expect_equal(length(re$warnings), 2)
50 |     expect_true(grepl("3", re$warnings[[1]]))
51 |     expect_true(grepl("warning", re$warnings[[1]]))
52 |     expect_true(grepl("6", re$warnings[[2]]))
53 |     expect_true(grepl("warning", re$warnings[[2]]))
54 | })
55 | 
56 | test_that("call can have multiple warnings", {
57 |     fx = function(a) {
58 |         if (a == 1) {
59 |             warning("warning 1")
60 |             warning("warning 2")
61 |         }
62 |     }
63 |     re = work_chunk(data.frame(a=1:2), fx)
64 |     expect_equal(length(re$warnings[['1']]), 2)
65 | })
66 | 
67 | test_that("const args", {
68 |     fx = function(a, ..., x=23) a + x
69 | 
70 |     re = work_chunk(df, fx, const=list(x=5))$result
71 |     expect_equal(re, setNames(as.list(df$a + 5), 1:3))
72 | })
73 | 
74 | test_that("seed reproducibility", {
75 |     fx = function(a, ...) sample(1:1000, 1)
76 | 
77 |     # seed should be set by common + df row name
78 |     expect_equal(work_chunk(df[1:2,], fx, common_seed=123)$result$'2',
79 |                  work_chunk(df[2:3,], fx, common_seed=123)$result$'2')
80 | })
81 | 
82 | test_that("env separation", {
83 |     seed = 123
84 |     fx = function(x, common_seed=seed) {
85 |         fun = function(x) stop("overwrite function")
86 |         df = data.frame()
87 |         common_seed
88 |     }
89 |     df2 = data.frame(x=1:5)
90 |     expect_equal(work_chunk(df2, fx)$result, setNames(rep(list(seed), 5), 1:5))
91 | })
92 | 


--------------------------------------------------------------------------------
/tests/testthat/test-4-pool.r:
--------------------------------------------------------------------------------
  1 | context("pool")
  2 | 
  3 | skip_if_not(has_connectivity("127.0.0.1"))
  4 | 
  5 | test_that("starting and stopping multicore", {
  6 |     skip_on_os("windows")
  7 | 
  8 |     w = workers(1, qsys_id="multicore")
  9 |     expect_equal(w$workers_total, 1)
 10 |     expect_equal(w$workers_running, 0)
 11 |     expect_null(w$recv(5000L))
 12 |     expect_equal(w$workers_running, 1)
 13 |     w$send(3 + 4)
 14 |     expect_equal(w$workers_running, 1)
 15 |     expect_equal(w$recv(1000L), 7)
 16 |     expect_equal(w$workers_running, 1)
 17 |     w$send_shutdown()
 18 |     expect_equal(w$workers_running, 0)
 19 |     expect_equal(w$workers_total, 0)
 20 |     expect_error(w$send(1))
 21 |     expect_error(w$recv(1000L))
 22 |     w$cleanup()
 23 |     expect_equal(w$workers_running, 0)
 24 |     expect_equal(w$workers_total, 0)
 25 |     expect_error(w$send(2))
 26 |     expect_error(w$recv(1000L))
 27 |     expect_equal(w$workers_running, 0)
 28 |     expect_equal(w$workers_total, 0)
 29 | })
 30 | 
 31 | test_that("pending workers area cleaned up properly", {
 32 |     skip_on_os("windows")
 33 |     w = workers(1, qsys_id="multicore")
 34 |     w$cleanup()
 35 |     expect_equal(w$workers_running, 0)
 36 |     expect_equal(w$workers_total, 0)
 37 | })
 38 | 
 39 | test_that("calculations are really done on the worker", {
 40 |     skip_on_os("windows")
 41 |     x = 1
 42 |     y = 2
 43 |     w = workers(1, qsys_id="multicore")
 44 |     expect_null(w$recv(5000L))
 45 |     w$env(y = 3)
 46 |     w$send(x + y, x=4)
 47 |     expect_equal(w$recv(1000L), 7)
 48 |     w$send_shutdown()
 49 |     w$cleanup()
 50 | })
 51 | 
 52 | test_that("call references are matched properly", {
 53 |     skip_on_os("windows")
 54 |     skip_on_cran()
 55 | 
 56 |     w = workers(2, qsys_id="multicore")
 57 |     expect_null(w$recv(5000L))
 58 | 
 59 |     r1 = w$send({Sys.sleep(1); 1})
 60 |     expect_null(w$recv(1000L))
 61 |     r2 = w$send(2)
 62 |     expect_equal(w$recv(500L), 2)
 63 |     expect_equal(w$current()$call_ref, r2)
 64 |     w$send_shutdown()
 65 |     expect_equal(w$recv(2000L), 1)
 66 |     expect_equal(w$current()$call_ref, r1)
 67 |     w$cleanup()
 68 | })
 69 | 
 70 | test_that("multiprocess", {
 71 |     skip("https://github.com/r-lib/processx/issues/236")
 72 | 
 73 |     w = workers(1, qsys_id="multiprocess")
 74 |     expect_null(w$recv())
 75 |     w$send(3 + 5)
 76 |     expect_equal(w$recv(), 8)
 77 |     w$send_shutdown()
 78 |     w$cleanup()
 79 | })
 80 | 
 81 | test_that("work_chunk on multiprocess", {
 82 |     skip("https://github.com/r-lib/processx/issues/236")
 83 | 
 84 |     w = workers(1, qsys_id="multiprocess")
 85 |     expect_null(w$recv())
 86 |     w$send(clustermq:::work_chunk(chunk, `+`), chunk=list(a=1:3, b=4:6))
 87 |     res = w$recv()
 88 |     expect_equal(res$result, list(`1`=5, `2`=7, `3`=9))
 89 |     expect_equal(res$warnings, list())
 90 |     expect_equal(res$errors, list())
 91 |     w$send_shutdown()
 92 |     w$cleanup()
 93 | })
 94 | 
 95 | test_that("worker creation passes template filling values", {
 96 |     TMPL_FILLER <<- R6::R6Class("TMPL_FILLER",
 97 |         inherit = QSys,
 98 |         public = list(
 99 |             initialize = function(addr, n_jobs, master, ...) {
100 |                 super$initialize(addr=addr, master=master, template="LSF")
101 |                 self$filled = private$fill_options(...)
102 |             },
103 |             filled = list()
104 |         )
105 |     )
106 |     old_defaults = getOption("clustermq.defaults")
107 |     on.exit(options(clustermq.defaults = old_defaults))
108 |     options(clustermq.defaults = list(cores="defaults_test", memory="invalid"))
109 | 
110 |     w = workers(1, qsys_id="tmpl_filler", template=list(memory="test"))
111 |     rm(TMPL_FILLER, envir=.GlobalEnv)
112 | 
113 |     expect_equal(w$workers$filled$memory, "test")
114 |     expect_equal(w$workers$filled$cores, "defaults_test")
115 | })
116 | 


--------------------------------------------------------------------------------
/tests/testthat/test-5-queue.r:
--------------------------------------------------------------------------------
  1 | context("queue")
  2 | 
  3 | skip_if_not(has_connectivity("127.0.0.1"))
  4 | 
  5 | test_that("control flow", {
  6 |     skip_on_os("windows")
  7 |     fx = function(x) x*2
  8 |     w = workers(n_jobs=1, qsys_id="multicore", reuse=FALSE)
  9 |     r = Q(fx, x=1:3, workers=w, timeout=10L)
 10 |     expect_equal(r, as.list(1:3*2))
 11 | })
 12 | 
 13 | test_that("control flow with automatic workers", {
 14 |     skip_on_os("windows")
 15 | 
 16 |     old_sched = getOption("clustermq.scheduler")
 17 |     on.exit(options(clustermq.scheduler = old_sched))
 18 |     options(clustermq.scheduler = "multicore")
 19 | 
 20 |     fx = function(x) x*2
 21 |     r = Q(fx, x=1:3, n_jobs=1, timeout=10L)
 22 |     expect_equal(r, as.list(1:3*2))
 23 | })
 24 | 
 25 | test_that("common data", {
 26 |     skip_on_os("windows")
 27 |     fx = function(x, y) x*2 + y
 28 |     w = workers(n_jobs=1, qsys_id="multicore", reuse=FALSE)
 29 |     r = Q(fx, x=1:3, const=list(y=10), workers=w, timeout=10L)
 30 |     expect_equal(r, as.list(1:3*2+10))
 31 | })
 32 | 
 33 | test_that("export", {
 34 |     skip_on_os("windows")
 35 |     fx = function(x) x*2 + z
 36 |     w = workers(n_jobs=1, qsys_id="multicore", reuse=FALSE)
 37 |     r = Q(fx, x=1:3, export=list(z=20), workers=w, timeout=10L)
 38 |     expect_equal(r, as.list(1:3*2+20))
 39 | })
 40 | 
 41 | test_that("load package on worker", {
 42 |     skip_on_os("windows")
 43 |     fx = function(x) splitIndices(1,1)
 44 |     x = "a string"
 45 |     w = workers(n_jobs=1, qsys_id="multicore", reuse=FALSE)
 46 |     r = Q(fx, x=x, pkgs="parallel", workers=w, rettype="character", timeout=10L)
 47 |     expect_equal(r, "1")
 48 | })
 49 | 
 50 | test_that("seed reproducibility", {
 51 |     skip_on_os("windows")
 52 |     fx = function(x) sample(1:100, 1)
 53 |     w1 = workers(n_jobs=1, qsys_id="multicore", reuse=FALSE)
 54 |     w2 = workers(n_jobs=1, qsys_id="multicore", reuse=FALSE)
 55 |     r1 = Q(fx, x=1:3, workers=w1, timeout=10L)
 56 |     r2 = Q(fx, x=1:3, workers=w2, timeout=10L)
 57 |     expect_equal(r1, r2)
 58 | })
 59 | 
 60 | test_that("master does not exit loop prematurely", {
 61 |     skip_on_os("windows")
 62 |     fx = function(x) {
 63 |         Sys.sleep(0.5)
 64 |         x*2
 65 |     }
 66 |     w = workers(n_jobs=2, qsys_id="multicore", reuse=FALSE)
 67 |     r = Q(fx, x=1:3, workers=w, timeout=10L)
 68 |     expect_equal(r, as.list(1:3*2))
 69 | })
 70 | 
 71 | test_that("rettype is respected", {
 72 |     skip_on_os("windows")
 73 |     fx = function(x) x*2
 74 |     w = workers(n_jobs=1, qsys_id="multicore", reuse=FALSE)
 75 |     r = Q(fx, x=1:3, rettype="numeric", workers=w, timeout=10L)
 76 |     expect_equal(r, 1:3*2)
 77 | })
 78 | 
 79 | test_that("worker timeout throws error", {
 80 |     skip_on_os("windows")
 81 |     w = workers(n_jobs=1, qsys_id="multicore", reuse=FALSE)
 82 |     expect_error(expect_warning(
 83 |         Q(Sys.sleep, 3, rettype="numeric", workers=w, timeout=1L)))
 84 | })
 85 | 
 86 | test_that("Q with expired workers throws error quickly", {
 87 |     skip_on_cran()
 88 |     skip_on_os("windows")
 89 | 
 90 |     w = workers(n_jobs=1, qsys_id="multicore", reuse=FALSE)
 91 |     w$cleanup()
 92 | 
 93 |     times = system.time({
 94 |         expect_error(Q(identity, x=1:3, rettype="numeric", workers=w, timeout=10L))
 95 |     })
 96 |     expect_true(times[["elapsed"]] < 5)
 97 | })
 98 | 
 99 | test_that("shutdown monitor does not fire on clean disconnects", {
100 |     skip_on_os("windows")
101 |     skip_if_not(libzmq_has_draft())
102 | 
103 |     w = workers(n_jobs=2, qsys_id="multicore", reuse=FALSE)
104 |     res = Q(Sys.sleep, time=c(0,1), workers=w, timeout=10L)
105 |     expect_equal(res, list(NULL, NULL))
106 | })
107 | 
108 | test_that("max_calls_worker is respected", {
109 |     skip_on_cran()
110 |     skip_on_os("windows")
111 | 
112 |     fx = function(x) { Sys.sleep(x==1); Sys.getpid() }
113 | 
114 |     w = workers(n_jobs=2, qsys_id="multicore", reuse=FALSE)
115 |     res = table(unlist(Q(fx, x=1:4, workers=w)))
116 |     expect_true(setequal(res, c(1,3)))
117 | 
118 |     w = workers(n_jobs=2, qsys_id="multicore", reuse=FALSE)
119 |     res = table(unlist(Q(fx, x=1:4, workers=w, max_calls_worker=2)))
120 |     expect_true(setequal(res, 2))
121 | })
122 | 


--------------------------------------------------------------------------------
/tests/testthat/test-6-queue_impl.r:
--------------------------------------------------------------------------------
 1 | context("qsys implementations")
 2 | 
 3 | avail = Sys.which(c("bsub", "qsub", "sbatch", "fake_scheduler.sh"))
 4 | avail = as.list(nchar(avail) != 0)
 5 | fx = function(x) x*2
 6 | 
 7 | test_that("local, explicit", {
 8 |     w = workers(n_jobs=4, qsys_id="local")
 9 |     r = Q(fx, x=1:3, workers=w, timeout=10L)
10 |     success = w$cleanup()
11 |     expect_equal(r, as.list(1:3*2))
12 |     expect_true(success)
13 | })
14 | 
15 | test_that("local, n_jobs=0", {
16 |     fx = function(x) x*2
17 |     r = Q(fx, x=1:3, n_jobs=0, timeout=10L)
18 |     expect_equal(r, as.list(1:3*2))
19 | })
20 | 
21 | test_that("qsys_multicore", {
22 |     skip_on_os("windows")
23 |     w = workers(n_jobs=4, qsys_id="multicore", reuse=FALSE)
24 |     r = Q(fx, x=1:3, workers=w, timeout=10L)
25 |     expect_equal(r, as.list(1:3*2))
26 | })
27 | 
28 | test_that("qsys_multicore with reuse=TRUE", {
29 |     skip_on_os("windows")
30 |     w = workers(n_jobs=4, qsys_id="multicore", reuse=TRUE)
31 |     r = Q(fx, x=1:3, workers=w, timeout=10L)
32 |     success = w$cleanup()
33 |     expect_equal(r, as.list(1:3*2))
34 |     expect_true(success)
35 | })
36 | 
37 | test_that("qsys_multiprocess (callr)", {
38 |     skip("https://github.com/r-lib/processx/issues/236")
39 | 
40 |     w = workers(n_jobs=2, qsys_id="multiprocess", reuse=TRUE)
41 |     r = Q(fx, x=1:3, workers=w, timeout=10L)
42 |     success = w$cleanup()
43 |     expect_equal(r, as.list(1:3*2))
44 |     expect_equal(success, TRUE)
45 | })
46 | 
47 | test_that("qsys_lsf", {
48 |     skip_on_cran()
49 |     skip_if_not(with(avail, bsub))
50 |     skip_if_not(has_cmq())
51 |     skip_if_not(has_connectivity(Sys.info()["nodename"]))
52 |     skip_on_os("windows")
53 |     w = workers(n_jobs=1, qsys_id="lsf", reuse=FALSE)
54 |     r = Q(fx, x=1:3, workers=w, timeout=10L)
55 |     expect_equal(r, as.list(1:3*2))
56 | })
57 | 
58 | test_that("qsys_sge", {
59 |     skip_on_cran()
60 |     skip_if_not(with(avail, qsub))
61 |     skip_if_not(has_cmq())
62 |     skip_if_not(has_connectivity(Sys.info()["nodename"]))
63 |     skip_on_os("windows")
64 |     w = workers(n_jobs=1, qsys_id="sge", reuse=FALSE)
65 |     r = Q(fx, x=1:3, workers=w, timeout=10L)
66 |     expect_equal(r, as.list(1:3*2))
67 | })
68 | 
69 | test_that("qsys_slurm", {
70 |     skip_on_cran()
71 |     skip_if_not(with(avail, sbatch))
72 |     skip_if_not(has_cmq())
73 |     skip_if_not(has_connectivity(Sys.info()["nodename"]))
74 |     skip_on_os("windows")
75 |     w = workers(n_jobs=1, qsys_id="slurm", reuse=FALSE)
76 |     r = Q(fx, x=1:3, workers=w, timeout=10L)
77 |     expect_equal(r, as.list(1:3*2))
78 | })
79 | 


--------------------------------------------------------------------------------
/tests/testthat/test-7-ssh_proxy.r:
--------------------------------------------------------------------------------
  1 | context("ssh proxy")
  2 | 
  3 | has_localhost = has_connectivity("127.0.0.1")
  4 | 
  5 | # in the following 2 tests, passing the context is deactivated because running
  6 | # the first test twice leads to a segfault; not sure why, fix this eventually
  7 | test_that("simple forwarding works", {
  8 |     skip_if_not(has_localhost)
  9 | 
 10 |     m = methods::new(CMQMaster)
 11 |     p = methods::new(CMQProxy)#, m$context())
 12 |     w = methods::new(CMQWorker)#, m$context())
 13 |     addr1 = m$listen("tcp://127.0.0.1:*")#"inproc://master")
 14 |     addr2 = p$listen("tcp://127.0.0.1:*")#"inproc://proxy")
 15 |     m$add_pending_workers(1L)
 16 |     p$connect(addr1, 500L)
 17 |     w$connect(addr2, 500L)
 18 |     expect_true(p$process_one())
 19 |     expect_null(m$recv(500L)) # worker up
 20 |     m$send(5 + 2)
 21 |     expect_true(p$process_one())
 22 |     expect_true(w$process_one())
 23 |     expect_true(p$process_one())
 24 |     result = m$recv(500L)
 25 |     expect_equal(result, 7)
 26 | 
 27 |     w$close()
 28 |     p$close(0L)
 29 |     m$close(0L)
 30 | })
 31 | 
 32 | test_that("proxy communication yields submit args", {
 33 |     skip_if_not(has_localhost)
 34 |     skip_on_cran()
 35 | 
 36 |     m = methods::new(CMQMaster)
 37 |     p = methods::new(CMQProxy)#, m$context())
 38 |     addr1 = m$listen("tcp://127.0.0.1:*")#"inproc://master")
 39 |     addr2 = p$listen("tcp://127.0.0.1:*")#"inproc://proxy")
 40 | 
 41 |     # direct connection, no ssh forward here
 42 |     p$connect(addr1, 500L)
 43 |     p$proxy_request_cmd()
 44 |     m$proxy_submit_cmd(list(n_jobs=1), 500L)
 45 |     args = p$proxy_receive_cmd()
 46 | 
 47 |     expect_true(inherits(args, "list"))
 48 |     expect_true("n_jobs" %in% names(args))
 49 | 
 50 |     p$close(0L)
 51 |     m$close(0L)
 52 | })
 53 | 
 54 | test_that("using the proxy without pool and forward", {
 55 |     skip_on_cran()
 56 |     skip_on_os("windows")
 57 |     skip_if_not(has_localhost)
 58 |     skip_if(toupper(getOption("clustermq.scheduler", qsys_default)) != "MULTICORE",
 59 |             message="options(clustermq.scheduler') must be 'MULTICORE'")
 60 | 
 61 |     m = methods::new(CMQMaster)
 62 |     addr = m$listen("tcp://127.0.0.1:*")
 63 |     p = parallel::mcparallel(ssh_proxy(sub(".*:", "", addr)))
 64 | 
 65 |     m$proxy_submit_cmd(list(n_jobs=1), 10000L)
 66 |     m$add_pending_workers(1L)
 67 |     expect_null(m$recv(2000L)) # worker 1 up
 68 |     m$send(5 + 2)
 69 |     expect_equal(m$recv(500L), 7) # collect results
 70 | 
 71 |     m$send_shutdown()
 72 |     m$close(500L)
 73 | 
 74 |     pr = parallel::mccollect(p, wait=TRUE, timeout=0.5)
 75 |     expect_equal(names(pr), as.character(p$pid))
 76 | })
 77 | 
 78 | test_that("using the proxy without pool and forward, 2 workers", {
 79 |     skip_on_cran()
 80 |     skip_on_os("windows")
 81 |     skip_if_not(has_localhost)
 82 |     skip_if(toupper(getOption("clustermq.scheduler", qsys_default)) != "MULTICORE",
 83 |             message="options(clustermq.scheduler') must be 'MULTICORE'")
 84 | 
 85 |     m = methods::new(CMQMaster)
 86 |     addr = m$listen("tcp://127.0.0.1:*")
 87 |     p = parallel::mcparallel(ssh_proxy(sub(".*:", "", addr)))
 88 | 
 89 |     m$proxy_submit_cmd(list(n_jobs=2), 10000L)
 90 |     m$add_pending_workers(2L)
 91 |     expect_null(m$recv(2000L)) # worker 1 up
 92 |     m$send({ Sys.sleep(0.5); 5 + 2 })
 93 |     expect_null(m$recv(500L)) # worker 2 up
 94 |     m$send({ Sys.sleep(0.5); 3 + 1 })
 95 |     r1 = m$recv(1000L)
 96 |     m$send_shutdown()
 97 |     r2 = m$recv(500L)
 98 |     m$send_shutdown()
 99 |     expect_equal(sort(c(r1,r2)), c(4,7))
100 | 
101 |     m$close(500L)
102 |     pr = parallel::mccollect(p, wait=TRUE, timeout=0.5)
103 |     expect_equal(names(pr), as.character(p$pid))
104 | })
105 | 
106 | test_that("full SSH connection", {
107 |     skip_on_cran()
108 |     skip_on_os("windows")
109 |     skip_if_not(has_localhost)
110 |     skip_if_not(has_ssh_cmq("127.0.0.1"))
111 | 
112 |     # 'LOCAL' mode (default) will not set up required sockets
113 |     # 'SSH' mode would lead to circular connections
114 |     # schedulers may have long delay (they start in fresh session, so no path)
115 |     sched = getOption("clustermq.scheduler", qsys_default)
116 |     skip_if(is.null(sched) || toupper(sched) != "MULTICORE",
117 |             message="options(clustermq.scheduler') must be 'MULTICORE'")
118 |     options(clustermq.template = "SSH", clustermq.ssh.host="127.0.0.1")
119 | 
120 |     w = workers(n_jobs=1, qsys_id="ssh", reuse=FALSE)
121 |     result = Q(identity, 42, n_jobs=1, timeout=10L, workers=w)
122 |     expect_equal(result, list(42))
123 | 
124 |     w = workers(n_jobs=2, qsys_id="ssh", reuse=FALSE)
125 |     result = clustermq::Q(Sys.sleep, time=c(1,2), n_jobs=2)
126 |     expect_equal(result, list(NULL, NULL))
127 | })
128 | 


--------------------------------------------------------------------------------
/tests/testthat/test-8-foreach.r:
--------------------------------------------------------------------------------
  1 | context("foreach")
  2 | 
  3 | skip_if_not_installed("foreach")
  4 | 
  5 | foreach = foreach::foreach
  6 | `%dopar%` = foreach::`%dopar%`
  7 | `%do%` = foreach::`%do%`
  8 | register_dopar_cmq(n_jobs=0)
  9 | 
 10 | test_that("foreach::getDoParWorkers() returns n_jobs", {
 11 |     expect_equal(foreach::getDoParWorkers(), 0)
 12 | })
 13 | 
 14 | test_that("simple foreach registration works", {
 15 |     res = foreach(i=1:3) %dopar% sqrt(i)
 16 |     cmp = foreach(i=1:3) %do% sqrt(i)
 17 | 
 18 |     expect_equal(res, cmp)
 19 | })
 20 | 
 21 | test_that(".export objects are exported", {
 22 |     y = 5
 23 |     res = foreach(x=1:3, .export="y") %dopar% { x + y }
 24 |     cmp = foreach(x=1:3, .export="y") %do% { x + y }
 25 | 
 26 |     expect_equal(res, cmp)
 27 | #    expect_error(foreach(x=1:3) %dopar% { x + y })
 28 | })
 29 | 
 30 | test_that(".packages are loaded", {
 31 |     expect_error(foreach(i="a string") %dopar% { md5sum(i) })
 32 |     res = foreach(i="a string", .packages="tools") %dopar% { md5sum(i) }
 33 |     cmp = foreach(i="a string") %do% { md5sum(i) }
 34 |     expect_equal(res, cmp)
 35 | })
 36 | 
 37 | test_that(".combine is respected", {
 38 |     res = foreach(i=1:3, .combine=c) %dopar% sqrt(i)
 39 |     cmp = foreach(i=1:3, .combine=c) %do% sqrt(i)
 40 |     expect_equal(res, cmp)
 41 | 
 42 |     res = foreach(i=1:3, .combine=append) %dopar% list(a=1, b=2)
 43 |     cmp = foreach(i=1:3, .combine=append) %do% list(a=1, b=2)
 44 |     expect_equal(res, cmp)
 45 | 
 46 |     res = foreach(i=1:3, .combine=cbind) %dopar% sqrt(i)
 47 |     cmp = foreach(i=1:3, .combine=cbind) %do% sqrt(i)
 48 |     expect_equal(res, cmp)
 49 | 
 50 |     res = foreach(i=1:3, .combine=rbind) %dopar% sqrt(i)
 51 |     cmp = foreach(i=1:3, .combine=rbind) %do% sqrt(i)
 52 |     expect_equal(res, cmp)
 53 | })
 54 | 
 55 | test_that("no matrix unlisting (#143)", {
 56 |     fx = function(x) matrix(c(1,2)+x, ncol=1)
 57 |     res = foreach(i=1:3) %dopar% fx(i)
 58 |     cmp = foreach(i=1:3) %do% fx(i)
 59 |     expect_equal(res, cmp)
 60 | })
 61 | 
 62 | test_that("automatic export in foreach", {
 63 |     fx = function(x) x + y
 64 |     y = 5
 65 |     res = foreach(x=1:3) %dopar% { x + y }
 66 |     cmp = foreach(x=1:3) %do% { x + y }
 67 |     expect_equal(res, cmp)
 68 | })
 69 | 
 70 | test_that("NULL objects are exported", {
 71 |     fx = function(x) is.null(x)
 72 |     y = NULL
 73 |     res = foreach(i=1) %dopar% fx(y)
 74 |     cmp = foreach(i=1) %do% fx(y)
 75 |     expect_equal(res, cmp)
 76 | })
 77 | 
 78 | test_that("external worker", {
 79 |     skip_on_os("windows")
 80 | 
 81 |     old_sched = getOption("clustermq.scheduler")
 82 |     on.exit(options(clustermq.scheduler = old_sched))
 83 |     options(clustermq.scheduler = "multicore")
 84 | 
 85 |     register_dopar_cmq(n_jobs=1)
 86 |     res = foreach(i=1:3) %dopar% sqrt(i)
 87 |     cmp = foreach(i=1:3) %do% sqrt(i)
 88 |     expect_equal(res, cmp)
 89 | })
 90 | 
 91 | test_that("foreach works via BiocParallel", {
 92 |     skip_on_os("windows")
 93 |     skip_if_not_installed("BiocParallel")
 94 | 
 95 |     old_sched = getOption("clustermq.scheduler")
 96 |     on.exit(options(clustermq.scheduler = old_sched))
 97 |     options(clustermq.scheduler = "multicore")
 98 | 
 99 |     register_dopar_cmq(n_jobs=1)
100 |     BiocParallel::register(BiocParallel::DoparParam())
101 |     res = BiocParallel::bplapply(1:3, sqrt)
102 |     cmp = foreach(i=1:3) %do% sqrt(i)
103 | 
104 |     expect_equal(res, cmp)
105 | })
106 | 


--------------------------------------------------------------------------------
/tools/winlibs.R:
--------------------------------------------------------------------------------
 1 | if(!file.exists("../windows/zeromq/include")){
 2 |   unlink("../windows", recursive = TRUE)
 3 |   url <- if(grepl("aarch", R.version$platform)){
 4 |     "https://github.com/r-windows/bundles/releases/download/zeromq-4.3.4/zeromq-4.3.4-clang-aarch64.tar.xz"
 5 |   } else if(grepl("clang", Sys.getenv('R_COMPILED_BY'))){
 6 |     "https://github.com/r-windows/bundles/releases/download/zeromq-4.3.4/zeromq-4.3.4-clang-x86_64.tar.xz"
 7 |   }  else if(getRversion() >= "4.3") {
 8 |     "https://github.com/r-windows/bundles/releases/download/zeromq-4.3.4/zeromq-4.3.4-ucrt-x86_64.tar.xz"
 9 |   } else {
10 |     "https://github.com/rwinlib/zeromq/archive/4.3.4.tar.gz"
11 |   }
12 |   download.file(url, basename(url), quiet = TRUE)
13 |   dir.create("../windows", showWarnings = FALSE)
14 |   untar(basename(url), exdir = "../windows", tar = 'internal')
15 |   unlink(basename(url))
16 |   setwd("../windows")
17 |   file.rename(list.files(), 'zeromq')
18 | }
19 | 


--------------------------------------------------------------------------------
/vignettes/faq.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Frequently asked questions"
  3 | output:
  4 |   rmarkdown::html_vignette
  5 | vignette: >
  6 |   %\VignetteIndexEntry{FAQ}
  7 |   %\VignetteEngine{knitr::rmarkdown}
  8 |   %\VignetteEncoding{UTF-8}
  9 | ---
 10 | 
 11 | ```{css echo=FALSE}
 12 | img {
 13 |     border: 0px !important;
 14 |     margin: 2em 2em 2em 2em !important;
 15 | }
 16 | code {
 17 |     border: 0px !important;
 18 | }
 19 | ```
 20 | 
 21 | ```{r echo=FALSE, results="hide"}
 22 | knitr::opts_chunk$set(
 23 |     cache = FALSE,
 24 |     echo = TRUE,
 25 |     collapse = TRUE,
 26 |     comment = "#>"
 27 | )
 28 | options(clustermq.scheduler = "local", rmarkdown.html_vignette.check_title = FALSE)
 29 | suppressPackageStartupMessages(library(clustermq))
 30 | ```
 31 | 
 32 | ## Installation errors {#install}
 33 | 
 34 | To compile this package a fully C++11 compliant compiler is required. This is
 35 | [implicit for CRAN packages](https://www.tidyverse.org/blog/2023/03/cran-checks-compiled-code/)
 36 | since `R=3.6.2` and is hence not listed in _SystemRequirements_.
 37 | 
 38 | If you encounter an error saying that that no matching function call to
 39 | `zmq::message_t::message_t(std::string&)` exists, your compiler does not
 40 | (fully) support this and the automated check failed for some reason.
 41 | 
 42 | ```{sh eval=FALSE}
 43 | In file included from CMQMaster.cpp:2:0:
 44 | CMQMaster.h: In member function ‘void CMQMaster::proxy_submit_cmd(SEXP, int)’:
 45 | CMQMaster.h:146:40: error: no matching function for call to ‘zmq::message_t::message_t(std::string&)’
 46 |          mp.push_back(zmq::message_t(cur));
 47 | ```
 48 | 
 49 | This happens for instance for old versions of the `gcc` compiler (default on
 50 | most Linux distributions). You can check your version in the terminal using:
 51 | 
 52 | ```{sh eval=FALSE}
 53 | # the minimum required gcc version is 5.5 for full C++11 support (3.3 for clang)
 54 | cc --version
 55 | ```
 56 | 
 57 | In this case, it is _very_ likely that your HPC system already has a newer
 58 | compiler installed that you need to add to your `$PATH` or load as a module.
 59 | Once this is set, you can install the package from R *that was started in a
 60 | terminal that has this module/path active*.
 61 | 
 62 | ## Session gets stuck at "Running calculations" {#stuck}
 63 | 
 64 | Your R session may be stuck at something like the following:
 65 | 
 66 | ```{r eval=FALSE}
 67 | > clustermq::Q(identity, x=42, n_jobs=1)
 68 | Submitting 1 worker jobs (ID: cmq8480) ...
 69 | Running 1 calculations (5 objs/19.4 Kb common; 1 calls/chunk) ...
 70 | ```
 71 | 
 72 | You will see this every time your jobs are queued but not yet started.
 73 | Depending on how busy your HPC is, this may take a long time. You can check the
 74 | queueing status of your jobs in the terminal with _e.g._ `qstat` (SGE), `bjobs`
 75 | (LSF), or `sinfo` (SLURM).
 76 | 
 77 | If your jobs are already finished, this likely means that the `clustermq`
 78 | workers can not connect to the main session. You can confirm this by passing
 79 | [`log_worker=TRUE`](https://mschubert.github.io/clustermq/articles/userguide.html#debugging-workers)
 80 | to `Q` and inspect the logs created in your current working directory. If they
 81 | state something like:
 82 | 
 83 | ```{sh eval=FALSE}
 84 | > clustermq:::worker("tcp://my.headnode:9091")
 85 | 2023-12-11 10:22:58.485529 | Master: tcp://my.headnode:9091
 86 | 2023-12-11 10:22:58.488892 | connecting to: tcp://my.headnode:9091:
 87 | Error: Connection failed after 10016 ms
 88 | Execution halted
 89 | ```
 90 | 
 91 | the submitted job is indeed unable to establish a network connection with the
 92 | head node. This can happen if your HPC does not allow incoming connections at
 93 | all, but more likely happens because (1) only certain ports are allowed, or (2)
 94 | there are multiple network interfaces, only some of which have access to the
 95 | head node.
 96 | 
 97 |  1. If the head node only allows incoming connections on certain ports, set the
 98 |     [R
 99 |     option](https://mschubert.github.io/clustermq/articles/userguide.html#options)
100 |     `clustermq.ports=<port range>`.
101 |  2. You can list the available network interfaces using the `ifconfig` command
102 |     in the terminal. Find the interface that shares a subnetwork with the head
103 |     node and add the [R
104 |     option](https://mschubert.github.io/clustermq/articles/userguide.html#options)
105 |     `clustermq.host=<interface>`. If this is unclear, contact your system
106 |     administrators to see which interface to use.
107 | 
108 | ## SSH not working {#ssh}
109 | 
110 | Before trying remote schedulers via SSH, make sure that the scheduler works
111 | when you first connect to the cluster and run a job from there.
112 | 
113 | If the terminal is stuck at
114 | 
115 | ```
116 | Connecting <user@host> via SSH ...
117 | ```
118 | 
119 | make sure that each step of your SSH connection works by typing the following
120 | commands in your **local** terminal and make sure that you don't get errors or
121 | warnings in each step:
122 | 
123 | ```{sh eval=FALSE}
124 | # test your ssh login that you set up in ~/.ssh/config
125 | # if this fails you have not set up SSH correctly
126 | ssh <user@host>
127 | 
128 | # test port forwarding from 54709 remote to 6687 local (ports are random)
129 | # if the fails you will not be able to use clustermq via SSH
130 | ssh -R 54709:localhost:6687 <user@host> R --vanilla
131 | ```
132 | 
133 | If you get an `Command not found: R` error, make sure your `$PATH` is set up
134 | correctly in your `~/.bash_profile` and/or your `~/.bashrc` (depending on your
135 | cluster config you might need either). You may also need to modify your [SSH
136 | template](https://mschubert.github.io/clustermq/articles/userguide.html#ssh-template)
137 | to load R as a module or conda environment.
138 | 
139 | If you get a SSH warning or error try again with `ssh -v` to enable verbose
140 | output. If the forward itself works, run the following in your local R session
141 | (ideally also in command-line R, [not only in
142 | RStudio](https://github.com/mschubert/clustermq/issues/206)):
143 | 
144 | ```{r eval=FALSE}
145 | options(clustermq.scheduler = "ssh",
146 |         clustermq.ssh.log = "~/ssh_proxy.log")
147 | Q(identity, x=1, n_jobs=1)
148 | ```
149 | 
150 | This will create a log file *on the remote server* that will contain any errors
151 | that might have occurred during `ssh_proxy` startup.
152 | 
153 | If the `ssh_proxy` startup fails on your local machine with the error
154 | 
155 | ```
156 | Remote R process did not respond after 5 seconds. Check your SSH server log.
157 | ```
158 | 
159 | but the server log does not show any errors, then you can try increasing the
160 | timeout:
161 | 
162 | ```{r eval=FALSE}
163 | options(clustermq.ssh.timeout = 30) # in seconds
164 | ```
165 | 
166 | This can happen when your SSH startup template includes additional steps before
167 | starting R, such as activating a module or conda environment, or having to
168 | confirm the connection via two-factor authentication.
169 | 
170 | ## Running the master inside containers {#master-in-container}
171 | 
172 | If your master process is inside a container, accessing the HPC scheduler is
173 | more difficult. Containers, including singularity and docker, isolate the
174 | processes inside the container from the host. The *R* process will not be able
175 | to submit a job because the scheduler cannot be found.
176 | 
177 | Note that the HPC node running the master process must be allowed to submit
178 | jobs. Not all HPC systems allow compute nodes to submit jobs. If that is the
179 | case, you may need to run the master process on the login node, and discuss the
180 | issue with your system administrator.
181 | 
182 | If your container is binary compatible with the host, you may be able to bind
183 | in the scheduler executable to the container.
184 | 
185 | For example, PBS might look something like:
186 | 
187 | ```{sh eval=FALSE}
188 | #PBS directives ...
189 | 
190 | module load singularity
191 | 
192 | SINGULARITYENV_APPEND_PATH=/opt/pbs/bin
193 | singularity exec --bind /opt/pbs/bin r_image.sif Rscript master_script.R
194 | ```
195 | 
196 | A working example of binding SLURM into a CentOS 7 container image from a
197 | CentOS 7 host is available at
198 | https://groups.google.com/a/lbl.gov/d/msg/singularity/syLcsIWWzdo/NZvF2Ud2AAAJ
199 | 
200 | Alternatively, you can create a script that uses SSH to execute the scheduler
201 | on the login node. For this, you will need an SSH client in the container,
202 | [keys set up for password-less login](https://www.digitalocean.com/community/tutorials/how-to-configure-ssh-key-based-authentication-on-a-linux-server),
203 | and create a script to call the scheduler on the login node via ssh (e.g.
204 | `~/bin/qsub` for SGE/PBS/Torque, `bsub` for LSF and `sbatch` for Slurm):
205 | 
206 | ```{sh eval=FALSE}
207 | #!/bin/bash
208 | ssh -i ~/.ssh/<your key file> ${PBS_O_HOST:-"no_host_not_in_a_pbs_job"} qsub "$@"
209 | ```
210 | 
211 | Make sure the script is executable, and bind/copy it into the container
212 | somewhere on `$PATH`. Home directories are bound in by default in singularity.
213 | 
214 | ```{sh eval=FALSE}
215 | chmod u+x ~/bin/qsub
216 | SINGULARITYENV_APPEND_PATH=~/bin
217 | ```
218 | 


--------------------------------------------------------------------------------
/vignettes/technicaldocs.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Technical Documentation"
  3 | output:
  4 |   rmarkdown::html_vignette
  5 | vignette: >
  6 |   %\VignetteIndexEntry{Technical Documentation}
  7 |   %\VignetteEngine{knitr::rmarkdown}
  8 |   %\VignetteEncoding{UTF-8}
  9 | ---
 10 | 
 11 | ```{css echo=FALSE}
 12 | img {
 13 |     border: 0px !important;
 14 |     margin: 2em 2em 2em 2em !important;
 15 | }
 16 | code {
 17 |     border: 0px !important;
 18 | }
 19 | ```
 20 | 
 21 | ```{r echo=FALSE, results="hide"}
 22 | knitr::opts_chunk$set(
 23 |     cache = FALSE,
 24 |     echo = TRUE,
 25 |     collapse = TRUE,
 26 |     comment = "#>"
 27 | )
 28 | options(clustermq.scheduler = "local")
 29 | suppressPackageStartupMessages(library(clustermq))
 30 | ```
 31 | 
 32 | ## Worker API
 33 | 
 34 | ### Base API and schedulers
 35 | 
 36 | The main worker functions are wrapped in an _R6_ class with the name of `QSys`.
 37 | This provides a standardized API to the [lower-level
 38 | messages](https://mschubert.github.io/clustermq/articles/technicaldocs.html#zeromq-message-specification)
 39 | that are sent via [_ZeroMQ_](https://zeromq.org/).
 40 | 
 41 | The base class itself is derived in scheduler classes that add the required
 42 | functions for submitting and cleaning up jobs:
 43 | 
 44 | ```
 45 | + QSys
 46 |   |- Multicore
 47 |   |- LSF
 48 |   + SGE
 49 |     |- PBS
 50 |     |- Torque
 51 |   |- etc.
 52 | ```
 53 | 
 54 | The user-visible object is a worker `Pool` that wraps this, and will eventually
 55 | allow to manage different workers.
 56 | 
 57 | ### Workers
 58 | 
 59 | #### Creating a worker pool
 60 | 
 61 | A pool of workers can be created using the `workers()` function, which
 62 | instantiates a `Pool` object of the corresponding `QSys`-derived scheduler
 63 | class. See `?workers` for details.
 64 | 
 65 | ```{r eval=FALSE}
 66 | # start up a pool of three workers using the default scheduler
 67 | w = workers(n_jobs=3)
 68 | 
 69 | # if we make an unclean exit for whatever reason, clean up the jobs
 70 | on.exit(w$cleanup())
 71 | ```
 72 | 
 73 | #### Worker startup
 74 | 
 75 | For workers that are started up via a scheduler, we do not know which machine
 76 | they will run on. This is why we start up every worker with a TCP/IP address of
 77 | the master socket that will distribute work.
 78 | 
 79 | This is achieved by the call to R common to all schedulers:
 80 | 
 81 | ```{sh eval=FALSE}
 82 | R --no-save --no-restore -e 'clustermq:::worker("{{ master }}")'
 83 | ```
 84 | 
 85 | #### Worker communication
 86 | 
 87 | On the master's side, we wait until a worker connects:
 88 | 
 89 | ```{r eval=FALSE}
 90 | msg = w$recv() # this will block until a worker is ready
 91 | ```
 92 | 
 93 | We can then send any expression to be evaluated on the worker using the `send`
 94 | method:
 95 | 
 96 | ```{r eval=FALSE}
 97 | w$send(expression, ...)
 98 | ```
 99 | 
100 | After the expression (in `...`), any variables that should be passed along with
101 | the call can be added. For batch processing that `clustermq` usually does, this
102 | command is `work_chunk`, where the `chunk` data is added:
103 | 
104 | ```{r eval=FALSE}
105 | w$send(clustermq:::work_chunk(chunk, fun, const, rettype, common_seed),
106 |        chunk = chunk(iter, submit_index))
107 | ```
108 | 
109 | #### Worker environment
110 | 
111 | We can add any number of objects to a worker environment using the `env`
112 | method:
113 | 
114 | ```{r eval=FALSE}
115 | w$env(object=value, ...)
116 | ```
117 | 
118 | This will also invisibly return a `data.frame` with all objects currently in
119 | the environment. If a user wants to inspect the environment without changing it
120 | they can call `w$env()` without arguments. The environment will be propagated
121 | to all workers automatically in a greedy fashion.
122 | 
123 | ### Main event loop
124 | 
125 | Putting the above together in an event loop, we get what is essentially
126 | implemented in `master`. `w$send` invisibly returns an identifier to track
127 | which call was submitted, and `w$current()` matches the same to `w$recv()`.
128 | 
129 | ```{r eval=FALSE}
130 | w = workers(3)
131 | on.exit(w$cleanup())
132 | w$env(...)
133 | 
134 | while (we have new work to send || jobs pending) {
135 |     res = w$recv() # the result of the call, or NULL for a new worker
136 |     w$current()$call_ref # matches answer to request, -1 otherwise
137 |     # handle result
138 | 
139 |     if (more work)
140 |         call_ref = w$send(expression, ...) # call_ref tracks request identity
141 |     else
142 |         w$send_shutdown()
143 | }
144 | ```
145 | 
146 | A loop of a similar structure can be used to extend `clustermq`. As an example,
147 | [this was done by the _targets_
148 | package](https://github.com/ropensci/targets/blob/1.2.2/R/class_clustermq.R).
149 | 
150 | ## ZeroMQ message specification
151 | 
152 | Communication between the `master` (main event loop) and workers (`QSys` base
153 | class) is organised in _messages_. These are chunks of serialized data sent via
154 | _ZeroMQ_'s protocol (_ZMTP_). The parts of each message are called *frames*.
155 | 
156 | ### Master - Worker communication
157 | 
158 | The master requests an evaluation in a message with X frames (direct) or Y if
159 | proxied. This is all handled by _clustermq_ internally.
160 | 
161 | * The worker identity frame or routing identifier
162 | * A delimiter frame
163 | * Worker status (`wlife_t`)
164 | * The call to be evaluated
165 | * _N_ repetitions of:
166 |   * The variable name of an environment object that is not yet present on the
167 |     worker
168 |   * The variable value
169 | 
170 | If using a proxy, this will be followed by a `SEXP` that contains variable
171 | names the proxy should add before forwarding to the worker.
172 | 
173 | ### Worker evaluation
174 | 
175 | A worker evaluates the call using the R C API:
176 | 
177 | ```{r eval=FALSE}
178 | R_tryEvalSilent(cmd, env, &err);
179 | ```
180 | 
181 | If an error occurs in this evaluation will be returned as a structure with
182 | class `worker_error`. If a developer wants to catch errors and warnings in a
183 | more fine-grained manner, it is recommended to add their own `callingHandlers`
184 | to `cmd` (as _clustermq_ does work its `work_chunk`).
185 | 
186 | ### Worker - Master communication
187 | 
188 | The result of this evaluation is then returned in a message with four (direct)
189 | or five (proxied) frames:
190 | 
191 | * Worker identity frame (handled internally by _ZeroMQ_'s `ZMQ_REQ` socket)
192 | * Empty frame (handled internally by _ZeroMQ_'s `ZMQ_REQ` socket)
193 | * Worker status (`wlife_t`) that is handled internally by _clustermq_
194 | * The result of the call (`SEXP`), visible to the user
195 | 
196 | If using a worker via SSH, these frames will be preceded by a routing identify
197 | frame that is handled internally by _ZeroMQ_ and added or peeled off by the
198 | proxy.
199 | 


--------------------------------------------------------------------------------