├── .github
├── .gitignore
└── workflows
│ ├── pkgdown.yaml
│ └── R-CMD-check.yaml
├── LICENSE
├── inst
└── logo.png
├── man
├── figures
│ ├── logo.png
│ ├── README-plot-cpu-1.png
│ └── README-plot-mem-1.png
├── syrup-package.Rd
└── syrup.Rd
├── _pkgdown.yml
├── .gitignore
├── R
├── imports.R
├── syrup-package.R
├── utils.R
└── syrup.R
├── .Rbuildignore
├── NAMESPACE
├── NEWS.md
├── tests
├── testthat.R
└── testthat
│ ├── _snaps
│ └── syrup.md
│ └── test-syrup.R
├── syrup.Rproj
├── DESCRIPTION
├── LICENSE.md
├── README.Rmd
└── README.md
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2024
2 | COPYRIGHT HOLDER: Posit Software, PBC
3 |
--------------------------------------------------------------------------------
/inst/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simonpcouch/syrup/HEAD/inst/logo.png
--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simonpcouch/syrup/HEAD/man/figures/logo.png
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://simonpcouch.github.io/syrup/
2 | template:
3 | bootstrap: 5
4 |
5 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | *.psd
3 | .Rhistory
4 | .Rdata
5 | .httr-oauth
6 | .DS_Store
7 | .quarto
8 | docs
9 |
--------------------------------------------------------------------------------
/man/figures/README-plot-cpu-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simonpcouch/syrup/HEAD/man/figures/README-plot-cpu-1.png
--------------------------------------------------------------------------------
/man/figures/README-plot-mem-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simonpcouch/syrup/HEAD/man/figures/README-plot-mem-1.png
--------------------------------------------------------------------------------
/R/imports.R:
--------------------------------------------------------------------------------
1 | #' @import rlang
2 | #' @importFrom bench bench_bytes
3 | #' @importFrom purrr map map_chr map_dbl map_int map_lgl
4 | NULL
5 |
--------------------------------------------------------------------------------
/R/syrup-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 |
4 | ## usethis namespace: start
5 | ## usethis namespace: end
6 | NULL
7 |
8 | utils::globalVariables(c("time", "user", "name", "pid"))
9 |
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^syrup\.Rproj$
2 | ^\.Rproj\.user$
3 | ^LICENSE\.md$
4 | ^README\.Rmd$
5 | ^cran-comments\.md$
6 | inst/
7 | ^vignettes/articles$
8 | ^\.github$
9 | ^_pkgdown\.yml$
10 | ^docs$
11 | ^pkgdown$
12 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(syrup)
4 | import(rlang)
5 | importFrom(bench,bench_bytes)
6 | importFrom(purrr,map)
7 | importFrom(purrr,map_chr)
8 | importFrom(purrr,map_dbl)
9 | importFrom(purrr,map_int)
10 | importFrom(purrr,map_lgl)
11 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # syrup (development version)
2 |
3 | # syrup 0.1.4
4 |
5 | * Resolves issue with Additional Issues checks MKL and noLD.
6 |
7 | # syrup 0.1.3
8 |
9 | * Resolves another issue with Fedora in R-devel.
10 |
11 | # syrup 0.1.2
12 |
13 | * Resolves issue with Fedora in R-devel.
14 |
15 | # syrup 0.1.1
16 |
17 | * Resolves CRAN test failures.
18 |
19 | # syrup 0.1.0
20 |
21 | * Initial CRAN submission.
22 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | # This file is part of the standard setup for testthat.
2 | # It is recommended that you do not modify it.
3 | #
4 | # Where should you do additional test configuration?
5 | # Learn more about the roles of various files in:
6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
7 | # * https://testthat.r-lib.org/articles/special-files.html
8 |
9 | library(testthat)
10 | library(syrup)
11 |
12 | test_check("syrup")
13 |
--------------------------------------------------------------------------------
/syrup.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: No
4 | SaveWorkspace: No
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 |
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/syrup.md:
--------------------------------------------------------------------------------
1 | # syrup does basic type checks
2 |
3 | Code
4 | syrup(1, interval = "boop")
5 | Condition
6 | Error in `syrup()`:
7 | ! `interval` must be a single, finite numeric.
8 |
9 | ---
10 |
11 | Code
12 | syrup(1, peak = "no")
13 | Condition
14 | Error in `syrup()`:
15 | ! `peak` must be `TRUE` or `FALSE`.
16 |
17 | ---
18 |
19 | Code
20 | syrup(1, env = "schmenv")
21 | Condition
22 | Error in `syrup()`:
23 | ! `env` must be an environment.
24 |
25 | # syrup warns with only one ID
26 |
27 | ! `expr` evaluated fully before syrup could take a snapshot of memory usage.
28 | * Results likely represent memory usage before `expr` was evaluated.
29 |
30 |
--------------------------------------------------------------------------------
/man/syrup-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/syrup-package.R
3 | \docType{package}
4 | \name{syrup-package}
5 | \alias{syrup-package}
6 | \title{syrup: Measure Memory and CPU Usage for Parallel R Code}
7 | \description{
8 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
9 |
10 | Measures memory and CPU usage of R code by regularly taking snapshots of calls to the system command 'ps'. The package provides an entry point (albeit coarse) to profile usage of system resources by R code run in parallel.
11 | }
12 | \seealso{
13 | Useful links:
14 | \itemize{
15 | \item \url{https://github.com/simonpcouch/syrup}
16 | \item \url{https://simonpcouch.github.io/syrup/}
17 | \item Report bugs at \url{https://github.com/simonpcouch/syrup/issues}
18 | }
19 |
20 | }
21 | \author{
22 | \strong{Maintainer}: Simon Couch \email{simon.couch@posit.co} (\href{https://orcid.org/0000-0001-5676-5107}{ORCID})
23 |
24 | Other contributors:
25 | \itemize{
26 | \item Posit Software, PBC [copyright holder, funder]
27 | }
28 |
29 | }
30 | \keyword{internal}
31 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: syrup
2 | Title: Measure Memory and CPU Usage for Parallel R Code
3 | Version: 0.1.4.9000
4 | Authors@R: c(
5 | person("Simon", "Couch", , "simon.couch@posit.co", role = c("aut", "cre"),
6 | comment = c(ORCID = "0000-0001-5676-5107")),
7 | person(given = "Posit Software, PBC", role = c("cph", "fnd"))
8 | )
9 | Description: Measures memory and CPU usage of R code by regularly taking
10 | snapshots of calls to the system command 'ps'. The package provides an entry
11 | point (albeit coarse) to profile usage of system resources by R code run
12 | in parallel.
13 | License: MIT + file LICENSE
14 | Suggests:
15 | testthat (>= 3.0.0)
16 | Config/testthat/edition: 3
17 | Encoding: UTF-8
18 | Roxygen: list(markdown = TRUE)
19 | RoxygenNote: 7.3.2
20 | Depends:
21 | bench
22 | Imports:
23 | callr,
24 | dplyr,
25 | ps,
26 | purrr,
27 | rlang,
28 | tibble,
29 | vctrs,
30 | withr
31 | Config/Needs/website: rmarkdown
32 | URL: https://github.com/simonpcouch/syrup, https://simonpcouch.github.io/syrup/
33 | BugReports: https://github.com/simonpcouch/syrup/issues
34 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | Copyright (c) 2024 Posit Software, PBC
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 | release:
9 | types: [published]
10 | workflow_dispatch:
11 |
12 | name: pkgdown
13 |
14 | permissions: read-all
15 |
16 | jobs:
17 | pkgdown:
18 | runs-on: ubuntu-latest
19 | # Only restrict concurrency for non-PR jobs
20 | concurrency:
21 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
22 | env:
23 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
24 | permissions:
25 | contents: write
26 | steps:
27 | - uses: actions/checkout@v4
28 |
29 | - uses: r-lib/actions/setup-pandoc@v2
30 |
31 | - uses: r-lib/actions/setup-r@v2
32 | with:
33 | use-public-rspm: true
34 |
35 | - uses: r-lib/actions/setup-r-dependencies@v2
36 | with:
37 | extra-packages: any::pkgdown, local::.
38 | needs: website
39 |
40 | - name: Build site
41 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
42 | shell: Rscript {0}
43 |
44 | - name: Deploy to GitHub pages 🚀
45 | if: github.event_name != 'pull_request'
46 | uses: JamesIves/github-pages-deploy-action@v4.5.0
47 | with:
48 | clean: false
49 | branch: gh-pages
50 | folder: docs
51 |
--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | #
4 | # NOTE: This workflow is overkill for most R packages and
5 | # check-standard.yaml is likely a better choice.
6 | # usethis::use_github_action("check-standard") will install it.
7 | on:
8 | push:
9 | branches: [main, master]
10 | pull_request:
11 | branches: [main, master]
12 |
13 | name: R-CMD-check
14 |
15 | permissions: read-all
16 |
17 | jobs:
18 | R-CMD-check:
19 | runs-on: ${{ matrix.config.os }}
20 |
21 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
22 |
23 | strategy:
24 | fail-fast: false
25 | matrix:
26 | config:
27 | - {os: macos-latest, r: 'release'}
28 |
29 | - {os: windows-latest, r: 'release'}
30 | # use 4.1 to check with rtools40's older compiler
31 | - {os: windows-latest, r: '4.1'}
32 |
33 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
34 | - {os: ubuntu-latest, r: 'release'}
35 | - {os: ubuntu-latest, r: 'oldrel-1'}
36 | - {os: ubuntu-latest, r: 'oldrel-2'}
37 | - {os: ubuntu-latest, r: 'oldrel-3'}
38 | - {os: ubuntu-latest, r: 'oldrel-4'}
39 |
40 | env:
41 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
42 | R_KEEP_PKG_SOURCE: yes
43 |
44 | steps:
45 | - uses: actions/checkout@v4
46 |
47 | - uses: r-lib/actions/setup-pandoc@v2
48 |
49 | - uses: r-lib/actions/setup-r@v2
50 | with:
51 | r-version: ${{ matrix.config.r }}
52 | http-user-agent: ${{ matrix.config.http-user-agent }}
53 | use-public-rspm: true
54 |
55 | - uses: r-lib/actions/setup-r-dependencies@v2
56 | with:
57 | extra-packages: any::rcmdcheck
58 | needs: check
59 |
60 | - uses: r-lib/actions/check-r-package@v2
61 | with:
62 | upload-snapshots: true
63 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
64 |
--------------------------------------------------------------------------------
/tests/testthat/test-syrup.R:
--------------------------------------------------------------------------------
1 | skip_if_not(ps::ps_os_type()[["POSIX"]])
2 | skip_on_cran()
3 |
4 | test_that("syrup works", {
5 | set.seed(1)
6 | expect_no_error(
7 | res <- syrup(
8 | res_with_syrup <- Sys.sleep(1)
9 | )
10 | )
11 |
12 | set.seed(1)
13 | res_no_syrup <- Sys.sleep(1)
14 |
15 | expect_equal(res_with_syrup, res_no_syrup)
16 |
17 | expect_s3_class(res, "tbl_df")
18 |
19 | expect_named(res, c("id", "time", "pid", "ppid", "name", "pct_cpu", "rss", "vms"))
20 | expect_gte(nrow(res), 1)
21 | expect_equal(unique(res$id), 1:max(res$id, na.rm = TRUE))
22 | expect_type(res$pid, "integer")
23 | expect_true(ps::ps_pid() %in% res$pid)
24 | expect_type(res$ppid, "integer")
25 | expect_type(res$name, "character")
26 | expect_s3_class(res$rss, "bench_bytes")
27 | expect_s3_class(res$vms, "bench_bytes")
28 | })
29 |
30 | test_that("syrup(peak = TRUE) works", {
31 | expect_no_error(
32 | res <- syrup(
33 | Sys.sleep(1),
34 | peak = TRUE
35 | )
36 | )
37 |
38 | expect_s3_class(res, "tbl_df")
39 | expect_equal(length(unique(res$id)), 1)
40 | })
41 |
42 | test_that("syrup(interval) works", {
43 | # can't expect that nrow will grow strictly proportionally, as
44 | # the number of other processes running may change and there's some
45 | # overhead associated with the inter-process communication
46 | expect_no_error(
47 | res_01 <- syrup(
48 | Sys.sleep(1),
49 | interval = .01
50 | )
51 | )
52 |
53 | expect_no_error(
54 | res_1 <- syrup(
55 | Sys.sleep(1),
56 | interval = .1
57 | )
58 | )
59 |
60 | expect_s3_class(res_01, "tbl_df")
61 | expect_s3_class(res_1, "tbl_df")
62 |
63 | skip_on_cran()
64 |
65 | expect_true(length(unique(res_01$id)) > length(unique(res_1$id)))
66 | })
67 |
68 | test_that("syrup does basic type checks", {
69 | # the rlang type check standalone is probably overkill for this project,
70 | # but some simple type checks should still be helpful.
71 | expect_snapshot(error = TRUE, syrup(1, interval = "boop"))
72 | expect_snapshot(error = TRUE, syrup(1, peak = "no"))
73 | expect_snapshot(error = TRUE, syrup(1, env = "schmenv"))
74 | })
75 |
76 | test_that("syrup warns with only one ID", {
77 | skip_on_cran()
78 |
79 | expect_snapshot_warning(syrup(1))
80 | })
81 |
--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 | # a wrapper around ps::ps() that returns info only on live R-ish processes
2 | ps_r_processes <- function(id) {
3 | ps <- ps::ps()
4 |
5 | ps <-
6 | vctrs::vec_slice(
7 | ps,
8 | (ps$name %in% c("R", "ark", "R.exe") | grepl("rsession", ps$name)) &
9 | ps$status != "zombie" &
10 | ps$pid != ps::ps_pid()
11 | )
12 |
13 | ps$rss <- bench::bench_bytes(ps$rss)
14 | ps$vms <- bench::bench_bytes(ps$vms)
15 |
16 | vctrs::vec_cbind(
17 | tibble::new_tibble(
18 | list(id = rep(id, nrow(ps)), time = rep(Sys.time(), nrow(ps)))
19 | ),
20 | ps[!colnames(ps) %in% c("username", "status", "created", "ps_handle")]
21 | )
22 | }
23 |
24 | # x is a data frame of row-binded ps_r_processes() outputs
25 | mutate_pct_cpu <- function(x) {
26 | x <- dplyr::mutate(
27 | x,
28 | pct_cpu = calculate_pct_cpu(time, user, system),
29 | .after = name,
30 | .by = pid
31 | )
32 | x <- dplyr::select(x, -c(user, system))
33 | }
34 |
35 | # time, user, and system are vectors of repeated measures from a given pid
36 | calculate_pct_cpu <- function(time, user, system) {
37 | intervals <- as.numeric(diff(time))
38 | user_diffs <- diff(user)
39 | system_diffs <- diff(system)
40 |
41 | c(NA_real_, (user_diffs + system_diffs) * 100 / intervals)
42 | }
43 |
44 | # grab the result from sesh and close it.
45 | # may be a slightly longer delay before sesh is able to return, so iteratively
46 | # query until we get a result back.
47 | retrieve_results <- function(sesh, call = caller_env()) {
48 | sesh_res <- sesh$read()
49 | cnt <- 1
50 | while (is.null(sesh_res) & cnt < 10) {
51 | Sys.sleep(.2)
52 | sesh_res <- sesh$read()
53 | cnt <- cnt + 1
54 | }
55 |
56 | sesh$close()
57 |
58 | if (cnt == 10) {
59 | rlang::abort(
60 | "Unable to retrieve resource usage results from the temporary session.",
61 | .internal = TRUE,
62 | call = call
63 | )
64 | }
65 |
66 | sesh_res$result
67 | }
68 |
69 | is_unix <- function() {
70 | identical(.Platform$OS.type, "unix")
71 | }
72 |
73 | # from rstudio/reticulate
74 | is_fedora <- function() {
75 | if (is_unix() && file.exists("/etc/os-release")) {
76 | os_info <- readLines("/etc/os-release")
77 | any(grepl("Fedora", os_info))
78 | } else {
79 | FALSE
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/man/syrup.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/syrup.R
3 | \name{syrup}
4 | \alias{syrup}
5 | \title{Memory and CPU Usage Information for Parallel R Code}
6 | \usage{
7 | syrup(expr, interval = 0.5, peak = FALSE, env = caller_env())
8 | }
9 | \arguments{
10 | \item{expr}{An expression.}
11 |
12 | \item{interval}{The interval at which to take snapshots of respirce usage.
13 | In practice, there's an overhead on top of each of these intervals.}
14 |
15 | \item{peak}{Whether to return rows for only the "peak" memory usage.
16 | Interpreted as the \code{id} with the maximum \code{rss} sum. Defaults to \code{FALSE},
17 | but may be helpful to set \code{peak = TRUE} for potentially very long-running
18 | processes so that the tibble doesn't grow too large.}
19 |
20 | \item{env}{The environment to evaluate \code{expr} in.}
21 | }
22 | \value{
23 | A tibble with columns \code{id} and \code{time} and a number of columns from
24 | \code{\link[ps:ps]{ps::ps()}} output describing memory and CPU usage. Notably, the process ID
25 | \code{pid}, parent process ID \code{ppid}, percent CPU usage, and resident set size
26 | \code{rss} (a measure of memory usage).
27 | }
28 | \description{
29 | This function is a wrapper around the system command \code{ps} that can
30 | be used to benchmark (peak) memory and CPU usage of parallel R code.
31 | By taking snapshots the memory usage of R processes at a regular \code{interval},
32 | the function dynamically builds up a profile of their usage of system
33 | resources.
34 | }
35 | \details{
36 | While much of the verbiage in the package assumes that the supplied
37 | expression will be distributed across CPU cores, there's nothing specific
38 | about this package that necessitates the expression provided to \code{syrup()} is
39 | run in parallel. Said another way, \code{syrup()} will work just fine
40 | with "normal," sequentially-run R code (as in the examples). That said,
41 | there are many better, more fine-grained tools for the job in the case of
42 | sequential R code, such as \code{\link[=Rprofmem]{Rprofmem()}}, the
43 | \href{https://CRAN.R-project.org/package=profmem}{profmem}
44 | package, the \link[bench:mark]{bench} package, and packages in
45 | the \href{https://github.com/r-prof}{R-prof} GitHub organization.
46 |
47 | Loosely, the function works by:
48 | \itemize{
49 | \item Setting up another R process (call it \code{sesh}) that queries system
50 | information using \code{\link[ps:ps]{ps::ps()}} at a regular interval,
51 | \item Evaluating the supplied expression,
52 | \item Reading the queried system information back into the main process from \code{sesh},
53 | \item Closing \code{sesh}, and then
54 | \item Returning the queried system information.
55 | }
56 |
57 | Note that information on the R process \code{sesh} is filtered out from the results
58 | automatically.
59 | }
60 | \examples{
61 | \dontshow{if (ps::ps_os_type()[["POSIX"]] && !syrup:::is_fedora()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
62 | # pass any expression to syrup. first, sequentially:
63 | res_syrup <- syrup({res_output <- Sys.sleep(1)})
64 |
65 | res_syrup
66 |
67 | # to snapshot memory and CPU information more (or less) often, set `interval`
68 | syrup(Sys.sleep(1), interval = .01)
69 |
70 | # use `peak = TRUE` to return only the snapshot with
71 | # the highest memory usage (as `sum(rss)`)
72 | syrup(Sys.sleep(1), interval = .01, peak = TRUE)
73 |
74 | # results from syrup are more---or maybe only---useful when
75 | # computations are evaluated in parallel. see package README
76 | # for an example.
77 | \dontshow{\}) # examplesIf}
78 | }
79 |
--------------------------------------------------------------------------------
/R/syrup.R:
--------------------------------------------------------------------------------
1 | #' Memory and CPU Usage Information for Parallel R Code
2 | #'
3 | #' @description
4 | #' This function is a wrapper around the system command `ps` that can
5 | #' be used to benchmark (peak) memory and CPU usage of parallel R code.
6 | #' By taking snapshots the memory usage of R processes at a regular `interval`,
7 | #' the function dynamically builds up a profile of their usage of system
8 | #' resources.
9 | #'
10 | #' @param expr An expression.
11 | #' @param interval The interval at which to take snapshots of respirce usage.
12 | #' In practice, there's an overhead on top of each of these intervals.
13 | #' @param peak Whether to return rows for only the "peak" memory usage.
14 | #' Interpreted as the `id` with the maximum `rss` sum. Defaults to `FALSE`,
15 | #' but may be helpful to set `peak = TRUE` for potentially very long-running
16 | #' processes so that the tibble doesn't grow too large.
17 | #' @param env The environment to evaluate `expr` in.
18 | #'
19 | #' @returns A tibble with columns `id` and `time` and a number of columns from
20 | #' [ps::ps()] output describing memory and CPU usage. Notably, the process ID
21 | #' `pid`, parent process ID `ppid`, percent CPU usage, and resident set size
22 | #' `rss` (a measure of memory usage).
23 | #'
24 | #' @details
25 | #' While much of the verbiage in the package assumes that the supplied
26 | #' expression will be distributed across CPU cores, there's nothing specific
27 | #' about this package that necessitates the expression provided to `syrup()` is
28 | #' run in parallel. Said another way, `syrup()` will work just fine
29 | #' with "normal," sequentially-run R code (as in the examples). That said,
30 | #' there are many better, more fine-grained tools for the job in the case of
31 | #' sequential R code, such as [Rprofmem()], the
32 | #' [profmem](https://CRAN.R-project.org/package=profmem)
33 | #' package, the [bench][bench::mark()] package, and packages in
34 | #' the [R-prof](https://github.com/r-prof) GitHub organization.
35 | #'
36 | #' Loosely, the function works by:
37 | #'
38 | #' * Setting up another R process (call it `sesh`) that queries system
39 | #' information using [ps::ps()] at a regular interval,
40 | #' * Evaluating the supplied expression,
41 | #' * Reading the queried system information back into the main process from `sesh`,
42 | #' * Closing `sesh`, and then
43 | #' * Returning the queried system information.
44 | #'
45 | #' Note that information on the R process `sesh` is filtered out from the results
46 | #' automatically.
47 | #'
48 | #' @examplesIf ps::ps_os_type()[["POSIX"]] && !syrup:::is_fedora()
49 | #' # pass any expression to syrup. first, sequentially:
50 | #' res_syrup <- syrup({res_output <- Sys.sleep(1)})
51 | #'
52 | #' res_syrup
53 | #'
54 | #' # to snapshot memory and CPU information more (or less) often, set `interval`
55 | #' syrup(Sys.sleep(1), interval = .01)
56 | #'
57 | #' # use `peak = TRUE` to return only the snapshot with
58 | #' # the highest memory usage (as `sum(rss)`)
59 | #' syrup(Sys.sleep(1), interval = .01, peak = TRUE)
60 | #'
61 | #' # results from syrup are more---or maybe only---useful when
62 | #' # computations are evaluated in parallel. see package README
63 | #' # for an example.
64 | #' @export
65 | syrup <- function(expr, interval = .5, peak = FALSE, env = caller_env()) {
66 | expr <- substitute(expr)
67 | if (!is_double(interval, n = 1, finite = TRUE)) {
68 | abort("`interval` must be a single, finite numeric.")
69 | }
70 | if (!is_bool(peak)) {
71 | abort("`peak` must be `TRUE` or `FALSE`.")
72 | }
73 | if (!is_environment(env)) {
74 | abort("`env` must be an environment.")
75 | }
76 |
77 | # create a new temporary R session `sesh`
78 | sesh <- callr::r_session$new()
79 | withr::defer(sesh$close())
80 |
81 | # communicate with `sesh` through existence of a tempfile:
82 | keep_going_file <- tempfile()
83 | file.create(keep_going_file)
84 | withr::defer(if (file.exists(keep_going_file)) file.remove(keep_going_file))
85 |
86 | # regularly take snapshots of memory usage of R sessions
87 | sesh$call(
88 | function(interval, keep_going_file, ps_r_processes, exclude, peak) {
89 | id <- 1
90 | res <- ps_r_processes(id = id)
91 | current_peak <- sum(res$rss, na.rm = TRUE)
92 |
93 | while (file.exists(keep_going_file)) {
94 | id <- id + 1
95 | Sys.sleep(interval)
96 | new_res <- ps_r_processes(id = id)
97 | if (peak) {
98 | new_peak <- sum(new_res$rss, na.rm = TRUE)
99 | if (new_peak > current_peak) {
100 | current_peak <- new_peak
101 | res <- new_res
102 | }
103 | } else {
104 | res <- vctrs::vec_rbind(res, new_res)
105 | }
106 | }
107 |
108 | res
109 | },
110 | args = list(
111 | interval = interval,
112 | keep_going_file = keep_going_file,
113 | ps_r_processes = ps_r_processes,
114 | peak = peak
115 | )
116 | )
117 |
118 | # run the expression
119 | eval(expr, envir = env)
120 |
121 | # tell `sesh` to stop taking snapshots
122 | file.remove(keep_going_file)
123 | Sys.sleep(interval + .2)
124 |
125 | res <- retrieve_results(sesh)
126 |
127 | withr::deferred_clear()
128 |
129 | if (identical(res$id[length(res$id)], 1) && !isTRUE(peak)) {
130 | rlang::warn(c(
131 | "!" = "`expr` evaluated fully before syrup could take a snapshot of memory usage.",
132 | "*" = "Results likely represent memory usage before `expr` was evaluated."
133 | ))
134 | }
135 |
136 | res <- mutate_pct_cpu(res)
137 |
138 | res
139 | }
140 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 |
6 |
7 | ```{r, include = FALSE}
8 | knitr::opts_chunk$set(
9 | collapse = TRUE,
10 | comment = "#>",
11 | fig.path = "man/figures/README-",
12 | out.width = "100%"
13 | )
14 | ```
15 |
16 | # syrup
17 |
18 |
19 | [](https://lifecycle.r-lib.org/articles/stages.html#experimental)
20 | [](https://CRAN.R-project.org/package=syrup)
21 | [](https://github.com/simonpcouch/syrup/actions/workflows/R-CMD-check.yaml)
22 |
23 |
24 | The goal of syrup is to measure memory and CPU usage of R code by regularly taking snapshots of calls to the system command `ps`. The package provides an entry point (albeit coarse) to profile usage of system resources by R code run in parallel.
25 |
26 | The package name is an homage to syrupy (**SY**stem **R**esource **U**sage **P**rofile ...um, **Y**eah), a Python tool at [jeetsukumaran/Syrupy](https://github.com/jeetsukumaran/Syrupy).
27 |
28 | ## Installation
29 |
30 | Install the latest release of syrup from CRAN like so:
31 |
32 | ``` r
33 | install.packages("syrup")
34 | ```
35 |
36 | You can install the development version of syrup like so:
37 |
38 | ``` r
39 | pak::pak("simonpcouch/syrup")
40 | ```
41 |
42 | ## Example
43 |
44 | ```{r}
45 | library(syrup)
46 | ```
47 |
48 | The main function in the syrup package is the function by the same name. The main argument to `syrup()` is an expression, and the function outputs a tibble. Supplying a rather boring expression:
49 |
50 | ```{r}
51 | syrup(Sys.sleep(1))
52 | ```
53 |
54 | In this tibble, `id` defines a specific time point at which process usage was snapshotted, and the remaining columns show output derived from [ps::ps()](https://ps.r-lib.org/reference/ps.html). Notably, `pid` is the process ID, `ppid` is the process ID of the parent process, `pct_cpu` is the percent CPU usage, and `rss` is the resident set size (a measure of memory usage).
55 |
56 | The function works by:
57 |
58 | * Setting up another R process `sesh` that queries memory information at a regular interval,
59 | * Evaluating the supplied expression,
60 | * Reading the memory information back into the main process from `sesh`,
61 | * Closing `sesh`, and then
62 | * Returning the memory information.
63 |
64 | ## Application: model tuning
65 |
66 | For a more interesting demo, we'll tune a regularized linear model using cross-validation with tidymodels. First, loading needed packages:
67 |
68 | ```{r load-pkgs, message = FALSE, warning = FALSE}
69 | library(future)
70 | library(tidymodels)
71 | library(rlang)
72 | ```
73 |
74 | Using future to define our parallelism strategy, we'll set `plan(multicore, workers = 5)`, indicating that we'd like to use forking with 5 workers. By default, future disables forking from RStudio; I know that, in the context of building this README, this usage of forking is safe, so I'll temporarily override that default with `parallelly.fork.enable`.
75 |
76 | ```{r parellel}
77 | local_options(parallelly.fork.enable = TRUE)
78 | plan(multicore, workers = 5)
79 | ```
80 |
81 | Now, simulating some data:
82 |
83 | ```{r dat}
84 | set.seed(1)
85 | dat <- sim_regression(1000000)
86 |
87 | dat
88 | ```
89 |
90 | The call to `tune_grid()` does some setup sequentially before sending data off to the five child processes to actually carry out the model fitting. After models are fitted, data is sent back to the parent process to be combined. To better understand system resource usage throughout that process, we wrap the call in `syrup()`:
91 |
92 | ```{r syrup}
93 | res_mem <- syrup({
94 | res <-
95 | tune_grid(
96 | linear_reg(engine = "glmnet", penalty = tune()),
97 | outcome ~ .,
98 | vfold_cv(dat)
99 | )
100 | })
101 |
102 | res_mem
103 | ```
104 |
105 | These results are a bit more interesting than the sequential results from `Sys.sleep(1)`. Look closely at the `ppid`s for each `id`; after a snapshot or two, you'll see five identical `ppid`s for each `id`, and those `ppid`s match up with the remaining `pid` in the one remaining R process. This shows us that we've indeed distributed computations using forking in that that one remaining R process, the "parent," has spawned off five child processes from itself.
106 |
107 | We can plot the result to get a better sense of how memory usage of these processes changes over time:
108 |
109 | ```{r plot-mem, warning = FALSE}
110 | worker_ppid <- ps::ps_pid()
111 |
112 | res_mem %>%
113 | filter(ppid == worker_ppid | pid == worker_ppid) %>%
114 | ggplot() +
115 | aes(x = id, y = rss, group = pid) +
116 | geom_line() +
117 | scale_x_continuous(breaks = 1:max(res_mem$id))
118 | ```
119 |
120 | At first, only the parent process has non-`NA` `rss`, as tidymodels hasn't sent data off to any workers yet. Then, each of the 5 workers receives data from tidymodels and begins fitting models. Eventually, each of those workers returns their results to the parent process, and their `rss` is once again `NA`. The parent process wraps up its computations before completing evaluation of the expression, at which point `syrup()` returns. (Keep in mind: memory is weird. In the above plot, the total memory allotted to the parent session and its five workers at each ID is not simply the sum of those `rss` values, as memory is shared among them.) We see a another side of the story come together for CPU usage:
121 |
122 | ```{r plot-cpu, message = FALSE, warning = FALSE}
123 | res_mem %>%
124 | filter(ppid == worker_ppid | pid == worker_ppid) %>%
125 | ggplot() +
126 | aes(x = id, y = pct_cpu, group = pid) +
127 | geom_line() +
128 | scale_x_continuous(breaks = 1:max(res_mem$id))
129 | ```
130 |
131 | The percent CPU usage will always be `NA` the first time a process ID is seen, as the usage calculation is based on change since the previous recorded value. As soon as we're able to start measuring, we see the workers at 100% usage, while the parent process is largely idle once it has sent data off to workers.
132 |
133 | ## Scope
134 |
135 | While much of the verbiage in the package assumes that the supplied expression will be distributed across CPU cores, there's nothing specific about this package that necessitates the expression provided to `syrup()` is run in parallel. Said another way, syrup will work just fine with "normal," sequentially-run R code. That said, there are many better, more fine-grained tools for the job in the case of sequential R code, such as `Rprofmem()`, the [profmem](https://CRAN.R-project.org/package=profmem) package, the [bench](https://bench.r-lib.org/) package, and packages in the [R-prof](https://github.com/r-prof) GitHub organization.
136 |
137 | Results from syrup only provide enough detail for the coarsest analyses of memory and CPU usage, but they do provide an entry point to "profiling" system resource usage for R code that runs in parallel.
138 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # syrup
5 |
6 |
7 |
8 | [](https://lifecycle.r-lib.org/articles/stages.html#experimental)
10 | [](https://CRAN.R-project.org/package=syrup)
12 | [](https://github.com/simonpcouch/syrup/actions/workflows/R-CMD-check.yaml)
13 |
14 |
15 | The goal of syrup is to measure memory and CPU usage of R code by
16 | regularly taking snapshots of calls to the system command `ps`. The
17 | package provides an entry point (albeit coarse) to profile usage of
18 | system resources by R code run in parallel.
19 |
20 | The package name is an homage to syrupy (**SY**stem **R**esource
21 | **U**sage **P**rofile …um, **Y**eah), a Python tool at
22 | [jeetsukumaran/Syrupy](https://github.com/jeetsukumaran/Syrupy).
23 |
24 | ## Installation
25 |
26 | Install the latest release of syrup from CRAN like so:
27 |
28 | ``` r
29 | install.packages("syrup")
30 | ```
31 |
32 | You can install the development version of syrup like so:
33 |
34 | ``` r
35 | pak::pak("simonpcouch/syrup")
36 | ```
37 |
38 | ## Example
39 |
40 | ``` r
41 | library(syrup)
42 | #> Loading required package: bench
43 | ```
44 |
45 | The main function in the syrup package is the function by the same name.
46 | The main argument to `syrup()` is an expression, and the function
47 | outputs a tibble. Supplying a rather boring expression:
48 |
49 | ``` r
50 | syrup(Sys.sleep(1))
51 | #> # A tibble: 48 × 8
52 | #> id time pid ppid name pct_cpu rss vms
53 | #>
54 | #> 1 1 2024-07-03 11:42:33 67101 60522 R NA 112MB 392GB
55 | #> 2 1 2024-07-03 11:42:33 60522 60300 rsession-arm64 NA 653MB 394GB
56 | #> 3 1 2024-07-03 11:42:33 58919 1 R NA 773MB 393GB
57 | #> 4 1 2024-07-03 11:42:33 97009 1 rsession-arm64 NA 128KB 394GB
58 | #> 5 1 2024-07-03 11:42:33 97008 1 rsession-arm64 NA 128KB 394GB
59 | #> 6 1 2024-07-03 11:42:33 97007 1 rsession-arm64 NA 240KB 394GB
60 | #> 7 1 2024-07-03 11:42:33 97006 1 rsession-arm64 NA 240KB 394GB
61 | #> 8 1 2024-07-03 11:42:33 97005 1 rsession-arm64 NA 128KB 394GB
62 | #> 9 1 2024-07-03 11:42:33 91012 1 R NA 128KB 393GB
63 | #> 10 1 2024-07-03 11:42:33 90999 1 R NA 128KB 393GB
64 | #> # ℹ 38 more rows
65 | ```
66 |
67 | In this tibble, `id` defines a specific time point at which process
68 | usage was snapshotted, and the remaining columns show output derived
69 | from [ps::ps()](https://ps.r-lib.org/reference/ps.html). Notably, `pid`
70 | is the process ID, `ppid` is the process ID of the parent process,
71 | `pct_cpu` is the percent CPU usage, and `rss` is the resident set size
72 | (a measure of memory usage).
73 |
74 | The function works by:
75 |
76 | - Setting up another R process `sesh` that queries memory information at
77 | a regular interval,
78 | - Evaluating the supplied expression,
79 | - Reading the memory information back into the main process from `sesh`,
80 | - Closing `sesh`, and then
81 | - Returning the memory information.
82 |
83 | ## Application: model tuning
84 |
85 | For a more interesting demo, we’ll tune a regularized linear model using
86 | cross-validation with tidymodels. First, loading needed packages:
87 |
88 | ``` r
89 | library(future)
90 | library(tidymodels)
91 | library(rlang)
92 | ```
93 |
94 | Using future to define our parallelism strategy, we’ll set
95 | `plan(multicore, workers = 5)`, indicating that we’d like to use forking
96 | with 5 workers. By default, future disables forking from RStudio; I know
97 | that, in the context of building this README, this usage of forking is
98 | safe, so I’ll temporarily override that default with
99 | `parallelly.fork.enable`.
100 |
101 | ``` r
102 | local_options(parallelly.fork.enable = TRUE)
103 | plan(multicore, workers = 5)
104 | ```
105 |
106 | Now, simulating some data:
107 |
108 | ``` r
109 | set.seed(1)
110 | dat <- sim_regression(1000000)
111 |
112 | dat
113 | #> # A tibble: 1,000,000 × 21
114 | #> outcome predictor_01 predictor_02 predictor_03 predictor_04 predictor_05
115 | #>
116 | #> 1 3.63 -1.88 0.872 -0.799 -0.0379 2.68
117 | #> 2 41.6 0.551 -2.47 2.37 3.90 5.18
118 | #> 3 -6.99 -2.51 -3.15 2.61 2.13 3.08
119 | #> 4 33.2 4.79 1.86 -2.37 4.27 -3.59
120 | #> 5 34.3 0.989 -0.315 3.08 2.56 -5.91
121 | #> 6 26.7 -2.46 -0.459 1.75 -5.24 5.04
122 | #> 7 21.4 1.46 -0.674 -0.894 -3.91 -3.38
123 | #> 8 21.7 2.21 1.28 -1.05 -0.561 2.99
124 | #> 9 -8.84 1.73 0.0725 0.0976 5.40 4.30
125 | #> 10 24.5 -0.916 -0.223 -0.561 -4.12 0.0508
126 | #> # ℹ 999,990 more rows
127 | #> # ℹ 15 more variables: predictor_06 , predictor_07 ,
128 | #> # predictor_08 , predictor_09 , predictor_10 ,
129 | #> # predictor_11 , predictor_12 , predictor_13 ,
130 | #> # predictor_14 , predictor_15 , predictor_16 ,
131 | #> # predictor_17 , predictor_18 , predictor_19 ,
132 | #> # predictor_20
133 | ```
134 |
135 | The call to `tune_grid()` does some setup sequentially before sending
136 | data off to the five child processes to actually carry out the model
137 | fitting. After models are fitted, data is sent back to the parent
138 | process to be combined. To better understand system resource usage
139 | throughout that process, we wrap the call in `syrup()`:
140 |
141 | ``` r
142 | res_mem <- syrup({
143 | res <-
144 | tune_grid(
145 | linear_reg(engine = "glmnet", penalty = tune()),
146 | outcome ~ .,
147 | vfold_cv(dat)
148 | )
149 | })
150 |
151 | res_mem
152 | #> # A tibble: 158 × 8
153 | #> id time pid ppid name pct_cpu rss vms
154 | #>
155 | #> 1 1 2024-07-03 11:42:38 67101 60522 R NA 1.05GB 393GB
156 | #> 2 1 2024-07-03 11:42:38 60522 60300 rsession-arm64 NA 653.44MB 394GB
157 | #> 3 1 2024-07-03 11:42:38 58919 1 R NA 838.56MB 393GB
158 | #> 4 1 2024-07-03 11:42:38 97009 1 rsession-arm64 NA 128KB 394GB
159 | #> 5 1 2024-07-03 11:42:38 97008 1 rsession-arm64 NA 128KB 394GB
160 | #> 6 1 2024-07-03 11:42:38 97007 1 rsession-arm64 NA 240KB 394GB
161 | #> 7 1 2024-07-03 11:42:38 97006 1 rsession-arm64 NA 240KB 394GB
162 | #> 8 1 2024-07-03 11:42:38 97005 1 rsession-arm64 NA 128KB 394GB
163 | #> 9 1 2024-07-03 11:42:38 91012 1 R NA 128KB 393GB
164 | #> 10 1 2024-07-03 11:42:38 90999 1 R NA 128KB 393GB
165 | #> # ℹ 148 more rows
166 | ```
167 |
168 | These results are a bit more interesting than the sequential results
169 | from `Sys.sleep(1)`. Look closely at the `ppid`s for each `id`; after a
170 | snapshot or two, you’ll see five identical `ppid`s for each `id`, and
171 | those `ppid`s match up with the remaining `pid` in the one remaining R
172 | process. This shows us that we’ve indeed distributed computations using
173 | forking in that that one remaining R process, the “parent,” has spawned
174 | off five child processes from itself.
175 |
176 | We can plot the result to get a better sense of how memory usage of
177 | these processes changes over time:
178 |
179 | ``` r
180 | worker_ppid <- ps::ps_pid()
181 |
182 | res_mem %>%
183 | filter(ppid == worker_ppid | pid == worker_ppid) %>%
184 | ggplot() +
185 | aes(x = id, y = rss, group = pid) +
186 | geom_line() +
187 | scale_x_continuous(breaks = 1:max(res_mem$id))
188 | ```
189 |
190 |
191 |
192 | At first, only the parent process has non-`NA` `rss`, as tidymodels
193 | hasn’t sent data off to any workers yet. Then, each of the 5 workers
194 | receives data from tidymodels and begins fitting models. Eventually,
195 | each of those workers returns their results to the parent process, and
196 | their `rss` is once again `NA`. The parent process wraps up its
197 | computations before completing evaluation of the expression, at which
198 | point `syrup()` returns. (Keep in mind: memory is weird. In the above
199 | plot, the total memory allotted to the parent session and its five
200 | workers at each ID is not simply the sum of those `rss` values, as
201 | memory is shared among them.) We see a another side of the story come
202 | together for CPU usage:
203 |
204 | ``` r
205 | res_mem %>%
206 | filter(ppid == worker_ppid | pid == worker_ppid) %>%
207 | ggplot() +
208 | aes(x = id, y = pct_cpu, group = pid) +
209 | geom_line() +
210 | scale_x_continuous(breaks = 1:max(res_mem$id))
211 | ```
212 |
213 |
214 |
215 | The percent CPU usage will always be `NA` the first time a process ID is
216 | seen, as the usage calculation is based on change since the previous
217 | recorded value. As soon as we’re able to start measuring, we see the
218 | workers at 100% usage, while the parent process is largely idle once it
219 | has sent data off to workers.
220 |
221 | ## Scope
222 |
223 | While much of the verbiage in the package assumes that the supplied
224 | expression will be distributed across CPU cores, there’s nothing
225 | specific about this package that necessitates the expression provided to
226 | `syrup()` is run in parallel. Said another way, syrup will work just
227 | fine with “normal,” sequentially-run R code. That said, there are many
228 | better, more fine-grained tools for the job in the case of sequential R
229 | code, such as `Rprofmem()`, the
230 | [profmem](https://CRAN.R-project.org/package=profmem) package, the
231 | [bench](https://bench.r-lib.org/) package, and packages in the
232 | [R-prof](https://github.com/r-prof) GitHub organization.
233 |
234 | Results from syrup only provide enough detail for the coarsest analyses
235 | of memory and CPU usage, but they do provide an entry point to
236 | “profiling” system resource usage for R code that runs in parallel.
237 |
--------------------------------------------------------------------------------