├── inst
    ├── documents
    │   ├── _quarto.yml
    │   ├── .gitignore
    │   ├── _environment
    │   ├── style.css
    │   ├── files.qmd
    │   ├── python.qmd
    │   ├── summary.qmd
    │   ├── title-block.html
    │   ├── python_modules.py
    │   ├── dummy.qmd
    │   └── log.qmd
    ├── use_whirl
    │   ├── _parametrics.yml
    │   ├── _whirl.yml
    │   └── _biocompute.yml
    ├── .gitignore
    ├── examples
    │   ├── error.R
    │   ├── success.R
    │   ├── _whirl.yaml
    │   ├── warning.R
    │   └── prg1.R
    ├── rstudio
    │   └── addins.dcf
    └── WORDLIST
├── .github
    ├── .gitignore
    ├── pull_request_template.md
    ├── actions
    │   └── setup
    │   │   └── action.yaml
    ├── ISSUE_TEMPLATE
    │   ├── feature_request.md
    │   └── bug_report.md
    └── workflows
    │   └── check_and_co.yaml
├── vignettes
    ├── .gitignore
    ├── articles
    │   └── example.Rmd
    └── whirl.Rmd
├── .gitattributes
├── tests
    ├── testthat
    │   ├── scripts
    │   │   ├── py_success.py
    │   │   ├── py_warning.py
    │   │   ├── _biocompute_parametrics.yml
    │   │   ├── error.R
    │   │   ├── _whirl_r_programs.yaml
    │   │   ├── _whirl_expression.yaml
    │   │   ├── render_error.R
    │   │   ├── success.R
    │   │   ├── _whirl_unnamed.yaml
    │   │   ├── _whirl.yaml
    │   │   ├── _whirl_to_config.yaml
    │   │   ├── biocompute.R
    │   │   ├── py_error.py
    │   │   ├── warning.R
    │   │   ├── py_dependencies.py
    │   │   ├── packages.R
    │   │   └── _whirl_biocompute.yaml
    │   ├── test-errors.R
    │   ├── _snaps
    │   │   ├── custom_logging.md
    │   │   ├── whirl_queue.md
    │   │   ├── examples.md
    │   │   └── biocompute.md
    │   ├── setup.R
    │   ├── test-render_summary.R
    │   ├── test-biocompute.R
    │   ├── test-internal_run.R
    │   ├── test-approvedpkgs.R
    │   ├── test-read_glob.R
    │   ├── test-use_whirl.R
    │   ├── test-util_queue_summary.R
    │   ├── test-whirl_queue.R
    │   ├── test-progress_bar.R
    │   ├── helper.R
    │   ├── test-examples.R
    │   ├── test-renv.R
    │   ├── test-whirl_r_session.R
    │   ├── test-mdformats.R
    │   ├── test-python.R
    │   ├── test-utils.R
    │   ├── test-custom_logging.R
    │   ├── test-enrich_input.R
    │   ├── test-run.R
    │   └── test-strace.R
    └── testthat.R
├── man
    ├── figures
    │   └── logo.png
    ├── use_whirl.Rd
    ├── use_biocompute.Rd
    ├── custom_logging.Rd
    ├── whirl-package.Rd
    ├── whirl-options-params.Rd
    ├── write_biocompute.Rd
    ├── run.Rd
    └── whirl-options.Rd
├── pkgdown
    └── favicon
    │   ├── favicon.ico
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   ├── apple-touch-icon.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-152x152.png
    │   └── apple-touch-icon-180x180.png
├── R
    ├── whirl-package.R
    ├── approvedpkgs.R
    ├── quarto.R
    ├── run_current_script.R
    ├── progress_bar.R
    ├── util_queue_summary.R
    ├── mdformats.R
    ├── read_glob.R
    ├── internal_run.R
    ├── status.R
    ├── normalize_with_base.R
    ├── renv.R
    ├── use_whirl.R
    ├── enrich_input.R
    ├── utils.R
    ├── render_summary.R
    ├── custom_logging.R
    ├── whirl-options.R
    ├── run.R
    ├── log.R
    ├── strace.R
    ├── whirl_queue.R
    └── biocompute.R
├── _pkgdown.yml
├── NAMESPACE
├── .Rbuildignore
├── cran-comments.md
├── whirl.Rproj
├── .gitignore
├── DESCRIPTION
├── .pre-commit-config.yaml
├── NEWS.md
├── README.Rmd
├── README.md
└── LICENSE.md


/inst/documents/_quarto.yml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/inst/documents/.gitignore:
--------------------------------------------------------------------------------
1 | /.quarto/
2 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | tests/testthat/scripts/*.html linguist-generated


--------------------------------------------------------------------------------
/tests/testthat/scripts/py_success.py:
--------------------------------------------------------------------------------
1 | a = 2 + 2
2 | print(a)
3 | 


--------------------------------------------------------------------------------
/inst/use_whirl/_parametrics.yml:
--------------------------------------------------------------------------------
1 | ---
2 | paramA: "text parameter"
3 | paramB: 2
4 | 


--------------------------------------------------------------------------------
/tests/testthat/scripts/py_warning.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | 
3 | warnings.warn("test warning")
4 | 


--------------------------------------------------------------------------------
/inst/.gitignore:
--------------------------------------------------------------------------------
1 | *.png
2 | *.md
3 | *.html
4 | *.json
5 | *_files/*
6 | !documents/title-block.html
7 | 


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/man/figures/logo.png


--------------------------------------------------------------------------------
/inst/examples/error.R:
--------------------------------------------------------------------------------
1 | 
2 | # This script produces error for example purposes
3 | 
4 | stop("This is an error!")
5 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/tests/testthat/scripts/_biocompute_parametrics.yml:
--------------------------------------------------------------------------------
1 | ---
2 | #### Path ####
3 | outputFilePath: "a.txt"
4 | otherStuff: 2
5 | 


--------------------------------------------------------------------------------
/tests/testthat/scripts/error.R:
--------------------------------------------------------------------------------
1 | # This script produces error for testing purposes
2 | 
3 | stop("This is an error!")
4 | 


--------------------------------------------------------------------------------
/tests/testthat/scripts/_whirl_r_programs.yaml:
--------------------------------------------------------------------------------
1 | steps:
2 |   - name: "Run all R programs"
3 |     paths:
4 |       - "*.R"
5 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/inst/documents/_environment:
--------------------------------------------------------------------------------
1 | # The _environment file is used to make sure the quarto processes
2 | # have the correct environment variables set
3 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/tests/testthat/scripts/_whirl_expression.yaml:
--------------------------------------------------------------------------------
1 | steps:
2 |   - name: !expr "paste('Today is', Sys.Date())"
3 |     paths:
4 |       - "success.R"
5 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/inst/examples/success.R:
--------------------------------------------------------------------------------
1 | 
2 | # This script produces no errors or warnings for example purposes
3 | 
4 | message("this script has no errors or warnings")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/scripts/render_error.R:
--------------------------------------------------------------------------------
1 | # This script will always error in the logging process due to
2 | # Quarto params being deleted.
3 | rm(list = ls())
4 | 


--------------------------------------------------------------------------------
/tests/testthat/scripts/success.R:
--------------------------------------------------------------------------------
1 | # This script produces no errors or warnings for testing purposes
2 | 
3 | message("this script has no errors or warnings")
4 | 


--------------------------------------------------------------------------------
/inst/examples/_whirl.yaml:
--------------------------------------------------------------------------------
1 | steps:
2 |   - name: "First step"
3 |     paths:
4 |       - "success.R"
5 |   - name: "Second step"
6 |     paths:
7 |       - "warning.R"
8 |       - "error.R"
9 | 


--------------------------------------------------------------------------------
/tests/testthat/scripts/_whirl_unnamed.yaml:
--------------------------------------------------------------------------------
1 | steps:
2 |   - name: "Named step"
3 |     paths:
4 |       - "success.R"
5 |   - paths:
6 |       - "warning.R"
7 |   - paths:
8 |       - "error.R"
9 | 


--------------------------------------------------------------------------------
/tests/testthat/scripts/_whirl.yaml:
--------------------------------------------------------------------------------
1 | steps:
2 |   - name: "First step"
3 |     paths:
4 |       - "success.R"
5 |   - name: "Second step"
6 |     paths:
7 |       - "warning.R"
8 |       - "error.R"
9 | 


--------------------------------------------------------------------------------
/tests/testthat/scripts/_whirl_to_config.yaml:
--------------------------------------------------------------------------------
1 | steps:
2 |   - name: "Run _whirl.yaml"
3 |     paths:
4 |       - "_whirl.yaml"
5 |   - name: "Run _whirl_unnamed.yaml"
6 |     paths:
7 |       - "_whirl_unnamed.yaml"
8 | 


--------------------------------------------------------------------------------
/tests/testthat/scripts/biocompute.R:
--------------------------------------------------------------------------------
 1 | 
 2 | library(whirl)
 3 | 
 4 | log_read("my_project/params.yml")
 5 | 
 6 | log_read("my_data/data.rds")
 7 | 
 8 | log_write("my_output/output.txt")
 9 | 
10 | log_write("my_output/plot.png")
11 | 


--------------------------------------------------------------------------------
/R/whirl-package.R:
--------------------------------------------------------------------------------
 1 | ## usethis namespace: start
 2 | #' @importFrom dplyr .data
 3 | #' @importFrom reticulate py
 4 | #' @importFrom sessioninfo session_info
 5 | ## usethis namespace: end
 6 | NULL
 7 | 
 8 | #' @keywords internal
 9 | "_PACKAGE"
10 | 


--------------------------------------------------------------------------------
/tests/testthat/test-errors.R:
--------------------------------------------------------------------------------
 1 | test_that("quarto error", {
 2 |   skip_if_no_quarto()
 3 | 
 4 |   test_script("render_error.R") |>
 5 |     run(summary_file = NULL) |>
 6 |     expect_error(
 7 |       regexp = "object 'params' not found"
 8 |     )
 9 | })
10 | 


--------------------------------------------------------------------------------
/tests/testthat/scripts/py_error.py:
--------------------------------------------------------------------------------
1 | # mypy: disable-error-code="operator"
2 | 1 + "a"  # pyright: ignore [reportOperatorIssue, reportUnusedExpression]
3 | 
4 | raise TypeError("This is a type error for testing purposes")
5 | 
6 | raise Exception("Error also for testing")
7 | 


--------------------------------------------------------------------------------
/inst/documents/style.css:
--------------------------------------------------------------------------------
 1 | /* Container for Script section */
 2 | .script-container {
 3 |   background-color: #f8f9fa;
 4 |   padding: 20px;
 5 |   border-left: 4px solid #007bff;
 6 |   border-radius: 6px;
 7 |   margin: 15px 0;
 8 |   box-shadow: 0 2px 4px rgba(0,0,0,0.1);
 9 | }
10 | 


--------------------------------------------------------------------------------
/inst/rstudio/addins.dcf:
--------------------------------------------------------------------------------
 1 | Name: 📑 Run all
 2 | Description: Run all scripts in the project and generate HTML logs
 3 | Binding: run
 4 | Interactive: false
 5 | 
 6 | Name: 📝 Run current script
 7 | Description: Run the current script and produce HTML log
 8 | Binding: run_current_script
 9 | Interactive: false
10 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/custom_logging.md:
--------------------------------------------------------------------------------
 1 | # stream to log file in a whirl context
 2 | 
 3 |     Code
 4 |       x
 5 |     Output
 6 |                        time   type        file
 7 |       1 2000-01-01 01:01:01   read   test_read
 8 |       2 2000-01-01 01:01:01  write  test_write
 9 |       3 2000-01-01 01:01:01 delete test_delete
10 | 
11 | 


--------------------------------------------------------------------------------
/inst/use_whirl/_whirl.yml:
--------------------------------------------------------------------------------
 1 | steps:
 2 |   - name: "Run single script"
 3 |     paths:
 4 |       - "path/to/script.R"
 5 |   - name: "Run multiple scripts in same step"
 6 |     paths:
 7 |       - "path/to/script_1.R"
 8 |       - "path/to/script_2.R"
 9 |   - name: "Run all R scripts in folder"
10 |     paths:
11 |       - "path/to/folder/*.R"
12 | 


--------------------------------------------------------------------------------
/inst/examples/warning.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # This script produces warnings for example purposes
 3 | 
 4 | warning("this is a warning")
 5 | warning("this is a warning 2 ^^")
 6 | warning("this is a warning 3 ^^")
 7 | warning("this is a warning 4 ^^")
 8 | warning("this is a warning 5 ^^")
 9 | warning("this is a warning 6 ^^")
10 | warning("this is a warning 7 ^^")
11 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/whirl_queue.md:
--------------------------------------------------------------------------------
 1 | # whirl_queue edge cases not covered in test-run etc.
 2 | 
 3 |     Code
 4 |       print(q)
 5 |     Output
 6 |       # A tibble: 1 x 6
 7 |            id tag   script    status  result log_dir
 8 |         <dbl> <chr> <chr>     <chr>   <list> <chr>  
 9 |       1     1 <NA>  skipped.R skipped <NULL> .      
10 | 
11 | 


--------------------------------------------------------------------------------
/tests/testthat/scripts/warning.R:
--------------------------------------------------------------------------------
 1 | # This script produces a warning for testing purposes
 2 | 
 3 | warning("this is a warning")
 4 | warning("this is a warning 2 ^^")
 5 | warning("this is a warning 3 ^^")
 6 | warning("this is a warning 4 ^^")
 7 | warning("this is a warning 5 ^^")
 8 | warning("this is a warning 6 ^^")
 9 | warning("this is a warning 7 ^^")
10 | 


--------------------------------------------------------------------------------
/inst/examples/prg1.R:
--------------------------------------------------------------------------------
 1 | #' Setup
 2 | 
 3 | library(dplyr)
 4 | library(ggplot2)
 5 | 
 6 | #' Prepare data
 7 | 
 8 | x <- mtcars |>
 9 |   as_tibble(rownames = "car")
10 | 
11 | print(x)
12 | 
13 | #' Create and save plot
14 | 
15 | ggplot(data = x) +
16 |   geom_point(mapping = aes(x = mpg, y = hp, size = wt, colour = as.factor(am)))
17 | 
18 | ggsave("plot1.png")
19 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://novonordisk-opensource.github.io/whirl/
 2 | 
 3 | template:
 4 |   bootstrap: 5
 5 | 
 6 | reference:
 7 | - title: "Execute scripts"
 8 |   contents:
 9 |   - run
10 |   - write_biocompute
11 |   - use_whirl
12 |   - use_biocompute
13 |   - whirl-options
14 | 
15 | - title: "Log custom actions"
16 |   contents:
17 |   - starts_with("log_")
18 | 


--------------------------------------------------------------------------------
/tests/testthat/setup.R:
--------------------------------------------------------------------------------
 1 | ## whirl have to be installed for Quarto to use it
 2 | 
 3 | withr::local_envvar(
 4 |   R_USER_CACHE_DIR = tempfile(),
 5 |   .local_envir = teardown_env()
 6 | )
 7 | 
 8 | # Minimal prints to make it easier to read test output
 9 | 
10 | withr::local_options(
11 |   list(whirl.verbosity_level = "quiet"),
12 |   .local_envir = teardown_env()
13 | )
14 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(log_delete)
 4 | export(log_read)
 5 | export(log_write)
 6 | export(run)
 7 | export(use_biocompute)
 8 | export(use_whirl)
 9 | export(write_biocompute)
10 | importFrom(R6,R6Class)
11 | importFrom(callr,r_session)
12 | importFrom(dplyr,.data)
13 | importFrom(reticulate,py)
14 | importFrom(sessioninfo,session_info)
15 | 


--------------------------------------------------------------------------------
/tests/testthat/test-render_summary.R:
--------------------------------------------------------------------------------
 1 | test_that("warning when summary cant be created", {
 2 | 
 3 |   tempdir <- withr::local_tempdir()
 4 | 
 5 |   summary_file <- file.path(tempdir, "folder_not_exists", "summary.html")
 6 | 
 7 |   q <- whirl_queue$new()$skip("test.R")
 8 | 
 9 |   render_summary(input = q, summary_file = summary_file) |>
10 |     expect_error() |>
11 |     expect_warning()
12 | })
13 | 


--------------------------------------------------------------------------------
/tests/testthat/scripts/py_dependencies.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd  # noqa: E501 # pylint: disable=import-error # pyright: ignore[reportMissingImports]
 2 | import numpy as np  # noqa: E501 # pylint: disable=import-error # pyright: ignore[reportMissingImports]
 3 | 
 4 | 
 5 | def main():
 6 |     df = pd.DataFrame({
 7 |         'A': [np.sum([1, 2])]
 8 |     })
 9 | 
10 |     print(df)
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     main()
15 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
 7 | # * https://testthat.r-lib.org/articles/special-files.html
 8 | 
 9 | library(testthat)
10 | library(whirl)
11 | 
12 | test_check("whirl")
13 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^LICENSE\.md$
 2 | ^README\.Rmd$
 3 | ^\.Rproj\.user$
 4 | ^\.github$
 5 | ^\.lintr$
 6 | ^\.pre-commit-config\.yaml$
 7 | ^_pkgdown\.yml$
 8 | ^dev/
 9 | ^docs$
10 | ^inst/examples/*\\.html$
11 | ^inst/examples/*_files$
12 | ^inst/output$
13 | ^pkgdown$
14 | ^vignettes/articles$
15 | ^whirl.*\.tar\.gz$
16 | ^whirl.*\.tgz$
17 | ^whirl\.Rcheck$
18 | ^whirl\.Rproj$
19 | ^înst/WORDLIST$
20 | plot1.png
21 | summary.html
22 | ^cran-comments\.md$
23 | ^CRAN-SUBMISSION$
24 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## R CMD check results
 2 | 
 3 | 0 errors | 0 warnings | 1 note
 4 | 
 5 | * Patch update improving the logs.
 6 | * Removed `verbosity_level` argument to `run()` since it is now completely controlled
 7 | by zephyr options (see `help("whirl-options")`). No reverse dependencies.
 8 | * [Check result errors](https://cran.r-project.org/web/checks/check_results_whirl.html)
 9 | should be fixed in [#197](https://github.com/NovoNordisk-OpenSource/whirl/pull/197)
10 | 


--------------------------------------------------------------------------------
/tests/testthat/scripts/packages.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # This script produces no errors or warnings for example purposes
 3 | 
 4 | message("this script has no errors or warnings")
 5 | 
 6 | #Library calls
 7 | library(dplyr)
 8 | 
 9 | for (package in c("reticulate", "rlang")) {
10 |   library(package, character.only = TRUE)
11 | }
12 | 
13 | 
14 | #Load into namespace
15 | ggplot2::ggplot(mtcars, ggplot2::aes(mpg, cyl)) +
16 |   ggplot2::geom_point()
17 | 
18 | zephyr::msg("Test message with zephyr")
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-biocompute.R:
--------------------------------------------------------------------------------
 1 | test_that("Biocompute object created correctly", {
 2 |   skip_if_no_quarto()
 3 | 
 4 |   input_yml <- test_script("_whirl_biocompute.yaml")
 5 |   queue <- whirl::run(
 6 |     input = input_yml,
 7 |     out_formats = NULL,
 8 |     summary_file = NULL
 9 |   )
10 | 
11 |   bco_tmp <- withr::local_tempfile(fileext = ".json")
12 | 
13 |   bco <- write_biocompute(queue = queue, path = bco_tmp) |>
14 |     expect_no_condition()
15 | 
16 |   expect_snapshot(str(bco$io_domain))
17 | })
18 | 


--------------------------------------------------------------------------------
/whirl.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 | 
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 | 


--------------------------------------------------------------------------------
/tests/testthat/test-internal_run.R:
--------------------------------------------------------------------------------
 1 | test_that("testing internal_run()", {
 2 |   skip_if_no_quarto()
 3 | 
 4 |   # A config file
 5 | 
 6 |   q <- whirl_queue$new(n_workers = 2)
 7 | 
 8 |   test_script("_whirl.yaml") |>
 9 |     internal_run(steps = NULL, level = 1, queue = q) |>
10 |     expect_no_error()
11 | 
12 |   # A config file calling another config file
13 | 
14 |   q <- whirl_queue$new()
15 | 
16 |   test_script("_whirl_to_config.yaml") |>
17 |     internal_run(steps = NULL, level = 1, queue = q) |>
18 |     expect_no_error()
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-approvedpkgs.R:
--------------------------------------------------------------------------------
 1 | test_that("Approved packages", {
 2 |   check_approved("pkg@version") |>
 3 |     expect_type("logical") |>
 4 |     expect_length(1) |>
 5 |     is.na() |>
 6 |     expect_true()
 7 | 
 8 |   check_approved(character(), "pkg@version") |>
 9 |     expect_type("logical") |>
10 |     expect_length(0)
11 | 
12 |   check_approved(
13 |     used = c("pkg1@1.0.0", "pkg2@5.6.7"),
14 |     approved = c("pkg1@1.0.0", "pkg2@4.3.2")
15 |   ) |>
16 |     expect_type("logical") |>
17 |     expect_equal(c(TRUE, FALSE))
18 | })
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-read_glob.R:
--------------------------------------------------------------------------------
 1 | test_that("testing read_glob()", {
 2 |   # A single file
 3 |   test_script("success.R") |>
 4 |     read_glob() |>
 5 |     expect_equal(test_script("success.R"))
 6 | 
 7 |   # All R files in a directory
 8 |   test_script("") |>
 9 |     file.path("*.R") |>
10 |     read_glob() |>
11 |     expect_match("\\.R$") |>
12 |     length() |>
13 |     expect_gt(1)
14 | 
15 |   # Error when file does not exist
16 |   test_script("") |>
17 |     file.path("fake_program.R") |>
18 |     read_glob() |>
19 |     expect_message()
20 | })
21 | 


--------------------------------------------------------------------------------
/inst/documents/files.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | ---
 3 | 
 4 | ```{r}
 5 | #| echo: false
 6 | no_files_if_null <- function(x) {
 7 |   if (!is.null(x) && nrow(x)) {
 8 |     return(x)
 9 |   }
10 | 
11 |   knitr::asis_output("No files")
12 | }
13 | ```
14 | 
15 | ## Input
16 | 
17 | ```{r}
18 | #| echo: false
19 | no_files_if_null(result$files$read)
20 | ```
21 | 
22 | ## Output
23 | 
24 | ```{r}
25 | #| echo: false
26 | no_files_if_null(result$files$write)
27 | ```
28 | 
29 | ## Removed
30 | 
31 | ```{r}
32 | #| echo: false
33 | no_files_if_null(result$files$delete)
34 | ```
35 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | ## Summary
 2 | (Summary of made changes with explanation of what and why)
 3 | 
 4 | ## Changes Made
 5 | - Change 1
 6 | - ...
 7 | 
 8 | ## Testing
 9 | - Unit test for `tbl_cnt()` method
10 | - ...
11 | 
12 | ## Checklist
13 | - [ ] PR linked to issue descripting the bug or feature request being addressed
14 | - [ ] Code changes have been tested
15 | - [ ] Documentation has been updated, if applicable
16 | - [ ] All automated tests pass
17 | - [ ] Coding style and naming conventions have been followed
18 | - [ ] The PR is ready for review and merge
19 | 


--------------------------------------------------------------------------------
/inst/documents/python.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | ---
 3 | 
 4 | :::{.callout-note collapse=false appearance="minimal"}
 5 | ## Python packages used directly
 6 | 
 7 | ```{r}
 8 | #| echo: false
 9 | result$session$python |> 
10 |     dplyr::filter(directly_used) |> 
11 |     format_approved()
12 | ```
13 | 
14 | :::
15 | 
16 | :::{.callout-note collapse=true appearance="minimal"}
17 | ## Python packages used indirectly
18 | 
19 | ```{r}
20 | #| echo: false
21 | result$session$python |> 
22 |     dplyr::filter(!.data$directly_used) |> 
23 |     dplyr::select("package", "version", "path")
24 | ```
25 | 
26 | :::
27 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/examples.md:
--------------------------------------------------------------------------------
 1 | # All example scripts run with consistent output
 2 | 
 3 |     Code
 4 |       res
 5 |     Output
 6 |       # A tibble: 7 x 4
 7 |            id tag         script    status 
 8 |         <dbl> <chr>       <chr>     <chr>  
 9 |       1     1 First step  success.R success
10 |       2     2 Second step warning.R warning
11 |       3     3 Second step error.R   error  
12 |       4     4 Step 2      error.R   error  
13 |       5     5 Step 2      prg1.R    success
14 |       6     6 Step 2      success.R success
15 |       7     7 Step 2      warning.R warning
16 | 
17 | 


--------------------------------------------------------------------------------
/tests/testthat/scripts/_whirl_biocompute.yaml:
--------------------------------------------------------------------------------
 1 | biocompute:
 2 |   object_id: "urn:uuid:1234567890"
 3 |   spec_version: "https://w3id.org/ieee/ieee-2791-schema/2791object.json"
 4 |   etag: "c69asgs797agfka"
 5 |   usability: |
 6 |     This is an example of a biocompute project.
 7 |   extension: |
 8 |     example text
 9 | steps:
10 |   - name: "First step"
11 |     paths:
12 |       - "biocompute.R"
13 |     parameter_files:
14 |       - "_biocompute_parametrics.yml"
15 |   - name: "Second step"
16 |     paths:
17 |       - "success.R"
18 |     parameter_files:
19 |       - "_biocompute_parametrics.yml"
20 | 


--------------------------------------------------------------------------------
/man/use_whirl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/use_whirl.R
 3 | \name{use_whirl}
 4 | \alias{use_whirl}
 5 | \title{Use whirl}
 6 | \usage{
 7 | use_whirl(config_file = "_whirl.yml")
 8 | }
 9 | \arguments{
10 | \item{config_file}{Path to the whirl config file, relative to the project}
11 | }
12 | \description{
13 | Utility function to setup execution with whirl in your project:
14 | \enumerate{
15 | \item Creates configuration file (default \verb{_whirl.yml})
16 | \item Updates \code{.gitignore} to not include log files
17 | }
18 | 
19 | See \code{vignette("whirl")} for how to specify paths inside the
20 | configuration file.
21 | }
22 | 


--------------------------------------------------------------------------------
/inst/documents/summary.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Summary"
 3 | date: "`r Sys.time()`"
 4 | date-format: "YYYY-MM-DDTHH:mm:ss zzz"
 5 | params:
 6 |   summary_df: summary_df
 7 | format:
 8 |   html:
 9 |     embed-resources: true
10 |     standalone: false
11 |     toc: true
12 |     toc-depth: 2
13 |     toc-expand: true
14 |     smooth-scroll: true
15 | ---
16 | 
17 | ```{r}
18 | #| label: Setup
19 | #| include: false
20 | knitr::opts_chunk$set(
21 |   error = TRUE,
22 |   warning = TRUE,
23 |   message = TRUE,
24 |   echo = TRUE
25 | )
26 | ```
27 | 
28 | ```{r}
29 | #| label: Summary
30 | #| echo: false
31 | 
32 | my_df <- structure(params$summary_df, format = "html", class = "knitr_kable")
33 | my_df
34 | ```
35 | 


--------------------------------------------------------------------------------
/tests/testthat/test-use_whirl.R:
--------------------------------------------------------------------------------
 1 | test_that("use_whirl", {
 2 |   withr::with_tempdir({
 3 |     rlang::local_interactive(FALSE)
 4 | 
 5 |     usethis::create_project(path = ".") |>
 6 |       expect_message() |>
 7 |       suppressMessages()
 8 | 
 9 |     use_whirl() |>
10 |       expect_message() |>
11 |       suppressMessages()
12 | 
13 |     expect_true(file.exists("_whirl.yml"))
14 | 
15 |     expect_equal(
16 |       readLines("_whirl.yml"),
17 |       readLines(system.file("use_whirl/_whirl.yml", package = "whirl"))
18 |     )
19 | 
20 |     expect_true(file.exists(".gitignore"))
21 | 
22 |     expect_contains(
23 |       readLines(".gitignore"),
24 |       "*_log.(html|json|md)"
25 |     )
26 |   })
27 | })
28 | 


--------------------------------------------------------------------------------
/tests/testthat/test-util_queue_summary.R:
--------------------------------------------------------------------------------
 1 | test_that("fails with invalid input", {
 2 |   data.frame(dummy = "dummy") |>
 3 |     util_queue_summary() |>
 4 |     expect_error()
 5 | })
 6 | 
 7 | 
 8 | test_that("Summary tibble is created successfully", {
 9 |   skip_if_no_quarto()
10 |   skip_if_no_python()
11 |   skip_on_cran()
12 | 
13 |   q <- whirl_queue$new(n_workers = 2)
14 | 
15 |   test_script(c("success.R", "py_success.py")) |>
16 |     q$run()
17 | 
18 |   q$queue |>
19 |     util_queue_summary() |>
20 |     expect_s3_class("tbl_df") |>
21 |     expect_named(
22 |       c("Tag", "Directory", "Filename", "Status", "Hyperlink", "Information")
23 |     ) |>
24 |     nrow() |>
25 |     expect_equal(2)
26 | })
27 | 


--------------------------------------------------------------------------------
/tests/testthat/test-whirl_queue.R:
--------------------------------------------------------------------------------
 1 | test_that("whirl_queue edge cases not covered in test-run etc.", {
 2 |   q <- whirl_queue$new()
 3 | 
 4 |   q$skip("skipped.R")
 5 | 
 6 |   expect_equal(q$queue$script, "skipped.R")
 7 |   expect_equal(q$queue$status, "skipped")
 8 | 
 9 |   q |>
10 |     print() |>
11 |     expect_snapshot()
12 | 
13 |   q$push("a/b/new.R") |>
14 |     expect_error("Logs cannot be saved because \"a/b\" does not exist")
15 | 
16 |   q$push(c("a/new.R", "b/also_new.R")) |>
17 |     expect_error("Logs cannot be saved because \"a\" and \"b\" does not exist")
18 | 
19 |   whirl_queue$new(log_dir = "fake_folder")$push("fake_script.R") |>
20 |     expect_error("Logs cannot be saved because \"fake_folder\" does not exist")
21 | })
22 | 


--------------------------------------------------------------------------------
/inst/use_whirl/_biocompute.yml:
--------------------------------------------------------------------------------
 1 | biocompute:
 2 |   object_id: "your unique id"
 3 |   spec_version: "https://w3id.org/ieee/ieee-2791-schema/2791object.json"
 4 |   etag: "tag from biocompute platform"
 5 |   usability: |
 6 |     This is an example of a biocompute project.
 7 |   extension: |
 8 |     (optional) example text
 9 | 
10 | steps:
11 |   - name: "Run single script"
12 |     paths:
13 |       - "path/to/script.R"
14 |   - name: "Run multiple scripts in same step"
15 |     paths:
16 |       - "path/to/script_1.R"
17 |       - "path/to/script_2.R"
18 |   - name: "Run all R scripts in folder"
19 |     paths:
20 |       - "path/to/folder/*.R"
21 |   - name: "Define parameter file for biocompute"
22 |     paths:
23 |       - "path/to/script.R"
24 |     parameter_files:
25 |       - "_parametrics.yml"
26 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/biocompute.md:
--------------------------------------------------------------------------------
 1 | # Biocompute object created correctly
 2 | 
 3 |     Code
 4 |       str(bco$io_domain)
 5 |     Output
 6 |       List of 2
 7 |        $ input_subdomain :List of 2
 8 |         ..$ :List of 1
 9 |         .. ..$ uri:List of 1
10 |         .. .. ..$ uri: chr "my_project/params.yml"
11 |         ..$ :List of 1
12 |         .. ..$ uri:List of 1
13 |         .. .. ..$ uri: chr "my_data/data.rds"
14 |        $ output_subdomain:List of 2
15 |         ..$ :List of 2
16 |         .. ..$ mediatype: chr "text/txt"
17 |         .. ..$ uri      :List of 2
18 |         .. .. ..$ filename: chr "output.txt"
19 |         .. .. ..$ uri     : chr "my_output/output.txt"
20 |         ..$ :List of 2
21 |         .. ..$ mediatype: chr " "
22 |         .. ..$ uri      :List of 2
23 |         .. .. ..$ filename: chr "plot.png"
24 |         .. .. ..$ uri     : chr "my_output/plot.png"
25 | 
26 | 


--------------------------------------------------------------------------------
/tests/testthat/test-progress_bar.R:
--------------------------------------------------------------------------------
 1 | test_that("progress bar is visible", {
 2 |   withr::local_options(
 3 |     .new = list(cli.dynamic = TRUE, whirl.verbosity_level = "verbose")
 4 |   )
 5 | 
 6 |   pb <- pb_start() |>
 7 |     expect_no_condition() |>
 8 |     expect_type("character")
 9 | 
10 |   queue <- data.frame(
11 |     script = "my_script.R",
12 |     status = "running"
13 |   )
14 | 
15 |   pb_update(id = pb, queue = queue) |>
16 |     expect_message()
17 | 
18 |   pb_done(id = pb) |>
19 |     expect_no_error()
20 | })
21 | 
22 | test_that("progress bar is not visible", {
23 |   pb <- pb_start() |>
24 |     expect_no_condition() |>
25 |     expect_null()
26 | 
27 |   queue <- data.frame(
28 |     script = "my_script.R",
29 |     status = "running"
30 |   )
31 | 
32 |   pb_update(id = pb, queue = queue) |>
33 |     expect_no_message()
34 | 
35 |   pb_done(id = pb) |>
36 |     expect_no_error()
37 | })
38 | 


--------------------------------------------------------------------------------
/tests/testthat/helper.R:
--------------------------------------------------------------------------------
 1 | # Helper function to select test scripts
 2 | 
 3 | test_script <- function(script) {
 4 |   script <- testthat::test_path("scripts", script) |>
 5 |     normalizePath(winslash = "/", mustWork = TRUE)
 6 |   return(script)
 7 | }
 8 | 
 9 | # Use to test quarto availability or version lower than required
10 | skip_if_no_quarto <- function(ver = NULL) {
11 |   skip_if(is.null(quarto::quarto_path()), message = "Quarto is not available")
12 |   skip_if(
13 |     condition = quarto::quarto_version() < ver,
14 |     message = sprintf(
15 |       fmt = "Version of quarto is lower than %s: %s.",
16 |       ver,
17 |       quarto::quarto_version()
18 |     )
19 |   )
20 | }
21 | 
22 | # Use to test if python is available for simple tests
23 | skip_if_no_python <- function() {
24 |   skip_if(
25 |     condition = !reticulate::py_available(initialize = TRUE),
26 |     message = "Python is not available"
27 |   )
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/testthat/test-examples.R:
--------------------------------------------------------------------------------
 1 | test_that("All example scripts run with consistent output", {
 2 |   skip_if_no_quarto()
 3 | 
 4 |   tmpdir <- withr::local_tempdir()
 5 | 
 6 |   # Copy all example scripts to the temporary working directory
 7 | 
 8 |   system.file("examples", package = "whirl") |>
 9 |     list.files(full.names = TRUE) |>
10 |     file.copy(recursive = TRUE, to = tmpdir)
11 | 
12 |   res <- list(
13 |     list.files(tmpdir, pattern = "\\.(yaml|yml)$", full.names = TRUE) |>
14 |       as.list(),
15 |     list.files(tmpdir, pattern = "\\.(R|py)$", full.names = TRUE)
16 |   ) |>
17 |     run(summary_file = NULL)
18 | 
19 |   # Unify result to only be about the status of the script and without
20 |   # the full path to the script
21 | 
22 |   res$script <- basename(res$script)
23 |   res <- res[c("id", "tag", "script", "status")]
24 | 
25 |   # Check that the results now are consistent
26 |   expect_snapshot(res)
27 | })
28 | 


--------------------------------------------------------------------------------
/R/approvedpkgs.R:
--------------------------------------------------------------------------------
 1 | #' Check if only using approved packages
 2 | #' Both used and approved packages to be specified
 3 | #' as: {pkgname}${pkgversion}
 4 | #' @noRd
 5 | check_approved <- function(used, approved = NULL) {
 6 |   if (is.null(approved)) {
 7 |     return(NA)
 8 |   }
 9 | 
10 |   used <- package_spec(used)
11 |   names_used <- names(used)
12 |   approved <- package_spec(approved)
13 | 
14 |   ok <- logical(length = length(used))
15 | 
16 |   for (i in which(names_used %in% names(approved))) {
17 |     ok[[i]] <- used[[i]] == approved[[names_used[[i]]]]
18 |   }
19 | 
20 |   ok
21 | }
22 | 
23 | #' @noRd
24 | package_spec <- function(x) {
25 |   if (!length(x)) {
26 |     return(character())
27 |   }
28 | 
29 |   x <- strsplit(x = x, split = "@")
30 |   n <- vapply(X = x, FUN = \(x) x[[1]], FUN.VALUE = character(1))
31 |   v <- vapply(X = x, FUN = \(x) x[[2]], FUN.VALUE = character(1))
32 |   stats::setNames(object = v, nm = n)
33 | }
34 | 


--------------------------------------------------------------------------------
/.github/actions/setup/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Specific setup for whirl
 2 | description: Installs Quarto and enables strace on Linux
 3 | runs:
 4 |   using: "composite"
 5 |   steps:
 6 |     - name: Allow strace to attach to a process
 7 |       if: runner.os == 'Linux'
 8 |       shell: bash
 9 |       run: |
10 |         echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
11 |         sudo setcap cap_sys_ptrace=eip /usr/bin/strace
12 | 
13 |     - name: Install Quarto
14 |       uses: quarto-dev/quarto-actions/setup@v2
15 | 
16 |     - name: (Linux) Install jupyter
17 |       if: runner.os == 'Linux'
18 |       shell: bash
19 |       run: python3 -m pip install jupyter
20 | 
21 |     - name: (macOS) Install jupyter via Homebrew
22 |       if: runner.os == 'macOS'
23 |       shell: bash
24 |       run: brew install jupyter
25 | 
26 |     - name: (Windows) Install jupyter
27 |       if: runner.os == 'Windows'
28 |       shell: bash
29 |       run: py -m pip install jupyter
30 | 


--------------------------------------------------------------------------------
/tests/testthat/test-renv.R:
--------------------------------------------------------------------------------
 1 | test_that("consistent output from renv help functions", {
 2 | 
 3 |   withr::local_options("usethis.quiet" = TRUE)
 4 | 
 5 |   tmpdir <- withr::local_tempdir()
 6 | 
 7 |   usethis::create_project(path = tmpdir, open = FALSE)
 8 | 
 9 |   usethis::local_project(tmpdir)
10 | 
11 |   status <- renv_status() |>
12 |     expect_s3_class(c("whirl_renv_status"))
13 | 
14 |   knit_print_whirl_renv_status(status) |>
15 |     as.character() |>
16 |     expect_equal("::: {.callout-warning}\n## renv not used\n:::")
17 | 
18 |   status$status$lockfile$Packages <- c("a", "b")
19 | 
20 |   knit_print_whirl_renv_status(status) |>
21 |     as.character() |>
22 |     expect_match(
23 |       "::: \\{.callout-important collapse=true\\}\n## renv out of sync"
24 |     )
25 | 
26 |   status$status$synchronized <- TRUE
27 | 
28 |   knit_print_whirl_renv_status(status) |>
29 |     as.character() |>
30 |     expect_match("::: \\{.callout-tip collapse=true\\}\n## renv synchronized")
31 | })
32 | 


--------------------------------------------------------------------------------
/R/quarto.R:
--------------------------------------------------------------------------------
 1 | #' Create a Quarto callout block with code
 2 | #'
 3 | #' See https://quarto.org/docs/authoring/callouts.html
 4 | #'
 5 | #' @param text description
 6 | #' @param title description
 7 | #' @param type description
 8 | #' @param collapse description
 9 | #' @noRd
10 | quarto_callout <- function(
11 |     text = NULL,
12 |     title = NULL,
13 |     type = c("note", "warning", "important", "tip", "caution"),
14 |     collapse = NULL) {
15 |   type <- rlang::arg_match(type)
16 |   if (!is.null(collapse)) {
17 |     collapse <- ifelse(collapse, "true", "false")
18 |   }
19 | 
20 |   c(
21 |     sprintf(
22 |       "::: {.callout-%s}",
23 |       ifelse(!is.null(collapse),
24 |         paste0(type, " collapse=", collapse),
25 |         type
26 |       )
27 |     ),
28 |     if (!is.null(title)) {
29 |       paste("##", title)
30 |     },
31 |     if (!is.null(text)) {
32 |       text
33 |     },
34 |     ":::"
35 |   ) |>
36 |     paste(collapse = "\n") |>
37 |     knitr::asis_output()
38 | }
39 | 


--------------------------------------------------------------------------------
/man/use_biocompute.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/use_whirl.R
 3 | \name{use_biocompute}
 4 | \alias{use_biocompute}
 5 | \title{Use whirl to create biocompute logs}
 6 | \usage{
 7 | use_biocompute(
 8 |   config_file = "_whirl.yml",
 9 |   parametrics_file = "_parametrics.yml"
10 | )
11 | }
12 | \arguments{
13 | \item{config_file}{Path to the whirl config file, relative to the project}
14 | 
15 | \item{parametrics_file}{Path to the biocompute parametrics file, relative to the project}
16 | }
17 | \description{
18 | Utility function to setup execution with whirl in your project suitable for
19 | creating biocompute logs with \code{write_biocompute()}:
20 | \enumerate{
21 | \item Creates configuration file (default \verb{_whirl.yml}) with default values for the \code{biocompute} metadata.
22 | \item Updates \code{.gitignore} to not include log files
23 | }
24 | 
25 | See \code{vignette("whirl")} for how to specify paths inside the
26 | configuration file.
27 | }
28 | 


--------------------------------------------------------------------------------
/R/run_current_script.R:
--------------------------------------------------------------------------------
 1 | #' RStudio IDE runner
 2 | #'
 3 | #' Easy to run and view log in RStudio. Also available as an addin. Takes
 4 | #' the active source document and [run()] it.
 5 | #'
 6 | #' @return Returns nothing. Run for side effects.
 7 | #' @seealso [run()]
 8 | #' @keywords intern
 9 | #' @noRd
10 | run_current_script <- function() {
11 |   stopifnot(
12 |     "rstudioapi package needed" = requireNamespace("rstudioapi", quietly = TRUE)
13 |   )
14 | 
15 |   # Run file
16 |   script_info <- rstudioapi::getSourceEditorContext()
17 |   file <- gsub(
18 |     paste0(normalizePath(getwd()), .Platform$file.sep),
19 |     "",
20 |     normalizePath(script_info$path)
21 |   )
22 | 
23 |   withr::local_options(list(whirl.verbosity_level = "minimal"))
24 | 
25 |   run_result <- run(input = list(list(names = basename(file), paths = file)))
26 | 
27 |   log_file <- run_result[["result"]][[1]][["log_details"]][["location"]][[1]]
28 |   if (file.exists(log_file)) {
29 |     rstudioapi::viewer(log_file)
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/inst/documents/title-block.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <header id="title-block-header" class="quarto-title-block default">
 3 | <div class="quarto-title">
 4 | <h1 class="title">$title$</h1>
 5 | $if(subtitle)$
 6 | <p class="subtitle">$subtitle$</p>
 7 | $endif$
 8 | $for(author)$
 9 | <p class="author">$author$</p>
10 | $endfor$
11 | 
12 | $if(date)$
13 | <div>
14 |   <div class="quarto-title-meta-heading">Execution end time</div>
15 |   <div class="quarto-title-meta-contents">
16 |     <p class="date">$date$</p>
17 |   </div>
18 | </div>
19 | $endif$
20 | 
21 | $if(date-modified)$
22 | <div>
23 |   <div class="quarto-title-meta-heading">Execution time</div>
24 |   <div class="quarto-title-meta-contents">
25 |     <p class="date-modified">$date-modified$</p>
26 |   </div>
27 | </div>
28 | $endif$
29 | 
30 | $if(abstract)$
31 | <div>
32 |   <div class="quarto-title-meta-heading">Path to script</div>
33 |   <div class="quarto-title-meta-contents">
34 |     <p class="abstract">$abstract$</p>
35 |   </div>
36 | </div>
37 | $endif$
38 | 
39 | </div>
40 | </header>
41 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: "[FEAT] Feature name"
 5 | labels: ''
 6 | assignees: ''
 7 | ---
 8 | 
 9 | ## Feature Request
10 | 
11 | ### Description
12 | [Provide a clear and concise description of the feature you are requesting. What problem does it aim to solve?]
13 | 
14 | ### Proposed Solution
15 | [Outline a proposed solution or approach for implementing the feature. How do you envision the feature working?]
16 | 
17 | ### Use Case
18 | [Describe a specific use case or scenario where this feature would be beneficial or necessary.]
19 | 
20 | ### Additional Context
21 | [Add any additional context, information, or examples that support or clarify the feature request.]
22 | 
23 | ### Impact
24 | [Explain the potential impact of this feature on the package and its users. How would it improve the package or benefit the user community?]
25 | 
26 | ### Related Issues
27 | [Are there any related issues or pull requests that are relevant to this feature request?]
28 | 


--------------------------------------------------------------------------------
/.github/workflows/check_and_co.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |       - master
 6 |   pull_request:
 7 |     branches:
 8 |       - main
 9 |       - master
10 | permissions:
11 |   contents: write
12 |   pull-requests: write
13 | name: All actions
14 | jobs:
15 |   check-current-version:
16 |     name: Check current version
17 |     uses: >-
18 |       NovoNordisk-OpenSource/r.workflows/.github/workflows/check_current_version.yaml@main
19 |     with:
20 |       use_local_setup_action: true
21 |   pkgdown:
22 |     name: Pkgdown site
23 |     uses: NovoNordisk-OpenSource/r.workflows/.github/workflows/pkgdown.yaml@main
24 |     with:
25 |       use_local_setup_action: true
26 |   coverage:
27 |     name: Coverage report
28 |     uses: NovoNordisk-OpenSource/r.workflows/.github/workflows/coverage.yaml@main
29 |     secrets: inherit
30 |     with:
31 |       use_local_setup_action: true
32 |       use_codecov: true
33 |   megalinter:
34 |     name: Megalinter
35 |     uses: NovoNordisk-OpenSource/r.workflows/.github/workflows/megalinter.yaml@main
36 | 


--------------------------------------------------------------------------------
/R/progress_bar.R:
--------------------------------------------------------------------------------
 1 | #' Start a queue progress bar for a tag if verbose and possible
 2 | #' @noRd
 3 | pb_start <- function() {
 4 |   if (
 5 |     !cli::is_dynamic_tty() || zephyr::get_verbosity_level("whirl") != "verbose"
 6 |   ) {
 7 |     return(invisible())
 8 |   }
 9 | 
10 |   withr::local_options(
11 |     cli.progress_show_after = 0
12 |   )
13 | 
14 |   cli::cli_progress_bar(
15 |     type = "custom",
16 |     format = "{cli::pb_spin} Running {cli::pb_extra$running} [{cli::pb_elapsed}]",
17 |     extra = list(running = c()),
18 |     .auto_close = FALSE
19 |   )
20 | }
21 | 
22 | #' Easily update the progress bar for the queue
23 | #' @noRd
24 | pb_update <- function(id, queue) {
25 |   if (is.null(id)) {
26 |     return(invisible())
27 |   }
28 | 
29 |   cli::cli_progress_update(
30 |     extra = list(
31 |       running = basename(queue$script)[queue$status == "running"]
32 |     ),
33 |     id = id
34 |   )
35 | }
36 | 
37 | #' Convenience wrapper to end progress bar
38 | #' @noRd
39 | pb_done <- function(id) {
40 |   if (is.null(id)) {
41 |     return(invisible())
42 |   }
43 |   cli::cli_progress_done(id = id)
44 |   invisible()
45 | }
46 | 


--------------------------------------------------------------------------------
/R/util_queue_summary.R:
--------------------------------------------------------------------------------
 1 | #' Generate a summary tibble from a queue table
 2 | #'
 3 | #' This function takes a queue table as input and generates a summary tibble
 4 | #' with columns for Directory, Filename, Status, Hyperlink, and Information.
 5 | #'
 6 | #' @param queue_table The queue table containing the result data
 7 | #' @return A tibble summarizing the queue table data
 8 | #' @noRd
 9 | util_queue_summary <- function(queue_table) {
10 |   queue_table |>
11 |     dplyr::mutate(
12 |       Tag = .data$tag,
13 |       Directory = normalizePath(dirname(.data$script), winslash = "/"),
14 |       Filename = basename(.data$script),
15 |       Status = .data$status,
16 |       Hyperlink = vapply(
17 |         X = .data$result,
18 |         FUN = \(x) utils::head(x[["logs"]], 1),
19 |         FUN.VALUE = character(1)
20 |       ),
21 |       Information = vapply(
22 |         X = .data$result,
23 |         FUN = \(x) {
24 |           x[["status"]][c("errors", "warnings")] |>
25 |             unlist() |>
26 |             paste0(collapse = "<br>")
27 |         },
28 |         FUN.VALUE = character(1)
29 |       )
30 |     ) |>
31 |     dplyr::select("Tag", "Directory", "Filename", "Status", "Hyperlink", "Information")
32 | }
33 | 


--------------------------------------------------------------------------------
/tests/testthat/test-whirl_r_session.R:
--------------------------------------------------------------------------------
 1 | test_that("interactive whirl R session components not tested in run", {
 2 |   skip_if_no_quarto()
 3 |   p <- whirl_r_session$new()
 4 | 
 5 |   p$print() |>
 6 |     expect_message() |>
 7 |     suppressMessages()
 8 | 
 9 |   p$tmpdir |>
10 |     dir.exists() |>
11 |     expect_true()
12 | 
13 |   p$tmpdir |>
14 |     list.files() |>
15 |     sort() |>
16 |     expect_contains(c("dummy.qmd", "log.qmd", "summary.qmd"))
17 | 
18 |   p$call(func = Sys.sleep, args = list(time = 1)) # Sleep for 1 second
19 | 
20 |   status <- p$wait(timeout = 10)$check_status() # Timeout after 10 ms
21 |   expect_null(status) # Still running
22 | 
23 |   status <- p$wait()$check_status()
24 |   expect_equal(status$code, 200L) # Completed successfully
25 | 
26 |   p$call(func = \() 1 + "a") # Something with an error
27 |   expect_error(p$wait()$check_status())
28 | 
29 |   # Test temp dir is deleted correctly
30 |   dir <- p$tmpdir
31 |   rm(p)
32 |   gc()
33 | 
34 |   expect_false(dir.exists(dir))
35 | })
36 | 
37 | test_that("additional error testing", {
38 |   wrs_report_status(
39 |     status = "unknown",
40 |     script = "my_script.R",
41 |     logs = "my_log.html"
42 |   ) |>
43 |     expect_error()
44 | })
45 | 


--------------------------------------------------------------------------------
/R/mdformats.R:
--------------------------------------------------------------------------------
 1 | #' @noRd
 2 | mdformats <- function(script, log_html, mdfmt, self, out_dir) {
 3 |   newname <- gsub(
 4 |     pattern = "\\.[^\\.]*$",
 5 |     replacement = "",
 6 |     x = basename(script)
 7 |   )
 8 | 
 9 |   supported_formats <- list_pandoc_output_formats()
10 |   unsupported_formats <- setdiff(mdfmt, supported_formats)
11 |   if (any(!mdfmt %in% supported_formats)) {
12 |     cli::cli_abort(
13 |       "Output format{?s} {.code {unsupported_formats}} not supported by your pandoc installation"
14 |     )
15 |   }
16 | 
17 |   if (length(mdfmt) >= 1) {
18 |     newname <- paste0(newname, "_log_", mdfmt)
19 |   }
20 | 
21 |   newname <- paste0(newname, ".md")
22 | 
23 |   for (i in seq_along(newname)) {
24 |     knitr::pandoc(
25 |       input = log_html,
26 |       format = mdfmt[[i]],
27 |       ext = "md"
28 |     )
29 | 
30 |     file.copy(
31 |       from = file.path(self$tmpdir, "log.md"),
32 |       to = file.path(
33 |         out_dir,
34 |         newname[[i]]
35 |       ),
36 |       overwrite = TRUE
37 |     )
38 |   }
39 | 
40 |   return(invisible(newname))
41 | }
42 | 
43 | #' @noRd
44 | list_pandoc_output_formats <- function() {
45 |   system(command = "pandoc --list-output-formats", intern = TRUE)
46 | }
47 | 


--------------------------------------------------------------------------------
/R/read_glob.R:
--------------------------------------------------------------------------------
 1 | #' @title Fetch files if path uses regular expression
 2 | #' @param input A character vector with paths to files that should be executed.
 3 | #' @return A character vector with the paths to the files. If regexp have been
 4 | #'   used as input these will be solved to the actual files matching the
 5 | #'   criteria.
 6 | #' @noRd
 7 | 
 8 | read_glob <- function(input, root_dir) {
 9 |   files <- vector(
10 |     mode = "list",
11 |     length = length(input)
12 |   )
13 | 
14 |   for (i in seq_along(files)) {
15 |     if (length(input[[i]]) > 1 || is.list(input[[i]])) {
16 |       files[[i]] <- read_glob(
17 |         input = input[[i]],
18 |         root_dir = root_dir
19 |       )
20 |     } else {
21 |       files[[i]] <- input[[i]] |>
22 |         normalize_with_base(base = root_dir) |>
23 |         read_single_glob()
24 |     }
25 |   }
26 | 
27 |   if (is.list(input)) {
28 |     return(files)
29 |   }
30 | 
31 |   unlist(files, use.names = FALSE)
32 | }
33 | 
34 | #' @noRd
35 | read_single_glob <- function(x) {
36 |   if (file.exists(x)) {
37 |     return(x)
38 |   }
39 |   files <- Sys.glob(x)
40 |   if (length(files) == 0) {
41 |     cli::cli_alert_warning("No files or folders for this path {x}")
42 |   }
43 |   files
44 | }
45 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG] Bug name"
 5 | labels: ''
 6 | assignees: ''
 7 | ---
 8 | 
 9 | ## Bug Report
10 | 
11 | ### Description
12 | [Provide a clear and concise description of the bug. What behavior did you observe that you believe to be a bug?]
13 | 
14 | ### Reproducible Example
15 | [Include a minimal, complete, and verifiable example that demonstrates the bug. This could be a code snippet, dataset, or specific steps to reproduce the issue.]
16 | 
17 | ### Expected Behavior
18 | [Describe what you expected to happen when you encountered the bug.]
19 | 
20 | ### Actual Behavior
21 | [Explain what actually happened when you encountered the bug.]
22 | 
23 | ### Environment
24 | - R Version: [e.g., 4.5.1]
25 | - Package Version: [e.g., 1.2.3]
26 | - Operating System: [e.g., Windows 10, macOS 11.1]
27 | 
28 | ### Additional Context
29 | [Add any additional context, information, or examples that can help in understanding and reproducing the bug.]
30 | 
31 | ### Impact
32 | [Explain the impact or consequences of this bug. How does it affect the package's functionality or the user experience?]
33 | 
34 | ### Related Issues
35 | [Are there any related issues, pull requests, or discussions that are relevant to this bug report?]
36 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mdformats.R:
--------------------------------------------------------------------------------
 1 | test_that("pandoc works", {
 2 |   skip_if_no_quarto()
 3 | 
 4 |   x <- whirl_r_session$new()
 5 | 
 6 |   file.copy(
 7 |     from = test_script("test-mdformats.html"),
 8 |     to = file.path(x$tmpdir, "log.html")
 9 |   ) |>
10 |     expect_true()
11 | 
12 |   tmpdir <- withr::local_tempdir()
13 | 
14 |   # Different pandoc installations support different formats
15 |   # for testing purposes we only use the supported ones here.
16 |   # Error handling tested in new test below.
17 |   mdfmt <- c("gfm", "commonmark", "markua") |>
18 |     intersect(list_pandoc_output_formats())
19 | 
20 |   mdformats(
21 |     script = "test1.R",
22 |     log_html = file.path(x$tmpdir, "log.html"),
23 |     mdfmt = mdfmt,
24 |     out_dir = tmpdir,
25 |     self = x
26 |   ) |>
27 |     suppressMessages()
28 | 
29 |   file.path(
30 |     tmpdir,
31 |     paste0("test1_log_", c("gfm", "commonmark", "markua"), ".md")
32 |   ) |>
33 |     file.exists() |>
34 |     all() |>
35 |     expect_true()
36 | })
37 | 
38 | test_that("gives error when format is not supported", {
39 |   skip_if_no_quarto()
40 | 
41 |   mdformats(
42 |     script = "script.R",
43 |     log_html = "script.html",
44 |     mdfmt = "fake_format",
45 |     out_dir = "."
46 |   ) |>
47 |     expect_error("not supported by your pandoc installation")
48 | })
49 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | .RData
 7 | .RDataTmp
 8 | 
 9 | # User-specific files
10 | .Ruserdata
11 | 
12 | # Example code in package build process
13 | *-Ex.R
14 | dev/*
15 | 
16 | # Output files from R CMD build
17 | /*.tar.gz
18 | 
19 | # Output files from R CMD check
20 | /*.Rcheck/
21 | tests/testthat/*.png
22 | 
23 | # Output files from test coverage
24 | cobertura.xml
25 | 
26 | # RStudio files
27 | .Rproj.user/
28 | 
29 | # produced vignettes
30 | vignettes/*.html
31 | vignettes/*.pdf
32 | 
33 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
34 | .httr-oauth
35 | 
36 | # knitr and R markdown default cache directories
37 | *_cache/
38 | /cache/
39 | 
40 | # Temporary files created by R markdown
41 | *.utf8.md
42 | *.knit.md
43 | 
44 | # R Environment Variables
45 | .Renviron
46 | 
47 | # pkgdown site
48 | docs/
49 | 
50 | # translation temp files
51 | po/*~
52 | 
53 | # RStudio Connect folder
54 | rsconnect/
55 | 
56 | # remove files for examples
57 | inst/examples/*_files
58 | inst/examples/*.html
59 | plot1.png
60 | from_python.txt
61 | summary.html
62 | *_log.html
63 | *_log.json
64 | 
65 | inst/doc
66 | whirl.Rcheck/
67 | whirl*.tar.gz
68 | whirl*.tgz
69 | CRAN-SUBMISSION
70 | bco.json
71 | *_log.(html|json|md)
72 | _parametrics.yml
73 | _whirl.yml
74 | .DS_Store
75 | 


--------------------------------------------------------------------------------
/man/custom_logging.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/custom_logging.R
 3 | \name{custom_logging}
 4 | \alias{custom_logging}
 5 | \alias{log_read}
 6 | \alias{log_write}
 7 | \alias{log_delete}
 8 | \title{Helper function to log custom messages}
 9 | \usage{
10 | log_read(file, log = Sys.getenv("WHIRL_LOG_MSG"))
11 | 
12 | log_write(file, log = Sys.getenv("WHIRL_LOG_MSG"))
13 | 
14 | log_delete(file, log = Sys.getenv("WHIRL_LOG_MSG"))
15 | }
16 | \arguments{
17 | \item{file}{\code{\link[=character]{character()}} description of the file that was read, written or
18 | deleted.}
19 | 
20 | \item{log}{\code{\link[=character]{character()}} path to the log file.}
21 | }
22 | \description{
23 | Useful for e.g. read and write operations on databases etc.
24 | that are not automatically captured.
25 | }
26 | \details{
27 | The default environment variable \code{WHIRL_LOG_MSG} is set in the session used
28 | to log scripts, and input is automatically captured in the resulting log.
29 | 
30 | If run outside of whirl, meaning when the above environment variable is
31 | unset, the operations are streamed to \code{stdout()}. By default the console.
32 | }
33 | \examples{
34 | # Stream logs to console since `WHIRL_LOG_MSG` is not set:
35 | log_read("my/folder/input.txt")
36 | log_write("my/folder/output.txt")
37 | log_delete("my/folder/old_output.txt")
38 | }
39 | 


--------------------------------------------------------------------------------
/tests/testthat/test-python.R:
--------------------------------------------------------------------------------
 1 | test_that("python dependencies found correctly", {
 2 |   skip_on_cran()
 3 |   skip_if_no_quarto()
 4 |   skip_if_no_python()
 5 |   skip_if_not_installed(pkg = "reticulate", minimum_version = "1.41.0")
 6 | 
 7 |   reticulate::py_require("pandas")
 8 |   reticulate::py_require("numpy")
 9 | 
10 |   res <- test_script(
11 |     script = c("py_success.py", "py_dependencies.py")
12 |   ) |>
13 |     run(summary_file = NULL) |>
14 |     expect_no_warning() |>
15 |     expect_no_error()
16 | 
17 |   res[["status"]] |>
18 |     expect_equal(c("success", "success"))
19 | 
20 |   info_py_success <- res[["result"]][[1]][["session"]]
21 | 
22 |   info_py_success$platform$setting |>
23 |     expect_contains("python")
24 | 
25 |   info_py_success$python$package[
26 |     info_py_success$python$directly_used
27 |   ] |>
28 |     expect_length(0)
29 | 
30 |   info_py_dependencies <- res[["result"]][[2]][["session"]]
31 | 
32 |   info_py_dependencies$platform$setting |>
33 |     expect_contains("python")
34 | 
35 |   info_py_dependencies$python$package[
36 |     info_py_dependencies$python$directly_used
37 |   ] |>
38 |     expect_length(2) |>
39 |     expect_contains(c("pandas", "numpy"))
40 | })
41 | 
42 | test_that("parse_pip_list() is consistent", {
43 |   skip_on_cran()
44 |   skip_if_no_python()
45 | 
46 |   system("python -m pip list -v", intern = TRUE) |>
47 |     parse_pip_list() |>
48 |     expect_s3_class("data.frame") |>
49 |     expect_named(c("package", "version", "path", "installer")) |>
50 |     nrow() |>
51 |     expect_gt(0)
52 | })
53 | 


--------------------------------------------------------------------------------
/tests/testthat/test-utils.R:
--------------------------------------------------------------------------------
 1 | test_that("get_file_ext()", {
 2 |   get_file_ext("file.txt") |>
 3 |     expect_equal("txt")
 4 | 
 5 |   get_file_ext("a/b/file.a") |>
 6 |     expect_equal("a")
 7 | 
 8 |   get_file_ext("a/b/file") |>
 9 |     expect_equal("")
10 | })
11 | 
12 | test_that("scale_to_percent()", {
13 |   scale_to_percent(0.5) |>
14 |     expect_equal("50.00%")
15 | 
16 |   scale_to_percent(0.55124316, 1) |>
17 |     expect_equal("55.1%")
18 | })
19 | 
20 | test_that("replace_na_with_last()", {
21 |   replace_na_with_last(1:5) |>
22 |     expect_equal(1:5)
23 | 
24 |   replace_na_with_last(c(1, NA, 3, NA, 5)) |>
25 |     expect_equal(c(1, 1, 3, 3, 5))
26 | 
27 |   replace_na_with_last(c(NA, NA, NA)) |>
28 |     expect_equal(c(NA, NA, NA))
29 | 
30 |   replace_na_with_last(c(NA, "a", "b", NA, "c")) |>
31 |     expect_equal(c(NA, "a", "b", "b", "c"))
32 | })
33 | 
34 | test_that("path_rel()", {
35 |   path_rel("a", "a") |>
36 |     expect_equal(".")
37 | 
38 |   path_rel("a", "a/b") |>
39 |     expect_equal("..")
40 | 
41 |   path_rel("a", "a/b/c") |>
42 |     expect_equal("../..")
43 | 
44 |   path_rel("a/b", "a") |>
45 |     expect_equal("b")
46 | 
47 |   path_rel("a/b/d", "a/b/c") |>
48 |     expect_equal("../d")
49 | })
50 | 
51 | test_that("create_cli_links()", {
52 |   text <- create_cli_links("mytext", "my_link") |>
53 |     expect_type("character")
54 | 
55 |   grep(pattern = "my_link", x = text) |>
56 |     expect_length(0)
57 | 
58 |   withr::with_options(
59 |     new = list(cli.dynamic = TRUE),
60 |     code = create_cli_links("mytext", "my_link") |>
61 |       expect_type("character") |>
62 |       expect_match("my_link")
63 |   )
64 | })
65 | 


--------------------------------------------------------------------------------
/man/whirl-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/whirl-package.R
 3 | \docType{package}
 4 | \name{whirl-package}
 5 | \alias{whirl}
 6 | \alias{whirl-package}
 7 | \title{whirl: Log Execution of Scripts}
 8 | \description{
 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
10 | 
11 | Logging of scripts suitable for clinical trials using 'Quarto' to create nice human readable logs. 'whirl' enables execution of scripts in batch, while simultaneously creating logs for the execution of each script, and providing an overview summary log of the entire batch execution.
12 | }
13 | \seealso{
14 | Useful links:
15 | \itemize{
16 |   \item \url{https://novonordisk-opensource.github.io/whirl/}
17 |   \item \url{https://github.com/novonordisk-opensource/whirl}
18 |   \item Report bugs at \url{https://github.com/NovoNordisk-OpenSource/whirl/issues}
19 | }
20 | 
21 | }
22 | \author{
23 | \strong{Maintainer}: Aksel Thomsen \email{oath@novonordisk.com}
24 | 
25 | Authors:
26 | \itemize{
27 |   \item Lovemore Gakava \email{lvgk@novonordisk.com}
28 |   \item Cervan Girard \email{cgid@novonordisk.com}
29 |   \item Kristian Troejelsgaard \email{ktqn@novonordisk.com}
30 |   \item Steffen Falgreen Larsen \email{sffl@novonordisk.com}
31 |   \item Vladimir Obucina \email{vlob@novonordisk.com}
32 |   \item Michael Svingel \email{wymz@novonordisk.com}
33 |   \item Skander Mulder \email{sktf@novonordisk.com}
34 |   \item Oliver Lundsgaard \email{ovlr@novonordisk.com}
35 | }
36 | 
37 | Other contributors:
38 | \itemize{
39 |   \item Novo Nordisk A/S [copyright holder]
40 | }
41 | 
42 | }
43 | \keyword{internal}
44 | 


--------------------------------------------------------------------------------
/inst/documents/python_modules.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import sys
 3 | import types
 4 | 
 5 | def get_loaded_packages():
 6 |     """Return all the packages that are currently loaded in the Python environment"""
 7 |     loaded = set(m.split('.')[0] for m in sys.modules if m and not m.startswith('_'))
 8 |     return loaded
 9 | 
10 | def get_namespaced_packages():
11 |     """Get all the stuff that exposed via namespacing (i.e. attached)"""
12 |     namespaced = set()
13 |     
14 |     # Use __main__.__dict__ to find all imported modules in the current script
15 |     # needed for things like reticulate
16 |     import __main__
17 |     if hasattr(__main__, '__dict__'):
18 |         globals_dict = __main__.__dict__
19 |     else:
20 |         frame = sys._getframe(1)
21 |         globals_dict = frame.f_globals
22 |     
23 |     for name, obj in globals_dict.items():
24 |         if isinstance(obj, types.ModuleType) and not name.startswith('_'):
25 |             module_name = getattr(obj, '__name__', name)
26 |             root_module = module_name.split('.')[0]
27 |             if root_module not in ['sys', 'json', 'types', 'builtins']:
28 |                 namespaced.add(root_module)
29 |     
30 |     return namespaced
31 | 
32 | def get_package_status(tmpfile=''):
33 |     """Get both loaded and namespaced packages as a named list"""
34 |     loaded = get_loaded_packages()
35 |     namespaced = get_namespaced_packages()
36 |     
37 |     status = {
38 |         'loaded': list(loaded),
39 |         'namespaced': list(namespaced)
40 |     }
41 |     
42 |     if tmpfile != '':
43 |         with open(tmpfile, 'w') as f:
44 |             json.dump(status, f, indent=2)
45 |     
46 |     return status
47 | 


--------------------------------------------------------------------------------
/tests/testthat/test-custom_logging.R:
--------------------------------------------------------------------------------
 1 | test_that("stream to console outside whirl context", {
 2 |   log_read("test_read") |>
 3 |     expect_output(
 4 |       regexp = "\\{\"time\":\".*\",\"type\":\"read\",\"file\":\"test_read\"\\}"
 5 |     )
 6 | 
 7 |   log_write("test_write") |>
 8 |     expect_output(
 9 |       regexp = "\\{\"time\":\".*\",\"type\":\"write\",\"file\":\"test_write\"\\}" # nolint: line_length_linter
10 |     )
11 | 
12 |   log_delete("test_delete") |>
13 |     expect_output(
14 |       regexp = "\\{\"time\":\".*\",\"type\":\"delete\",\"file\":\"test_delete\"\\}" # nolint: line_length_linter
15 |     )
16 | })
17 | 
18 | 
19 | test_that("stream to log file in a whirl context", {
20 |   tmp_log_file <- withr::local_tempfile(fileext = ".json")
21 | 
22 |   withr::with_envvar(
23 |     c(WHIRL_LOG_MSG = tmp_log_file),
24 |     {
25 |       log_read("test_read")
26 |       log_write("test_write")
27 |       log_delete("test_delete")
28 | 
29 |       x <- read_from_log()
30 | 
31 |       expect_equal(nrow(x), 3)
32 |       expect_equal(x$type, c("read", "write", "delete"))
33 |       expect_equal(x$file, c("test_read", "test_write", "test_delete"))
34 |     }
35 |   )
36 | 
37 |   split_log(x) |>
38 |     expect_length(3) |>
39 |     lapply(expect_s3_class, "data.frame") |>
40 |     vapply(\(x) x[["file"]], character(1)) |>
41 |     expect_equal(
42 |       c(read = "test_read", write = "test_write", delete = "test_delete")
43 |     )
44 | 
45 |   split_log(x[-2, ]) |>
46 |     expect_length(3) |>
47 |     lapply(expect_s3_class, "data.frame") |>
48 |     sapply(\(x) x[["file"]]) |>
49 |     unlist() |>
50 |     expect_equal(
51 |       c(read = "test_read", delete = "test_delete")
52 |     )
53 | 
54 |   x["time"] <- as.POSIXct("2000-01-01 01:01:01")
55 |   expect_snapshot(x)
56 | })
57 | 


--------------------------------------------------------------------------------
/vignettes/articles/example.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Log example"
 3 | ---
 4 | 
 5 | ```{r setup, include = FALSE}
 6 | knitr::opts_chunk$set(
 7 |   collapse = TRUE,
 8 |   comment = "#>"
 9 | )
10 | 
11 | # Use a temporary directory as working directory with the example program
12 | 
13 | wd <- normalizePath(".")
14 | tmp <- withr::local_tempdir()
15 | 
16 | system.file("examples/prg1.R", package = "whirl") |>
17 |   file.copy(to = file.path(tmp, "example.R"))
18 | 
19 | knitr::opts_knit$set(root.dir = tmp)
20 | ```
21 | 
22 | In this example we are going to execute the following script and create a log of it's execution:
23 | 
24 | `example.R:`
25 | ```{r, file = "example.R", eval = FALSE}
26 | ```
27 | 
28 | We are going to use the `run()` function to execute the script, and since this vignette
29 | is created on Linux we can use the `whirl.track_files` option to automatically track the used files: 
30 | 
31 | ```{r whirl-setup}
32 | library(whirl)
33 | 
34 | options(whirl.track_files = TRUE)
35 | options(whirl.verbosity_level = "minimal")
36 | ```
37 | 
38 | The `verbosity_level` is set to `minimal` for nicer printing in this vignette.
39 | Now we are ready to execute the script:
40 | 
41 | ```{r run}
42 | result <- run("example.R")
43 | 
44 | print(result)
45 | ```
46 | 
47 | The script is now executed and you can access the logs below:
48 | 
49 | ```{r copy, include=FALSE}
50 | # Also check if all outputs were created as expected
51 | withr::with_dir(tmp, {
52 |   articles_folder <- file.path(wd, "../../docs/articles")
53 | 
54 |   articles_folder |>
55 |     dir.exists() |>
56 |     stopifnot()
57 | 
58 |   c("summary.html", "example_log.html", "plot1.png") |>
59 |     lapply(file.copy, to = articles_folder, overwrite = TRUE) |>
60 |     unlist() |>
61 |     all() |>
62 |     stopifnot()
63 | })
64 | ```
65 | 
66 | * [View summary log](summary.html)
67 | * [View log for example.R](example_log.html)
68 | * [The saved plot1.png](plot1.png)
69 | 


--------------------------------------------------------------------------------
/R/internal_run.R:
--------------------------------------------------------------------------------
 1 | #' Sending the scripts to the whirl_r_queue for execution
 2 | #'
 3 | #' @param input A character vector of file path(s) to R, R Markdown, Quarto
 4 | #'   scripts, or files in a folder using regular expression, or to to a whirl
 5 | #'   config file. The input can also be structured in a list where each element
 6 | #'   will be executed in parallel.
 7 | #' @param steps An optional argument that can be used if only certain steps
 8 | #'   within a config files is to be executed. Should be equivalent to the names
 9 | #'   of the steps found in the config file. If kept as NULL (default) then all
10 | #'   steps listed in the config file will be executed.
11 | #' @param queue The whirl_r_queue that should execute the scripts
12 | #' @param level Depth of the recursive config calls.
13 | #' The initial call will have 1.
14 | #' @inheritParams options_params
15 | #' @return A tibble containing the execution results for all the scripts.
16 | #' @noRd
17 | internal_run <- function(input,
18 |                          steps,
19 |                          queue,
20 |                          level) {
21 |   # Enrich the input with "name" and "path" elements
22 |   enriched <- enrich_input(input, steps)
23 | 
24 |   # Loop over the elements
25 |   for (i in seq_along(enriched)) {
26 |     files <- enriched[[i]]$path
27 |     name <- enriched[[i]]$name
28 | 
29 |     # Messages
30 |     cli_level <- get(paste0("cli_h", min(level, 3)), envir = asNamespace("cli"))
31 |     zephyr::msg_verbose(message = name, msg_fun = cli_level)
32 | 
33 |     # If the step points to a config file then re-initiate internal_run()
34 |     if (any(grepl("yaml|yml", get_file_ext(files)))) {
35 |       internal_run(
36 |         input = files,
37 |         steps = steps,
38 |         queue = queue,
39 |         level = level + 1
40 |       )
41 |     } else {
42 |       # Execute the scripts
43 |       queue$run(scripts = files, tag = name)
44 |       zephyr::msg_verbose(message = "\n", msg_fun = cli::cli_verbatim)
45 |     }
46 |   }
47 | 
48 |   invisible(queue)
49 | }
50 | 


--------------------------------------------------------------------------------
/R/status.R:
--------------------------------------------------------------------------------
 1 | #' Get execution status
 2 | #'
 3 | #' Retrieves errors and warnings from the generated markdown file,
 4 | #' and derives the execution status.
 5 | #'
 6 | #' @noRd
 7 | 
 8 | get_status <- function(md, start) {
 9 |   x <- readChar(con = md, nchars = file.size(md)) |>
10 |     paste(collapse = "\n") |>
11 |     stringr::str_split("\n:::")
12 | 
13 |   x <- x[[1]]
14 | 
15 |   add_python <- x |>
16 |     stringr::str_detect("\\{.python .cell-code") |>
17 |     any()
18 | 
19 |   # Errors
20 | 
21 |   errors <- x |>
22 |     stringr::str_subset(
23 |       pattern = "^ *\\{\\.cell-output \\.cell-output-error\\}"
24 |     ) |>
25 |     stringr::str_remove_all("\\{[^\\}]*\\}") |>
26 |     stringr::str_squish()
27 | 
28 |   if (add_python) {
29 |     python_errors <- x |>
30 |       stringr::str_subset(pattern = "^ *\\{\\.cell-output") |>
31 |       stringr::str_remove_all("\\{[^\\}]*\\}") |>
32 |       stringr::str_squish() |>
33 |       stringr::str_subset("Error:")
34 | 
35 |     errors <- c(errors, python_errors)
36 |   }
37 | 
38 |   # Warnings
39 | 
40 |   warnings <- x |>
41 |     stringr::str_subset(
42 |       pattern = "^ *\\{\\.cell-output \\.cell-output-stderr\\}"
43 |     ) |>
44 |     stringr::str_remove_all("\\{[^\\}]*\\}") |>
45 |     stringr::str_squish() |>
46 |     stringr::str_subset(pattern = "^(W|w)arning")
47 | 
48 |   if (add_python) {
49 |     python_warnings <- x |>
50 |       stringr::str_subset(pattern = "^ *\\{\\.cell-output") |>
51 |       stringr::str_remove_all("\\{[^\\}]*\\}") |>
52 |       stringr::str_squish() |>
53 |       stringr::str_subset("Warning:")
54 | 
55 |     warnings <- c(warnings, python_warnings)
56 |   }
57 | 
58 |   # Status
59 | 
60 |   if (length(errors)) {
61 |     status <- "error"
62 |   } else if (length(warnings)) {
63 |     status <- "warning"
64 |   } else {
65 |     status <- "success"
66 |   }
67 | 
68 |   # Return list with status
69 | 
70 |   list(
71 |     message = status,
72 |     warnings = warnings,
73 |     errors = errors,
74 |     start = readRDS(start),
75 |     end = file.mtime(md)
76 |   )
77 | }
78 | 


--------------------------------------------------------------------------------
/R/normalize_with_base.R:
--------------------------------------------------------------------------------
 1 | #' Normalize a Path with Respect to a Base Directory
 2 | #'
 3 | #' This function normalizes a given path with respect to a specified base
 4 | #' directory.
 5 | #' If the path is relative, it combines the base directory and the path, then
 6 | #' normalizes the resulting path.
 7 | #' If the path is absolute or starts with `~`, it normalizes the path directly.
 8 | #' If no base directory is specified, the current working directory is used as
 9 | #' the base.
10 | #'
11 | #' @param path A character string representing the path to be normalized.
12 | #' Can be relative, absolute, or start with `~`.
13 | #' @param base A character string representing the base directory with respect
14 | #' to which the path should be normalized. The default is the current working
15 | #' directory (".").
16 | #' @return A character string representing the normalized path.
17 | #' @examplesIf FALSE
18 | #' base <- "/my/base/directory"
19 | #' relative_path <- "subdir/file.txt"
20 | #' normalized_path <- normalize_with_base(relative_path, base)
21 | #' print(normalized_path)
22 | #'
23 | #' # Using the current working directory as the base
24 | #' relative_path <- "subdir/file.txt"
25 | #' normalized_path <- normalize_with_base(relative_path)
26 | #' print(normalized_path)
27 | #'
28 | #' # Using an absolute path
29 | #' absolute_path <- "/another/directory/subdir/file.txt"
30 | #' normalized_path <- normalize_with_base(absolute_path)
31 | #' print(normalized_path)
32 | #'
33 | #' # Using a path starting with ~
34 | #' home_path <- "~/subdir/file.txt"
35 | #' normalized_path <- normalize_with_base(home_path)
36 | #' print(normalized_path)
37 | #' @noRd
38 | normalize_with_base <- function(path, base = ".") {
39 |   # Expand any ~ in the path to the full home directory path
40 |   path <- path.expand(path)
41 | 
42 |   # Check if the path is absolute
43 |   if (!grepl("^(/|[a-zA-Z]:)", path)) {
44 |     # Combine the base and the relative path if the path is not absolute
45 |     path <- file.path(base, path)
46 |   }
47 | 
48 |   # Normalize the path
49 |   normalized_path <- normalizePath(path, winslash = "/", mustWork = FALSE)
50 | 
51 |   return(normalized_path)
52 | }
53 | 


--------------------------------------------------------------------------------
/R/renv.R:
--------------------------------------------------------------------------------
 1 | #' Get renv status
 2 | #' @noRd
 3 | 
 4 | renv_status <- function() {
 5 |   rlang::check_installed("renv")
 6 | 
 7 |   msg <- utils::capture.output(status <- renv::status())
 8 | 
 9 |   structure(
10 |     list(message = msg, status = status),
11 |     class = c("whirl_renv_status", "list")
12 |   )
13 | }
14 | 
15 | #' @noRd
16 | 
17 | print.whirl_renv_status <- function(x, ...) {
18 |   x$message |>
19 |     cat(sep = "\n")
20 | 
21 |   return(invisible(x))
22 | }
23 | 
24 | #' @noRd
25 | 
26 | knit_print_whirl_renv_status <- function(x, ...) {
27 |   if (!length(x$status$lockfile$Packages)) {
28 |     renv_note <- "warning"
29 |     renv_title <- "renv not used"
30 |     renv_message <- NULL
31 |   } else if (x$status$synchronized) {
32 |     renv_note <- "tip"
33 |     renv_title <- "renv synchronized"
34 |     renv_message <- x$message
35 |   } else {
36 |     renv_note <- "important"
37 |     renv_title <- "renv out of sync"
38 |     renv_message <- x$message |>
39 |       renv_message_table() |>
40 |       renv_message_headers()
41 |   }
42 | 
43 |   quarto_callout(
44 |     text = renv_message,
45 |     title = renv_title,
46 |     type = renv_note,
47 |     collapse = if (!is.null(renv_message)) {
48 |       TRUE
49 |     } else {
50 |       NULL
51 |     }
52 |   )
53 | }
54 | 
55 | #' Format renv message with markdown table.
56 | #' Used when packages are in inconsistent state only.
57 | #' @noRd
58 | 
59 | renv_message_table <- function(renv_message) {
60 |   i <- grepl(pattern = "^ +", x = renv_message) |>
61 |     which()
62 | 
63 |   if (!length(i)) {
64 |     return(renv_message)
65 |   }
66 | 
67 |   renv_message[i] <- gsub(
68 |     pattern = "( |$)(?! )",
69 |     replacement = "|",
70 |     x = renv_message[i],
71 |     perl = TRUE
72 |   )
73 | 
74 |   j <- i[[1]]
75 | 
76 |   c(
77 |     utils::head(renv_message, j),
78 |     gsub(pattern = "[^|]", replacement = "-", x = renv_message[j]),
79 |     utils::tail(renv_message, -j)
80 |   )
81 | }
82 | 
83 | #' Bump renv status headers down to header 3
84 | #' @noRd
85 | 
86 | renv_message_headers <- function(renv_message) {
87 |   gsub(pattern = "^#", replacement = "###", x = renv_message)
88 | }
89 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: whirl
 2 | Title: Log Execution of Scripts
 3 | Version: 0.3.1.9001
 4 | Authors@R: c(
 5 |     person("Aksel", "Thomsen", , "oath@novonordisk.com", role = c("aut", "cre")),
 6 |     person("Lovemore", "Gakava", , "lvgk@novonordisk.com", role = "aut"),
 7 |     person("Cervan", "Girard", , "cgid@novonordisk.com", role = "aut"),
 8 |     person("Kristian", "Troejelsgaard", , "ktqn@novonordisk.com", role = "aut"),
 9 |     person("Steffen Falgreen", "Larsen", , "sffl@novonordisk.com", role = "aut"),
10 |     person("Vladimir", "Obucina", , "vlob@novonordisk.com", role = "aut"),
11 |     person("Michael", "Svingel", , "wymz@novonordisk.com", role = "aut"),
12 |     person("Skander", "Mulder", , "sktf@novonordisk.com", role = "aut"),
13 |     person("Oliver", "Lundsgaard", , "ovlr@novonordisk.com", role = "aut"),
14 |     person("Novo Nordisk A/S", role = "cph")
15 |   )
16 | Description: Logging of scripts suitable for clinical trials using
17 |     'Quarto' to create nice human readable logs. 'whirl' enables
18 |     execution of scripts in batch, while simultaneously creating logs for
19 |     the execution of each script, and providing an overview summary log of
20 |     the entire batch execution.
21 | License: Apache License (>= 2)
22 | URL: https://novonordisk-opensource.github.io/whirl/,
23 |     https://github.com/novonordisk-opensource/whirl
24 | BugReports: https://github.com/NovoNordisk-OpenSource/whirl/issues
25 | Depends: 
26 |     R (>= 4.1)
27 | Imports: 
28 |     callr,
29 |     cli,
30 |     dplyr,
31 |     jsonlite,
32 |     kableExtra,
33 |     knitr,
34 |     purrr,
35 |     quarto,
36 |     R6 (>= 2.4.0),
37 |     reticulate,
38 |     rlang,
39 |     sessioninfo,
40 |     stringr,
41 |     tibble,
42 |     unglue,
43 |     utils,
44 |     withr,
45 |     yaml,
46 |     zephyr (>= 0.1.1)
47 | Suggests: 
48 |     ggplot2,
49 |     renv,
50 |     rstudioapi,
51 |     testthat (>= 3.0.0),
52 |     usethis
53 | VignetteBuilder: 
54 |     knitr
55 | Config/testthat/edition: 3
56 | Encoding: UTF-8
57 | Roxygen: list(markdown = TRUE)
58 | RoxygenNote: 7.3.2
59 | SystemRequirements: Quarto command line tool
60 |     (<https://github.com/quarto-dev/quarto-cli>).
61 | 


--------------------------------------------------------------------------------
/tests/testthat/test-enrich_input.R:
--------------------------------------------------------------------------------
 1 | test_that("Enrich input works as expected", {
 2 |   # Find all R programs
 3 | 
 4 |   enriched <- test_script("_whirl_r_programs.yaml") |>
 5 |     enrich_input() |>
 6 |     expect_type("list") |>
 7 |     expect_length(1)
 8 | 
 9 |   enriched[[1]] |>
10 |     expect_type("list") |>
11 |     expect_length(2) |>
12 |     expect_named(c("name", "paths")) |>
13 |     lapply(expect_type, "character") |>
14 |     invisible()
15 | 
16 |   # Unnamed steps
17 | 
18 |   test_script("_whirl_unnamed.yaml") |>
19 |     enrich_input() |>
20 |     expect_type("list") |>
21 |     expect_length(3) |>
22 |     vapply(FUN = \(x) x$name, FUN.VALUE = character(1)) |>
23 |     expect_equal(
24 |       c("Named step", "Step 2", "Step 3")
25 |     )
26 | 
27 |   # File input
28 | 
29 |   enriched <- test_script("success.R") |>
30 |     enrich_input() |>
31 |     expect_type("list") |>
32 |     expect_length(1)
33 | 
34 |   enriched[[1]] |>
35 |     expect_type("list") |>
36 |     expect_length(2) |>
37 |     expect_named(c("name", "paths")) |>
38 |     unlist() |>
39 |     expect_equal(
40 |       c(
41 |         name = "Step 1",
42 |         paths = test_script("success.R")
43 |       )
44 |     )
45 | 
46 |   # Pruning a config file
47 | 
48 |   test_script("_whirl.yaml") |>
49 |     enrich_input(steps = "Second step") |>
50 |     expect_type("list") |>
51 |     expect_length(1) |>
52 |     vapply(FUN = \(x) x$name, FUN.VALUE = character(1)) |>
53 |     expect_equal("Second step")
54 | 
55 |   test_script("_whirl.yaml") |>
56 |     enrich_input(steps = c("First step", "Second step")) |>
57 |     expect_type("list") |>
58 |     expect_length(2) |>
59 |     vapply(FUN = \(x) x$name, FUN.VALUE = character(1)) |>
60 |     expect_equal(c("First step", "Second step"))
61 | 
62 |   # Expected error when input is a directory
63 | 
64 |   test_script("") |>
65 |     enrich_input() |>
66 |     expect_error()
67 | 
68 |   # Evaluate expressions in yaml
69 | 
70 |   test_script("_whirl_expression.yaml") |>
71 |     enrich_input() |>
72 |     expect_type("list") |>
73 |     expect_length(1) |>
74 |     vapply(FUN = \(x) x$name, FUN.VALUE = character(1)) |>
75 |     expect_match(regexp = format(Sys.Date()))
76 | })
77 | 


--------------------------------------------------------------------------------
/inst/WORDLIST:
--------------------------------------------------------------------------------
  1 | addin
  2 | addins
  3 | Aksel
  4 | arning
  5 | asis
  6 | aut
  7 | autoupdate0
  8 | biocompute
  9 | Biocompute
 10 | Bioinformatics
 11 | callout
 12 | callr
 13 | Cervan
 14 | cgid
 15 | chdir
 16 | cli
 17 | colour
 18 | colnames
 19 | commonmark
 20 | compats
 21 | Compats
 22 | config
 23 | Config
 24 | cph
 25 | CRAN
 26 | cre
 27 | cyclocomp
 28 | dev
 29 | Devskim
 30 | dir
 31 | dontrun
 32 | dplyr
 33 | dset
 34 | emph
 35 | ENOENT
 36 | ENOTDIR
 37 | envir
 38 | ENXIO
 39 | Falgreen
 40 | fileext
 41 | Finalise
 42 | funct
 43 | Gakava
 44 | getenv
 45 | getframe
 46 | getOption
 47 | getwd
 48 | gfm
 49 | ggplot
 50 | ggsave
 51 | Girard
 52 | github
 53 | globbing
 54 | grepl
 55 | grpvar
 56 | gsub
 57 | GxP
 58 | hjust
 59 | https
 60 | httr
 61 | initialise
 62 | ìnst
 63 | io
 64 | jsonlite
 65 | kable
 66 | kableExtra
 67 | knitr
 68 | Kristian
 69 | ktqn
 70 | lapply
 71 | lintr
 72 | loadedpath
 73 | loadedversion
 74 | Lovemore
 75 | Lundsgaard
 76 | lvgk
 77 | markua
 78 | md
 79 | mdfmt
 80 | mdformats
 81 | mediatype
 82 | multiarch
 83 | namespacing
 84 | nchars
 85 | ncol
 86 | newname
 87 | nocov
 88 | nodename
 89 | Nordisk
 90 | notangle
 91 | Novo
 92 | novonordisk
 93 | NovoNordisk
 94 | nrow
 95 | numpy
 96 | Obucina
 97 | Oliver
 98 | ondiskversion
 99 | openat
100 | opensource
101 | OpenSource
102 | overwritable
103 | pandoc
104 | parametrics
105 | pkgdown
106 | pkgload
107 | pkgname
108 | pkgs
109 | PKGS
110 | pkgversion
111 | pmap
112 | Posix
113 | pre
114 | purrr
115 | Rbuildignore
116 | rdname
117 | README
118 | renv
119 | RENV
120 | Renviron
121 | reticulate
122 | rlang
123 | rlist
124 | rmarkdown
125 | roxygen
126 | Roxygen
127 | Roxygenize
128 | RoxygenNote
129 | Rproj
130 | rstudio
131 | rstudioapi
132 | seealso
133 | sessioninfo
134 | setenv
135 | sffl
136 | Skander
137 | skyblue
138 | Steffen
139 | stopifnot
140 | stringr
141 | succes
142 | succesfully
143 | Svingel
144 | Sweave
145 | Sys
146 | sysname
147 | testthat
148 | testuser
149 | Thomsen
150 | tibble
151 | tidyr
152 | tidyverse
153 | tmpdir
154 | tmpf
155 | Troejelsgaard
156 | unglue
157 | ungroup
158 | unlist
159 | unnest
160 | urandom
161 | usethis
162 | vapply
163 | vctr
164 | vctrs
165 | VignetteBuilder
166 | vjust
167 | vlob
168 | winslash
169 | withr
170 | WORDLIST
171 | yaml
172 | 
173 | 


--------------------------------------------------------------------------------
/R/use_whirl.R:
--------------------------------------------------------------------------------
 1 | #' Use whirl
 2 | #'
 3 | #' @description
 4 | #'
 5 | #' Utility function to setup execution with whirl in your project:
 6 | #'
 7 | #' 1. Creates configuration file (default `_whirl.yml`)
 8 | #' 1. Updates `.gitignore` to not include log files
 9 | #'
10 | #' See `vignette("whirl")` for how to specify paths inside the
11 | #' configuration file.
12 | #'
13 | #' @param config_file Path to the whirl config file, relative to the project
14 | #' @export
15 | 
16 | use_whirl <- function(config_file = "_whirl.yml") {
17 |   cli::cli_h1("Setup {.pkg whirl}")
18 | 
19 |   use_template("_whirl.yml", config_file = config_file)
20 | 
21 |   cli::cli_alert_info("Run project with {.run whirl::run(\"{config_file}\")}")
22 | 
23 |   cli::cli_h1("")
24 | 
25 |   return(invisible(config_file))
26 | }
27 | 
28 | #' Use whirl to create biocompute logs
29 | #'
30 | #' @description
31 | #'
32 | #' Utility function to setup execution with whirl in your project suitable for
33 | #' creating biocompute logs with `write_biocompute()`:
34 | #'
35 | #' 1. Creates configuration file (default `_whirl.yml`) with default values for the `biocompute` metadata.
36 | #' 1. Updates `.gitignore` to not include log files
37 | #'
38 | #' See `vignette("whirl")` for how to specify paths inside the
39 | #' configuration file.
40 | #'
41 | #' @param config_file Path to the whirl config file, relative to the project
42 | #' @param parametrics_file Path to the biocompute parametrics file, relative to the project
43 | #' @export
44 | 
45 | use_biocompute <- function(config_file = "_whirl.yml", parametrics_file = "_parametrics.yml") {
46 |   cli::cli_h1("Setup {.pkg whirl} and biocompute logs")
47 | 
48 |   use_template(template = "_biocompute.yml", config_file = config_file)
49 |   use_template(template = "_parametrics.yml", config_file = parametrics_file)
50 | 
51 |   cli::cli_alert_info(
52 |     "Run project with {.run whirl::run(\"{config_file}\")} and {.run whirl::write_biocompute()}"
53 |   )
54 | 
55 |   cli::cli_h1("")
56 | 
57 |   return(invisible(config_file))
58 | }
59 | 
60 | #' @noRd
61 | use_template <- function(template, config_file) {
62 |   rlang::check_installed("usethis")
63 | 
64 |   usethis::use_git_ignore(ignores = "*_log.(html|json|md)")
65 | 
66 |   config <- system.file("use_whirl", template, package = "whirl") |>
67 |     readLines()
68 | 
69 |   config_file_path <- usethis::proj_path(config_file)
70 |   usethis::write_over(path = config_file_path, lines = config)
71 |   usethis::edit_file(path = config_file_path)
72 | }
73 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # All available hooks: https://pre-commit.com/hooks.html
 2 | # R specific hooks: https://github.com/lorenzwalthert/precommit
 3 | repos:
 4 | -   repo: https://github.com/lorenzwalthert/precommit
 5 |     rev: v0.4.3.9003
 6 |     hooks:
 7 |     # -   id: style-files
 8 |     #     args: [--style_pkg=styler, --style_fun=tidyverse_style]
 9 |     -   id: roxygenize
10 |         additional_dependencies:
11 |           - dplyr
12 |           - ggplot2
13 |           - httr
14 |           - jsonlite
15 |           - kableExtra
16 |           - quarto
17 |           - rmarkdown
18 |           - sessioninfo
19 |           - tibble
20 |           - tidyr
21 |           - unglue
22 |           - zephyr
23 |           - reticulate
24 |     -   id: use-tidy-description
25 |     -   id: spell-check
26 |         exclude: >
27 |           (?x)^(
28 |           .*\.[rR]|
29 |           .*\.feather|
30 |           .*\.jpeg|
31 |           .*\.pdf|
32 |           .*\.png|
33 |           .*\.py|
34 |           .*\.RData|
35 |           .*\.rds|
36 |           .*\.Rds|
37 |           .*\.Rproj|
38 |           .*\.sh|
39 |           (.*/|)\.gitignore|
40 |           (.*/|)\.gitlab-ci\.yml|
41 |           (.*/|)\.lintr|
42 |           (.*/|)\.pre-commit-.*|
43 |           (.*/|)\.Rbuildignore|
44 |           (.*/|)\.Renviron|
45 |           (.*/|)\.Rprofile|
46 |           (.*/|)\.travis\.yml|
47 |           (.*/|)appveyor\.yml|
48 |           (.*/|)NAMESPACE|
49 |           (.*/|)renv/settings\.dcf|
50 |           (.*/|)renv\.lock|
51 |           (.*/|)WORDLIST|
52 |           \.github/workflows/.*|
53 |           data/.*|
54 |           )$
55 |     # -   id: lintr
56 |     -   id: readme-rmd-rendered
57 |     -   id: parsable-R
58 |     -   id: no-browser-statement
59 |     # -   id: no-print-statement
60 |     -   id: no-debug-statement
61 |     -   id: deps-in-desc
62 |     -   id: pkgdown
63 | -   repo: https://github.com/pre-commit/pre-commit-hooks
64 |     rev: v5.0.0
65 |     hooks:
66 |     -   id: check-added-large-files
67 |         args: ['--maxkb=200']
68 |     -   id: file-contents-sorter
69 |         files: '^\.Rbuildignore$'
70 |     -   id: end-of-file-fixer
71 |         exclude: '\.Rd'
72 | -   repo: https://github.com/pre-commit-ci/pre-commit-ci-config
73 |     rev: v1.6.1
74 |     hooks:
75 |     # Only required when https://pre-commit.ci is used for config validation
76 |     -   id: check-pre-commit-ci-config
77 | -   repo: local
78 |     hooks:
79 |     -   id: forbid-to-commit
80 |         name: Don't commit common R artifacts
81 |         entry: Cannot commit .Rhistory, .RData, .Rds or .rds.
82 |         language: fail
83 |         files: '\.(Rhistory|RData|Rds|rds)$'
84 | ci:
85 |     autoupdate_schedule: monthly
86 |     skip: [pkgdown]


--------------------------------------------------------------------------------
/man/whirl-options-params.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/whirl-options.R
 3 | \name{whirl-options-params}
 4 | \alias{whirl-options-params}
 5 | \title{Internal parameters for reuse in functions}
 6 | \arguments{
 7 | \item{verbosity_level}{Verbosity level for functions in whirl.
 8 | See \link[zephyr:verbosity_level]{zephyr::verbosity_level} for details.. Default: \code{NA_character_}.}
 9 | 
10 | \item{out_formats}{Which log format(s) to produce. Possibilities are \code{html},
11 | \code{json}, and markdown formats: \code{gfm}, \code{commonmark}, and \code{markua}.. Default: \code{"html"}.}
12 | 
13 | \item{track_files}{Should files read and written be tracked?
14 | Currently only supported on Linux.. Default: \code{FALSE}.}
15 | 
16 | \item{check_renv}{Should the projects renv status be checked?. Default: \code{FALSE}.}
17 | 
18 | \item{track_files_discards}{List of file naming patterns not be tracked when track_files = TRUE. Default: \code{c("^/lib", "^/etc", "^/lib64", "^/usr", "^/var", "^/opt", "^/sys", "^/proc", "^/tmp", "^/null", "^/urandom", "^/.cache")}.}
19 | 
20 | \item{track_files_keep}{List of file naming patterns always to be tracked when
21 | track_files = TRUE. Default: \code{NULL}.}
22 | 
23 | \item{approved_packages}{List of approved R packages and their version in the format: \{name\}@\{version\}. Default: \code{NULL}.}
24 | 
25 | \item{approved_python_packages}{List of approved Python packages and their version in the format: \{name\}@\{version\}. Default: \code{NULL}.}
26 | 
27 | \item{n_workers}{Number of simultaneous workers used in the run function.
28 | A maximum of 128 workers is allowed.. Default: \code{1}.}
29 | 
30 | \item{log_dir}{The output directory of the log files. Default is the folder of
31 | the executed script. log_dir can be a path as a character or it can be a
32 | function that takes the script path as input and returns the log directory.
33 | For more information see the examples of \code{run()} or \code{vignette('whirl')}.. Default: \code{function (x)  dirname(x)}.}
34 | 
35 | \item{execute_dir}{The working directory of the process executing each script.
36 | Default us to execute R files from the working directory when calling \code{run()}
37 | and all other functions from the directory of the script. To change provide
38 | a character path (used for all scripts) or a function that takes the script
39 | as input and returns the execution directory.. Default: \code{NULL}.}
40 | 
41 | \item{wait_timeout}{Timeout for waiting for the R process from callr::r_session to
42 | start, in milliseconds.. Default: \code{9000}.}
43 | 
44 | \item{environment_secrets}{Secret environment variable patterns.
45 | Any variables matching will not be included in the logs.. Default: \code{c("BASH_FUNC", "_SSL_CERT", "_KEY", "_PAT", "_TOKEN")}.}
46 | }
47 | \description{
48 | Internal parameters for reuse in functions
49 | }
50 | \details{
51 | See \link{whirl-options} for more information.
52 | }
53 | \keyword{internal}
54 | 


--------------------------------------------------------------------------------
/R/enrich_input.R:
--------------------------------------------------------------------------------
 1 | #' Converting input to a standardized list with that contain step names and
 2 | #' files paths
 3 | #'
 4 | #' @param input Can be a vector, a list or a whirl config file.
 5 | #' @param steps A filter argument for selecting specific steps that should be
 6 | #'   executed
 7 | #' @return A list
 8 | #' @noRd
 9 | enrich_input <- function(
10 |   input, # nolint: cyclocomp_linter
11 |   steps = NULL
12 | ) {
13 |   # Characterize the input
14 |   is_config_file <-
15 |     !is.list(input) &&
16 |     any(grepl("yaml|yml", get_file_ext(input)))
17 |   is_character <- is.character(input)
18 | 
19 |   # Read yaml and extract list
20 |   if (is_config_file && length(input) == 1) {
21 |     root_dir <- dirname(input)
22 |     config_whirl <- yaml::read_yaml(file = input, eval.expr = TRUE)
23 |     got <- config_whirl$"steps"
24 |   } else {
25 |     root_dir <- getwd()
26 |   }
27 | 
28 |   # Convert vector to list
29 |   if (is_character && !is_config_file) {
30 |     got <- list(input)
31 |   }
32 | 
33 |   if (is.list(input)) {
34 |     got <- input
35 |   }
36 | 
37 |   names <- list()
38 |   paths <- list()
39 |   for (i in seq_along(got)) {
40 |     # Identify the step names - if none, then create a default name
41 |     check_name <- any(grepl("name", names(got[[i]])))
42 |     if (check_name) {
43 |       names[[i]] <- got[[i]][[which(grepl("name", names(got[[i]])))]]
44 |     } else {
45 |       names[[i]] <- paste0("Step ", i)
46 |     }
47 | 
48 |     # Identify the paths
49 |     check_path <- any(grepl("path", names(got[[i]])))
50 |     if (check_path) {
51 |       paths[[i]] <- got[[i]][[which(grepl("path", names(got[[i]])))]]
52 |     } else {
53 |       paths[[i]] <- got[[i]]
54 |     }
55 |   }
56 | 
57 |   # Normalizing the paths and read regexp
58 |   paths <- read_glob(
59 |     input = paths,
60 |     root_dir = root_dir
61 |   )
62 | 
63 |   # If input include one or more directories
64 |   paths_is_dir <- unlist(paths)
65 |   paths_is_dir <- paths_is_dir[dir.exists(paths_is_dir)]
66 |   if (length(paths_is_dir)) {
67 |     cli::cli_abort(
68 |       "The input argument in run() does not accept directories,
69 |         please specify which file(s) in {.val {paths_is_dir}} that should be executed"
70 |     )
71 |   }
72 | 
73 |   # Merge the names and paths into a list
74 |   out <- mapply(list, "name" = names, "paths" = paths, SIMPLIFY = FALSE)
75 | 
76 |   # Get the step names
77 |   step_names <- unlist(out)[grepl("name", names(unlist(out)))]
78 | 
79 |   # Prune the list when steps have been selected
80 |   if (!is.null(steps)) {
81 |     id <- which(step_names %in% steps)
82 |     # Update the vector of names
83 |     step_names <- step_names[id]
84 |     # Update the list
85 |     out <- out[id]
86 |   }
87 | 
88 |   # Output the steps that will be executed
89 |   message_ <- c(
90 |     "The following steps will be executed",
91 |     step_names |> rlang::set_names("*")
92 |   )
93 | 
94 |   zephyr::msg_verbose(message = message_, msg_fun = cli::cli_inform)
95 | 
96 |   invisible(out)
97 | }
98 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | # Getting file extension
  2 | get_file_ext <- function(file_paths) {
  3 |   vapply(
  4 |     X = file_paths,
  5 |     FUN = function(file_path) {
  6 |       file_name <- basename(file_path)
  7 |       file_parts <- strsplit(file_name, "\\.")[[1]]
  8 |       file_extension <- ifelse(
  9 |         length(file_parts) == 1,
 10 |         "",
 11 |         utils::tail(file_parts, 1)
 12 |       )
 13 |       return(file_extension)
 14 |     },
 15 |     FUN.VALUE = character(1),
 16 |     USE.NAMES = FALSE
 17 |   )
 18 | }
 19 | 
 20 | # Function to scale a numeric vector to percentage
 21 | scale_to_percent <- function(x, digits = 2) {
 22 |   percent_values <- x * 100
 23 |   formatted_percent_values <- sprintf(
 24 |     fmt = paste0("%.", digits, "f%%"),
 25 |     percent_values
 26 |   )
 27 |   return(formatted_percent_values)
 28 | }
 29 | 
 30 | # Function to replace NA values with the last non-NA value
 31 | replace_na_with_last <- function(x) {
 32 |   last_non_na <- NA
 33 |   for (i in seq_along(x)) {
 34 |     if (!is.na(x[i])) {
 35 |       last_non_na <- x[i]
 36 |     } else if (!is.na(last_non_na)) {
 37 |       x[i] <- last_non_na
 38 |     }
 39 |   }
 40 |   return(x)
 41 | }
 42 | 
 43 | # Function to get relative path
 44 | path_rel <- function(path, start = ".") {
 45 |   # Normalize the paths
 46 |   path <- normalizePath(path, winslash = "/", mustWork = FALSE)
 47 |   start <- normalizePath(start, winslash = "/", mustWork = FALSE)
 48 | 
 49 |   # Split the paths into components
 50 |   path_parts <- strsplit(path, "/")[[1]]
 51 |   start_parts <- strsplit(start, "/")[[1]]
 52 | 
 53 |   # Check if the paths are the same
 54 |   if (identical(path_parts, start_parts)) {
 55 |     return(".")
 56 |   }
 57 | 
 58 |   # Check if the start is a subdirectory of the path
 59 |   if (all(path_parts %in% start_parts)) {
 60 |     up_levels <- length(start_parts) - length(path_parts)
 61 |     relative_path <- c(rep("..", up_levels))
 62 |     return(paste(relative_path, collapse = "/"))
 63 |   }
 64 | 
 65 |   # Find the common prefix length
 66 |   common_length <- 0
 67 |   for (i in seq_along(start_parts)) {
 68 |     if (i > length(path_parts) || start_parts[i] != path_parts[i]) {
 69 |       break
 70 |     }
 71 |     common_length <- common_length + 1
 72 |   }
 73 | 
 74 |   # Compute the relative path
 75 |   up_levels <- length(start_parts) - common_length
 76 |   down_levels <- path_parts[(common_length + 1):length(path_parts)]
 77 | 
 78 |   relative_path <- c(rep("..", up_levels), down_levels)
 79 | 
 80 |   # Join the components into a single path
 81 |   relative_path <- paste(relative_path, collapse = "/")
 82 | 
 83 |   return(relative_path)
 84 | }
 85 | 
 86 | # Easily create cli links (only when possible)
 87 | create_cli_links <- function(text, href, .envir = parent.frame()) {
 88 |   if (!cli::is_dynamic_tty()) {
 89 |     return(
 90 |       cli::format_message(message = text, .envir = .envir)
 91 |     )
 92 |   }
 93 | 
 94 |   vapply(
 95 |     X = paste0("{.href [", text, "](file://", href, ")}"),
 96 |     FUN = cli::format_message,
 97 |     FUN.VALUE = character(1),
 98 |     .envir = .envir,
 99 |     USE.NAMES = FALSE
100 |   )
101 | }
102 | 


--------------------------------------------------------------------------------
/R/render_summary.R:
--------------------------------------------------------------------------------
  1 | #' Render data.frame into a summary.html file
  2 | #'
  3 | #' @param input The input data.frame that should be rendered into a summary.html
  4 | #' file
  5 | #' @param summary_file A character string specifying the path where the summary
  6 | #' HTML file should be saved. Defaults to `"summary.html"`.
  7 | #'
  8 | #' @return Takes a data.frame as input and returns a log in html format
  9 | #' @noRd
 10 | render_summary <- function(input, summary_file = "summary.html") {
 11 |   summary_qmd <- withr::local_tempfile(
 12 |     lines = readLines(system.file("documents/summary.qmd", package = "whirl")),
 13 |     fileext = ".qmd"
 14 |   )
 15 | 
 16 |   summary_log_html <- gsub(
 17 |     pattern = "qmd",
 18 |     replacement = "html",
 19 |     x = basename(summary_qmd)
 20 |   )
 21 | 
 22 |   summary_dir_f <- normalizePath(dirname(summary_file), winslash = "/")
 23 |   my_summaries <- knit_print_whirl_summary_info(input, summary_dir_f)
 24 |   withr::with_dir(
 25 |     tempdir(),
 26 |     {
 27 |       tryCatch(
 28 |         {
 29 |           quarto::quarto_render(
 30 |             input = summary_qmd,
 31 |             output_format = "html",
 32 |             execute_params = list(
 33 |               summary_df = my_summaries
 34 |             ),
 35 |             quiet = TRUE
 36 |           )
 37 |         },
 38 |         error = function(e) {
 39 |           cli::cli_abort(
 40 |             "Failed to render summary file {.file {summary_dir_f}}:
 41 |            {e$message}"
 42 |           )
 43 |         }
 44 |       )
 45 |     }
 46 |   )
 47 | 
 48 |   # Create requested outputs
 49 |   tryCatch(
 50 |     {
 51 |       file.copy(
 52 |         from = file.path(dirname(summary_qmd), summary_log_html),
 53 |         to = summary_file,
 54 |         overwrite = TRUE
 55 |       )
 56 |     },
 57 |     error = function(e) {
 58 |       warning("File copy failed: ", e$message)
 59 |       FALSE
 60 |     }
 61 |   )
 62 | }
 63 | 
 64 | #' @noRd
 65 | knit_print_whirl_summary_info <- function(x, path_rel_start, ...) {
 66 |   hold <- x |>
 67 |     data.frame(check.names = FALSE)
 68 | 
 69 |   row.names(hold) <- NULL
 70 |   ncols <- ncol(hold)
 71 | 
 72 |   if (grepl("rstudio_cloud", Sys.getenv("R_CONFIG_ACTIVE"))) {
 73 |     formatted <- file.path("/file_show?path=", hold$"Hyperlink")
 74 |   } else {
 75 |     formatted <- lapply(hold$"Hyperlink", path_rel, start = path_rel_start) |>
 76 |       unlist() |>
 77 |       file.path()
 78 |   }
 79 | 
 80 |   hold$Hyperlink <- paste0(sprintf(
 81 |     '<a href="%s" target="_blank">%s</a>',
 82 |     formatted,
 83 |     "HTML Log"
 84 |   ))
 85 | 
 86 |   knitr::kable(hold, format = "html", escape = FALSE) |>
 87 |     kableExtra::column_spec(
 88 |       1:ncols,
 89 |       background = ifelse(
 90 |         hold[["Status"]] == "error",
 91 |         "#fceeef",
 92 |         ifelse(
 93 |           hold[["Status"]] == "warning",
 94 |           "#fffaea",
 95 |           ifelse(
 96 |             hold[["Status"]] == "success",
 97 |             "#ebf5f1",
 98 |             ifelse(hold[["Status"]] == "skip", "#94CBFF", "white")
 99 |           )
100 |         )
101 |       )
102 |     ) |>
103 |     kableExtra::kable_styling(
104 |       bootstrap_options = "striped",
105 |       full_width = TRUE
106 |     )
107 | }
108 | 


--------------------------------------------------------------------------------
/R/custom_logging.R:
--------------------------------------------------------------------------------
  1 | #' Helper function to log custom messages
  2 | #'
  3 | #' Useful for e.g. read and write operations on databases etc.
  4 | #' that are not automatically captured.
  5 | #'
  6 | #' The default environment variable `WHIRL_LOG_MSG` is set in the session used
  7 | #' to log scripts, and input is automatically captured in the resulting log.
  8 | #'
  9 | #' If run outside of whirl, meaning when the above environment variable is
 10 | #' unset, the operations are streamed to `stdout()`. By default the console.
 11 | #'
 12 | #' @name custom_logging
 13 | #' @param file [character()] description of the file that was read, written or
 14 | #' deleted.
 15 | #' @param log [character()] path to the log file.
 16 | #' @examples
 17 | #' # Stream logs to console since `WHIRL_LOG_MSG` is not set:
 18 | #' log_read("my/folder/input.txt")
 19 | #' log_write("my/folder/output.txt")
 20 | #' log_delete("my/folder/old_output.txt")
 21 | NULL
 22 | 
 23 | #' @rdname custom_logging
 24 | #' @export
 25 | log_read <- function(file, log = Sys.getenv("WHIRL_LOG_MSG")) {
 26 |   write_to_log(file, "read", log)
 27 | }
 28 | 
 29 | #' @rdname custom_logging
 30 | #' @export
 31 | log_write <- function(file, log = Sys.getenv("WHIRL_LOG_MSG")) {
 32 |   write_to_log(file, "write", log)
 33 | }
 34 | 
 35 | #' @rdname custom_logging
 36 | #' @export
 37 | log_delete <- function(file, log = Sys.getenv("WHIRL_LOG_MSG")) {
 38 |   write_to_log(file, "delete", log)
 39 | }
 40 | 
 41 | #' @noRd
 42 | write_to_log <- function(
 43 |     file,
 44 |     type = c("read", "write", "delete"),
 45 |     log = Sys.getenv("WHIRL_LOG_MSG")) {
 46 |   type <- rlang::arg_match(type)
 47 |   stopifnot(rlang::is_string(file))
 48 |   stopifnot(rlang::is_string(log))
 49 | 
 50 |   x <- log_df(
 51 |     type = type,
 52 |     file = file
 53 |   )
 54 | 
 55 |   if (log == "") {
 56 |     jsonlite::stream_out(x = x, verbose = FALSE)
 57 |   } else {
 58 |     con <- file(description = log, open = "a")
 59 |     jsonlite::stream_out(x = x, con = con, verbose = FALSE)
 60 |     close(con)
 61 |   }
 62 | }
 63 | 
 64 | #' @noRd
 65 | read_from_log <- function(log = Sys.getenv("WHIRL_LOG_MSG"), track_files) {
 66 |   if (log == "" || !file.exists(log)) {
 67 |     if (!track_files) {
 68 |       return(NULL)
 69 |     }
 70 |     return(log_df())
 71 |   }
 72 | 
 73 |   con <- file(description = log, open = "r")
 74 |   log_info <- jsonlite::stream_in(con, verbose = FALSE)
 75 |   close(con)
 76 |   log_info$time <- as.POSIXct(log_info$time)
 77 |   return(log_info)
 78 | }
 79 | 
 80 | #' @noRd
 81 | log_df <- function(type = character(), file = character()) {
 82 |   data.frame(
 83 |     time = rep(x = Sys.time(), times = length(file)),
 84 |     type = type,
 85 |     file = file
 86 |   )
 87 | }
 88 | 
 89 | #' @noRd
 90 | split_log <- function(x, types = c("read", "write", "delete")) {
 91 |   if (is.null(x)) {
 92 |     return(NULL)
 93 |   }
 94 | 
 95 |   # Split in a tibble for each type of output
 96 |   x <- split(x[c("time", "file")], x$type)
 97 | 
 98 |   # Add empty table for types not reported
 99 |   out <- vector(mode = "list", length = length(types)) |>
100 |     rlang::set_names(types)
101 | 
102 |   out[names(x)] <- x
103 | 
104 |   i <- lapply(X = out, FUN = is.null) |>
105 |     unlist() |>
106 |     which()
107 | 
108 |   out[i] <- list(log_df()[c("time", "file")])
109 | 
110 |   lapply(X = out, FUN = `rownames<-`, value = NULL)
111 | }
112 | 


--------------------------------------------------------------------------------
/man/write_biocompute.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/biocompute.R
 3 | \name{write_biocompute}
 4 | \alias{write_biocompute}
 5 | \title{Create biocompute logs}
 6 | \usage{
 7 | write_biocompute(queue = run("_whirl.yml"), path = "bco.json", ...)
 8 | }
 9 | \arguments{
10 | \item{queue}{Result from \code{run()}.}
11 | 
12 | \item{path}{A character string specifying the file path to write BioCompute log to.}
13 | 
14 | \item{...}{Additional arguments parsed to \code{jsonlite::write_json()}. Note always uses \code{auto_unbox = TRUE}.}
15 | }
16 | \value{
17 | (\code{invisible}) \code{list} of the biocompute domains and their content.
18 | }
19 | \description{
20 | BioCompute is a standard for logs of programs for for Bioinformatics Computational Analyses.
21 | 
22 | The BioCompute object is a \code{json} log that can be created based on the output of \code{run()}.
23 | }
24 | \details{
25 | The object consists of the following domains:
26 | \itemize{
27 | \item \strong{Specifications}:
28 | \itemize{
29 | \item \emph{spec_version}: Version of BioCompute used (`https://w3id.org/biocompute/1.3.0/``)
30 | \item \emph{object_id}: Unique project id
31 | \item \emph{type}: Your project type
32 | \item \emph{etag}: Your \code{etag} id from the BioCompute Object Portal
33 | }
34 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Provenance-domain}{Provenance Domain}
35 | \itemize{
36 | \item This is used to track the history of the BCO. Review and signatures go here.
37 | }
38 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Usability-domain}{Usability Domain}
39 | \itemize{
40 | \item This is used to improve searchability by allowing a free-text description of the BCO.
41 | \item Provide external document.
42 | }
43 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Extension-domain}{Extension Domain}
44 | \itemize{
45 | \item This is used to add any additional structured information that is not directly covered by the BCO.
46 | }
47 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Description-domain}{Description Domain}
48 | \itemize{
49 | \item Contains a structured field for the description of external references, the pipeline steps,
50 | and the relationship of I/O objects.
51 | \item Provide external document.
52 | \item \strong{Note}: Use of \code{keywords} and \code{External_Reference} entries are not yet implemented.
53 | To use fill out the entries manually after creating the BioCompute object.`
54 | }
55 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Execution-domain}{Execution Domain}
56 | \itemize{
57 | \item Contains fields for the execution of the BCO.
58 | \item \strong{Note}: Use of \code{external_data_endpoints} not implemented. Fill out manually afterwards if needed.
59 | }
60 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Parametric-domain}{Parametric Domain}
61 | \itemize{
62 | \item Represents the list of parameters customizing the computational flow which can affect
63 | the output of the calculations.
64 | }
65 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Iodomain}{IO Domain}
66 | \itemize{
67 | \item Represents the list of global input and output files created by the computational workflow.
68 | }
69 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Error-domain}{Error Domain}
70 | \itemize{
71 | \item Defines the empirical and algorithmic limits and error sources of the BCO.
72 | \item \strong{Note}: Use of this domain is not clearly defined.
73 | It is therefore always left empty in the current implementation.
74 | If you want to add content do so manually after creating the BCO.
75 | }
76 | }
77 | 
78 | See the \href{https://www.biocomputeobject.org}{BioCompute Object Portal} and the
79 | \href{https://wiki.biocomputeobject.org/Main_Page}{BioCompute Objects Wiki} for more information.
80 | }
81 | 


--------------------------------------------------------------------------------
/R/whirl-options.R:
--------------------------------------------------------------------------------
  1 | #' @name whirl-options
  2 | #' @title Options for whirl
  3 | #' @description
  4 | #' `r zephyr::list_options(as = "markdown", .envir = "whirl")`
  5 | NULL
  6 | 
  7 | #' @title Internal parameters for reuse in functions
  8 | #' @name whirl-options-params
  9 | #' @eval zephyr::list_options(as = "params", .envir = "whirl")
 10 | #' @details
 11 | #' See [whirl-options] for more information.
 12 | #' @keywords internal
 13 | NULL
 14 | 
 15 | zephyr::create_option(
 16 |   name = "verbosity_level",
 17 |   default = NA_character_,
 18 |   description = "Verbosity level for functions in whirl.
 19 |   See [zephyr::verbosity_level] for details."
 20 | )
 21 | 
 22 | zephyr::create_option(
 23 |   name = "out_formats",
 24 |   default = "html",
 25 |   description = "Which log format(s) to produce. Possibilities are `html`,
 26 |   `json`, and markdown formats: `gfm`, `commonmark`, and `markua`."
 27 | )
 28 | 
 29 | zephyr::create_option(
 30 |   name = "track_files",
 31 |   default = FALSE,
 32 |   description = "Should files read and written be tracked?
 33 |   Currently only supported on Linux."
 34 | )
 35 | 
 36 | zephyr::create_option(
 37 |   name = "check_renv",
 38 |   default = FALSE,
 39 |   description = "Should the projects renv status be checked?"
 40 | )
 41 | 
 42 | zephyr::create_option(
 43 |   name = "track_files_discards",
 44 |   default = c(
 45 |     "^/lib",
 46 |     "^/etc",
 47 |     "^/lib64",
 48 |     "^/usr",
 49 |     "^/var",
 50 |     "^/opt",
 51 |     "^/sys",
 52 |     "^/proc",
 53 |     "^/tmp",
 54 |     "^/null",
 55 |     "^/urandom",
 56 |     "^/.cache"
 57 |   ),
 58 |   description = "List of file naming patterns not be tracked when track_files = TRUE"
 59 | )
 60 | 
 61 | zephyr::create_option(
 62 |   name = "track_files_keep",
 63 |   default = NULL,
 64 |   description = "List of file naming patterns always to be tracked when
 65 |   track_files = TRUE"
 66 | )
 67 | 
 68 | zephyr::create_option(
 69 |   name = "approved_packages",
 70 |   default = NULL,
 71 |   description = "List of approved R packages and their version in the format: \\{name\\}@\\{version\\}"
 72 | )
 73 | 
 74 | zephyr::create_option(
 75 |   name = "approved_python_packages",
 76 |   default = NULL,
 77 |   description = "List of approved Python packages and their version in the format: \\{name\\}@\\{version\\}"
 78 | )
 79 | 
 80 | zephyr::create_option(
 81 |   name = "n_workers",
 82 |   default = 1,
 83 |   description = "Number of simultaneous workers used in the run function.
 84 |   A maximum of 128 workers is allowed."
 85 | )
 86 | 
 87 | zephyr::create_option(
 88 |   name = "log_dir",
 89 |   default = \(x) dirname(x),
 90 |   description = "The output directory of the log files. Default is the folder of
 91 |   the executed script. log_dir can be a path as a character or it can be a
 92 |   function that takes the script path as input and returns the log directory.
 93 |   For more information see the examples of `run()` or `vignette('whirl')`."
 94 | )
 95 | 
 96 | zephyr::create_option(
 97 |   name = "execute_dir",
 98 |   default = NULL,
 99 |   description = "The working directory of the process executing each script.
100 |   Default us to execute R files from the working directory when calling `run()`
101 |    and all other functions from the directory of the script. To change provide
102 |   a character path (used for all scripts) or a function that takes the script
103 |   as input and returns the execution directory."
104 | )
105 | 
106 | zephyr::create_option(
107 |   name = "wait_timeout",
108 |   default = 9000,
109 |   description = "Timeout for waiting for the R process from callr::r_session to 
110 |   start, in milliseconds."
111 | )
112 | 
113 | zephyr::create_option(
114 |   name = "environment_secrets",
115 |   default = c(
116 |     "BASH_FUNC",
117 |     "_SSL_CERT",
118 |     "_KEY",
119 |     "_PAT",
120 |     "_TOKEN"
121 |   ),
122 |   description = "Secret environment variable patterns. 
123 |   Any variables matching will not be included in the logs."
124 | )
125 | 


--------------------------------------------------------------------------------
/inst/documents/dummy.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | params:
  3 |   script: 'dev'
  4 |   renv: TRUE
  5 |   tmpdir: "."
  6 |   converted_qmd: "temp_script.qmd"
  7 | ---
  8 | 
  9 | ```{r}
 10 | #| label: Setup
 11 | #| include: false
 12 | knitr::opts_chunk$set(
 13 |   error = TRUE,
 14 |   warning = TRUE,
 15 |   message = TRUE,
 16 |   echo = TRUE
 17 | )
 18 | 
 19 | file.path(params$tmpdir, "parent_options.rds") |>
 20 |   readRDS() |>
 21 |   options()
 22 | ```
 23 | 
 24 | ```{r}
 25 | #| label: Convert R script to qmd
 26 | #| eval: !expr base::grepl("\\.R$", params$script)
 27 | #| echo: false
 28 | # Conversion is done to avoid problems caused by the warnings thrown
 29 | # by knitr::knit(), see e.g. issue #151
 30 | knitr::spin(
 31 |   text = readLines(con = params$script, warn = FALSE),
 32 |   knit = FALSE,
 33 |   format = "qmd"
 34 | ) |>
 35 |   writeLines(
 36 |     con = file.path(params$tmpdir, params$converted_qmd)
 37 |   )
 38 | ```
 39 | 
 40 | ```{r}
 41 | #| label: Log R
 42 | #| child: !expr file.path(params$tmpdir, params$converted_qmd)
 43 | #| eval: !expr file.exists(file.path(params$tmpdir, params$converted_qmd))
 44 | #| include: !expr file.exists(file.path(params$tmpdir, params$converted_qmd))
 45 | ```
 46 | 
 47 | ```{r}
 48 | #| label: Log Quarto
 49 | #| child: !expr params$script
 50 | #| eval: !expr base::grepl("\\.qmd$", params$script)
 51 | #| include: !expr base::grepl("\\.qmd$", params$script)
 52 | ```
 53 | 
 54 | ```{r}
 55 | #| label: Log Rmarkdown
 56 | #| child: !expr params$script
 57 | #| eval: !expr base::grepl("\\.Rmd$", params$script)
 58 | #| include: !expr base::grepl("\\.Rmd$", params$script)
 59 | ```
 60 | 
 61 | ```{python}
 62 | #| label: Setup Python tracking
 63 | #| eval: !expr base::grepl("\\.py$", params$script)
 64 | #| include: false
 65 | #| error: false
 66 | import sys
 67 | sys.path.append(r.params["tmpdir"])
 68 | from python_modules import get_package_status
 69 | get_package_status(r.params["tmpdir"]+"/"+'py_old_status.json')
 70 | ```
 71 | 
 72 | ```{python}
 73 | #| label: Log Python
 74 | #| file: !expr params$script
 75 | #| eval: !expr base::grepl("\\.py$", params$script)
 76 | #| include: !expr base::grepl("\\.py$", params$script)
 77 | ```
 78 | 
 79 | ```{python}
 80 | #| label: Log Python modules
 81 | #| eval: !expr base::grepl("\\.py$", params$script)
 82 | #| include: false
 83 | #| error: false
 84 | get_package_status(r.params["tmpdir"]+"/"+'py_new_status.json')
 85 | ```
 86 | 
 87 | ```{r}
 88 | #| label: Python modules
 89 | #| eval: !expr base::grepl("\\.py$", params$script)
 90 | #| include: false
 91 | #| error: false
 92 | saveRDS(
 93 |   object = system('python -m pip list -v', intern = TRUE),
 94 |   file = file.path(params$tmpdir, "py_pip_list.rds")
 95 | )
 96 | ```
 97 | 
 98 | ```{r}
 99 | #| label: renv status
100 | #| include: false
101 | #| error: false
102 | if (is.character(params$renv) && params$renv == "yes" ||
103 |       is.logical(params$renv) && params$renv) {
104 |   saveRDS(
105 |     object = whirl:::renv_status(),
106 |     file = file.path(params$tmpdir, "renv_status.rds")
107 |   )
108 | }
109 | ```
110 | 
111 | ```{r}
112 | #| label: Log options and environment
113 | #| include: false
114 | #| error: false
115 | saveRDS(
116 |       object = options(),
117 |       file = file.path(params$tmpdir, "options.rds")
118 | )
119 | 
120 | saveRDS(
121 |   object = Sys.getenv(),
122 |   file = file.path(params$tmpdir, "environment.rds")
123 | )
124 | ```
125 | 
126 | ```{r}
127 | #| label: Session info
128 | #| include: false
129 | #| error: false
130 | info <- list( # Due to https://github.com/r-lib/sessioninfo/issues/96
131 |   platform = sessioninfo::session_info(info = "platform")[["platform"]],
132 |   packages = sessioninfo::session_info(info = "packages")[["packages"]],
133 |   python = sessioninfo::session_info(info = "python")[["python"]]
134 | )
135 | 
136 | if (!"quarto" %in% names(info$platform)) { # Due to older versions of sessioninfo
137 |   info$platform$quarto <- paste(quarto::quarto_version(), "@", quarto::quarto_path())
138 | }
139 | 
140 | saveRDS(
141 |   object = info,
142 |   file = file.path(params$tmpdir, "session_info.rds")
143 | )
144 | ```
145 | 


--------------------------------------------------------------------------------
/man/run.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/run.R
  3 | \name{run}
  4 | \alias{run}
  5 | \title{Execute single or multiple R, R Markdown, and Quarto scripts}
  6 | \usage{
  7 | run(
  8 |   input = "_whirl.yml",
  9 |   steps = NULL,
 10 |   summary_file = "summary.html",
 11 |   n_workers = zephyr::get_option("n_workers", "whirl"),
 12 |   check_renv = zephyr::get_option("check_renv", "whirl"),
 13 |   track_files = zephyr::get_option("track_files", "whirl"),
 14 |   out_formats = zephyr::get_option("out_formats", "whirl"),
 15 |   log_dir = zephyr::get_option("log_dir", "whirl")
 16 | )
 17 | }
 18 | \arguments{
 19 | \item{input}{A character vector of file path(s) to R, R Markdown, Quarto
 20 | scripts, or files in a folder using regular expression, or to to a whirl
 21 | config file. The input can also be structured in a list where each element
 22 | will be executed sequentially, while scripts within each element can be
 23 | executed in parallel.}
 24 | 
 25 | \item{steps}{An optional argument that can be used if only certain steps
 26 | within a config files (or list) is to be executed. Should be equivalent to
 27 | the names of the steps found in the config file. If kept as NULL (default)
 28 | then all steps listed in the config file will be executed.}
 29 | 
 30 | \item{summary_file}{A character string specifying the file path where the
 31 | summary log will be stored.}
 32 | 
 33 | \item{n_workers}{Number of simultaneous workers used in the run function.
 34 | A maximum of 128 workers is allowed.. Default: \code{1}.}
 35 | 
 36 | \item{check_renv}{Should the projects renv status be checked?. Default: \code{FALSE}.}
 37 | 
 38 | \item{track_files}{Should files read and written be tracked?
 39 | Currently only supported on Linux.. Default: \code{FALSE}.}
 40 | 
 41 | \item{out_formats}{Which log format(s) to produce. Possibilities are \code{html},
 42 | \code{json}, and markdown formats: \code{gfm}, \code{commonmark}, and \code{markua}.. Default: \code{"html"}.}
 43 | 
 44 | \item{log_dir}{The output directory of the log files. Default is the folder of
 45 | the executed script. log_dir can be a path as a character or it can be a
 46 | function that takes the script path as input and returns the log directory.
 47 | For more information see the examples of \code{run()} or \code{vignette('whirl')}.. Default: \code{function (x)  dirname(x)}.}
 48 | }
 49 | \value{
 50 | A tibble containing the execution results for all the scripts.
 51 | }
 52 | \description{
 53 | Executes and logs the execution of the scripts.
 54 | Logs for each script are stored in the same folder as the script.
 55 | 
 56 | The way the execution is logged is configurable through several options for
 57 | e.g. the verbosity of the logs.
 58 | See \link{whirl-options} on how to configure these.
 59 | }
 60 | \examples{
 61 | \dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 62 | # Copy example scripts:
 63 | file.copy(
 64 |   from = system.file("examples", c("success.R", "warning.R", "error.R"),
 65 |     package = "whirl"
 66 |   ),
 67 |   to = tempdir()
 68 | )
 69 | 
 70 | # Run a single script and create log:
 71 | run(file.path(tempdir(), "success.R"))
 72 | 
 73 | # Run several scripts in parallel on up to 2 workers:
 74 | run(
 75 |   input = file.path(tempdir(), c("success.R", "warning.R", "error.R")),
 76 |   n_workers = 2
 77 | )
 78 | 
 79 | # Run several scripts in two steps by providing them as list elements:
 80 | run(
 81 |   list(
 82 |     file.path(tempdir(), c("success.R", "warning.R")),
 83 |     file.path(tempdir(), "error.R")
 84 |   )
 85 | )
 86 | 
 87 | # Re-directing the logs to a sub-folder by utilizing the log_dir argument in
 88 | # run(). This will require that the sub-folder exists.
 89 | 
 90 | # Specifying the path using a manually defined character
 91 | run(file.path(tempdir(), "success.R"), log_dir = tempdir())
 92 | 
 93 | # Specifying the path with a generic function that can handle the scripts
 94 | # individually.
 95 | run(
 96 |   input = file.path(tempdir(), "success.R"),
 97 |   log_dir = function(x) {paste0(dirname(x), "/logs")}
 98 | )
 99 | \dontshow{\}) # examplesIf}
100 | }
101 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # whirl dev
 2 | 
 3 | * Fixed bug where warnings were given when scripts are missing a final EOL (#206)
 4 | * Fixed bug so no warning is shown when saving `options()` to the temp folder used by whirl (#206)
 5 | * Fixed so Quarto is started with the right renv library paths when using renv (#215)
 6 | * Refactored log.qmd to remove read_log.
 7 | 
 8 | # whirl 0.3.1
 9 | 
10 | * Improved error handling when the log cannot be created.
11 | * Added new option `environment_secrets` to control which secret environment variables not to include in the log.
12 | * Improved progress bar of `run()` to show all currently running scripts.
13 | * Removed `verbosity_level` argument to `run()` since it is now completely controlled by zephyr options (see `help("whirl-options")`).
14 | * Fixed bug where a script would not execute if using `options(warn = 2)` (#151)
15 | * The log now distinguishes between directly and indirectly used packages, and visually highlights their approval status if a list of approved packages is provided.
16 | * Added check for approved Python packages with the `approved_python_packages` option similar to for R.
17 | * Improved how to find used Python packages, so it now only lists the packages actually used in the script.
18 | 
19 | # whirl 0.3.0
20 | 
21 | * Add `write_biocompute()` to create [BioCompute Objects](https://www.biocomputeobject.org/) containing the logs in a standardized JSON format.
22 | * Calling `run()` with `track_files = TRUE` now checks if strace can be attached to the process.
23 | * Improved json logs and similar returned output from running a script (`result` column in return from `run()`).
24 | * Simplified approved packages check. Now the user supplies a character vector of packages and versions specified as `{package}@{version}`.
25 | * Implement use of `tag` in the returned output. Each script is now tagged with the step name in the summary report.
26 | 
27 | # whirl 0.2.0
28 | 
29 | * Initial CRAN release.
30 | * Default `input` argument of `run()` set to "_whirl.yml".
31 | * Added RStudio addins for running all scripts and the active script.
32 | * Fix typo in `track_files_discards` option.
33 | * Increases unit test coverage and skips `run()` tests etc. when Quarto is not available.
34 | 
35 | # whirl 0.1.7
36 | * Enable redirection of logs through the `log_dir` argument in `run()`.
37 | * Changed the title on the individual logs to the script name and moved the path to a distinct section within the title-block.
38 | * Fixed a bug where the hyperlink in the summary files was not rendered correctly.
39 | * Fixed a bug where the installed python packages were not listed in the log.
40 | * Enable the use of R expressions in the yaml configuration file.
41 | * Enables the user to define the working directory for each script with the `execute_dir` option.
42 | 
43 | # whirl 0.1.6
44 | * Added support for logging of Python scripts with `run()`.
45 | * Improved unit tests for `run()`.
46 | * Fixing a bug where the queue was not returned correctly in some instances.
47 | * Switched to using `Sys.glob()` instead of `utils::glob2rx()`.
48 | 
49 | # whirl 0.1.4
50 | * Add `use_whirl()` utility function.
51 | 
52 | # whirl 0.1.3
53 | * Adding additional arguments to `run()` allowing the user to:
54 |   - control the verbosity level
55 |   - specify whether renv should be checked
56 |   - specify which files to track
57 |   - adjust the output format of the log files.
58 | 
59 | # whirl 0.1.1
60 | * Fix enabling rendering of md log formats("gfm", "commonmark", "markua").
61 | 
62 | # whirl 0.1.0
63 | * First version publicly available on GitHub.
64 | 
65 | # whirl 0.0.5
66 | * Updated documentation
67 | * README and vignettes are now ready for users.
68 | 
69 | # whirl 0.0.4
70 | * Adjusting `run()` to unify execution of scripts, lists of scripts, and configuration files.
71 | * Using multiple independent `callr::r_session` when executing several scripts.
72 | * Cleanup of namespace and exported functions.
73 | 
74 | # whirl 0.0.3
75 | * Initial version of `run()`.
76 | * Substituting spinner with progress bar when executing single scripts.
77 | 
78 | # whirl 0.0.2
79 | * Update so that the execution (including order of execution) can be controlled through a config file.
80 | 
81 | # whirl 0.0.1
82 | * First version of package.
83 | 


--------------------------------------------------------------------------------
/inst/documents/log.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "`r basename(params$title)`"
  3 | date: "`r Sys.time()`"
  4 | date-format: "YYYY-MM-DDTHH:mm:ss zzz"
  5 | abstract: "`r params$title`"
  6 | template-partials:
  7 |   - title-block.html
  8 | params:
  9 |   result: NULL
 10 |   title: 'dev'
 11 |   tmpdir: '.'
 12 | format:
 13 |   html:
 14 |     embed-resources: true
 15 |     toc: true
 16 |     toc-depth: 2
 17 |     toc-expand: true
 18 |     smooth-scroll: true
 19 |     css: style.css
 20 | ---
 21 | 
 22 | # Summary
 23 | 
 24 | ```{r}
 25 | #| label: Setup
 26 | #| echo: false
 27 | 
 28 | file.path(params$tmpdir, "parent_options.rds") |>
 29 |   readRDS() |>
 30 |   options()
 31 | 
 32 | format_approved <- function(x) {
 33 |   if (all(is.na(x$approved))) {
 34 |     x$approved <- NULL
 35 |     return(x)
 36 |   }
 37 | 
 38 |   x$approved = ifelse(x$approved == TRUE, "\u2705 Yes", "\u274C No")
 39 | 
 40 |   x
 41 | }
 42 | 
 43 | knit_print.data.frame <- function(x, ...) {
 44 |   names(x) <- gsub(
 45 |     # Use space instead of . or _ in columns names
 46 |     pattern = "\\.|_",
 47 |     replacement = " ",
 48 |     x = names(x)
 49 |   )
 50 | 
 51 |   names(x) <- gsub(
 52 |     # Convert all column names to title case
 53 |     pattern = "\\b([[:alpha:]])([[:alpha:]]+)",
 54 |     replacement = "\\U\\1\\L\\2",
 55 |     x = names(x),
 56 |     perl = TRUE
 57 |   )
 58 | 
 59 |   for (i in which(vapply(X = x, FUN = is.logical, FUN.VALUE = logical(1)))) {
 60 |     x[[i]] <- ifelse(x[[i]], "Yes", "No") # Yes/No instead of TRUE/FALSE
 61 |   }
 62 | 
 63 |   x |>
 64 |     knitr::kable(format = "html") |>
 65 |     kableExtra::kable_styling(
 66 |       bootstrap_options = "striped",
 67 |       full_width = TRUE
 68 |     ) |>
 69 |     knitr::knit_print()
 70 | }
 71 | 
 72 | registerS3method(
 73 |   "knit_print",
 74 |   "data.frame",
 75 |   knit_print.data.frame,
 76 |   envir = asNamespace("knitr")
 77 | )
 78 | ```
 79 | 
 80 | ```{r}
 81 | #| label: Status
 82 | #| include: false
 83 | result <- readRDS(params$result)
 84 | ```
 85 | 
 86 | ```{r}
 87 | #| label: Report status
 88 | #| echo: false
 89 | status <- result$status
 90 | 
 91 | status_txt <- status[c("error", "warning")]
 92 | status_txt <- status_txt[lapply(status_txt, length) > 0] |>
 93 |   lapply(\(x) paste("*", x, collapse = "\n")) |>
 94 |   unlist()
 95 | 
 96 | whirl:::quarto_callout(
 97 |   text = status_txt,
 98 |   title = status$message,
 99 |   type = switch(
100 |     status$message,
101 |     "error" = "important",
102 |     "warning" = "warning",
103 |     "success" = "tip"
104 |   ),
105 |   collapse = status$message != "success"
106 | )
107 | ```
108 | 
109 | ```{r}
110 | #| label: Renv
111 | #| echo: false
112 | #| eval: !expr file.exists(file.path(params$tmpdir, "renv_status.rds"))
113 | params$tmpdir |>
114 |   file.path("renv_status.rds") |>
115 |   readRDS() |>
116 |   whirl:::knit_print_whirl_renv_status()
117 | ```
118 | 
119 | ```{r}
120 | #| label: Used files
121 | #| child: !expr file.path(params$tmpdir, "files.qmd")
122 | #| eval: !expr (!is.null(result$files))
123 | ```
124 | 
125 | # Script
126 | ::: {.script-container}
127 | 
128 | ```{r}
129 | #| child: !expr file.path(params$tmpdir, "dummy.md")
130 | ```
131 | 
132 | :::
133 | 
134 | # Session info
135 | 
136 | ```{r}
137 | #| label: Session info settings
138 | #| include: false
139 | knitr::opts_chunk$set(
140 |   error = FALSE,
141 |   warning = FALSE,
142 |   message = FALSE,
143 |   echo = FALSE
144 | )
145 | ```
146 | 
147 | :::{.callout-note collapse=false appearance="minimal"}
148 | ## Platform
149 | 
150 | ```{r}
151 | #| label: Platform
152 | result$session$platform
153 | ```
154 | 
155 | :::
156 | 
157 | ```{r}
158 | #| label: Python
159 | #| child: !expr file.path(params$tmpdir, "python.qmd")
160 | #| eval: !expr grepl("\\.py$", params$title)
161 | ```
162 | 
163 | :::{.callout-note collapse=false appearance="minimal"}
164 | ## R packages used directly
165 | 
166 | ```{r}
167 | #| label: R packages used directly
168 | result$session$R |>
169 |   dplyr::filter(directly_used) |>
170 |   dplyr::select("package", "version", "approved", "source") |>
171 |   format_approved()
172 | ```
173 | 
174 | :::
175 | 
176 | :::{.callout-note collapse=true appearance="minimal"}
177 | ## R packages used indirectly
178 | 
179 | ```{r}
180 | #| label: R packages used indirectly
181 | result$session$R |>
182 |   dplyr::filter(!directly_used) |>
183 |   dplyr::select("package", "version", "source")
184 | ```
185 | 
186 | :::
187 | 
188 | :::{.callout-note collapse=true appearance="minimal"}
189 | ## Environment variables
190 | 
191 | ```{r}
192 | #| label: Environment
193 | result$session$environment
194 | ```
195 | 
196 | :::
197 | 
198 | :::{.callout-note collapse=true appearance="minimal"}
199 | ## Options
200 | 
201 | ```{r}
202 | #| label: Options
203 | result$session$options
204 | ```
205 | 
206 | :::
207 | 


--------------------------------------------------------------------------------
/man/whirl-options.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/whirl-options.R
  3 | \name{whirl-options}
  4 | \alias{whirl-options}
  5 | \title{Options for whirl}
  6 | \description{
  7 | \subsection{verbosity_level}{
  8 | 
  9 | Verbosity level for functions in whirl.
 10 | See \link[zephyr:verbosity_level]{zephyr::verbosity_level} for details.
 11 | \itemize{
 12 | \item Default: \code{NA_character_}
 13 | \item Option: \code{whirl.verbosity_level}
 14 | \item Environment: \code{R_WHIRL_VERBOSITY_LEVEL}
 15 | }
 16 | }
 17 | 
 18 | \subsection{out_formats}{
 19 | 
 20 | Which log format(s) to produce. Possibilities are \code{html},
 21 | \code{json}, and markdown formats: \code{gfm}, \code{commonmark}, and \code{markua}.
 22 | \itemize{
 23 | \item Default: \code{"html"}
 24 | \item Option: \code{whirl.out_formats}
 25 | \item Environment: \code{R_WHIRL_OUT_FORMATS}
 26 | }
 27 | }
 28 | 
 29 | \subsection{track_files}{
 30 | 
 31 | Should files read and written be tracked?
 32 | Currently only supported on Linux.
 33 | \itemize{
 34 | \item Default: \code{FALSE}
 35 | \item Option: \code{whirl.track_files}
 36 | \item Environment: \code{R_WHIRL_TRACK_FILES}
 37 | }
 38 | }
 39 | 
 40 | \subsection{check_renv}{
 41 | 
 42 | Should the projects renv status be checked?
 43 | \itemize{
 44 | \item Default: \code{FALSE}
 45 | \item Option: \code{whirl.check_renv}
 46 | \item Environment: \code{R_WHIRL_CHECK_RENV}
 47 | }
 48 | }
 49 | 
 50 | \subsection{track_files_discards}{
 51 | 
 52 | List of file naming patterns not be tracked when track_files = TRUE
 53 | \itemize{
 54 | \item Default: \code{c("^/lib", "^/etc", "^/lib64", "^/usr", "^/var", "^/opt", "^/sys", "^/proc", "^/tmp", "^/null", "^/urandom", "^/.cache")}
 55 | \item Option: \code{whirl.track_files_discards}
 56 | \item Environment: \code{R_WHIRL_TRACK_FILES_DISCARDS}
 57 | }
 58 | }
 59 | 
 60 | \subsection{track_files_keep}{
 61 | 
 62 | List of file naming patterns always to be tracked when
 63 | track_files = TRUE
 64 | \itemize{
 65 | \item Default: \code{NULL}
 66 | \item Option: \code{whirl.track_files_keep}
 67 | \item Environment: \code{R_WHIRL_TRACK_FILES_KEEP}
 68 | }
 69 | }
 70 | 
 71 | \subsection{approved_packages}{
 72 | 
 73 | List of approved R packages and their version in the format: \{name\}@\{version\}
 74 | \itemize{
 75 | \item Default: \code{NULL}
 76 | \item Option: \code{whirl.approved_packages}
 77 | \item Environment: \code{R_WHIRL_APPROVED_PACKAGES}
 78 | }
 79 | }
 80 | 
 81 | \subsection{approved_python_packages}{
 82 | 
 83 | List of approved Python packages and their version in the format: \{name\}@\{version\}
 84 | \itemize{
 85 | \item Default: \code{NULL}
 86 | \item Option: \code{whirl.approved_python_packages}
 87 | \item Environment: \code{R_WHIRL_APPROVED_PYTHON_PACKAGES}
 88 | }
 89 | }
 90 | 
 91 | \subsection{n_workers}{
 92 | 
 93 | Number of simultaneous workers used in the run function.
 94 | A maximum of 128 workers is allowed.
 95 | \itemize{
 96 | \item Default: \code{1}
 97 | \item Option: \code{whirl.n_workers}
 98 | \item Environment: \code{R_WHIRL_N_WORKERS}
 99 | }
100 | }
101 | 
102 | \subsection{log_dir}{
103 | 
104 | The output directory of the log files. Default is the folder of
105 | the executed script. log_dir can be a path as a character or it can be a
106 | function that takes the script path as input and returns the log directory.
107 | For more information see the examples of \code{run()} or \code{vignette('whirl')}.
108 | \itemize{
109 | \item Default: \code{function (x)  dirname(x)}
110 | \item Option: \code{whirl.log_dir}
111 | \item Environment: \code{R_WHIRL_LOG_DIR}
112 | }
113 | }
114 | 
115 | \subsection{execute_dir}{
116 | 
117 | The working directory of the process executing each script.
118 | Default us to execute R files from the working directory when calling \code{run()}
119 | and all other functions from the directory of the script. To change provide
120 | a character path (used for all scripts) or a function that takes the script
121 | as input and returns the execution directory.
122 | \itemize{
123 | \item Default: \code{NULL}
124 | \item Option: \code{whirl.execute_dir}
125 | \item Environment: \code{R_WHIRL_EXECUTE_DIR}
126 | }
127 | }
128 | 
129 | \subsection{wait_timeout}{
130 | 
131 | Timeout for waiting for the R process from callr::r_session to
132 | start, in milliseconds.
133 | \itemize{
134 | \item Default: \code{9000}
135 | \item Option: \code{whirl.wait_timeout}
136 | \item Environment: \code{R_WHIRL_WAIT_TIMEOUT}
137 | }
138 | }
139 | 
140 | \subsection{environment_secrets}{
141 | 
142 | Secret environment variable patterns.
143 | Any variables matching will not be included in the logs.
144 | \itemize{
145 | \item Default: \code{c("BASH_FUNC", "_SSL_CERT", "_KEY", "_PAT", "_TOKEN")}
146 | \item Option: \code{whirl.environment_secrets}
147 | \item Environment: \code{R_WHIRL_ENVIRONMENT_SECRETS}
148 | }
149 | }
150 | }
151 | 


--------------------------------------------------------------------------------
/tests/testthat/test-run.R:
--------------------------------------------------------------------------------
  1 | expect_single_script <- function(res) {
  2 |   res[["status"]] |>
  3 |     testthat::expect_equal("success")
  4 | 
  5 |   res[["result"]][[1]] |>
  6 |     names() |>
  7 |     testthat::expect_equal(c("script", "status", "files", "session", "logs"))
  8 | 
  9 |   return(invisible(res))
 10 | }
 11 | 
 12 | test_that("Run single R script", {
 13 |   skip_if_no_quarto()
 14 |   res <- test_script("success.R") |>
 15 |     run(summary_file = NULL) |>
 16 |     expect_no_warning() |>
 17 |     expect_no_error()
 18 | 
 19 |   expect_single_script(res)
 20 | 
 21 |   withr::with_options(
 22 |     new = list(whirl.verbosity_level = "verbose"),
 23 |     code = {
 24 |       test_script("success.R") |>
 25 |         run(summary_file = NULL) |>
 26 |         expect_message() |>
 27 |         suppressMessages()
 28 |     }
 29 |   )
 30 | })
 31 | 
 32 | test_that("Run single python script", {
 33 |   skip_on_cran()
 34 |   skip_if_no_quarto()
 35 |   skip_if_no_python()
 36 |   res <- test_script("py_success.py") |>
 37 |     run(summary_file = NULL) |>
 38 |     expect_no_warning() |>
 39 |     expect_no_error()
 40 | 
 41 |   expect_single_script(res)
 42 | })
 43 | 
 44 | expect_multiple_scripts <- function(res) {
 45 |   res[["status"]] |>
 46 |     testthat::expect_equal(c("success", "warning", "error"))
 47 | 
 48 |   res[["result"]][[1]][["status"]][c("errors", "warnings")] |>
 49 |     lapply(\(x) length(x) > 0) |>
 50 |     unlist() |>
 51 |     testthat::expect_equal(c(FALSE, FALSE), ignore_attr = TRUE)
 52 | 
 53 |   res[["result"]][[2]][["status"]][c("errors", "warnings")] |>
 54 |     lapply(\(x) length(x) > 0) |>
 55 |     unlist() |>
 56 |     testthat::expect_equal(c(FALSE, TRUE), ignore_attr = TRUE)
 57 | 
 58 |   res[["result"]][[3]][["status"]][c("errors", "warnings")] |>
 59 |     lapply(\(x) length(x) > 0) |>
 60 |     unlist() |>
 61 |     testthat::expect_equal(c(TRUE, FALSE), ignore_attr = TRUE)
 62 | 
 63 |   return(invisible(res))
 64 | }
 65 | 
 66 | test_that("Run multiple R scripts", {
 67 |   skip_if_no_quarto()
 68 |   res <- test_script(c("success.R", "warning.R", "error.R")) |>
 69 |     run(n_workers = 2) |>
 70 |     expect_no_error()
 71 | 
 72 |   expect_multiple_scripts(res)
 73 | })
 74 | 
 75 | test_that("Run multiple python scripts", {
 76 |   skip_on_cran()
 77 |   skip_if_no_quarto()
 78 |   skip_if_no_python()
 79 |   res <- test_script(c("py_success.py", "py_warning.py", "py_error.py")) |>
 80 |     run(n_workers = 2) |>
 81 |     expect_no_error()
 82 | 
 83 |   expect_multiple_scripts(res)
 84 | })
 85 | 
 86 | test_that("Run yaml config file", {
 87 |   skip_on_cran()
 88 |   skip_if_no_quarto()
 89 |   res <- test_script("_whirl.yaml") |>
 90 |     run(summary_file = NULL, n_workers = 2) |>
 91 |     expect_no_error()
 92 | })
 93 | 
 94 | test_that("Change the log_dir to a path", {
 95 |   skip_on_cran()
 96 |   skip_if_no_quarto()
 97 |   # Custom path
 98 |   custom_path <- withr::local_tempdir()
 99 | 
100 |   # Execute run() with log_dir = custom path
101 |   res <- test_script("success.R") |>
102 |     run(summary_file = NULL, log_dir = custom_path) |>
103 |     expect_no_error()
104 | 
105 |   # Check if the log file is created in the custom path
106 |   file.path(custom_path, "success_log.html") |>
107 |     file.exists() |>
108 |     expect_true()
109 | })
110 | 
111 | test_that("Change the log_dir with a function", {
112 |   skip_on_cran()
113 |   skip_if_no_quarto()
114 |   # Custom path and copy script
115 |   custom_path <- withr::local_tempdir()
116 |   dir.create(file.path(custom_path, "logs"))
117 |   file.copy(from = test_script("warning.R"), to = custom_path) |>
118 |     expect_true()
119 | 
120 |   # Execute run() with log_dir as a function
121 |   res <- file.path(custom_path, "warning.R") |>
122 |     run(summary_file = NULL, log_dir = function(x) {
123 |       paste0(dirname(x), "/logs")
124 |     }) |>
125 |     expect_no_error()
126 | 
127 |   # Check if the log file is created in the correct folder
128 |   file.path(custom_path, "logs", "warning_log.html") |>
129 |     file.exists() |>
130 |     expect_true()
131 | })
132 | 
133 | test_that("Change the execute_dir to a path", {
134 |   skip_on_cran()
135 |   skip_if_no_quarto()
136 |   custom_path <- withr::local_tempdir()
137 |   withr::local_options(whirl.execute_dir = custom_path)
138 | 
139 |   test_script("success.R") |>
140 |     run(summary_file = NULL) |>
141 |     expect_no_error()
142 | 
143 |   withr::local_options(whirl.execute_dir = "this/path/does/not/exist")
144 | 
145 |   test_script("success.R") |>
146 |     run(summary_file = NULL) |>
147 |     expect_error()
148 | })
149 | 
150 | test_that("Change the execute_dir to a function", {
151 |   skip_on_cran()
152 |   skip_if_no_quarto()
153 |   withr::local_options(whirl.execute_dir = \(x) dirname(x))
154 | 
155 |   test_script("success.R") |>
156 |     run(summary_file = NULL) |>
157 |     expect_no_error()
158 | })
159 | 


--------------------------------------------------------------------------------
/R/run.R:
--------------------------------------------------------------------------------
  1 | #' Execute single or multiple R, R Markdown, and Quarto scripts
  2 | #'
  3 | #' @description
  4 | #' Executes and logs the execution of the scripts.
  5 | #' Logs for each script are stored in the same folder as the script.
  6 | #'
  7 | #' The way the execution is logged is configurable through several options for
  8 | #' e.g. the verbosity of the logs.
  9 | #' See [whirl-options] on how to configure these.
 10 | #'
 11 | #' @param input  A character vector of file path(s) to R, R Markdown, Quarto
 12 | #'   scripts, or files in a folder using regular expression, or to to a whirl
 13 | #'   config file. The input can also be structured in a list where each element
 14 | #'   will be executed sequentially, while scripts within each element can be
 15 | #'   executed in parallel.
 16 | #' @param steps An optional argument that can be used if only certain steps
 17 | #'   within a config files (or list) is to be executed. Should be equivalent to
 18 | #'   the names of the steps found in the config file. If kept as NULL (default)
 19 | #'   then all steps listed in the config file will be executed.
 20 | #' @param summary_file A character string specifying the file path where the
 21 | #'   summary log will be stored.
 22 | #' @inheritParams whirl-options-params
 23 | #' @return A tibble containing the execution results for all the scripts.
 24 | #'
 25 | #' @examplesIf FALSE
 26 | #' # Copy example scripts:
 27 | #' file.copy(
 28 | #'   from = system.file("examples", c("success.R", "warning.R", "error.R"),
 29 | #'     package = "whirl"
 30 | #'   ),
 31 | #'   to = tempdir()
 32 | #' )
 33 | #'
 34 | #' # Run a single script and create log:
 35 | #' run(file.path(tempdir(), "success.R"))
 36 | #'
 37 | #' # Run several scripts in parallel on up to 2 workers:
 38 | #' run(
 39 | #'   input = file.path(tempdir(), c("success.R", "warning.R", "error.R")),
 40 | #'   n_workers = 2
 41 | #' )
 42 | #'
 43 | #' # Run several scripts in two steps by providing them as list elements:
 44 | #' run(
 45 | #'   list(
 46 | #'     file.path(tempdir(), c("success.R", "warning.R")),
 47 | #'     file.path(tempdir(), "error.R")
 48 | #'   )
 49 | #' )
 50 | #'
 51 | #' # Re-directing the logs to a sub-folder by utilizing the log_dir argument in
 52 | #' # run(). This will require that the sub-folder exists.
 53 | #'
 54 | #' # Specifying the path using a manually defined character
 55 | #' run(file.path(tempdir(), "success.R"), log_dir = tempdir())
 56 | #'
 57 | #' # Specifying the path with a generic function that can handle the scripts
 58 | #' # individually.
 59 | #' run(
 60 | #'   input = file.path(tempdir(), "success.R"),
 61 | #'   log_dir = function(x) {paste0(dirname(x), "/logs")}
 62 | #' )
 63 | #'
 64 | #' @export
 65 | run <- function(
 66 |   input = "_whirl.yml",
 67 |   steps = NULL,
 68 |   summary_file = "summary.html",
 69 |   n_workers = zephyr::get_option("n_workers", "whirl"),
 70 |   check_renv = zephyr::get_option("check_renv", "whirl"),
 71 |   track_files = zephyr::get_option("track_files", "whirl"),
 72 |   out_formats = zephyr::get_option("out_formats", "whirl"),
 73 |   log_dir = zephyr::get_option("log_dir", "whirl")
 74 | ) {
 75 |   # Additional Settings
 76 |   track_files_discards <- zephyr::get_option("track_files_discards") |>
 77 |     c(.libPaths()) # Don't track the library paths
 78 |   track_files_keep <- zephyr::get_option("track_files_keep")
 79 | 
 80 |   # Check suggest imports if they are needed
 81 |   if (check_renv) {
 82 |     rlang::check_installed("renv")
 83 |   }
 84 | 
 85 |   # Message when initiating
 86 |   d <- NULL
 87 |   zephyr::msg_verbose(
 88 |     message = "Executing scripts and generating logs",
 89 |     theme = list(
 90 |       rule = list(color = "skyblue3", "line-type" = "double")
 91 |     ),
 92 |     msg_fun = \(message, theme, .envir) {
 93 |       d <<- cli::cli_div(theme = theme, .auto_close = FALSE)
 94 |       cli::cli_rule(message, .envir = .envir)
 95 |     }
 96 |   )
 97 | 
 98 |   # Message when ending
 99 |   on.exit({
100 |     zephyr::msg_verbose(
101 |       message = "End of process",
102 |       div = d,
103 |       msg_fun = \(message, div, .envir) {
104 |         cli::cli_rule(message, .envir = .envir)
105 |         cli::cli_end(div)
106 |       }
107 |     )
108 |   })
109 | 
110 |   # Constrain the number of workers
111 |   n_workers <- min(128, n_workers)
112 | 
113 |   zephyr::msg_verbose(
114 |     message = "Executing scripts in parallel using {n_workers} worker(s)"
115 |   )
116 | 
117 |   # Initiating the queue
118 |   queue <- whirl_queue$new(
119 |     n_workers = n_workers,
120 |     check_renv = check_renv,
121 |     track_files = track_files,
122 |     out_formats = out_formats,
123 |     track_files_discards = track_files_discards,
124 |     track_files_keep = track_files_keep,
125 |     log_dir = log_dir
126 |   )
127 | 
128 |   result <- internal_run(
129 |     input = input,
130 |     steps = steps,
131 |     queue = queue,
132 |     level = 1
133 |   )
134 | 
135 |   # Create the summary log if required
136 |   if (!is.null(summary_file)) {
137 |     summary_tibble <- util_queue_summary(result$queue)
138 |     render_summary(input = summary_tibble, summary_file = summary_file)
139 |   }
140 | 
141 |   queue <- result$queue
142 |   attr(x = queue, which = "whirl_input") <- input
143 | 
144 |   invisible(queue)
145 | }
146 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r setup, include = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = "#>",
 11 |   fig.path = "man/figures/README-",
 12 |   out.width = "100%"
 13 | )
 14 | 
 15 | # Minimal printing for shorter readme
 16 | options(whirl.verbosity_level = "minimal")
 17 | 
 18 | # Use a temporary directory as working directory with all examples available
 19 | tmp <- withr::local_tempdir()
 20 | 
 21 | system.file("examples", package = "whirl") |>
 22 |   list.files(full.names = TRUE) |>
 23 |   file.copy(to = tmp)
 24 | 
 25 | knitr::opts_knit$set(root.dir = tmp)
 26 | ```
 27 | 
 28 | # whirl
 29 | 
 30 | <!-- badges: start -->
 31 | 
 32 | [![Checks](https://github.com/NovoNordisk-OpenSource/whirl/actions/workflows/check_and_co.yaml/badge.svg)](https://github.com/NovoNordisk-OpenSource/whirl/actions/workflows/check_and_co.yaml)
 33 | 
 34 | [![Codecov test coverage](https://codecov.io/gh/NovoNordisk-OpenSource/whirl/graph/badge.svg)](https://app.codecov.io/gh/NovoNordisk-OpenSource/whirl)
 35 | 
 36 | [![CRAN status](https://www.r-pkg.org/badges/version/whirl)](https://CRAN.R-project.org/package=whirl)
 37 | 
 38 | [<img src="http://pharmaverse.org/shields/whirl.svg">](https://pharmaverse.org)
 39 | 
 40 | <!-- badges: end -->
 41 | 
 42 | ##  Overview
 43 | 
 44 | The whirl package provide functionalities for executing scripts in batch and simultaneously getting a log from the individual executions. 
 45 | A log from script execution is in many pharmaceutical companies a GxP requirement, and the whirl package honors this requirement by generating a log that, among other things, contains information about:
 46 | 
 47 | * Status (did the script run with any error or warnings)
 48 | * The actual code itself
 49 | * Date and time of execution
 50 | * The environment the script was executed under (session info)
 51 | * Information about packages versions that was utilized
 52 | * Environmental variables  
 53 | 
 54 | And all this is wrapped into a nicely formatted html document that is easy to navigate. 
 55 | 
 56 | ## Installation
 57 | 
 58 | ```{r, eval=FALSE}
 59 | # Install the released version from CRAN:
 60 | install.packages("whirl")
 61 | # Install the development version from GitHub:
 62 | pak::pak("NovoNordisk-OpenSource/whirl")
 63 | ```
 64 | 
 65 | ## Usage
 66 | 
 67 | The main function in the whirl package is `run()` which takes an `input` argument
 68 | that defines the scripts to be executed.
 69 | 
 70 | The simplest way is to provide the path to a single script:
 71 | 
 72 | ```{r ex-script}
 73 | library(whirl)
 74 | 
 75 | run("success.R")
 76 | ```
 77 | 
 78 | It is also possible to run several scripts simultaneously:
 79 | 
 80 | ```{r ex-scripts}
 81 | result <- run(c("success.R", "warning.R"), n_workers = 2)
 82 | ```
 83 | 
 84 | Here we are specifying that `run()` can use up to two simultaneous workers to execute the scripts,
 85 | meaning that they will be executed in parallel.
 86 | 
 87 | When using `run()` the following files are created:
 88 | 
 89 | 1. Creates a log in the same directory as the script with the names `{script_name}_log.html`. See [example_log.html](https://novonordisk-opensource.github.io/whirl/articles/example_log.html) for an example of a simple log.
 90 | 1. Creates a summary log with the overall status of each script. Default path is `summary.html`. See [summary.html](https://novonordisk-opensource.github.io/whirl/articles/summary.html) for an example of a summary of the same log as above.
 91 | 
 92 | Apart from this the function also returns a `tibble` with the status of the script execution similar to the content of the summary above:
 93 | 
 94 | ```{r ex-return}
 95 | print(result)
 96 | ```
 97 | 
 98 | ## Config files
 99 | 
100 | `run()` also supports running scripts in several sequential steps. 
101 | This setup is very useful when your projects have several steps that depends on each others output, and thereby need to be executed in a specific order.
102 | The best way to implement this in your project is use a configuration file for whirl. 
103 | The configuration file is a `yaml` file that specifies each steps:
104 | 
105 | `_whirl.yaml:`
106 | ```yaml
107 | `r paste(readLines("_whirl.yaml"), collapse = "\n")```
108 | ```
109 | 
110 | Here we are specifying that in the first step we run `succes.R`. And then when this step has been completed we continue to running 
111 | the scripts in the second steps. 
112 | 
113 | ```{r ex-run-config}
114 | result <- run("_whirl.yaml", n_workers = 2)
115 | ```
116 | 
117 | ```{r, ex-return-config}
118 | print(result)
119 | ```
120 | 
121 | ## Useful links
122 | 
123 | For more information about how to customize the the execution and the logging for your needs see the following:
124 | 
125 | * `run()`: For further information on how to call it.
126 | * `vignette("whirl")`: For a more in depth explanation, and more advanced usage.
127 | * `vignette("articles/example")`: With a simple example, including the created log.
128 | * `help("whirl-options")`: On how to change the default behavior of whirl.
129 | * [NovoNordisk-OpenSource/R-packages](https://novonordisk-opensource.github.io/R-packages/) for an overview of connector and other R packages published by Novo Nordisk.
130 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # whirl
  5 | 
  6 | <!-- badges: start -->
  7 | 
  8 | [![Checks](https://github.com/NovoNordisk-OpenSource/whirl/actions/workflows/check_and_co.yaml/badge.svg)](https://github.com/NovoNordisk-OpenSource/whirl/actions/workflows/check_and_co.yaml)
  9 | 
 10 | [![Codecov test
 11 | coverage](https://codecov.io/gh/NovoNordisk-OpenSource/whirl/graph/badge.svg)](https://app.codecov.io/gh/NovoNordisk-OpenSource/whirl)
 12 | 
 13 | [![CRAN
 14 | status](https://www.r-pkg.org/badges/version/whirl)](https://CRAN.R-project.org/package=whirl)
 15 | 
 16 | [<img src="http://pharmaverse.org/shields/whirl.svg">](https://pharmaverse.org)
 17 | 
 18 | <!-- badges: end -->
 19 | 
 20 | ## Overview
 21 | 
 22 | The whirl package provide functionalities for executing scripts in batch
 23 | and simultaneously getting a log from the individual executions. A log
 24 | from script execution is in many pharmaceutical companies a GxP
 25 | requirement, and the whirl package honors this requirement by generating
 26 | a log that, among other things, contains information about:
 27 | 
 28 | - Status (did the script run with any error or warnings)
 29 | - The actual code itself
 30 | - Date and time of execution
 31 | - The environment the script was executed under (session info)
 32 | - Information about packages versions that was utilized
 33 | - Environmental variables
 34 | 
 35 | And all this is wrapped into a nicely formatted html document that is
 36 | easy to navigate.
 37 | 
 38 | ## Installation
 39 | 
 40 | ``` r
 41 | # Install the released version from CRAN:
 42 | install.packages("whirl")
 43 | # Install the development version from GitHub:
 44 | pak::pak("NovoNordisk-OpenSource/whirl")
 45 | ```
 46 | 
 47 | ## Usage
 48 | 
 49 | The main function in the whirl package is `run()` which takes an `input`
 50 | argument that defines the scripts to be executed.
 51 | 
 52 | The simplest way is to provide the path to a single script:
 53 | 
 54 | ``` r
 55 | library(whirl)
 56 | 
 57 | run("success.R")
 58 | #> ✔ success.R: Completed succesfully. See log html.
 59 | ```
 60 | 
 61 | It is also possible to run several scripts simultaneously:
 62 | 
 63 | ``` r
 64 | result <- run(c("success.R", "warning.R"), n_workers = 2)
 65 | #> ✔ success.R: Completed succesfully. See log html.
 66 | #> ! warning.R: Completed with warnings. See log html.
 67 | ```
 68 | 
 69 | Here we are specifying that `run()` can use up to two simultaneous
 70 | workers to execute the scripts, meaning that they will be executed in
 71 | parallel.
 72 | 
 73 | When using `run()` the following files are created:
 74 | 
 75 | 1.  Creates a log in the same directory as the script with the names
 76 |     `{script_name}_log.html`. See
 77 |     [example_log.html](https://novonordisk-opensource.github.io/whirl/articles/example_log.html)
 78 |     for an example of a simple log.
 79 | 2.  Creates a summary log with the overall status of each script.
 80 |     Default path is `summary.html`. See
 81 |     [summary.html](https://novonordisk-opensource.github.io/whirl/articles/summary.html)
 82 |     for an example of a summary of the same log as above.
 83 | 
 84 | Apart from this the function also returns a `tibble` with the status of
 85 | the script execution similar to the content of the summary above:
 86 | 
 87 | ``` r
 88 | print(result)
 89 | #> # A tibble: 2 × 6
 90 | #>      id tag    script                                status result       log_dir
 91 | #>   <dbl> <chr>  <chr>                                 <chr>  <list>       <chr>  
 92 | #> 1     1 Step 1 /private/var/folders/fx/71by3f551qzb… succe… <named list> /priva…
 93 | #> 2     2 Step 1 /private/var/folders/fx/71by3f551qzb… warni… <named list> /priva…
 94 | ```
 95 | 
 96 | ## Config files
 97 | 
 98 | `run()` also supports running scripts in several sequential steps. This
 99 | setup is very useful when your projects have several steps that depends
100 | on each others output, and thereby need to be executed in a specific
101 | order. The best way to implement this in your project is use a
102 | configuration file for whirl. The configuration file is a `yaml` file
103 | that specifies each steps:
104 | 
105 | `_whirl.yaml:`
106 | 
107 | ``` yaml
108 | steps:
109 |   - name: "First step"
110 |     paths:
111 |       - "success.R"
112 |   - name: "Second step"
113 |     paths:
114 |       - "warning.R"
115 |       - "error.R"``
116 | ```
117 | 
118 | Here we are specifying that in the first step we run `succes.R`. And
119 | then when this step has been completed we continue to running the
120 | scripts in the second steps.
121 | 
122 | ``` r
123 | result <- run("_whirl.yaml", n_workers = 2)
124 | #> ✔ success.R: Completed succesfully. See log html.
125 | #> ! warning.R: Completed with warnings. See log html.
126 | #> ✖ error.R: Completed with errors. See log html.
127 | ```
128 | 
129 | ``` r
130 | print(result)
131 | #> # A tibble: 3 × 6
132 | #>      id tag         script                           status result       log_dir
133 | #>   <dbl> <chr>       <chr>                            <chr>  <list>       <chr>  
134 | #> 1     1 First step  /private/var/folders/fx/71by3f5… succe… <named list> /priva…
135 | #> 2     2 Second step /private/var/folders/fx/71by3f5… warni… <named list> /priva…
136 | #> 3     3 Second step /private/var/folders/fx/71by3f5… error  <named list> /priva…
137 | ```
138 | 
139 | ## Useful links
140 | 
141 | For more information about how to customize the the execution and the
142 | logging for your needs see the following:
143 | 
144 | - `run()`: For further information on how to call it.
145 | - `vignette("whirl")`: For a more in depth explanation, and more
146 |   advanced usage.
147 | - `vignette("articles/example")`: With a simple example, including the
148 |   created log.
149 | - `help("whirl-options")`: On how to change the default behavior of
150 |   whirl.
151 | - [NovoNordisk-OpenSource/R-packages](https://novonordisk-opensource.github.io/R-packages/)
152 |   for an overview of connector and other R packages published by Novo
153 |   Nordisk.
154 | 


--------------------------------------------------------------------------------
/R/log.R:
--------------------------------------------------------------------------------
  1 | #' Helper function to read in all temporary information to be used in the log
  2 | #' @noRd
  3 | read_info <- function(
  4 |   script,
  5 |   md,
  6 |   start,
  7 |   log,
  8 |   pkgs_used,
  9 |   session,
 10 |   environment,
 11 |   options,
 12 |   python_pip_list = NULL,
 13 |   python_new_status = NULL,
 14 |   python_old_status = NULL,
 15 |   track_files = FALSE
 16 | ) {
 17 |   info <- list(
 18 |     script = readRDS(script),
 19 |     status = get_status(md = md, start = start),
 20 |     files = log |>
 21 |       read_from_log(track_files = track_files) |>
 22 |       split_log(),
 23 |     session = read_session_info(
 24 |       file = session,
 25 |       pkgs_used = pkgs_used
 26 |     )
 27 |   )
 28 | 
 29 |   info$session$environment <- read_environment(environment)
 30 |   info$session$options <- read_options(options)
 31 | 
 32 |   if (!is.null(python_pip_list) && file.exists(python_pip_list)) {
 33 |     info$session$platform <- info$session$platform |>
 34 |       dplyr::bind_rows(
 35 |         data.frame(
 36 |           setting = "python",
 37 |           value = python_version()
 38 |         )
 39 |       )
 40 | 
 41 |     info$session$python <- read_python(
 42 |       old_status = python_old_status,
 43 |       new_status = python_new_status,
 44 |       pip_list = python_pip_list
 45 |     )
 46 | 
 47 |     info$session <-
 48 |       info$session[c("platform", "R", "python", "environment", "options")]
 49 |   }
 50 | 
 51 |   return(info)
 52 | }
 53 | 
 54 | #' Read and format session info output from `sessioninfo::session_info()`
 55 | #' @noRd
 56 | read_session_info <- function(file, pkgs_used) {
 57 |   info <- readRDS(file)
 58 |   pkgs_used <- readRDS(pkgs_used)
 59 | 
 60 |   platform <- info[["platform"]] |>
 61 |     unlist() |>
 62 |     tibble::enframe(name = "setting", value = "value")
 63 | 
 64 |   r_packages <- info[["packages"]] |>
 65 |     tibble::as_tibble() |>
 66 |     dplyr::mutate(
 67 |       package = .data$package,
 68 |       version = .data$loadedversion,
 69 |       attached = .data$attached,
 70 |       path = .data$loadedpath,
 71 |       date = vapply(
 72 |         X = .data$package,
 73 |         FUN = utils::packageDate,
 74 |         FUN.VALUE = Sys.Date(),
 75 |         USE.NAMES = FALSE
 76 |       ) |>
 77 |         as.Date(),
 78 |       source = source,
 79 |       url = vapply(
 80 |         X = .data$package,
 81 |         FUN = \(x) {
 82 |           utils::packageDescription(x)[["URL"]] |>
 83 |             dplyr::coalesce(NA_character_)
 84 |         },
 85 |         FUN.VALUE = character(1),
 86 |         USE.NAMES = FALSE
 87 |       ),
 88 |       directly_used = .data$package %in% pkgs_used[["Package"]],
 89 |       approved = check_approved(
 90 |         used = paste(.data$package, .data$version, sep = "@"),
 91 |         approved = zephyr::get_option("approved_packages")
 92 |       ),
 93 |     ) |>
 94 |     dplyr::select(
 95 |       "package",
 96 |       "version",
 97 |       "attached",
 98 |       "directly_used",
 99 |       "approved",
100 |       "path",
101 |       "date",
102 |       "source",
103 |       "url"
104 |     )
105 | 
106 |   list(
107 |     platform = platform,
108 |     R = r_packages
109 |   )
110 | }
111 | 
112 | #' Read and format list of environment variables from `Sys.getenv()`
113 | #' @noRd
114 | read_environment <- function(file) {
115 |   readRDS(file) |>
116 |     as.list() |>
117 |     unlist(recursive = FALSE) |>
118 |     tibble::enframe(name = "variable", value = "value") |>
119 |     dplyr::filter(
120 |       stringr::str_detect(
121 |         string = .data$variable,
122 |         pattern = paste0(
123 |           zephyr::get_option("environment_secrets", "whirl"),
124 |           collapse = "|"
125 |         ),
126 |         negate = TRUE
127 |       )
128 |     )
129 | }
130 | 
131 | #' Read and format options output from `options()`
132 | #' @noRd
133 | read_options <- function(file) {
134 |   readRDS(file) |>
135 |     tibble::enframe(name = "option", value = "value") |>
136 |     dplyr::filter(
137 |       !.data$option %in% "rl_word_breaks" # Removed due to breaking tables
138 |     )
139 | }
140 | 
141 | #' Retrieve python version and path
142 | #' @noRd
143 | python_version <- function() {
144 |   reticulate::py_config()[["version"]] |>
145 |     as.character() |>
146 |     paste("@", reticulate::py_config()[["python"]])
147 | }
148 | 
149 | #' Read and format python packages information from a JSON file
150 | #' JSON files created in `inst/documents/python_modules.py`.
151 | #' Pip list is created in `ìnst/documents/dummy.qmd`.
152 | #' @noRd
153 | read_python <- function(old_status, new_status, pip_list) {
154 |   old <- old_status |>
155 |     jsonlite::read_json() |>
156 |     lapply(FUN = unlist, use.names = FALSE)
157 | 
158 |   new <- new_status |>
159 |     jsonlite::read_json() |>
160 |     lapply(FUN = unlist, use.names = FALSE)
161 | 
162 |   pip <- pip_list |>
163 |     readRDS() |>
164 |     parse_pip_list()
165 | 
166 |   if (!nrow(pip)) {
167 |     return(
168 |       tibble::tibble(
169 |         package = character(0),
170 |         version = character(0),
171 |         directly_used = logical(0),
172 |         approved = logical(0),
173 |         path = character(0)
174 |       )
175 |     )
176 |   }
177 | 
178 |   pip |>
179 |     dplyr::filter(
180 |       .data$package %in%
181 |         c(
182 |           setdiff(new$namespaced, old$namespaced),
183 |           setdiff(new$loaded, old$loaded)
184 |         )
185 |     ) |>
186 |     dplyr::mutate(
187 |       directly_used = .data$package %in% new$namespaced,
188 |       approved = check_approved(
189 |         used = paste(.data$package, .data$version, sep = "@"),
190 |         approved = zephyr::get_option("approved_python_packages")
191 |       )
192 |     ) |>
193 |     dplyr::select("package", "version", "directly_used", "approved", "path")
194 | }
195 | 
196 | #' @noRd
197 | parse_pip_list <- function(x) {
198 |   x <- utils::read.table(text = x)
199 | 
200 |   nm <- tolower(x[1, ])
201 |   nm[which(nm == "location")] <- "path"
202 | 
203 |   names(x) <- nm
204 | 
205 |   x |>
206 |     utils::tail(-2) |>
207 |     tibble::as_tibble()
208 | }
209 | 


--------------------------------------------------------------------------------
/vignettes/whirl.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Execute Scripts"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{Execute Scripts}
  6 |   %\VignetteEngine{knitr::rmarkdown_notangle}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>",
 14 |   eval = FALSE
 15 | )
 16 | ```
 17 | 
 18 | # Introduction
 19 | 
 20 | The whirl package provides functionalities for executing scripts in batch while simultaneously getting logs from the individual executions. In the following sections, we will go through how to use the package.
 21 | 
 22 | # Ways to call the `run()` function
 23 | 
 24 | ## Single and multiple files, and wild cards
 25 | 
 26 | The `input` argument in the `run()` function can in the most simple case point to a single file for which an execution and log-generation is required. 
 27 | 
 28 | ```{r}
 29 | library(whirl)
 30 | run(input = "path/to/script.R")
 31 | ```
 32 | 
 33 | The `input` argument can also point to multiple files in a directory using wildcard (or globbing) patterns. 
 34 | In this case, all scripts in the directory will be executed in parallel and a log will be generated for each script.
 35 | The number of workers can be specified through the `n_workers` argument (default is set to one).
 36 | A summary log file will also be provided as a fast way to get an overview of the execution. 
 37 | The location of the summary file can be controlled with the `summary_file` argument.
 38 | 
 39 | ```{r}
 40 | # Execution of all R files in a specific directory
 41 | run(
 42 |   input = "path/to/directory/*.R",
 43 |   n_workers = 4,
 44 |   summary_file = "path/to/summary"
 45 | )
 46 | 
 47 | # Execution of all R files starting with "2_" in a specific directory
 48 | run(
 49 |   input = "path/to/directory/2_*.R",
 50 |   n_workers = 8,
 51 |   summary_file = "path/to/summary"
 52 | )
 53 | ```
 54 | 
 55 | More information on how the wildcards are interpreted see `Sys.glob()`.
 56 | 
 57 | It is also possible to provide a character vector of several paths (either single files or glob expression) that should be executed. Note that whenever the `input` argument in is supplied with a character vector (e.g. `c("path/to/script1.R", "path/to/script2.R")`) it assumes that these can be executed independently and in parallel. If the elements needs to be executed sequentially this can be achieved by using a `list()` instead (see below).
 58 | 
 59 | ## Using `list()` as input
 60 | 
 61 | If the scripts have to be executed in a specific order, the `input` argument can be supplied as a list. The scripts will then be executed in the order they are listed in the list, with scripts listed in the same element being executed in parallel (if `n_workers` > 1). 
 62 | 
 63 | ```{r}
 64 | # In the below example, script1.R and script2.R will be executed in parallel
 65 | run(
 66 |   input = c(
 67 |     "path/to/script1.R",
 68 |     "path/to/script2.R"
 69 |   ),
 70 |   n_workers = 2
 71 | )
 72 | 
 73 | # In the below example, script1.R and script2.R will be executed in parallel,
 74 | # and all R files in the directory will subsequently be executed in parallel
 75 | run(
 76 |   input = list(
 77 |     c("path/to/script1.R", "path/to/script2.R"),
 78 |     "path/to/directory/*.R"
 79 |   ),
 80 |   n_workers = 2
 81 | )
 82 | 
 83 | # In the below example, script1.R and script2.R will be executed in parallel,
 84 | # and subsequently script3.R and script4.R will be executed in parallel
 85 | run(
 86 |   input = list(
 87 |     c("path/to/script1.R", "path/to/script2.R"),
 88 |     c("path/to/script3.R", "path/to/script4.R")
 89 |   ),
 90 |   n_workers = 2
 91 | )
 92 | ```
 93 | 
 94 | The list can also be supplied with names list elements. 
 95 | This can be useful during execution as some of these 'name' will be printed to the console.
 96 | 
 97 | E.g.
 98 | ```{r}
 99 | run(
100 |   input = list(
101 |     list(
102 |       name = "Step 1",
103 |       paths = c("path/to/script1.R", "path/to/script2.R")
104 |     ),
105 |     list(
106 |       name = "Step 2",
107 |       paths = c("path/to/script3.R", "path/to/script4.R")
108 |     )
109 |   ),
110 |   n_workers = 2
111 | )
112 | ```
113 | 
114 | ## Using a configuration file as input
115 | 
116 | The execution order can also be pre-specified in a configuration file (config file for short). 
117 | The config file could have the following structure. 
118 | 
119 | ```yaml
120 | steps:
121 |   - name: "Step 1"    
122 |     paths:    
123 |       - "path/to/script1.R"
124 |       - "path/to/script2.R"
125 |   - name: "Step 2"  
126 |     paths:    
127 |       - "path/to/script3.R"
128 |       - "path/to/script4.R"
129 | ```
130 | 
131 | In this case, the `input` argument in the `run()` function should point to the config file. 
132 | Assuming the config file is called `config.yaml`, the execution can be initiated as follows:
133 | 
134 | ```{r}
135 | run(input = "path/to/config.yaml", n_workers = 4)
136 | ```
137 | 
138 | Each steps in the config file will be executed independently while scrips within each step will be executed in parallel using the number of workers specified with the `n_workers` argument.
139 | 
140 | # Adjusting the log directory
141 | ## How to use the `log_dir` argument to specify where to store the logs
142 | When executing `run()` the default is to store logs in the directory where the individual scripts are located. For example, if we apply `run()` to a a vector of scripts with the following paths `c(path/to/dir1/script1.R, path/to/dir2/script2.R)`, the log of script1.R and script2.R will be stored in **path/to/dir1** and **path/to/dir2**, respectively.
143 | 
144 | If the logs should be stored in a different directory, the `log_dir` argument can be used. 
145 | This argument can be supplied with a character string or a function. Note that in either case the directory that `log_dir` points to must exist before the execution is initiated.  
146 | 
147 | If the `log_dir` is supplied with a character pointing to a specific path the call could look like:
148 | 
149 | ```{r}
150 | run(input = "path/to/script.R", log_dir = "path/to/logs")
151 | ```
152 | 
153 | In this example the log of script.R will be stored in **path/to/logs**.  
154 | Note that if multiple scripts are executed and `log_dir` is a character to a path, then every log will be redirected to the same directory - in this case **path/to/logs**.
155 | 
156 | 
157 | If a more dynamic approach is needed the `log_dir` argument can also be supplied with a function that will be applied to the individual path of every script.  
158 | For example, if multiple script are executed and the logs needs to be stored in a sub-folder within the script directories this could be achieved by:
159 | 
160 | ```{r}
161 | run(
162 |   input = c("path/to/dir1/script1.R", "path/to/dir2/script2.R"),
163 |   log_dir = function(x) {
164 |     paste0(dirname(x), "/logs")
165 |   }
166 | )
167 | ```
168 | 
169 | In this example the log of script1.R will be stored in **path/to/dir1/logs** and the log of script2.R will be stored in **path/to/dir2/logs**.
170 | 
171 | Note that **x** refer to the path of the script that is being executed. 
172 | 


--------------------------------------------------------------------------------
/R/strace.R:
--------------------------------------------------------------------------------
  1 | #' Start strace
  2 | #' @param pid [integer] process id to attach strace onto
  3 | #' @param file [character] path to the file in which to store the strace log
  4 | #' @noRd
  5 | 
  6 | start_strace <- function(pid, file) {
  7 |   # Check OS first
  8 |   os_type <- Sys.info()["sysname"]
  9 |   if (os_type != "Linux") {
 10 |     cli::cli_abort(paste("strace does not support", os_type))
 11 |   }
 12 | 
 13 |   # Just construct and run the command directly
 14 |   tryCatch(
 15 |     expr = {
 16 |       cmd <- sprintf(
 17 |         "strace -f -q -ttt -T -e trace=all -s 256 -o %s -p %s -y",
 18 |         file,
 19 |         pid
 20 |       )
 21 |       system(cmd, wait = FALSE)
 22 |     },
 23 |     error = function(e) {
 24 |       if (grepl("permission denied|not permitted", tolower(e$message))) {
 25 |         cli::cli_abort(
 26 |           "strace cannot attach to process. This may be due to permission errors"
 27 |         )
 28 |       }
 29 | 
 30 |       if (
 31 |         grepl("not found|no such file|command not found", tolower(e$message))
 32 |       ) {
 33 |         cli::cli_abort("strace is not installed or not found in PATH")
 34 |       }
 35 | 
 36 |       cli::cli_abort(paste("Error:", e$message))
 37 |     }
 38 |   )
 39 | }
 40 | 
 41 | #' Get strace info ready for reporting
 42 | #'
 43 | #' @param path [character] path to the strace log
 44 | #' @param p_wd [character] path to the working directory used for the process
 45 | #' tracked in strace
 46 | #' @param strace_discards [character] keywords to use to discard files from
 47 | #' the info
 48 | #' @param types [character] which element(s) to report in the info. If not
 49 | #' found in strace, a dummy `data.frame` is inserted.
 50 | #' @return [list] of `data.frame`(s) of the relevant files for each type of info
 51 | #' @noRd
 52 | 
 53 | read_strace_info <- function(
 54 |   path,
 55 |   p_wd = dirname(path),
 56 |   strace_discards = character(),
 57 |   strace_keep = character(),
 58 |   types = c("read", "write", "delete")
 59 | ) {
 60 |   strace <- path |>
 61 |     read_strace(p_wd = p_wd) |>
 62 |     refine_strace(strace_discards = strace_discards, strace_keep = strace_keep)
 63 | 
 64 |   class(strace) <- c("whirl_log_info", class(strace))
 65 | 
 66 |   # Split in a tibble for each type of output
 67 | 
 68 |   strace <- split(strace[c("time", "file")], strace$type)
 69 | 
 70 |   # Add empty table for types not reported
 71 | 
 72 |   out <- vector(mode = "list", length = length(types)) |>
 73 |     rlang::set_names(types)
 74 | 
 75 |   out[names(strace)] <- strace
 76 | 
 77 |   i <- lapply(X = out, FUN = is.null) |>
 78 |     unlist() |>
 79 |     which()
 80 |   dummy <- tibble::tibble(file = "No files")
 81 |   class(dummy) <- c("whirl_log_info", class(dummy))
 82 |   out[i] <- list(dummy)
 83 | 
 84 |   return(out)
 85 | }
 86 | 
 87 | #' Read strace file
 88 | #'
 89 | #' @param path [character] path to the strace log
 90 | #' @param p_wd [character] path to the working directory used for the process
 91 | #' tracked in strace
 92 | #' @return [data.frame] with strace information where all files are reported
 93 | #' with their full path
 94 | #' @noRd
 95 | 
 96 | read_strace <- function(path, p_wd) {
 97 |   # Early return if file does not exist
 98 | 
 99 |   if (!file.exists(path)) {
100 |     return(
101 |       tibble::tibble(
102 |         seq = integer(),
103 |         time = as.POSIXct(character()),
104 |         file = character(),
105 |         type = character()
106 |       )
107 |     )
108 |   }
109 | 
110 |   # Read strace log
111 | 
112 |   strace <- readLines(con = path, warn = FALSE) |>
113 |     stringr::str_squish() |>
114 |     stringr::str_subset("openat|unlink|chdir") |>
115 |     stringr::str_subset(
116 |       pattern = "ENOENT \\(No such file or directory\\)|ENXIO \\(No such device or address\\)| ENOTDIR \\(Not a directory\\)", # nolint: line_length_linter
117 |       negate = TRUE
118 |     ) |>
119 |     stringr::str_subset(
120 |       "<unfinished \\.{3}>|<\\.{3} [a-zA-Z]+ resumed>",
121 |       negate = TRUE
122 |     )
123 | 
124 |   # Early return if no information
125 | 
126 |   if (length(strace) == 0) {
127 |     return(
128 |       tibble::tibble(
129 |         seq = integer(),
130 |         time = as.POSIXct(character()),
131 |         file = character(),
132 |         type = character()
133 |       )
134 |     )
135 |   }
136 | 
137 |   # Otherwise extract information
138 | 
139 |   strace_df <- strace |>
140 |     unglue::unglue_data(
141 |       patterns = list(
142 |         "{pid} {time} {funct}({keyword}<{dir}>, \"{path}\", {action}, {access}) = {result}<{result_dir}> <{duration}>", # nolint: line_length_linter
143 |         "{pid} {time} {funct}({keyword}<{dir}>, \"{path}\", {action}) = {result}<{result_dir}> <{duration}>", # nolint: line_length_linter
144 |         "{pid} {time} {funct}({keyword}<{dir}>, \"{path}\", {action}) = {result} <{duration}>", # nolint: line_length_linter
145 |         "{pid} {time} {funct}(\"{path}\") = {result} <{duration}>" # nolint: line_length_linter
146 |       )
147 |     ) |>
148 |     tibble::as_tibble() |>
149 |     dplyr::mutate(
150 |       seq = dplyr::row_number(),
151 |       pid = as.numeric(.data$pid),
152 |       time = as.POSIXct(as.numeric(.data$time), origin = "1970-01-01"),
153 |       result = as.numeric(.data$result),
154 |       duration = as.numeric(.data$duration),
155 |       type = dplyr::case_when(
156 |         .data$funct == "chdir" ~ "chdir",
157 |         stringr::str_detect(.data$funct, "unlink") ~ "delete",
158 |         .data$funct == "openat" &
159 |           stringr::str_detect(.data$action, "O_DIRECTORY") ~
160 |           "lookup",
161 |         .data$funct == "openat" & is.na(.data$access) ~ "read",
162 |         .data$funct == "openat" & !is.na(.data$access) ~ "write",
163 |       ),
164 |       wd = dplyr::case_when(
165 |         .data$type == "chdir" & .data$path == "." ~ p_wd,
166 |         .data$type == "chdir" ~ .data$path
167 |       ) |>
168 |         replace_na_with_last() |>
169 |         dplyr::coalesce(p_wd),
170 |       dir = dplyr::coalesce(.data$dir, .data$wd)
171 |     )
172 | 
173 |   # Full paths etc
174 | 
175 |   strace_df |>
176 |     dplyr::mutate(
177 |       file = dplyr::if_else(
178 |         stringr::str_detect(string = .data$path, pattern = "^/", negate = TRUE),
179 |         file.path(.data$dir, .data$path),
180 |         .data$path
181 |       )
182 |     ) |>
183 |     dplyr::filter(.data$type %in% c("read", "write", "delete")) |>
184 |     dplyr::select("seq", "time", "file", "type")
185 | }
186 | 
187 | #' refine strace output
188 | #'
189 | #' @param strace_df [data.frame] with strace information as returned from
190 | #' `read_strace`
191 | #' @param strace_discards [character] keywords to use to discard files from the
192 | #' info
193 | #' @return [data.frame] with strace information where discarded and duplicate
194 | #' files are removed
195 | #' @noRd
196 | 
197 | refine_strace <- function(
198 |   strace_df,
199 |   strace_discards = character(),
200 |   strace_keep = character()
201 | ) {
202 |   # Remove discards if provided
203 | 
204 |   if (length(strace_discards) && length(strace_keep)) {
205 |     strace_df <- strace_df |>
206 |       dplyr::filter(
207 |         stringr::str_detect(
208 |           string = .data$file,
209 |           pattern = paste0(strace_discards, collapse = "|"),
210 |           negate = TRUE
211 |         ) |
212 |           stringr::str_detect(
213 |             string = .data$file,
214 |             pattern = paste0(strace_keep, collapse = "|")
215 |           )
216 |       )
217 |   } else if (length(strace_discards)) {
218 |     strace_df <- strace_df |>
219 |       dplyr::filter(
220 |         stringr::str_detect(
221 |           string = .data$file,
222 |           pattern = paste0(strace_discards, collapse = "|"),
223 |           negate = TRUE
224 |         )
225 |       )
226 |   }
227 | 
228 |   # Derive net status (clean duplicate entries)
229 | 
230 |   strace_df |>
231 |     dplyr::filter(
232 |       # First read or write
233 |       .data$type %in%
234 |         c("read", "write") &
235 |         !duplicated(strace_df[c("file", "type")]) |
236 |         # Last delete
237 |         .data$type %in%
238 |           c("delete") &
239 |           !duplicated(strace_df[c("file", "type")], fromLast = TRUE)
240 |     ) |>
241 |     dplyr::group_by(.data$file) |>
242 |     dplyr::arrange(.data$file, .data$seq) |>
243 |     dplyr::filter(
244 |       # Remove reads from  a file created earlier
245 |       .data$type == "read" &
246 |         !cumsum(.data$type == "write") |
247 |         # Remove write when the file is deleted afterwards
248 |         .data$type == "write" & !cumsum(rev(.data$type) == "delete") |
249 |         # Remove delete when the file was created earlier, and not read before
250 |         # that creation
251 |         .data$type == "delete" &
252 |           (
253 |             !cumsum(.data$type == "write") |
254 |               utils::head(.data$type, 1) == "read"
255 |           )
256 |     ) |>
257 |     dplyr::ungroup() |>
258 |     dplyr::arrange(.data$seq, .data$file) |>
259 |     dplyr::select("time", "file", "type")
260 | }
261 | 


--------------------------------------------------------------------------------
/tests/testthat/test-strace.R:
--------------------------------------------------------------------------------
  1 | strace_info <- function(path = "strace.log") {
  2 |   read_strace_info(
  3 |     path = path,
  4 |     p_wd = getwd(),
  5 |     strace_discards = zephyr::get_option("track_files_discards", "whirl"),
  6 |     strace_keep = getwd()
  7 |   )
  8 | }
  9 | 
 10 | wait_for_condition <-  function(check_fn, timeout = 2, interval = 0.1, error_msg = NULL) {
 11 |   start_time <- Sys.time()
 12 | 
 13 |   while (as.numeric(Sys.time() - start_time) < timeout) {
 14 |     tryCatch({
 15 |       if (check_fn()) {
 16 |         return(TRUE)
 17 |       }
 18 |     }, error = function(e) {
 19 |       # Continue waiting if check_fn() fails
 20 |     })
 21 | 
 22 |     Sys.sleep(interval)
 23 |   }
 24 | 
 25 |   # If we get here, the condition was never met
 26 |   if (is.null(error_msg)) {
 27 |     error_msg <- paste("Condition not met within", timeout, "seconds")
 28 |   }
 29 | 
 30 |   stop(error_msg, call. = FALSE)
 31 | }
 32 | 
 33 | check_strace_pattern <-  function(pattern, operation, path = "strace.log") {
 34 |   function() {
 35 |     test <- strace_info(path = path)
 36 |     any(grepl(x = test[[operation]]$file, pattern = pattern))
 37 |   }
 38 | }
 39 | 
 40 | 
 41 | test_that("strace works", {
 42 |   skip_on_cran()
 43 |   skip_on_os(c("windows", "mac", "solaris"))
 44 | 
 45 |   withr::with_tempdir(
 46 |     code = {
 47 |       cat("this is a dummy file to check strace", file = "dummy.txt")
 48 | 
 49 |       p <- callr::r_session$new()
 50 |       start_strace(pid = p$get_pid(), file = file.path(getwd(), "strace.log"))
 51 | 
 52 |       # Wait for strace to initialize
 53 |       wait_for_condition(
 54 |         check_fn = function() file.exists("strace.log"),
 55 |         error_msg = "strace log file was not created"
 56 |       )
 57 | 
 58 |       # Test operations
 59 |       p$run(\() saveRDS(object = mtcars, file = "mtcars.rds"))
 60 |       wait_for_condition(
 61 |         check_fn = check_strace_pattern("mtcars.rds", "write"),
 62 |         error_msg = "mtcars.rds write operation not detected"
 63 |       ) |> testthat::expect_true()
 64 | 
 65 |       p$run(\() readLines("dummy.txt"))
 66 |       wait_for_condition(
 67 |         check_fn = check_strace_pattern("dummy.txt", "read"),
 68 |         error_msg = "dummy.txt read operation not detected"
 69 |       ) |> testthat::expect_true()
 70 | 
 71 |       p$run(\() file.remove("dummy.txt"))
 72 |       wait_for_condition(
 73 |         check_fn = check_strace_pattern("dummy.txt", "delete"),
 74 |         error_msg = "dummy.txt delete operation not detected"
 75 |       ) |> testthat::expect_true()
 76 | 
 77 |       p$kill()
 78 |     }
 79 |   )
 80 | })
 81 | test_that("strace fails gracefully OS error handling", {
 82 |   skip_on_os("linux")
 83 | 
 84 |   # Get current OS
 85 |   os_type <- Sys.info()["sysname"]
 86 | 
 87 |   # Test for unsupported OS (Windows or Darwin)
 88 |   expect_error(
 89 |     start_strace(1234, "output.txt"),
 90 |     paste("strace does not support", os_type)
 91 |   )
 92 | })
 93 | 
 94 | test_that("strace fails gracefully with mocking error handling", {
 95 |   # Define test cases with OS names and expected outcomes
 96 |   test_cases <- list(
 97 |     list(
 98 |       os = "Windows",
 99 |       expected = "error",
100 |       message = "strace does not support Windows",
101 |       system_mock = NULL # Not needed for error cases
102 |     ),
103 |     list(
104 |       os = "Darwin",
105 |       expected = "error",
106 |       message = "strace does not support Darwin",
107 |       system_mock = NULL # Not needed for error cases
108 |     ),
109 |     list(
110 |       os = "unknownOS",
111 |       expected = "error",
112 |       message = "strace does not support unknownOS",
113 |       system_mock = NULL # Not needed for error cases
114 |     ),
115 |     list(
116 |       os = "Linux",
117 |       expected = "success",
118 |       message = NULL,
119 |       system_mock = function(...) 0 # Return success (0) for system call
120 |     )
121 |   )
122 | 
123 |   for (case in test_cases) {
124 |     # Set up OS mock for this test case
125 |     local_mocked_bindings(
126 |       Sys.info = function() {
127 |         c(
128 |           sysname = case$os,
129 |           release = "10.0",
130 |           version = "10.0",
131 |           nodename = paste0("test-", tolower(case$os)),
132 |           machine = "x86_64",
133 |           login = "testuser",
134 |           user = "testuser",
135 |           effective_user = "testuser"
136 |         )
137 |       },
138 |       .package = "base"
139 |     )
140 | 
141 |     # Set up system mock if provided (for Linux case)
142 |     if (!is.null(case$system_mock)) {
143 |       local_mocked_bindings(
144 |         system = case$system_mock,
145 |         .package = "base"
146 |       )
147 |     }
148 | 
149 |     # Check expected outcome
150 |     if (case$expected == "error") {
151 |       expect_error(
152 |         start_strace(1234, "output.txt"),
153 |         case$message
154 |       )
155 |     } else if (case$expected == "success") {
156 |       expect_no_error(start_strace(1234, "output.txt"))
157 |     }
158 |   }
159 | })
160 | 
161 | test_that("strace fails during execution handling", {
162 |   # Mock Sys.info to return Linux
163 |   local_mocked_bindings(
164 |     Sys.info = function() {
165 |       c(
166 |         sysname = "Linux",
167 |         release = "5.4.0",
168 |         version = "5.4.0-generic",
169 |         nodename = "test-linux",
170 |         machine = "x86_64",
171 |         login = "testuser",
172 |         user = "testuser",
173 |         effective_user = "testuser"
174 |       )
175 |     },
176 |     .package = "base"
177 |   )
178 | 
179 |   # Define test cases: error message and expected error
180 |   test_cases <- list(
181 |     list(
182 |       error = "Operation not permitted",
183 |       expected = "strace cannot attach to process. This may be due to permission errors"
184 |     ),
185 |     list(
186 |       error = "command not found",
187 |       expected = "strace is not installed or not found in PATH"
188 |     ),
189 |     list(
190 |       error = "random_not_defined_error12345",
191 |       expected = "random_not_defined_error12345" # Original error should be propagated
192 |     )
193 |   )
194 | 
195 |   # Loop through each test case
196 |   for (case in test_cases) {
197 |     # Mock system to throw the specific error
198 |     local_mocked_bindings(
199 |       system = function(...) {
200 |         stop(case$error)
201 |       },
202 |       .package = "base"
203 |     )
204 | 
205 |     # Test the error handling
206 |     expect_error(
207 |       start_strace(1234, "output.txt"),
208 |       case$expected
209 |     )
210 |   }
211 | })
212 | 
213 | 
214 | test_that("refine_strace filters out discarded files", {
215 |   withr::with_tempdir(
216 |     code = {
217 |       # Test data with required seq column
218 |       test_df <- tibble::tibble(
219 |         seq = c(1, 2),
220 |         time = as.POSIXct(c("2023-01-01 10:00:01", "2023-01-01 10:00:02")),
221 |         file = c("/tmp/cache.txt", "/home/important.R"),
222 |         type = c("read", "write")
223 |       )
224 | 
225 |       # Test: only discards provided (triggers the else if branch)
226 |       result <- refine_strace(test_df,
227 |                               strace_discards = c("tmp"),
228 |                               strace_keep = character())
229 | 
230 |       expect_equal(result$file, "/home/important.R")
231 |       expect_equal(nrow(result), 1)
232 |     }
233 |   )
234 | })
235 | 
236 | test_that("read_strace returns empty tibble when file does not exist", {
237 |   withr::with_tempdir(
238 |     code = {
239 |       # Test with non-existent file path
240 |       result <- read_strace("nonexistent_file.log", getwd())
241 | 
242 |       # Verify empty tibble structure
243 |       expect_s3_class(result, "tbl_df")
244 |       expect_named(result, c("seq", "time", "file", "type"))
245 |       expect_equal(nrow(result), 0)
246 |       expect_equal(ncol(result), 4)
247 | 
248 |       # Verify column classes
249 |       expect_type(result$seq, "integer")
250 |       expect_s3_class(result$time, "POSIXct")
251 |       expect_type(result$file, "character")
252 |       expect_type(result$type, "character")
253 |     }
254 |   )
255 | })
256 | 
257 | test_that("read_strace returns empty tibble when file has no relevant content", {
258 |   withr::with_tempdir(
259 |     code = {
260 |       # Create a file with content that gets filtered out (no openat|unlink|chdir)
261 |       cat("some irrelevant log content\nanother line without the required patterns\nyet another line",
262 |           file = "empty_strace.log")
263 | 
264 |       strace_result <- read_strace("empty_strace.log", getwd())
265 | 
266 |       # Test the returned data frame structure
267 |       expect_true(tibble::is_tibble(strace_result))
268 |       expect_identical(colnames(strace_result), c("seq", "time", "file", "type"))
269 |       expect_true(nrow(strace_result) == 0)
270 |       expect_true(ncol(strace_result) == 4)
271 | 
272 |       # Test column data types
273 |       expect_true(is.integer(strace_result$seq))
274 |       expect_true(inherits(strace_result$time, "POSIXct"))
275 |       expect_true(is.character(strace_result$file))
276 |       expect_true(is.character(strace_result$type))
277 |     }
278 |   )
279 | })
280 | 


--------------------------------------------------------------------------------
/R/whirl_queue.R:
--------------------------------------------------------------------------------
  1 | #' Queue for continuous execution and logging of scripts
  2 | #' @description
  3 | #' Implementation of a queue for supporting the continuous execution and logging
  4 | #' of several scripts.
  5 | #' The queue can be used interactively, but is mainly designed to be the
  6 | #' internal backbone of the `run()` function.
  7 | #' When a queue has several workers, pushed scripts will be run in parallel.
  8 | #' @importFrom R6 R6Class
  9 | #' @noRd
 10 | 
 11 | whirl_queue <- R6::R6Class(
 12 |   classname = "whirl_queue",
 13 |   public = list(
 14 |     #' @inheritParams options_params
 15 |     #' @description Initialize the new whirl_queue
 16 |     #' @return A [whirl_queue] object
 17 |     initialize = \(
 18 |       # jscpd:ignore-start
 19 |       n_workers = zephyr::get_option("n_workers", "whirl"),
 20 |       check_renv = zephyr::get_option("check_renv", "whirl"),
 21 |       track_files = zephyr::get_option("track_files", "whirl"),
 22 |       out_formats = zephyr::get_option("out_formats", "whirl"),
 23 |       track_files_discards = zephyr::get_option(
 24 |         "track_files_discards",
 25 |         "whirl"
 26 |       ),
 27 |       track_files_keep = zephyr::get_option("track_files_keep", "whirl"),
 28 |       log_dir = zephyr::get_option("log_dir", "whirl")
 29 |       # jscpd:ignore-end
 30 |     ) {
 31 |       wq_initialise(
 32 |         self,
 33 |         private,
 34 |         n_workers,
 35 |         check_renv,
 36 |         track_files,
 37 |         out_formats,
 38 |         track_files_discards,
 39 |         track_files_keep,
 40 |         log_dir
 41 |       )
 42 |     },
 43 | 
 44 |     #' @description Push scripts to the queue
 45 |     #' @param scripts [character] Full paths for the scripts to be executed
 46 |     #' @param tag (optional) [character] Tag for the scripts to include in
 47 |     #' the queue
 48 |     #' @return [invisible] self
 49 |     push = \(scripts, tag = NA_character_) {
 50 |       wq_push(self, private, scripts, tag)
 51 |     },
 52 | 
 53 |     #' @description Push scripts in the queue without executing them.
 54 |     #' Utility to include skipped scripts in the final queue.
 55 |     #' @param scripts [character] Full paths for the scripts to be executed
 56 |     #' @param tag (optional) [character] Tag for the scripts to include in
 57 |     #' the queue
 58 |     #' @return [invisible] self
 59 |     skip = \(scripts, tag = NA_character_) {
 60 |       wq_skip(self, private, scripts, tag)
 61 |     },
 62 | 
 63 |     #' @description Poll the queue and start next steps if needed
 64 |     #' @param timeout [numeric] The timeout in milliseconds.
 65 |     #' Note it is only implemented approximately if more than one script is
 66 |     #' running simultaneously.
 67 |     #' @return [character] Status of all scripts queue
 68 |     poll = \(timeout) {
 69 |       wq_poll(self, private, timeout)
 70 |     },
 71 | 
 72 |     #' @description Wait for the queue to complete
 73 |     #' @param timeout [numeric] The timeout in milliseconds
 74 |     #' @return [invisible] self
 75 |     wait = \(timeout = -1) {
 76 |       wq_wait(self, private, timeout)
 77 |     },
 78 | 
 79 |     #' @description Run scripts using the queue.
 80 |     #' This is a wrapper around calling both push() and wait().
 81 |     #' @param scripts [character] with full paths for the scripts to be executed
 82 |     #' @param tag (optional) [character] Tag for the scripts to include in
 83 |     #' the queue
 84 |     #' @return [invisible] self
 85 |     run = \(scripts, tag = NA_character_) {
 86 |       wq_run(scripts, tag, self, private)
 87 |     },
 88 | 
 89 |     #' @description Print method displaying the current status of the queue
 90 |     #' @return [invisible] self
 91 |     print = \() {
 92 |       print(self$queue)
 93 |       return(invisible(self))
 94 |     }
 95 |   ),
 96 |   active = list(
 97 |     #' @field queue [tibble] Current status of the queue
 98 |     queue = \() {
 99 |       private$.queue
100 |     },
101 | 
102 |     #' @field workers [tibble] Current status of the workers
103 |     workers = \() {
104 |       private$.workers
105 |     },
106 | 
107 |     #' @field available_workers [integer] Which workers are available
108 |     available_workers = \() {
109 |       which(!self$workers$active)
110 |     },
111 | 
112 |     #' @field next_ids [integer] Which scripts are next in the queue
113 |     next_ids = \() {
114 |       self$queue$id[self$queue$status == "waiting"] |>
115 |         utils::head(length(self$available_workers))
116 |     },
117 | 
118 |     #' @field next_workers [integer] Which workers are next to be started
119 |     next_workers = \() {
120 |       self$available_workers |>
121 |         utils::head(length(self$next_ids))
122 |     }
123 |   ),
124 |   private = list(
125 |     .queue = NULL,
126 |     .workers = NULL,
127 |     .n_workers = NULL,
128 |     check_renv = NULL,
129 |     track_files = NULL,
130 |     out_formats = NULL,
131 |     track_files_discards = NULL,
132 |     track_files_keep = NULL,
133 |     log_dir = NULL,
134 |     progress_bar = NULL
135 |   )
136 | )
137 | 
138 | wq_initialise <- function(
139 |   self,
140 |   private,
141 |   n_workers,
142 |   check_renv,
143 |   track_files,
144 |   out_formats,
145 |   track_files_discards,
146 |   track_files_keep,
147 |   log_dir
148 | ) {
149 |   private$check_renv <- check_renv
150 |   private$track_files <- track_files
151 |   private$out_formats <- out_formats
152 |   private$track_files_discards <- track_files_discards
153 |   private$track_files_keep <- track_files_keep
154 |   private$log_dir <- log_dir
155 | 
156 |   private$.queue <- tibble::tibble(
157 |     id = numeric(),
158 |     tag = character(),
159 |     script = character(),
160 |     status = character(),
161 |     result = list(),
162 |     log_dir = character()
163 |   )
164 | 
165 |   private$.workers <- tibble::tibble(
166 |     id = seq_len(n_workers),
167 |     session = vector(mode = "list", length = n_workers),
168 |     id_script = numeric(n_workers),
169 |     step = numeric(n_workers),
170 |     active = FALSE
171 |   )
172 | }
173 | 
174 | wq_add_queue <- function(self, private, scripts, tag, status) {
175 |   # Adding the log directory to the queue
176 |   if (is.character(private$log_dir)) {
177 |     # Check if the directory exists
178 |     if (!file.exists(private$log_dir)) {
179 |       cli::cli_abort(
180 |         "Logs cannot be saved because {.val {private$log_dir}} does not exist"
181 |       )
182 |     }
183 |     folder <- file.path(private$log_dir)
184 |   } else {
185 |     folder <- private$log_dir(scripts)
186 |     # Check if the directory exists
187 |     unique_folders <- unique(folder)
188 |     if (any(!file.exists(unique_folders))) {
189 |       missing <- unique_folders[!file.exists(unique_folders)] # nolint: object_usage_linter
190 |       cli::cli_abort(
191 |         "Logs cannot be saved because {.val {missing}} does not exist"
192 |       )
193 |     }
194 |   }
195 | 
196 |   private$.queue <- self$queue |>
197 |     tibble::add_row(
198 |       id = nrow(self$queue) + seq_along(scripts),
199 |       tag = tag,
200 |       script = scripts,
201 |       status = status,
202 |       log_dir = folder
203 |     )
204 |   return(invisible(self))
205 | }
206 | 
207 | wq_push <- function(self, private, scripts, tag) {
208 |   wq_add_queue(self, private, scripts, tag, status = "waiting")
209 | }
210 | 
211 | wq_skip <- function(self, private, scripts, tag) {
212 |   wq_add_queue(self, private, scripts, tag, status = "skipped")
213 | }
214 | 
215 | wq_poll <- function(
216 |   self,
217 |   private,
218 |   timeout
219 | ) {
220 |   # Start new sessions if there are available workers and waiting scripts in
221 |   # the queue
222 | 
223 |   if (length(self$next_ids)) {
224 |     nid <- self$next_ids
225 |     wid <- self$next_workers
226 |     private$.workers[["session"]][wid] <- replicate(
227 |       n = length(wid),
228 |       expr = whirl_r_session$new(
229 |         check_renv = private$check_renv,
230 |         track_files = private$track_files,
231 |         out_formats = private$out_formats,
232 |         track_files_discards = private$track_files_discards,
233 |         track_files_keep = private$track_files_keep,
234 |         log_dir = private$log_dir
235 |       ),
236 |       simplify = FALSE
237 |     )
238 |     private$.workers[wid, "id_script"] <- nid
239 |     private$.workers[wid, "active"] <- TRUE
240 |     private$.queue[nid, "status"] <- "running"
241 |   }
242 | 
243 |   # Check for active sessions that are idle and start the next step if needed
244 |   # When completed the session is stopped and the status in the queue is updated
245 | 
246 |   i_active <- which(private$.workers$active)
247 |   i_timeout <- round(timeout / length(i_active))
248 |   for (i in i_active) {
249 |     p <- private$.workers$session[[i]]$poll(timeout = i_timeout)
250 |     if (p == "ready") {
251 |       private$.workers$session[[i]]$check_status()
252 |     }
253 |     if (private$.workers$session[[i]]$get_state() == "idle") {
254 |       wq_next_step(self, private, i)
255 |     }
256 |   }
257 | 
258 |   return(self$queue$status)
259 | }
260 | 
261 | wq_wait <- function(self, private, timeout) {
262 |   start <- Sys.time()
263 |   timeout <- timeout / 1000 # Convert to secs
264 |   go <- TRUE
265 |   while (go) {
266 |     pb_update(id = private$progress_bar, queue = self$queue)
267 |     self$poll(50)
268 |     go <- any(self$queue$status %in% c("waiting", "running"))
269 |     if (timeout >= 0 && difftime(Sys.time(), start, units = "secs") > timeout) {
270 |       break
271 |     }
272 |   }
273 |   return(invisible(self))
274 | }
275 | 
276 | wq_next_step <- function(self, private, wid) {
277 |   private$.workers$step[[wid]] <- private$.workers$step[[wid]] + 1
278 |   id_script <- private$.workers$id_script[[wid]]
279 |   session <- private$.workers$session[[wid]]
280 | 
281 |   switch(
282 |     EXPR = private$.workers$step[[wid]],
283 | 
284 |     # Step 1: Log script
285 |     "1" = {
286 |       script <- private$.queue$script[[id_script]]
287 |       session$log_script(script)
288 |     },
289 |     # Step 2: Create log
290 |     "2" = {
291 |       session$create_log()
292 |     },
293 |     # Step 3: Finish log and create outputs
294 |     "3" = {
295 |       private$.queue$result[[id_script]] <-
296 |         session$log_finish(
297 |           out_dir = private$.queue$log_dir[[id_script]],
298 |           format = private$out_formats
299 |         )
300 |       # fmt: skip
301 |       private$.queue$status[[id_script]] <-
302 |         private$.queue$result[[id_script]]$status$message
303 | 
304 |       private$.workers$session[wid] <- list(NULL)
305 |       private$.workers$active[[wid]] <- FALSE
306 |       private$.workers$id_script[[wid]] <- 0
307 |       private$.workers$step[[wid]] <- 0
308 |     }
309 |   )
310 | 
311 |   return(invisible(wid))
312 | }
313 | 
314 | wq_run <- function(scripts, tag, self, private) {
315 |   private$progress_bar <- pb_start()
316 |   on.exit({
317 |     private$progress_bar <- pb_done(id = private$progress_bar)
318 |     gc() # finalizes used whirl_r_sessions - cleanup temp folders
319 |   })
320 |   self$push(scripts = scripts, tag = tag)$wait()
321 | }
322 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 | Apache License
  2 | ==============
  3 | 
  4 | _Version 2.0, January 2004_
  5 | _&lt;<http://www.apache.org/licenses/>&gt;_
  6 | 
  7 | ### Terms and Conditions for use, reproduction, and distribution
  8 | 
  9 | #### 1. Definitions
 10 | 
 11 | “License” shall mean the terms and conditions for use, reproduction, and
 12 | distribution as defined by Sections 1 through 9 of this document.
 13 | 
 14 | “Licensor” shall mean the copyright owner or entity authorized by the copyright
 15 | owner that is granting the License.
 16 | 
 17 | “Legal Entity” shall mean the union of the acting entity and all other entities
 18 | that control, are controlled by, or are under common control with that entity.
 19 | For the purposes of this definition, “control” means **(i)** the power, direct or
 20 | indirect, to cause the direction or management of such entity, whether by
 21 | contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the
 22 | outstanding shares, or **(iii)** beneficial ownership of such entity.
 23 | 
 24 | “You” (or “Your”) shall mean an individual or Legal Entity exercising
 25 | permissions granted by this License.
 26 | 
 27 | “Source” form shall mean the preferred form for making modifications, including
 28 | but not limited to software source code, documentation source, and configuration
 29 | files.
 30 | 
 31 | “Object” form shall mean any form resulting from mechanical transformation or
 32 | translation of a Source form, including but not limited to compiled object code,
 33 | generated documentation, and conversions to other media types.
 34 | 
 35 | “Work” shall mean the work of authorship, whether in Source or Object form, made
 36 | available under the License, as indicated by a copyright notice that is included
 37 | in or attached to the work (an example is provided in the Appendix below).
 38 | 
 39 | “Derivative Works” shall mean any work, whether in Source or Object form, that
 40 | is based on (or derived from) the Work and for which the editorial revisions,
 41 | annotations, elaborations, or other modifications represent, as a whole, an
 42 | original work of authorship. For the purposes of this License, Derivative Works
 43 | shall not include works that remain separable from, or merely link (or bind by
 44 | name) to the interfaces of, the Work and Derivative Works thereof.
 45 | 
 46 | “Contribution” shall mean any work of authorship, including the original version
 47 | of the Work and any modifications or additions to that Work or Derivative Works
 48 | thereof, that is intentionally submitted to Licensor for inclusion in the Work
 49 | by the copyright owner or by an individual or Legal Entity authorized to submit
 50 | on behalf of the copyright owner. For the purposes of this definition,
 51 | “submitted” means any form of electronic, verbal, or written communication sent
 52 | to the Licensor or its representatives, including but not limited to
 53 | communication on electronic mailing lists, source code control systems, and
 54 | issue tracking systems that are managed by, or on behalf of, the Licensor for
 55 | the purpose of discussing and improving the Work, but excluding communication
 56 | that is conspicuously marked or otherwise designated in writing by the copyright
 57 | owner as “Not a Contribution.”
 58 | 
 59 | “Contributor” shall mean Licensor and any individual or Legal Entity on behalf
 60 | of whom a Contribution has been received by Licensor and subsequently
 61 | incorporated within the Work.
 62 | 
 63 | #### 2. Grant of Copyright License
 64 | 
 65 | Subject to the terms and conditions of this License, each Contributor hereby
 66 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
 67 | irrevocable copyright license to reproduce, prepare Derivative Works of,
 68 | publicly display, publicly perform, sublicense, and distribute the Work and such
 69 | Derivative Works in Source or Object form.
 70 | 
 71 | #### 3. Grant of Patent License
 72 | 
 73 | Subject to the terms and conditions of this License, each Contributor hereby
 74 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
 75 | irrevocable (except as stated in this section) patent license to make, have
 76 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where
 77 | such license applies only to those patent claims licensable by such Contributor
 78 | that are necessarily infringed by their Contribution(s) alone or by combination
 79 | of their Contribution(s) with the Work to which such Contribution(s) was
 80 | submitted. If You institute patent litigation against any entity (including a
 81 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a
 82 | Contribution incorporated within the Work constitutes direct or contributory
 83 | patent infringement, then any patent licenses granted to You under this License
 84 | for that Work shall terminate as of the date such litigation is filed.
 85 | 
 86 | #### 4. Redistribution
 87 | 
 88 | You may reproduce and distribute copies of the Work or Derivative Works thereof
 89 | in any medium, with or without modifications, and in Source or Object form,
 90 | provided that You meet the following conditions:
 91 | 
 92 | * **(a)** You must give any other recipients of the Work or Derivative Works a copy of
 93 | this License; and
 94 | * **(b)** You must cause any modified files to carry prominent notices stating that You
 95 | changed the files; and
 96 | * **(c)** You must retain, in the Source form of any Derivative Works that You distribute,
 97 | all copyright, patent, trademark, and attribution notices from the Source form
 98 | of the Work, excluding those notices that do not pertain to any part of the
 99 | Derivative Works; and
100 | * **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any
101 | Derivative Works that You distribute must include a readable copy of the
102 | attribution notices contained within such NOTICE file, excluding those notices
103 | that do not pertain to any part of the Derivative Works, in at least one of the
104 | following places: within a NOTICE text file distributed as part of the
105 | Derivative Works; within the Source form or documentation, if provided along
106 | with the Derivative Works; or, within a display generated by the Derivative
107 | Works, if and wherever such third-party notices normally appear. The contents of
108 | the NOTICE file are for informational purposes only and do not modify the
109 | License. You may add Your own attribution notices within Derivative Works that
110 | You distribute, alongside or as an addendum to the NOTICE text from the Work,
111 | provided that such additional attribution notices cannot be construed as
112 | modifying the License.
113 | 
114 | You may add Your own copyright statement to Your modifications and may provide
115 | additional or different license terms and conditions for use, reproduction, or
116 | distribution of Your modifications, or for any such Derivative Works as a whole,
117 | provided Your use, reproduction, and distribution of the Work otherwise complies
118 | with the conditions stated in this License.
119 | 
120 | #### 5. Submission of Contributions
121 | 
122 | Unless You explicitly state otherwise, any Contribution intentionally submitted
123 | for inclusion in the Work by You to the Licensor shall be under the terms and
124 | conditions of this License, without any additional terms or conditions.
125 | Notwithstanding the above, nothing herein shall supersede or modify the terms of
126 | any separate license agreement you may have executed with Licensor regarding
127 | such Contributions.
128 | 
129 | #### 6. Trademarks
130 | 
131 | This License does not grant permission to use the trade names, trademarks,
132 | service marks, or product names of the Licensor, except as required for
133 | reasonable and customary use in describing the origin of the Work and
134 | reproducing the content of the NOTICE file.
135 | 
136 | #### 7. Disclaimer of Warranty
137 | 
138 | Unless required by applicable law or agreed to in writing, Licensor provides the
139 | Work (and each Contributor provides its Contributions) on an “AS IS” BASIS,
140 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
141 | including, without limitation, any warranties or conditions of TITLE,
142 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
143 | solely responsible for determining the appropriateness of using or
144 | redistributing the Work and assume any risks associated with Your exercise of
145 | permissions under this License.
146 | 
147 | #### 8. Limitation of Liability
148 | 
149 | In no event and under no legal theory, whether in tort (including negligence),
150 | contract, or otherwise, unless required by applicable law (such as deliberate
151 | and grossly negligent acts) or agreed to in writing, shall any Contributor be
152 | liable to You for damages, including any direct, indirect, special, incidental,
153 | or consequential damages of any character arising as a result of this License or
154 | out of the use or inability to use the Work (including but not limited to
155 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or
156 | any and all other commercial damages or losses), even if such Contributor has
157 | been advised of the possibility of such damages.
158 | 
159 | #### 9. Accepting Warranty or Additional Liability
160 | 
161 | While redistributing the Work or Derivative Works thereof, You may choose to
162 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or
163 | other liability obligations and/or rights consistent with this License. However,
164 | in accepting such obligations, You may act only on Your own behalf and on Your
165 | sole responsibility, not on behalf of any other Contributor, and only if You
166 | agree to indemnify, defend, and hold each Contributor harmless for any liability
167 | incurred by, or claims asserted against, such Contributor by reason of your
168 | accepting any such warranty or additional liability.
169 | 
170 | _END OF TERMS AND CONDITIONS_
171 | 
172 | ### APPENDIX: How to apply the Apache License to your work
173 | 
174 | To apply the Apache License to your work, attach the following boilerplate
175 | notice, with the fields enclosed by brackets `[]` replaced with your own
176 | identifying information. (Don't include the brackets!) The text should be
177 | enclosed in the appropriate comment syntax for the file format. We also
178 | recommend that a file or class name and description of purpose be included on
179 | the same “printed page” as the copyright notice for easier identification within
180 | third-party archives.
181 | 
182 |     Copyright 2025 Novo Nordisk A/S, Danish company registration no. 24256790
183 | 
184 |     Licensed under the Apache License, Version 2.0 (the "License");
185 |     you may not use this file except in compliance with the License.
186 |     You may obtain a copy of the License at
187 | 
188 |       http://www.apache.org/licenses/LICENSE-2.0
189 | 
190 |     Unless required by applicable law or agreed to in writing, software
191 |     distributed under the License is distributed on an "AS IS" BASIS,
192 |     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
193 |     See the License for the specific language governing permissions and
194 |     limitations under the License.
195 | 


--------------------------------------------------------------------------------
/R/biocompute.R:
--------------------------------------------------------------------------------
  1 | #' Create biocompute logs
  2 | #'
  3 | #' @description
  4 | #' BioCompute is a standard for logs of programs for for Bioinformatics Computational Analyses.
  5 | #'
  6 | #' The BioCompute object is a `json` log that can be created based on the output of `run()`.
  7 | #'
  8 | #' @details
  9 | #' The object consists of the following domains:
 10 | #'
 11 | #' * **Specifications**:
 12 | #'   * *spec_version*: Version of BioCompute used (`https://w3id.org/biocompute/1.3.0/``)
 13 | #'   * *object_id*: Unique project id
 14 | #'   * *type*: Your project type
 15 | #'   * *etag*: Your `etag` id from the BioCompute Object Portal
 16 | #'
 17 | #' * [Provenance Domain](https://wiki.biocomputeobject.org/index.php?title=Provenance-domain)
 18 | #'   * This is used to track the history of the BCO. Review and signatures go here.
 19 | #'
 20 | #' * [Usability Domain](https://wiki.biocomputeobject.org/index.php?title=Usability-domain)
 21 | #'   * This is used to improve searchability by allowing a free-text description of the BCO.
 22 | #'   * Provide external document.
 23 | #'
 24 | #' * [Extension Domain](https://wiki.biocomputeobject.org/index.php?title=Extension-domain)
 25 | #'   * This is used to add any additional structured information that is not directly covered by the BCO.
 26 | #'
 27 | #' * [Description Domain](https://wiki.biocomputeobject.org/index.php?title=Description-domain)
 28 | #'   * Contains a structured field for the description of external references, the pipeline steps,
 29 | #' and the relationship of I/O objects.
 30 | #'   * Provide external document.
 31 | #'   * **Note**: Use of `keywords` and `External_Reference` entries are not yet implemented.
 32 | #' To use fill out the entries manually after creating the BioCompute object.`
 33 | #'
 34 | #' * [Execution Domain](https://wiki.biocomputeobject.org/index.php?title=Execution-domain)
 35 | #'   * Contains fields for the execution of the BCO.
 36 | #'   * **Note**: Use of `external_data_endpoints` not implemented. Fill out manually afterwards if needed.
 37 | #'
 38 | #'* [Parametric Domain](https://wiki.biocomputeobject.org/index.php?title=Parametric-domain)
 39 | #'   * Represents the list of parameters customizing the computational flow which can affect
 40 | #' the output of the calculations.
 41 | #'
 42 | #' * [IO Domain](https://wiki.biocomputeobject.org/index.php?title=Iodomain)
 43 | #'   * Represents the list of global input and output files created by the computational workflow.
 44 | #'
 45 | #' * [Error Domain](https://wiki.biocomputeobject.org/index.php?title=Error-domain)
 46 | #'   * Defines the empirical and algorithmic limits and error sources of the BCO.
 47 | #'   * **Note**: Use of this domain is not clearly defined.
 48 | #' It is therefore always left empty in the current implementation.
 49 | #' If you want to add content do so manually after creating the BCO.
 50 | #'
 51 | #' See the [BioCompute Object Portal](https://www.biocomputeobject.org) and the
 52 | #' [BioCompute Objects Wiki](https://wiki.biocomputeobject.org/Main_Page) for more information.
 53 | #'
 54 | #' @param queue Result from `run()`.
 55 | #' @param path A character string specifying the file path to write BioCompute log to.
 56 | #' @param ... Additional arguments parsed to `jsonlite::write_json()`. Note always uses `auto_unbox = TRUE`.
 57 | #' @return (`invisible`) `list` of the biocompute domains and their content.
 58 | #' @export
 59 | write_biocompute <- function(
 60 |   queue = run("_whirl.yml"),
 61 |   path = "bco.json",
 62 |   ...
 63 | ) {
 64 |   config <- attr(queue, "whirl_input")
 65 | 
 66 |   if (is.null(config)) {
 67 |     cli::cli_abort("The `queue` must be created with `whirl::run()`")
 68 |   } else if (
 69 |     !rlang::is_string(config) ||
 70 |       !get_file_ext(config) %in% c("yml", "yaml") ||
 71 |       is.null(yaml::read_yaml(config)[["biocompute"]])
 72 |   ) {
 73 |     cli::cli_abort(
 74 |       "Input to `run()` must be a path to a yaml file with a biocompute entry. See `?use_biocompute()`."
 75 |     )
 76 |   }
 77 | 
 78 |   bco <- create_biocompute(queue = queue, config = config)
 79 | 
 80 |   jsonlite::write_json(x = bco, path = path, auto_unbox = TRUE, ...)
 81 | 
 82 |   invisible(bco)
 83 | }
 84 | 
 85 | #' @noRd
 86 | create_biocompute <- function(queue, config) {
 87 |   metadata <- yaml::read_yaml(config)
 88 | 
 89 |   list(
 90 |     object_id = metadata[["biocompute"]][["object_id"]] |>
 91 |       get_single_unique(),
 92 |     spec_version = metadata[["biocompute"]][["spec_version"]] |>
 93 |       get_single_unique(),
 94 |     etag = metadata[["biocompute"]][["etag"]] |>
 95 |       get_single_unique(),
 96 |     provenance_domain = NULL,
 97 |     usability_domain = metadata[["biocompute"]][["usability"]] |>
 98 |       get_single_unique(),
 99 |     extension_domain = metadata[["biocompute"]][["extension"]] |>
100 |       get_single_unique(),
101 |     description_domain = create_description_domain(queue),
102 |     execution_domain = create_execution_domain(queue),
103 |     parametric_domain = create_parametrics_domain(metadata, dirname(config)),
104 |     io_domain = create_io_domain(queue),
105 |     error_domain = list(
106 |       algorithmic_error = NULL,
107 |       empirical_error = NULL
108 |     )
109 |   )
110 | }
111 | 
112 | # DESCRIPTION DOMAIN
113 | #' @noRd
114 | create_description_domain <- function(queue) {
115 |   pipeline_steps <- queue |>
116 |     dplyr::mutate(
117 |       name = .data$script |>
118 |         basename() |>
119 |         sub(pattern = "\\.\\w+$", replacement = "") |>
120 |         gsub(pattern = "[-_]", replacement = " "),
121 |       step_number = .data$id,
122 |       version = .data$result |>
123 |         purrr::map_chr(c("script", "md5sum")),  # Devskim: ignore DS126858
124 |       description = .data$tag,
125 |       prerequisite = .data$result |>
126 |         purrr::map(c("session", "R")) |>
127 |         purrr::map(.f = \(x) {
128 |           x |>
129 |             dplyr::mutate(
130 |               name = paste(
131 |                 "R package:",
132 |                 .data$package,
133 |                 "- version:",
134 |                 .data$version
135 |               ),
136 |               uri = .data$url
137 |             ) |>
138 |             dplyr::select("name", "uri") |>
139 |             purrr::pmap(.f = list)
140 |         }),
141 |       input_list = .data$result |>
142 |         purrr::map(c("files", "read")) |>
143 |         purrr::map(.f = bco_file_format),
144 |       output_list = .data$result |>
145 |         purrr::map(c("files", "write")) |>
146 |         purrr::map(.f = bco_file_format)
147 |     ) |>
148 |     dplyr::select(
149 |       "name",
150 |       "step_number",
151 |       "version",
152 |       "description",
153 |       "prerequisite",
154 |       "input_list",
155 |       "output_list"
156 |     )
157 | 
158 |   description_domain <- list(
159 |     keywords = list(),
160 |     External_Reference = list(),
161 |     pipeline_steps = purrr::pmap(.l = pipeline_steps, .f = list)
162 |   )
163 | 
164 |   return(description_domain)
165 | }
166 | 
167 | #' @noRd
168 | bco_file_format <- function(x) {
169 |   if (is.null(x)) {
170 |     return(
171 |       dplyr::tibble(
172 |         filename = character(0),
173 |         uri = character(0),
174 |         access_time = character(0)
175 |       )
176 |     )
177 |   }
178 |   x |>
179 |     dplyr::mutate(
180 |       filename = basename(.data$file),
181 |       uri = .data$file,
182 |       access_time = format(.data$time, format = "%Y-%m-%d %H:%M:%S %Z")
183 |     ) |>
184 |     dplyr::select("filename", "uri", "access_time") |>
185 |     purrr::pmap(.f = list)
186 | }
187 | 
188 | #' @noRd
189 | get_single_unique <- function(x) {
190 |   x <- x |>
191 |     unlist() |>
192 |     unique()
193 | 
194 |   stopifnot(length(x) == 1)
195 | 
196 |   return(x)
197 | }
198 | 
199 | #' @noRd
200 | get_unique_values <- function(x) {
201 |   split(x, names(x)) |>
202 |     lapply(\(x) {
203 |       x |>
204 |         unlist() |>
205 |         unique() |>
206 |         paste(collapse = ";")
207 |     })
208 | }
209 | 
210 | #' @noRd
211 | create_execution_domain <- function(queue) {
212 |   envvars <- queue$result |>
213 |     purrr::map(c("session", "environment")) |>
214 |     purrr::list_rbind() |>
215 |     dplyr::distinct() |>
216 |     dplyr::arrange("variable")
217 | 
218 |   platform <- queue$result |>
219 |     purrr::map(c("session", "platform")) |>
220 |     purrr::list_rbind() |>
221 |     dplyr::distinct() |>
222 |     dplyr::filter(.data$setting %in% c("version", "pandoc", "quarto"))
223 | 
224 |   platform <- split(x = platform$value, f = platform$setting)
225 | 
226 |   software_prerequisites <- list(
227 |     list(
228 |       name = "R",
229 |       version = sub(
230 |         pattern = "R version ([0-9]+\\.[0-9]+\\.[0-9]+).*",
231 |         replacement = "\\1",
232 |         x = get_single_unique(platform$version)
233 |       ),
234 |       URI = "https://www.r-project.org/"
235 |     ),
236 |     list(
237 |       name = "quarto",
238 |       version = get_single_unique(platform$quarto),
239 |       URI = "https://quarto.org"
240 |     ),
241 |     list(
242 |       name = "pandoc",
243 |       version = get_single_unique(platform$pandoc),
244 |       URI = "https://pandoc.org/"
245 |     )
246 |   )
247 | 
248 |   packages <- queue$result |>
249 |     purrr::map(c("session", "R")) |>
250 |     purrr::list_rbind() |>
251 |     dplyr::distinct() |>
252 |     dplyr::arrange(.data$package) |>
253 |     dplyr::mutate(
254 |       name = .data$package,
255 |       version = .data$version,
256 |       uri = .data$url
257 |     ) |>
258 |     dplyr::select("name", "version", "uri") |>
259 |     purrr::pmap(list)
260 | 
261 |   software_prerequisites <- append(software_prerequisites, packages)
262 | 
263 |   execution_domain <- list(
264 |     script = queue$script,
265 |     script_driver = get_single_unique(platform$version),
266 |     software_prerequisites = software_prerequisites,
267 |     external_data_endpoints = list(),
268 |     environment_variables = stats::setNames(envvars$value, envvars$variable) |>
269 |       as.list() |>
270 |       get_unique_values()
271 |   )
272 | 
273 |   return(execution_domain)
274 | }
275 | 
276 | #' @noRd
277 | create_parametrics_domain <- function(config, base_path) {
278 |   parametric_domain <- list()
279 |   step_number <- 0
280 |   for (step in config$steps) {
281 |     if (!("parameter_files" %in% names(step))) {
282 |       step_number <- step_number + 1
283 |       next
284 |     }
285 | 
286 |     for (parameter_file in step$parameter_files) {
287 |       parameters <- yaml::read_yaml(normalize_with_base(
288 |         parameter_file,
289 |         base_path
290 |       ))
291 | 
292 |       parametric_domain <- append(
293 |         parametric_domain,
294 |         purrr::map2(
295 |           parameters,
296 |           names(parameters),
297 |           \(x, y) list(param = y, value = x, step = step_number)
298 |         ) |>
299 |           unname()
300 |       )
301 |     }
302 |     step_number <- step_number + 1
303 |   }
304 |   return(parametric_domain)
305 | }
306 | 
307 | #' @noRd
308 | bco_create_outputs <- function(files) {
309 |   entry <- vector(mode = "list", length = length(files))
310 |   for (i in seq_along(entry)) {
311 |     entry[[i]]$mediatype <- if (grepl("\\.html$", files[[i]])) {
312 |       "text/html"
313 |     } else if (grepl("\\.zip$", files[[i]])) {
314 |       "application/zip"
315 |     } else if (grepl("\\.csv$", files[[i]])) {
316 |       "text/csv"
317 |     } else if (grepl("\\.txt$", files[[i]])) {
318 |       "text/txt"
319 |     } else {
320 |       " "
321 |     }
322 | 
323 |     entry[[i]]$uri <- list(
324 |       filename = basename(files[[i]]),
325 |       uri = files[[i]]
326 |     )
327 |   }
328 | 
329 |   return(entry)
330 | }
331 | 
332 | #' @noRd
333 | bco_create_inputs <- function(files) {
334 |   entry <- vector(mode = "list", length = length(files))
335 |   for (i in seq_along(entry)) {
336 |     entry[[i]]$uri <- list(
337 |       uri = files[[i]]
338 |     )
339 |   }
340 | 
341 |   return(entry)
342 | }
343 | 
344 | #' @noRd
345 | create_io_domain <- function(queue) {
346 |   input <- queue$result |>
347 |     purrr::map(c("files", "read", "file")) |>
348 |     unlist() |>
349 |     unique() |>
350 |     bco_create_inputs()
351 | 
352 |   output <- queue$result |>
353 |     purrr::map(c("files", "write", "file")) |>
354 |     unlist() |>
355 |     unique() |>
356 |     bco_create_outputs()
357 | 
358 |   return(list(
359 |     input_subdomain = input,
360 |     output_subdomain = output
361 |   ))
362 | }
363 | 


--------------------------------------------------------------------------------