├── inst ├── documents │ ├── _quarto.yml │ ├── .gitignore │ ├── _environment │ ├── style.css │ ├── files.qmd │ ├── python.qmd │ ├── summary.qmd │ ├── title-block.html │ ├── python_modules.py │ ├── dummy.qmd │ └── log.qmd ├── use_whirl │ ├── _parametrics.yml │ ├── _whirl.yml │ └── _biocompute.yml ├── .gitignore ├── examples │ ├── error.R │ ├── success.R │ ├── _whirl.yaml │ ├── warning.R │ └── prg1.R ├── rstudio │ └── addins.dcf └── WORDLIST ├── .github ├── .gitignore ├── pull_request_template.md ├── actions │ └── setup │ │ └── action.yaml ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md └── workflows │ └── check_and_co.yaml ├── vignettes ├── .gitignore ├── articles │ └── example.Rmd └── whirl.Rmd ├── .gitattributes ├── tests ├── testthat │ ├── scripts │ │ ├── py_success.py │ │ ├── py_warning.py │ │ ├── _biocompute_parametrics.yml │ │ ├── error.R │ │ ├── _whirl_r_programs.yaml │ │ ├── _whirl_expression.yaml │ │ ├── render_error.R │ │ ├── success.R │ │ ├── _whirl_unnamed.yaml │ │ ├── _whirl.yaml │ │ ├── _whirl_to_config.yaml │ │ ├── biocompute.R │ │ ├── py_error.py │ │ ├── warning.R │ │ ├── py_dependencies.py │ │ ├── packages.R │ │ └── _whirl_biocompute.yaml │ ├── test-errors.R │ ├── _snaps │ │ ├── custom_logging.md │ │ ├── whirl_queue.md │ │ ├── examples.md │ │ └── biocompute.md │ ├── setup.R │ ├── test-render_summary.R │ ├── test-biocompute.R │ ├── test-internal_run.R │ ├── test-approvedpkgs.R │ ├── test-read_glob.R │ ├── test-use_whirl.R │ ├── test-util_queue_summary.R │ ├── test-whirl_queue.R │ ├── test-progress_bar.R │ ├── helper.R │ ├── test-examples.R │ ├── test-renv.R │ ├── test-whirl_r_session.R │ ├── test-mdformats.R │ ├── test-python.R │ ├── test-utils.R │ ├── test-custom_logging.R │ ├── test-enrich_input.R │ ├── test-run.R │ └── test-strace.R └── testthat.R ├── man ├── figures │ └── logo.png ├── use_whirl.Rd ├── use_biocompute.Rd ├── custom_logging.Rd ├── whirl-package.Rd ├── whirl-options-params.Rd ├── write_biocompute.Rd ├── run.Rd └── whirl-options.Rd ├── pkgdown └── favicon │ ├── favicon.ico │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── apple-touch-icon.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ └── apple-touch-icon-180x180.png ├── R ├── whirl-package.R ├── approvedpkgs.R ├── quarto.R ├── run_current_script.R ├── progress_bar.R ├── util_queue_summary.R ├── mdformats.R ├── read_glob.R ├── internal_run.R ├── status.R ├── normalize_with_base.R ├── renv.R ├── use_whirl.R ├── enrich_input.R ├── utils.R ├── render_summary.R ├── custom_logging.R ├── whirl-options.R ├── run.R ├── log.R ├── strace.R ├── whirl_queue.R └── biocompute.R ├── _pkgdown.yml ├── NAMESPACE ├── .Rbuildignore ├── cran-comments.md ├── whirl.Rproj ├── .gitignore ├── DESCRIPTION ├── .pre-commit-config.yaml ├── NEWS.md ├── README.Rmd ├── README.md └── LICENSE.md /inst/documents/_quarto.yml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /inst/documents/.gitignore: -------------------------------------------------------------------------------- 1 | /.quarto/ 2 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | tests/testthat/scripts/*.html linguist-generated -------------------------------------------------------------------------------- /tests/testthat/scripts/py_success.py: -------------------------------------------------------------------------------- 1 | a = 2 + 2 2 | print(a) 3 | -------------------------------------------------------------------------------- /inst/use_whirl/_parametrics.yml: -------------------------------------------------------------------------------- 1 | --- 2 | paramA: "text parameter" 3 | paramB: 2 4 | -------------------------------------------------------------------------------- /tests/testthat/scripts/py_warning.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | warnings.warn("test warning") 4 | -------------------------------------------------------------------------------- /inst/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | *.md 3 | *.html 4 | *.json 5 | *_files/* 6 | !documents/title-block.html 7 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/man/figures/logo.png -------------------------------------------------------------------------------- /inst/examples/error.R: -------------------------------------------------------------------------------- 1 | 2 | # This script produces error for example purposes 3 | 4 | stop("This is an error!") 5 | -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /tests/testthat/scripts/_biocompute_parametrics.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #### Path #### 3 | outputFilePath: "a.txt" 4 | otherStuff: 2 5 | -------------------------------------------------------------------------------- /tests/testthat/scripts/error.R: -------------------------------------------------------------------------------- 1 | # This script produces error for testing purposes 2 | 3 | stop("This is an error!") 4 | -------------------------------------------------------------------------------- /tests/testthat/scripts/_whirl_r_programs.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: "Run all R programs" 3 | paths: 4 | - "*.R" 5 | -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /inst/documents/_environment: -------------------------------------------------------------------------------- 1 | # The _environment file is used to make sure the quarto processes 2 | # have the correct environment variables set 3 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /tests/testthat/scripts/_whirl_expression.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: !expr "paste('Today is', Sys.Date())" 3 | paths: 4 | - "success.R" 5 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NovoNordisk-OpenSource/whirl/HEAD/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /inst/examples/success.R: -------------------------------------------------------------------------------- 1 | 2 | # This script produces no errors or warnings for example purposes 3 | 4 | message("this script has no errors or warnings") 5 | -------------------------------------------------------------------------------- /tests/testthat/scripts/render_error.R: -------------------------------------------------------------------------------- 1 | # This script will always error in the logging process due to 2 | # Quarto params being deleted. 3 | rm(list = ls()) 4 | -------------------------------------------------------------------------------- /tests/testthat/scripts/success.R: -------------------------------------------------------------------------------- 1 | # This script produces no errors or warnings for testing purposes 2 | 3 | message("this script has no errors or warnings") 4 | -------------------------------------------------------------------------------- /inst/examples/_whirl.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: "First step" 3 | paths: 4 | - "success.R" 5 | - name: "Second step" 6 | paths: 7 | - "warning.R" 8 | - "error.R" 9 | -------------------------------------------------------------------------------- /tests/testthat/scripts/_whirl_unnamed.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: "Named step" 3 | paths: 4 | - "success.R" 5 | - paths: 6 | - "warning.R" 7 | - paths: 8 | - "error.R" 9 | -------------------------------------------------------------------------------- /tests/testthat/scripts/_whirl.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: "First step" 3 | paths: 4 | - "success.R" 5 | - name: "Second step" 6 | paths: 7 | - "warning.R" 8 | - "error.R" 9 | -------------------------------------------------------------------------------- /tests/testthat/scripts/_whirl_to_config.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: "Run _whirl.yaml" 3 | paths: 4 | - "_whirl.yaml" 5 | - name: "Run _whirl_unnamed.yaml" 6 | paths: 7 | - "_whirl_unnamed.yaml" 8 | -------------------------------------------------------------------------------- /tests/testthat/scripts/biocompute.R: -------------------------------------------------------------------------------- 1 | 2 | library(whirl) 3 | 4 | log_read("my_project/params.yml") 5 | 6 | log_read("my_data/data.rds") 7 | 8 | log_write("my_output/output.txt") 9 | 10 | log_write("my_output/plot.png") 11 | -------------------------------------------------------------------------------- /R/whirl-package.R: -------------------------------------------------------------------------------- 1 | ## usethis namespace: start 2 | #' @importFrom dplyr .data 3 | #' @importFrom reticulate py 4 | #' @importFrom sessioninfo session_info 5 | ## usethis namespace: end 6 | NULL 7 | 8 | #' @keywords internal 9 | "_PACKAGE" 10 | -------------------------------------------------------------------------------- /tests/testthat/test-errors.R: -------------------------------------------------------------------------------- 1 | test_that("quarto error", { 2 | skip_if_no_quarto() 3 | 4 | test_script("render_error.R") |> 5 | run(summary_file = NULL) |> 6 | expect_error( 7 | regexp = "object 'params' not found" 8 | ) 9 | }) 10 | -------------------------------------------------------------------------------- /tests/testthat/scripts/py_error.py: -------------------------------------------------------------------------------- 1 | # mypy: disable-error-code="operator" 2 | 1 + "a" # pyright: ignore [reportOperatorIssue, reportUnusedExpression] 3 | 4 | raise TypeError("This is a type error for testing purposes") 5 | 6 | raise Exception("Error also for testing") 7 | -------------------------------------------------------------------------------- /inst/documents/style.css: -------------------------------------------------------------------------------- 1 | /* Container for Script section */ 2 | .script-container { 3 | background-color: #f8f9fa; 4 | padding: 20px; 5 | border-left: 4px solid #007bff; 6 | border-radius: 6px; 7 | margin: 15px 0; 8 | box-shadow: 0 2px 4px rgba(0,0,0,0.1); 9 | } 10 | -------------------------------------------------------------------------------- /inst/rstudio/addins.dcf: -------------------------------------------------------------------------------- 1 | Name: 📑 Run all 2 | Description: Run all scripts in the project and generate HTML logs 3 | Binding: run 4 | Interactive: false 5 | 6 | Name: 📝 Run current script 7 | Description: Run the current script and produce HTML log 8 | Binding: run_current_script 9 | Interactive: false 10 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/custom_logging.md: -------------------------------------------------------------------------------- 1 | # stream to log file in a whirl context 2 | 3 | Code 4 | x 5 | Output 6 | time type file 7 | 1 2000-01-01 01:01:01 read test_read 8 | 2 2000-01-01 01:01:01 write test_write 9 | 3 2000-01-01 01:01:01 delete test_delete 10 | 11 | -------------------------------------------------------------------------------- /inst/use_whirl/_whirl.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - name: "Run single script" 3 | paths: 4 | - "path/to/script.R" 5 | - name: "Run multiple scripts in same step" 6 | paths: 7 | - "path/to/script_1.R" 8 | - "path/to/script_2.R" 9 | - name: "Run all R scripts in folder" 10 | paths: 11 | - "path/to/folder/*.R" 12 | -------------------------------------------------------------------------------- /inst/examples/warning.R: -------------------------------------------------------------------------------- 1 | 2 | # This script produces warnings for example purposes 3 | 4 | warning("this is a warning") 5 | warning("this is a warning 2 ^^") 6 | warning("this is a warning 3 ^^") 7 | warning("this is a warning 4 ^^") 8 | warning("this is a warning 5 ^^") 9 | warning("this is a warning 6 ^^") 10 | warning("this is a warning 7 ^^") 11 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/whirl_queue.md: -------------------------------------------------------------------------------- 1 | # whirl_queue edge cases not covered in test-run etc. 2 | 3 | Code 4 | print(q) 5 | Output 6 | # A tibble: 1 x 6 7 | id tag script status result log_dir 8 | 9 | 1 1 skipped.R skipped . 10 | 11 | -------------------------------------------------------------------------------- /tests/testthat/scripts/warning.R: -------------------------------------------------------------------------------- 1 | # This script produces a warning for testing purposes 2 | 3 | warning("this is a warning") 4 | warning("this is a warning 2 ^^") 5 | warning("this is a warning 3 ^^") 6 | warning("this is a warning 4 ^^") 7 | warning("this is a warning 5 ^^") 8 | warning("this is a warning 6 ^^") 9 | warning("this is a warning 7 ^^") 10 | -------------------------------------------------------------------------------- /inst/examples/prg1.R: -------------------------------------------------------------------------------- 1 | #' Setup 2 | 3 | library(dplyr) 4 | library(ggplot2) 5 | 6 | #' Prepare data 7 | 8 | x <- mtcars |> 9 | as_tibble(rownames = "car") 10 | 11 | print(x) 12 | 13 | #' Create and save plot 14 | 15 | ggplot(data = x) + 16 | geom_point(mapping = aes(x = mpg, y = hp, size = wt, colour = as.factor(am))) 17 | 18 | ggsave("plot1.png") 19 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://novonordisk-opensource.github.io/whirl/ 2 | 3 | template: 4 | bootstrap: 5 5 | 6 | reference: 7 | - title: "Execute scripts" 8 | contents: 9 | - run 10 | - write_biocompute 11 | - use_whirl 12 | - use_biocompute 13 | - whirl-options 14 | 15 | - title: "Log custom actions" 16 | contents: 17 | - starts_with("log_") 18 | -------------------------------------------------------------------------------- /tests/testthat/setup.R: -------------------------------------------------------------------------------- 1 | ## whirl have to be installed for Quarto to use it 2 | 3 | withr::local_envvar( 4 | R_USER_CACHE_DIR = tempfile(), 5 | .local_envir = teardown_env() 6 | ) 7 | 8 | # Minimal prints to make it easier to read test output 9 | 10 | withr::local_options( 11 | list(whirl.verbosity_level = "quiet"), 12 | .local_envir = teardown_env() 13 | ) 14 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(log_delete) 4 | export(log_read) 5 | export(log_write) 6 | export(run) 7 | export(use_biocompute) 8 | export(use_whirl) 9 | export(write_biocompute) 10 | importFrom(R6,R6Class) 11 | importFrom(callr,r_session) 12 | importFrom(dplyr,.data) 13 | importFrom(reticulate,py) 14 | importFrom(sessioninfo,session_info) 15 | -------------------------------------------------------------------------------- /tests/testthat/test-render_summary.R: -------------------------------------------------------------------------------- 1 | test_that("warning when summary cant be created", { 2 | 3 | tempdir <- withr::local_tempdir() 4 | 5 | summary_file <- file.path(tempdir, "folder_not_exists", "summary.html") 6 | 7 | q <- whirl_queue$new()$skip("test.R") 8 | 9 | render_summary(input = q, summary_file = summary_file) |> 10 | expect_error() |> 11 | expect_warning() 12 | }) 13 | -------------------------------------------------------------------------------- /tests/testthat/scripts/py_dependencies.py: -------------------------------------------------------------------------------- 1 | import pandas as pd # noqa: E501 # pylint: disable=import-error # pyright: ignore[reportMissingImports] 2 | import numpy as np # noqa: E501 # pylint: disable=import-error # pyright: ignore[reportMissingImports] 3 | 4 | 5 | def main(): 6 | df = pd.DataFrame({ 7 | 'A': [np.sum([1, 2])] 8 | }) 9 | 10 | print(df) 11 | 12 | 13 | if __name__ == "__main__": 14 | main() 15 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview 7 | # * https://testthat.r-lib.org/articles/special-files.html 8 | 9 | library(testthat) 10 | library(whirl) 11 | 12 | test_check("whirl") 13 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^LICENSE\.md$ 2 | ^README\.Rmd$ 3 | ^\.Rproj\.user$ 4 | ^\.github$ 5 | ^\.lintr$ 6 | ^\.pre-commit-config\.yaml$ 7 | ^_pkgdown\.yml$ 8 | ^dev/ 9 | ^docs$ 10 | ^inst/examples/*\\.html$ 11 | ^inst/examples/*_files$ 12 | ^inst/output$ 13 | ^pkgdown$ 14 | ^vignettes/articles$ 15 | ^whirl.*\.tar\.gz$ 16 | ^whirl.*\.tgz$ 17 | ^whirl\.Rcheck$ 18 | ^whirl\.Rproj$ 19 | ^înst/WORDLIST$ 20 | plot1.png 21 | summary.html 22 | ^cran-comments\.md$ 23 | ^CRAN-SUBMISSION$ 24 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 1 note 4 | 5 | * Patch update improving the logs. 6 | * Removed `verbosity_level` argument to `run()` since it is now completely controlled 7 | by zephyr options (see `help("whirl-options")`). No reverse dependencies. 8 | * [Check result errors](https://cran.r-project.org/web/checks/check_results_whirl.html) 9 | should be fixed in [#197](https://github.com/NovoNordisk-OpenSource/whirl/pull/197) 10 | -------------------------------------------------------------------------------- /tests/testthat/scripts/packages.R: -------------------------------------------------------------------------------- 1 | 2 | # This script produces no errors or warnings for example purposes 3 | 4 | message("this script has no errors or warnings") 5 | 6 | #Library calls 7 | library(dplyr) 8 | 9 | for (package in c("reticulate", "rlang")) { 10 | library(package, character.only = TRUE) 11 | } 12 | 13 | 14 | #Load into namespace 15 | ggplot2::ggplot(mtcars, ggplot2::aes(mpg, cyl)) + 16 | ggplot2::geom_point() 17 | 18 | zephyr::msg("Test message with zephyr") 19 | -------------------------------------------------------------------------------- /tests/testthat/test-biocompute.R: -------------------------------------------------------------------------------- 1 | test_that("Biocompute object created correctly", { 2 | skip_if_no_quarto() 3 | 4 | input_yml <- test_script("_whirl_biocompute.yaml") 5 | queue <- whirl::run( 6 | input = input_yml, 7 | out_formats = NULL, 8 | summary_file = NULL 9 | ) 10 | 11 | bco_tmp <- withr::local_tempfile(fileext = ".json") 12 | 13 | bco <- write_biocompute(queue = queue, path = bco_tmp) |> 14 | expect_no_condition() 15 | 16 | expect_snapshot(str(bco$io_domain)) 17 | }) 18 | -------------------------------------------------------------------------------- /whirl.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /tests/testthat/test-internal_run.R: -------------------------------------------------------------------------------- 1 | test_that("testing internal_run()", { 2 | skip_if_no_quarto() 3 | 4 | # A config file 5 | 6 | q <- whirl_queue$new(n_workers = 2) 7 | 8 | test_script("_whirl.yaml") |> 9 | internal_run(steps = NULL, level = 1, queue = q) |> 10 | expect_no_error() 11 | 12 | # A config file calling another config file 13 | 14 | q <- whirl_queue$new() 15 | 16 | test_script("_whirl_to_config.yaml") |> 17 | internal_run(steps = NULL, level = 1, queue = q) |> 18 | expect_no_error() 19 | }) 20 | -------------------------------------------------------------------------------- /tests/testthat/test-approvedpkgs.R: -------------------------------------------------------------------------------- 1 | test_that("Approved packages", { 2 | check_approved("pkg@version") |> 3 | expect_type("logical") |> 4 | expect_length(1) |> 5 | is.na() |> 6 | expect_true() 7 | 8 | check_approved(character(), "pkg@version") |> 9 | expect_type("logical") |> 10 | expect_length(0) 11 | 12 | check_approved( 13 | used = c("pkg1@1.0.0", "pkg2@5.6.7"), 14 | approved = c("pkg1@1.0.0", "pkg2@4.3.2") 15 | ) |> 16 | expect_type("logical") |> 17 | expect_equal(c(TRUE, FALSE)) 18 | }) 19 | -------------------------------------------------------------------------------- /tests/testthat/test-read_glob.R: -------------------------------------------------------------------------------- 1 | test_that("testing read_glob()", { 2 | # A single file 3 | test_script("success.R") |> 4 | read_glob() |> 5 | expect_equal(test_script("success.R")) 6 | 7 | # All R files in a directory 8 | test_script("") |> 9 | file.path("*.R") |> 10 | read_glob() |> 11 | expect_match("\\.R$") |> 12 | length() |> 13 | expect_gt(1) 14 | 15 | # Error when file does not exist 16 | test_script("") |> 17 | file.path("fake_program.R") |> 18 | read_glob() |> 19 | expect_message() 20 | }) 21 | -------------------------------------------------------------------------------- /inst/documents/files.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | --- 3 | 4 | ```{r} 5 | #| echo: false 6 | no_files_if_null <- function(x) { 7 | if (!is.null(x) && nrow(x)) { 8 | return(x) 9 | } 10 | 11 | knitr::asis_output("No files") 12 | } 13 | ``` 14 | 15 | ## Input 16 | 17 | ```{r} 18 | #| echo: false 19 | no_files_if_null(result$files$read) 20 | ``` 21 | 22 | ## Output 23 | 24 | ```{r} 25 | #| echo: false 26 | no_files_if_null(result$files$write) 27 | ``` 28 | 29 | ## Removed 30 | 31 | ```{r} 32 | #| echo: false 33 | no_files_if_null(result$files$delete) 34 | ``` 35 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | (Summary of made changes with explanation of what and why) 3 | 4 | ## Changes Made 5 | - Change 1 6 | - ... 7 | 8 | ## Testing 9 | - Unit test for `tbl_cnt()` method 10 | - ... 11 | 12 | ## Checklist 13 | - [ ] PR linked to issue descripting the bug or feature request being addressed 14 | - [ ] Code changes have been tested 15 | - [ ] Documentation has been updated, if applicable 16 | - [ ] All automated tests pass 17 | - [ ] Coding style and naming conventions have been followed 18 | - [ ] The PR is ready for review and merge 19 | -------------------------------------------------------------------------------- /inst/documents/python.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | --- 3 | 4 | :::{.callout-note collapse=false appearance="minimal"} 5 | ## Python packages used directly 6 | 7 | ```{r} 8 | #| echo: false 9 | result$session$python |> 10 | dplyr::filter(directly_used) |> 11 | format_approved() 12 | ``` 13 | 14 | ::: 15 | 16 | :::{.callout-note collapse=true appearance="minimal"} 17 | ## Python packages used indirectly 18 | 19 | ```{r} 20 | #| echo: false 21 | result$session$python |> 22 | dplyr::filter(!.data$directly_used) |> 23 | dplyr::select("package", "version", "path") 24 | ``` 25 | 26 | ::: 27 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/examples.md: -------------------------------------------------------------------------------- 1 | # All example scripts run with consistent output 2 | 3 | Code 4 | res 5 | Output 6 | # A tibble: 7 x 4 7 | id tag script status 8 | 9 | 1 1 First step success.R success 10 | 2 2 Second step warning.R warning 11 | 3 3 Second step error.R error 12 | 4 4 Step 2 error.R error 13 | 5 5 Step 2 prg1.R success 14 | 6 6 Step 2 success.R success 15 | 7 7 Step 2 warning.R warning 16 | 17 | -------------------------------------------------------------------------------- /tests/testthat/scripts/_whirl_biocompute.yaml: -------------------------------------------------------------------------------- 1 | biocompute: 2 | object_id: "urn:uuid:1234567890" 3 | spec_version: "https://w3id.org/ieee/ieee-2791-schema/2791object.json" 4 | etag: "c69asgs797agfka" 5 | usability: | 6 | This is an example of a biocompute project. 7 | extension: | 8 | example text 9 | steps: 10 | - name: "First step" 11 | paths: 12 | - "biocompute.R" 13 | parameter_files: 14 | - "_biocompute_parametrics.yml" 15 | - name: "Second step" 16 | paths: 17 | - "success.R" 18 | parameter_files: 19 | - "_biocompute_parametrics.yml" 20 | -------------------------------------------------------------------------------- /man/use_whirl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/use_whirl.R 3 | \name{use_whirl} 4 | \alias{use_whirl} 5 | \title{Use whirl} 6 | \usage{ 7 | use_whirl(config_file = "_whirl.yml") 8 | } 9 | \arguments{ 10 | \item{config_file}{Path to the whirl config file, relative to the project} 11 | } 12 | \description{ 13 | Utility function to setup execution with whirl in your project: 14 | \enumerate{ 15 | \item Creates configuration file (default \verb{_whirl.yml}) 16 | \item Updates \code{.gitignore} to not include log files 17 | } 18 | 19 | See \code{vignette("whirl")} for how to specify paths inside the 20 | configuration file. 21 | } 22 | -------------------------------------------------------------------------------- /inst/documents/summary.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Summary" 3 | date: "`r Sys.time()`" 4 | date-format: "YYYY-MM-DDTHH:mm:ss zzz" 5 | params: 6 | summary_df: summary_df 7 | format: 8 | html: 9 | embed-resources: true 10 | standalone: false 11 | toc: true 12 | toc-depth: 2 13 | toc-expand: true 14 | smooth-scroll: true 15 | --- 16 | 17 | ```{r} 18 | #| label: Setup 19 | #| include: false 20 | knitr::opts_chunk$set( 21 | error = TRUE, 22 | warning = TRUE, 23 | message = TRUE, 24 | echo = TRUE 25 | ) 26 | ``` 27 | 28 | ```{r} 29 | #| label: Summary 30 | #| echo: false 31 | 32 | my_df <- structure(params$summary_df, format = "html", class = "knitr_kable") 33 | my_df 34 | ``` 35 | -------------------------------------------------------------------------------- /tests/testthat/test-use_whirl.R: -------------------------------------------------------------------------------- 1 | test_that("use_whirl", { 2 | withr::with_tempdir({ 3 | rlang::local_interactive(FALSE) 4 | 5 | usethis::create_project(path = ".") |> 6 | expect_message() |> 7 | suppressMessages() 8 | 9 | use_whirl() |> 10 | expect_message() |> 11 | suppressMessages() 12 | 13 | expect_true(file.exists("_whirl.yml")) 14 | 15 | expect_equal( 16 | readLines("_whirl.yml"), 17 | readLines(system.file("use_whirl/_whirl.yml", package = "whirl")) 18 | ) 19 | 20 | expect_true(file.exists(".gitignore")) 21 | 22 | expect_contains( 23 | readLines(".gitignore"), 24 | "*_log.(html|json|md)" 25 | ) 26 | }) 27 | }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test-util_queue_summary.R: -------------------------------------------------------------------------------- 1 | test_that("fails with invalid input", { 2 | data.frame(dummy = "dummy") |> 3 | util_queue_summary() |> 4 | expect_error() 5 | }) 6 | 7 | 8 | test_that("Summary tibble is created successfully", { 9 | skip_if_no_quarto() 10 | skip_if_no_python() 11 | skip_on_cran() 12 | 13 | q <- whirl_queue$new(n_workers = 2) 14 | 15 | test_script(c("success.R", "py_success.py")) |> 16 | q$run() 17 | 18 | q$queue |> 19 | util_queue_summary() |> 20 | expect_s3_class("tbl_df") |> 21 | expect_named( 22 | c("Tag", "Directory", "Filename", "Status", "Hyperlink", "Information") 23 | ) |> 24 | nrow() |> 25 | expect_equal(2) 26 | }) 27 | -------------------------------------------------------------------------------- /tests/testthat/test-whirl_queue.R: -------------------------------------------------------------------------------- 1 | test_that("whirl_queue edge cases not covered in test-run etc.", { 2 | q <- whirl_queue$new() 3 | 4 | q$skip("skipped.R") 5 | 6 | expect_equal(q$queue$script, "skipped.R") 7 | expect_equal(q$queue$status, "skipped") 8 | 9 | q |> 10 | print() |> 11 | expect_snapshot() 12 | 13 | q$push("a/b/new.R") |> 14 | expect_error("Logs cannot be saved because \"a/b\" does not exist") 15 | 16 | q$push(c("a/new.R", "b/also_new.R")) |> 17 | expect_error("Logs cannot be saved because \"a\" and \"b\" does not exist") 18 | 19 | whirl_queue$new(log_dir = "fake_folder")$push("fake_script.R") |> 20 | expect_error("Logs cannot be saved because \"fake_folder\" does not exist") 21 | }) 22 | -------------------------------------------------------------------------------- /inst/use_whirl/_biocompute.yml: -------------------------------------------------------------------------------- 1 | biocompute: 2 | object_id: "your unique id" 3 | spec_version: "https://w3id.org/ieee/ieee-2791-schema/2791object.json" 4 | etag: "tag from biocompute platform" 5 | usability: | 6 | This is an example of a biocompute project. 7 | extension: | 8 | (optional) example text 9 | 10 | steps: 11 | - name: "Run single script" 12 | paths: 13 | - "path/to/script.R" 14 | - name: "Run multiple scripts in same step" 15 | paths: 16 | - "path/to/script_1.R" 17 | - "path/to/script_2.R" 18 | - name: "Run all R scripts in folder" 19 | paths: 20 | - "path/to/folder/*.R" 21 | - name: "Define parameter file for biocompute" 22 | paths: 23 | - "path/to/script.R" 24 | parameter_files: 25 | - "_parametrics.yml" 26 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/biocompute.md: -------------------------------------------------------------------------------- 1 | # Biocompute object created correctly 2 | 3 | Code 4 | str(bco$io_domain) 5 | Output 6 | List of 2 7 | $ input_subdomain :List of 2 8 | ..$ :List of 1 9 | .. ..$ uri:List of 1 10 | .. .. ..$ uri: chr "my_project/params.yml" 11 | ..$ :List of 1 12 | .. ..$ uri:List of 1 13 | .. .. ..$ uri: chr "my_data/data.rds" 14 | $ output_subdomain:List of 2 15 | ..$ :List of 2 16 | .. ..$ mediatype: chr "text/txt" 17 | .. ..$ uri :List of 2 18 | .. .. ..$ filename: chr "output.txt" 19 | .. .. ..$ uri : chr "my_output/output.txt" 20 | ..$ :List of 2 21 | .. ..$ mediatype: chr " " 22 | .. ..$ uri :List of 2 23 | .. .. ..$ filename: chr "plot.png" 24 | .. .. ..$ uri : chr "my_output/plot.png" 25 | 26 | -------------------------------------------------------------------------------- /tests/testthat/test-progress_bar.R: -------------------------------------------------------------------------------- 1 | test_that("progress bar is visible", { 2 | withr::local_options( 3 | .new = list(cli.dynamic = TRUE, whirl.verbosity_level = "verbose") 4 | ) 5 | 6 | pb <- pb_start() |> 7 | expect_no_condition() |> 8 | expect_type("character") 9 | 10 | queue <- data.frame( 11 | script = "my_script.R", 12 | status = "running" 13 | ) 14 | 15 | pb_update(id = pb, queue = queue) |> 16 | expect_message() 17 | 18 | pb_done(id = pb) |> 19 | expect_no_error() 20 | }) 21 | 22 | test_that("progress bar is not visible", { 23 | pb <- pb_start() |> 24 | expect_no_condition() |> 25 | expect_null() 26 | 27 | queue <- data.frame( 28 | script = "my_script.R", 29 | status = "running" 30 | ) 31 | 32 | pb_update(id = pb, queue = queue) |> 33 | expect_no_message() 34 | 35 | pb_done(id = pb) |> 36 | expect_no_error() 37 | }) 38 | -------------------------------------------------------------------------------- /tests/testthat/helper.R: -------------------------------------------------------------------------------- 1 | # Helper function to select test scripts 2 | 3 | test_script <- function(script) { 4 | script <- testthat::test_path("scripts", script) |> 5 | normalizePath(winslash = "/", mustWork = TRUE) 6 | return(script) 7 | } 8 | 9 | # Use to test quarto availability or version lower than required 10 | skip_if_no_quarto <- function(ver = NULL) { 11 | skip_if(is.null(quarto::quarto_path()), message = "Quarto is not available") 12 | skip_if( 13 | condition = quarto::quarto_version() < ver, 14 | message = sprintf( 15 | fmt = "Version of quarto is lower than %s: %s.", 16 | ver, 17 | quarto::quarto_version() 18 | ) 19 | ) 20 | } 21 | 22 | # Use to test if python is available for simple tests 23 | skip_if_no_python <- function() { 24 | skip_if( 25 | condition = !reticulate::py_available(initialize = TRUE), 26 | message = "Python is not available" 27 | ) 28 | } 29 | -------------------------------------------------------------------------------- /tests/testthat/test-examples.R: -------------------------------------------------------------------------------- 1 | test_that("All example scripts run with consistent output", { 2 | skip_if_no_quarto() 3 | 4 | tmpdir <- withr::local_tempdir() 5 | 6 | # Copy all example scripts to the temporary working directory 7 | 8 | system.file("examples", package = "whirl") |> 9 | list.files(full.names = TRUE) |> 10 | file.copy(recursive = TRUE, to = tmpdir) 11 | 12 | res <- list( 13 | list.files(tmpdir, pattern = "\\.(yaml|yml)$", full.names = TRUE) |> 14 | as.list(), 15 | list.files(tmpdir, pattern = "\\.(R|py)$", full.names = TRUE) 16 | ) |> 17 | run(summary_file = NULL) 18 | 19 | # Unify result to only be about the status of the script and without 20 | # the full path to the script 21 | 22 | res$script <- basename(res$script) 23 | res <- res[c("id", "tag", "script", "status")] 24 | 25 | # Check that the results now are consistent 26 | expect_snapshot(res) 27 | }) 28 | -------------------------------------------------------------------------------- /R/approvedpkgs.R: -------------------------------------------------------------------------------- 1 | #' Check if only using approved packages 2 | #' Both used and approved packages to be specified 3 | #' as: {pkgname}${pkgversion} 4 | #' @noRd 5 | check_approved <- function(used, approved = NULL) { 6 | if (is.null(approved)) { 7 | return(NA) 8 | } 9 | 10 | used <- package_spec(used) 11 | names_used <- names(used) 12 | approved <- package_spec(approved) 13 | 14 | ok <- logical(length = length(used)) 15 | 16 | for (i in which(names_used %in% names(approved))) { 17 | ok[[i]] <- used[[i]] == approved[[names_used[[i]]]] 18 | } 19 | 20 | ok 21 | } 22 | 23 | #' @noRd 24 | package_spec <- function(x) { 25 | if (!length(x)) { 26 | return(character()) 27 | } 28 | 29 | x <- strsplit(x = x, split = "@") 30 | n <- vapply(X = x, FUN = \(x) x[[1]], FUN.VALUE = character(1)) 31 | v <- vapply(X = x, FUN = \(x) x[[2]], FUN.VALUE = character(1)) 32 | stats::setNames(object = v, nm = n) 33 | } 34 | -------------------------------------------------------------------------------- /.github/actions/setup/action.yaml: -------------------------------------------------------------------------------- 1 | name: Specific setup for whirl 2 | description: Installs Quarto and enables strace on Linux 3 | runs: 4 | using: "composite" 5 | steps: 6 | - name: Allow strace to attach to a process 7 | if: runner.os == 'Linux' 8 | shell: bash 9 | run: | 10 | echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 11 | sudo setcap cap_sys_ptrace=eip /usr/bin/strace 12 | 13 | - name: Install Quarto 14 | uses: quarto-dev/quarto-actions/setup@v2 15 | 16 | - name: (Linux) Install jupyter 17 | if: runner.os == 'Linux' 18 | shell: bash 19 | run: python3 -m pip install jupyter 20 | 21 | - name: (macOS) Install jupyter via Homebrew 22 | if: runner.os == 'macOS' 23 | shell: bash 24 | run: brew install jupyter 25 | 26 | - name: (Windows) Install jupyter 27 | if: runner.os == 'Windows' 28 | shell: bash 29 | run: py -m pip install jupyter 30 | -------------------------------------------------------------------------------- /tests/testthat/test-renv.R: -------------------------------------------------------------------------------- 1 | test_that("consistent output from renv help functions", { 2 | 3 | withr::local_options("usethis.quiet" = TRUE) 4 | 5 | tmpdir <- withr::local_tempdir() 6 | 7 | usethis::create_project(path = tmpdir, open = FALSE) 8 | 9 | usethis::local_project(tmpdir) 10 | 11 | status <- renv_status() |> 12 | expect_s3_class(c("whirl_renv_status")) 13 | 14 | knit_print_whirl_renv_status(status) |> 15 | as.character() |> 16 | expect_equal("::: {.callout-warning}\n## renv not used\n:::") 17 | 18 | status$status$lockfile$Packages <- c("a", "b") 19 | 20 | knit_print_whirl_renv_status(status) |> 21 | as.character() |> 22 | expect_match( 23 | "::: \\{.callout-important collapse=true\\}\n## renv out of sync" 24 | ) 25 | 26 | status$status$synchronized <- TRUE 27 | 28 | knit_print_whirl_renv_status(status) |> 29 | as.character() |> 30 | expect_match("::: \\{.callout-tip collapse=true\\}\n## renv synchronized") 31 | }) 32 | -------------------------------------------------------------------------------- /R/quarto.R: -------------------------------------------------------------------------------- 1 | #' Create a Quarto callout block with code 2 | #' 3 | #' See https://quarto.org/docs/authoring/callouts.html 4 | #' 5 | #' @param text description 6 | #' @param title description 7 | #' @param type description 8 | #' @param collapse description 9 | #' @noRd 10 | quarto_callout <- function( 11 | text = NULL, 12 | title = NULL, 13 | type = c("note", "warning", "important", "tip", "caution"), 14 | collapse = NULL) { 15 | type <- rlang::arg_match(type) 16 | if (!is.null(collapse)) { 17 | collapse <- ifelse(collapse, "true", "false") 18 | } 19 | 20 | c( 21 | sprintf( 22 | "::: {.callout-%s}", 23 | ifelse(!is.null(collapse), 24 | paste0(type, " collapse=", collapse), 25 | type 26 | ) 27 | ), 28 | if (!is.null(title)) { 29 | paste("##", title) 30 | }, 31 | if (!is.null(text)) { 32 | text 33 | }, 34 | ":::" 35 | ) |> 36 | paste(collapse = "\n") |> 37 | knitr::asis_output() 38 | } 39 | -------------------------------------------------------------------------------- /man/use_biocompute.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/use_whirl.R 3 | \name{use_biocompute} 4 | \alias{use_biocompute} 5 | \title{Use whirl to create biocompute logs} 6 | \usage{ 7 | use_biocompute( 8 | config_file = "_whirl.yml", 9 | parametrics_file = "_parametrics.yml" 10 | ) 11 | } 12 | \arguments{ 13 | \item{config_file}{Path to the whirl config file, relative to the project} 14 | 15 | \item{parametrics_file}{Path to the biocompute parametrics file, relative to the project} 16 | } 17 | \description{ 18 | Utility function to setup execution with whirl in your project suitable for 19 | creating biocompute logs with \code{write_biocompute()}: 20 | \enumerate{ 21 | \item Creates configuration file (default \verb{_whirl.yml}) with default values for the \code{biocompute} metadata. 22 | \item Updates \code{.gitignore} to not include log files 23 | } 24 | 25 | See \code{vignette("whirl")} for how to specify paths inside the 26 | configuration file. 27 | } 28 | -------------------------------------------------------------------------------- /R/run_current_script.R: -------------------------------------------------------------------------------- 1 | #' RStudio IDE runner 2 | #' 3 | #' Easy to run and view log in RStudio. Also available as an addin. Takes 4 | #' the active source document and [run()] it. 5 | #' 6 | #' @return Returns nothing. Run for side effects. 7 | #' @seealso [run()] 8 | #' @keywords intern 9 | #' @noRd 10 | run_current_script <- function() { 11 | stopifnot( 12 | "rstudioapi package needed" = requireNamespace("rstudioapi", quietly = TRUE) 13 | ) 14 | 15 | # Run file 16 | script_info <- rstudioapi::getSourceEditorContext() 17 | file <- gsub( 18 | paste0(normalizePath(getwd()), .Platform$file.sep), 19 | "", 20 | normalizePath(script_info$path) 21 | ) 22 | 23 | withr::local_options(list(whirl.verbosity_level = "minimal")) 24 | 25 | run_result <- run(input = list(list(names = basename(file), paths = file))) 26 | 27 | log_file <- run_result[["result"]][[1]][["log_details"]][["location"]][[1]] 28 | if (file.exists(log_file)) { 29 | rstudioapi::viewer(log_file) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /inst/documents/title-block.html: -------------------------------------------------------------------------------- 1 | 2 |
3 |
4 |

$title$

5 | $if(subtitle)$ 6 |

$subtitle$

7 | $endif$ 8 | $for(author)$ 9 |

$author$

10 | $endfor$ 11 | 12 | $if(date)$ 13 |
14 |
Execution end time
15 |
16 |

$date$

17 |
18 |
19 | $endif$ 20 | 21 | $if(date-modified)$ 22 |
23 |
Execution time
24 |
25 |

$date-modified$

26 |
27 |
28 | $endif$ 29 | 30 | $if(abstract)$ 31 |
32 |
Path to script
33 |
34 |

$abstract$

35 |
36 |
37 | $endif$ 38 | 39 |
40 |
41 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[FEAT] Feature name" 5 | labels: '' 6 | assignees: '' 7 | --- 8 | 9 | ## Feature Request 10 | 11 | ### Description 12 | [Provide a clear and concise description of the feature you are requesting. What problem does it aim to solve?] 13 | 14 | ### Proposed Solution 15 | [Outline a proposed solution or approach for implementing the feature. How do you envision the feature working?] 16 | 17 | ### Use Case 18 | [Describe a specific use case or scenario where this feature would be beneficial or necessary.] 19 | 20 | ### Additional Context 21 | [Add any additional context, information, or examples that support or clarify the feature request.] 22 | 23 | ### Impact 24 | [Explain the potential impact of this feature on the package and its users. How would it improve the package or benefit the user community?] 25 | 26 | ### Related Issues 27 | [Are there any related issues or pull requests that are relevant to this feature request?] 28 | -------------------------------------------------------------------------------- /.github/workflows/check_and_co.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | - master 6 | pull_request: 7 | branches: 8 | - main 9 | - master 10 | permissions: 11 | contents: write 12 | pull-requests: write 13 | name: All actions 14 | jobs: 15 | check-current-version: 16 | name: Check current version 17 | uses: >- 18 | NovoNordisk-OpenSource/r.workflows/.github/workflows/check_current_version.yaml@main 19 | with: 20 | use_local_setup_action: true 21 | pkgdown: 22 | name: Pkgdown site 23 | uses: NovoNordisk-OpenSource/r.workflows/.github/workflows/pkgdown.yaml@main 24 | with: 25 | use_local_setup_action: true 26 | coverage: 27 | name: Coverage report 28 | uses: NovoNordisk-OpenSource/r.workflows/.github/workflows/coverage.yaml@main 29 | secrets: inherit 30 | with: 31 | use_local_setup_action: true 32 | use_codecov: true 33 | megalinter: 34 | name: Megalinter 35 | uses: NovoNordisk-OpenSource/r.workflows/.github/workflows/megalinter.yaml@main 36 | -------------------------------------------------------------------------------- /R/progress_bar.R: -------------------------------------------------------------------------------- 1 | #' Start a queue progress bar for a tag if verbose and possible 2 | #' @noRd 3 | pb_start <- function() { 4 | if ( 5 | !cli::is_dynamic_tty() || zephyr::get_verbosity_level("whirl") != "verbose" 6 | ) { 7 | return(invisible()) 8 | } 9 | 10 | withr::local_options( 11 | cli.progress_show_after = 0 12 | ) 13 | 14 | cli::cli_progress_bar( 15 | type = "custom", 16 | format = "{cli::pb_spin} Running {cli::pb_extra$running} [{cli::pb_elapsed}]", 17 | extra = list(running = c()), 18 | .auto_close = FALSE 19 | ) 20 | } 21 | 22 | #' Easily update the progress bar for the queue 23 | #' @noRd 24 | pb_update <- function(id, queue) { 25 | if (is.null(id)) { 26 | return(invisible()) 27 | } 28 | 29 | cli::cli_progress_update( 30 | extra = list( 31 | running = basename(queue$script)[queue$status == "running"] 32 | ), 33 | id = id 34 | ) 35 | } 36 | 37 | #' Convenience wrapper to end progress bar 38 | #' @noRd 39 | pb_done <- function(id) { 40 | if (is.null(id)) { 41 | return(invisible()) 42 | } 43 | cli::cli_progress_done(id = id) 44 | invisible() 45 | } 46 | -------------------------------------------------------------------------------- /R/util_queue_summary.R: -------------------------------------------------------------------------------- 1 | #' Generate a summary tibble from a queue table 2 | #' 3 | #' This function takes a queue table as input and generates a summary tibble 4 | #' with columns for Directory, Filename, Status, Hyperlink, and Information. 5 | #' 6 | #' @param queue_table The queue table containing the result data 7 | #' @return A tibble summarizing the queue table data 8 | #' @noRd 9 | util_queue_summary <- function(queue_table) { 10 | queue_table |> 11 | dplyr::mutate( 12 | Tag = .data$tag, 13 | Directory = normalizePath(dirname(.data$script), winslash = "/"), 14 | Filename = basename(.data$script), 15 | Status = .data$status, 16 | Hyperlink = vapply( 17 | X = .data$result, 18 | FUN = \(x) utils::head(x[["logs"]], 1), 19 | FUN.VALUE = character(1) 20 | ), 21 | Information = vapply( 22 | X = .data$result, 23 | FUN = \(x) { 24 | x[["status"]][c("errors", "warnings")] |> 25 | unlist() |> 26 | paste0(collapse = "
") 27 | }, 28 | FUN.VALUE = character(1) 29 | ) 30 | ) |> 31 | dplyr::select("Tag", "Directory", "Filename", "Status", "Hyperlink", "Information") 32 | } 33 | -------------------------------------------------------------------------------- /tests/testthat/test-whirl_r_session.R: -------------------------------------------------------------------------------- 1 | test_that("interactive whirl R session components not tested in run", { 2 | skip_if_no_quarto() 3 | p <- whirl_r_session$new() 4 | 5 | p$print() |> 6 | expect_message() |> 7 | suppressMessages() 8 | 9 | p$tmpdir |> 10 | dir.exists() |> 11 | expect_true() 12 | 13 | p$tmpdir |> 14 | list.files() |> 15 | sort() |> 16 | expect_contains(c("dummy.qmd", "log.qmd", "summary.qmd")) 17 | 18 | p$call(func = Sys.sleep, args = list(time = 1)) # Sleep for 1 second 19 | 20 | status <- p$wait(timeout = 10)$check_status() # Timeout after 10 ms 21 | expect_null(status) # Still running 22 | 23 | status <- p$wait()$check_status() 24 | expect_equal(status$code, 200L) # Completed successfully 25 | 26 | p$call(func = \() 1 + "a") # Something with an error 27 | expect_error(p$wait()$check_status()) 28 | 29 | # Test temp dir is deleted correctly 30 | dir <- p$tmpdir 31 | rm(p) 32 | gc() 33 | 34 | expect_false(dir.exists(dir)) 35 | }) 36 | 37 | test_that("additional error testing", { 38 | wrs_report_status( 39 | status = "unknown", 40 | script = "my_script.R", 41 | logs = "my_log.html" 42 | ) |> 43 | expect_error() 44 | }) 45 | -------------------------------------------------------------------------------- /R/mdformats.R: -------------------------------------------------------------------------------- 1 | #' @noRd 2 | mdformats <- function(script, log_html, mdfmt, self, out_dir) { 3 | newname <- gsub( 4 | pattern = "\\.[^\\.]*$", 5 | replacement = "", 6 | x = basename(script) 7 | ) 8 | 9 | supported_formats <- list_pandoc_output_formats() 10 | unsupported_formats <- setdiff(mdfmt, supported_formats) 11 | if (any(!mdfmt %in% supported_formats)) { 12 | cli::cli_abort( 13 | "Output format{?s} {.code {unsupported_formats}} not supported by your pandoc installation" 14 | ) 15 | } 16 | 17 | if (length(mdfmt) >= 1) { 18 | newname <- paste0(newname, "_log_", mdfmt) 19 | } 20 | 21 | newname <- paste0(newname, ".md") 22 | 23 | for (i in seq_along(newname)) { 24 | knitr::pandoc( 25 | input = log_html, 26 | format = mdfmt[[i]], 27 | ext = "md" 28 | ) 29 | 30 | file.copy( 31 | from = file.path(self$tmpdir, "log.md"), 32 | to = file.path( 33 | out_dir, 34 | newname[[i]] 35 | ), 36 | overwrite = TRUE 37 | ) 38 | } 39 | 40 | return(invisible(newname)) 41 | } 42 | 43 | #' @noRd 44 | list_pandoc_output_formats <- function() { 45 | system(command = "pandoc --list-output-formats", intern = TRUE) 46 | } 47 | -------------------------------------------------------------------------------- /R/read_glob.R: -------------------------------------------------------------------------------- 1 | #' @title Fetch files if path uses regular expression 2 | #' @param input A character vector with paths to files that should be executed. 3 | #' @return A character vector with the paths to the files. If regexp have been 4 | #' used as input these will be solved to the actual files matching the 5 | #' criteria. 6 | #' @noRd 7 | 8 | read_glob <- function(input, root_dir) { 9 | files <- vector( 10 | mode = "list", 11 | length = length(input) 12 | ) 13 | 14 | for (i in seq_along(files)) { 15 | if (length(input[[i]]) > 1 || is.list(input[[i]])) { 16 | files[[i]] <- read_glob( 17 | input = input[[i]], 18 | root_dir = root_dir 19 | ) 20 | } else { 21 | files[[i]] <- input[[i]] |> 22 | normalize_with_base(base = root_dir) |> 23 | read_single_glob() 24 | } 25 | } 26 | 27 | if (is.list(input)) { 28 | return(files) 29 | } 30 | 31 | unlist(files, use.names = FALSE) 32 | } 33 | 34 | #' @noRd 35 | read_single_glob <- function(x) { 36 | if (file.exists(x)) { 37 | return(x) 38 | } 39 | files <- Sys.glob(x) 40 | if (length(files) == 0) { 41 | cli::cli_alert_warning("No files or folders for this path {x}") 42 | } 43 | files 44 | } 45 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG] Bug name" 5 | labels: '' 6 | assignees: '' 7 | --- 8 | 9 | ## Bug Report 10 | 11 | ### Description 12 | [Provide a clear and concise description of the bug. What behavior did you observe that you believe to be a bug?] 13 | 14 | ### Reproducible Example 15 | [Include a minimal, complete, and verifiable example that demonstrates the bug. This could be a code snippet, dataset, or specific steps to reproduce the issue.] 16 | 17 | ### Expected Behavior 18 | [Describe what you expected to happen when you encountered the bug.] 19 | 20 | ### Actual Behavior 21 | [Explain what actually happened when you encountered the bug.] 22 | 23 | ### Environment 24 | - R Version: [e.g., 4.5.1] 25 | - Package Version: [e.g., 1.2.3] 26 | - Operating System: [e.g., Windows 10, macOS 11.1] 27 | 28 | ### Additional Context 29 | [Add any additional context, information, or examples that can help in understanding and reproducing the bug.] 30 | 31 | ### Impact 32 | [Explain the impact or consequences of this bug. How does it affect the package's functionality or the user experience?] 33 | 34 | ### Related Issues 35 | [Are there any related issues, pull requests, or discussions that are relevant to this bug report?] 36 | -------------------------------------------------------------------------------- /tests/testthat/test-mdformats.R: -------------------------------------------------------------------------------- 1 | test_that("pandoc works", { 2 | skip_if_no_quarto() 3 | 4 | x <- whirl_r_session$new() 5 | 6 | file.copy( 7 | from = test_script("test-mdformats.html"), 8 | to = file.path(x$tmpdir, "log.html") 9 | ) |> 10 | expect_true() 11 | 12 | tmpdir <- withr::local_tempdir() 13 | 14 | # Different pandoc installations support different formats 15 | # for testing purposes we only use the supported ones here. 16 | # Error handling tested in new test below. 17 | mdfmt <- c("gfm", "commonmark", "markua") |> 18 | intersect(list_pandoc_output_formats()) 19 | 20 | mdformats( 21 | script = "test1.R", 22 | log_html = file.path(x$tmpdir, "log.html"), 23 | mdfmt = mdfmt, 24 | out_dir = tmpdir, 25 | self = x 26 | ) |> 27 | suppressMessages() 28 | 29 | file.path( 30 | tmpdir, 31 | paste0("test1_log_", c("gfm", "commonmark", "markua"), ".md") 32 | ) |> 33 | file.exists() |> 34 | all() |> 35 | expect_true() 36 | }) 37 | 38 | test_that("gives error when format is not supported", { 39 | skip_if_no_quarto() 40 | 41 | mdformats( 42 | script = "script.R", 43 | log_html = "script.html", 44 | mdfmt = "fake_format", 45 | out_dir = "." 46 | ) |> 47 | expect_error("not supported by your pandoc installation") 48 | }) 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | .RDataTmp 8 | 9 | # User-specific files 10 | .Ruserdata 11 | 12 | # Example code in package build process 13 | *-Ex.R 14 | dev/* 15 | 16 | # Output files from R CMD build 17 | /*.tar.gz 18 | 19 | # Output files from R CMD check 20 | /*.Rcheck/ 21 | tests/testthat/*.png 22 | 23 | # Output files from test coverage 24 | cobertura.xml 25 | 26 | # RStudio files 27 | .Rproj.user/ 28 | 29 | # produced vignettes 30 | vignettes/*.html 31 | vignettes/*.pdf 32 | 33 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 34 | .httr-oauth 35 | 36 | # knitr and R markdown default cache directories 37 | *_cache/ 38 | /cache/ 39 | 40 | # Temporary files created by R markdown 41 | *.utf8.md 42 | *.knit.md 43 | 44 | # R Environment Variables 45 | .Renviron 46 | 47 | # pkgdown site 48 | docs/ 49 | 50 | # translation temp files 51 | po/*~ 52 | 53 | # RStudio Connect folder 54 | rsconnect/ 55 | 56 | # remove files for examples 57 | inst/examples/*_files 58 | inst/examples/*.html 59 | plot1.png 60 | from_python.txt 61 | summary.html 62 | *_log.html 63 | *_log.json 64 | 65 | inst/doc 66 | whirl.Rcheck/ 67 | whirl*.tar.gz 68 | whirl*.tgz 69 | CRAN-SUBMISSION 70 | bco.json 71 | *_log.(html|json|md) 72 | _parametrics.yml 73 | _whirl.yml 74 | .DS_Store 75 | -------------------------------------------------------------------------------- /man/custom_logging.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/custom_logging.R 3 | \name{custom_logging} 4 | \alias{custom_logging} 5 | \alias{log_read} 6 | \alias{log_write} 7 | \alias{log_delete} 8 | \title{Helper function to log custom messages} 9 | \usage{ 10 | log_read(file, log = Sys.getenv("WHIRL_LOG_MSG")) 11 | 12 | log_write(file, log = Sys.getenv("WHIRL_LOG_MSG")) 13 | 14 | log_delete(file, log = Sys.getenv("WHIRL_LOG_MSG")) 15 | } 16 | \arguments{ 17 | \item{file}{\code{\link[=character]{character()}} description of the file that was read, written or 18 | deleted.} 19 | 20 | \item{log}{\code{\link[=character]{character()}} path to the log file.} 21 | } 22 | \description{ 23 | Useful for e.g. read and write operations on databases etc. 24 | that are not automatically captured. 25 | } 26 | \details{ 27 | The default environment variable \code{WHIRL_LOG_MSG} is set in the session used 28 | to log scripts, and input is automatically captured in the resulting log. 29 | 30 | If run outside of whirl, meaning when the above environment variable is 31 | unset, the operations are streamed to \code{stdout()}. By default the console. 32 | } 33 | \examples{ 34 | # Stream logs to console since `WHIRL_LOG_MSG` is not set: 35 | log_read("my/folder/input.txt") 36 | log_write("my/folder/output.txt") 37 | log_delete("my/folder/old_output.txt") 38 | } 39 | -------------------------------------------------------------------------------- /tests/testthat/test-python.R: -------------------------------------------------------------------------------- 1 | test_that("python dependencies found correctly", { 2 | skip_on_cran() 3 | skip_if_no_quarto() 4 | skip_if_no_python() 5 | skip_if_not_installed(pkg = "reticulate", minimum_version = "1.41.0") 6 | 7 | reticulate::py_require("pandas") 8 | reticulate::py_require("numpy") 9 | 10 | res <- test_script( 11 | script = c("py_success.py", "py_dependencies.py") 12 | ) |> 13 | run(summary_file = NULL) |> 14 | expect_no_warning() |> 15 | expect_no_error() 16 | 17 | res[["status"]] |> 18 | expect_equal(c("success", "success")) 19 | 20 | info_py_success <- res[["result"]][[1]][["session"]] 21 | 22 | info_py_success$platform$setting |> 23 | expect_contains("python") 24 | 25 | info_py_success$python$package[ 26 | info_py_success$python$directly_used 27 | ] |> 28 | expect_length(0) 29 | 30 | info_py_dependencies <- res[["result"]][[2]][["session"]] 31 | 32 | info_py_dependencies$platform$setting |> 33 | expect_contains("python") 34 | 35 | info_py_dependencies$python$package[ 36 | info_py_dependencies$python$directly_used 37 | ] |> 38 | expect_length(2) |> 39 | expect_contains(c("pandas", "numpy")) 40 | }) 41 | 42 | test_that("parse_pip_list() is consistent", { 43 | skip_on_cran() 44 | skip_if_no_python() 45 | 46 | system("python -m pip list -v", intern = TRUE) |> 47 | parse_pip_list() |> 48 | expect_s3_class("data.frame") |> 49 | expect_named(c("package", "version", "path", "installer")) |> 50 | nrow() |> 51 | expect_gt(0) 52 | }) 53 | -------------------------------------------------------------------------------- /tests/testthat/test-utils.R: -------------------------------------------------------------------------------- 1 | test_that("get_file_ext()", { 2 | get_file_ext("file.txt") |> 3 | expect_equal("txt") 4 | 5 | get_file_ext("a/b/file.a") |> 6 | expect_equal("a") 7 | 8 | get_file_ext("a/b/file") |> 9 | expect_equal("") 10 | }) 11 | 12 | test_that("scale_to_percent()", { 13 | scale_to_percent(0.5) |> 14 | expect_equal("50.00%") 15 | 16 | scale_to_percent(0.55124316, 1) |> 17 | expect_equal("55.1%") 18 | }) 19 | 20 | test_that("replace_na_with_last()", { 21 | replace_na_with_last(1:5) |> 22 | expect_equal(1:5) 23 | 24 | replace_na_with_last(c(1, NA, 3, NA, 5)) |> 25 | expect_equal(c(1, 1, 3, 3, 5)) 26 | 27 | replace_na_with_last(c(NA, NA, NA)) |> 28 | expect_equal(c(NA, NA, NA)) 29 | 30 | replace_na_with_last(c(NA, "a", "b", NA, "c")) |> 31 | expect_equal(c(NA, "a", "b", "b", "c")) 32 | }) 33 | 34 | test_that("path_rel()", { 35 | path_rel("a", "a") |> 36 | expect_equal(".") 37 | 38 | path_rel("a", "a/b") |> 39 | expect_equal("..") 40 | 41 | path_rel("a", "a/b/c") |> 42 | expect_equal("../..") 43 | 44 | path_rel("a/b", "a") |> 45 | expect_equal("b") 46 | 47 | path_rel("a/b/d", "a/b/c") |> 48 | expect_equal("../d") 49 | }) 50 | 51 | test_that("create_cli_links()", { 52 | text <- create_cli_links("mytext", "my_link") |> 53 | expect_type("character") 54 | 55 | grep(pattern = "my_link", x = text) |> 56 | expect_length(0) 57 | 58 | withr::with_options( 59 | new = list(cli.dynamic = TRUE), 60 | code = create_cli_links("mytext", "my_link") |> 61 | expect_type("character") |> 62 | expect_match("my_link") 63 | ) 64 | }) 65 | -------------------------------------------------------------------------------- /man/whirl-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/whirl-package.R 3 | \docType{package} 4 | \name{whirl-package} 5 | \alias{whirl} 6 | \alias{whirl-package} 7 | \title{whirl: Log Execution of Scripts} 8 | \description{ 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} 10 | 11 | Logging of scripts suitable for clinical trials using 'Quarto' to create nice human readable logs. 'whirl' enables execution of scripts in batch, while simultaneously creating logs for the execution of each script, and providing an overview summary log of the entire batch execution. 12 | } 13 | \seealso{ 14 | Useful links: 15 | \itemize{ 16 | \item \url{https://novonordisk-opensource.github.io/whirl/} 17 | \item \url{https://github.com/novonordisk-opensource/whirl} 18 | \item Report bugs at \url{https://github.com/NovoNordisk-OpenSource/whirl/issues} 19 | } 20 | 21 | } 22 | \author{ 23 | \strong{Maintainer}: Aksel Thomsen \email{oath@novonordisk.com} 24 | 25 | Authors: 26 | \itemize{ 27 | \item Lovemore Gakava \email{lvgk@novonordisk.com} 28 | \item Cervan Girard \email{cgid@novonordisk.com} 29 | \item Kristian Troejelsgaard \email{ktqn@novonordisk.com} 30 | \item Steffen Falgreen Larsen \email{sffl@novonordisk.com} 31 | \item Vladimir Obucina \email{vlob@novonordisk.com} 32 | \item Michael Svingel \email{wymz@novonordisk.com} 33 | \item Skander Mulder \email{sktf@novonordisk.com} 34 | \item Oliver Lundsgaard \email{ovlr@novonordisk.com} 35 | } 36 | 37 | Other contributors: 38 | \itemize{ 39 | \item Novo Nordisk A/S [copyright holder] 40 | } 41 | 42 | } 43 | \keyword{internal} 44 | -------------------------------------------------------------------------------- /inst/documents/python_modules.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import types 4 | 5 | def get_loaded_packages(): 6 | """Return all the packages that are currently loaded in the Python environment""" 7 | loaded = set(m.split('.')[0] for m in sys.modules if m and not m.startswith('_')) 8 | return loaded 9 | 10 | def get_namespaced_packages(): 11 | """Get all the stuff that exposed via namespacing (i.e. attached)""" 12 | namespaced = set() 13 | 14 | # Use __main__.__dict__ to find all imported modules in the current script 15 | # needed for things like reticulate 16 | import __main__ 17 | if hasattr(__main__, '__dict__'): 18 | globals_dict = __main__.__dict__ 19 | else: 20 | frame = sys._getframe(1) 21 | globals_dict = frame.f_globals 22 | 23 | for name, obj in globals_dict.items(): 24 | if isinstance(obj, types.ModuleType) and not name.startswith('_'): 25 | module_name = getattr(obj, '__name__', name) 26 | root_module = module_name.split('.')[0] 27 | if root_module not in ['sys', 'json', 'types', 'builtins']: 28 | namespaced.add(root_module) 29 | 30 | return namespaced 31 | 32 | def get_package_status(tmpfile=''): 33 | """Get both loaded and namespaced packages as a named list""" 34 | loaded = get_loaded_packages() 35 | namespaced = get_namespaced_packages() 36 | 37 | status = { 38 | 'loaded': list(loaded), 39 | 'namespaced': list(namespaced) 40 | } 41 | 42 | if tmpfile != '': 43 | with open(tmpfile, 'w') as f: 44 | json.dump(status, f, indent=2) 45 | 46 | return status 47 | -------------------------------------------------------------------------------- /tests/testthat/test-custom_logging.R: -------------------------------------------------------------------------------- 1 | test_that("stream to console outside whirl context", { 2 | log_read("test_read") |> 3 | expect_output( 4 | regexp = "\\{\"time\":\".*\",\"type\":\"read\",\"file\":\"test_read\"\\}" 5 | ) 6 | 7 | log_write("test_write") |> 8 | expect_output( 9 | regexp = "\\{\"time\":\".*\",\"type\":\"write\",\"file\":\"test_write\"\\}" # nolint: line_length_linter 10 | ) 11 | 12 | log_delete("test_delete") |> 13 | expect_output( 14 | regexp = "\\{\"time\":\".*\",\"type\":\"delete\",\"file\":\"test_delete\"\\}" # nolint: line_length_linter 15 | ) 16 | }) 17 | 18 | 19 | test_that("stream to log file in a whirl context", { 20 | tmp_log_file <- withr::local_tempfile(fileext = ".json") 21 | 22 | withr::with_envvar( 23 | c(WHIRL_LOG_MSG = tmp_log_file), 24 | { 25 | log_read("test_read") 26 | log_write("test_write") 27 | log_delete("test_delete") 28 | 29 | x <- read_from_log() 30 | 31 | expect_equal(nrow(x), 3) 32 | expect_equal(x$type, c("read", "write", "delete")) 33 | expect_equal(x$file, c("test_read", "test_write", "test_delete")) 34 | } 35 | ) 36 | 37 | split_log(x) |> 38 | expect_length(3) |> 39 | lapply(expect_s3_class, "data.frame") |> 40 | vapply(\(x) x[["file"]], character(1)) |> 41 | expect_equal( 42 | c(read = "test_read", write = "test_write", delete = "test_delete") 43 | ) 44 | 45 | split_log(x[-2, ]) |> 46 | expect_length(3) |> 47 | lapply(expect_s3_class, "data.frame") |> 48 | sapply(\(x) x[["file"]]) |> 49 | unlist() |> 50 | expect_equal( 51 | c(read = "test_read", delete = "test_delete") 52 | ) 53 | 54 | x["time"] <- as.POSIXct("2000-01-01 01:01:01") 55 | expect_snapshot(x) 56 | }) 57 | -------------------------------------------------------------------------------- /vignettes/articles/example.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Log example" 3 | --- 4 | 5 | ```{r setup, include = FALSE} 6 | knitr::opts_chunk$set( 7 | collapse = TRUE, 8 | comment = "#>" 9 | ) 10 | 11 | # Use a temporary directory as working directory with the example program 12 | 13 | wd <- normalizePath(".") 14 | tmp <- withr::local_tempdir() 15 | 16 | system.file("examples/prg1.R", package = "whirl") |> 17 | file.copy(to = file.path(tmp, "example.R")) 18 | 19 | knitr::opts_knit$set(root.dir = tmp) 20 | ``` 21 | 22 | In this example we are going to execute the following script and create a log of it's execution: 23 | 24 | `example.R:` 25 | ```{r, file = "example.R", eval = FALSE} 26 | ``` 27 | 28 | We are going to use the `run()` function to execute the script, and since this vignette 29 | is created on Linux we can use the `whirl.track_files` option to automatically track the used files: 30 | 31 | ```{r whirl-setup} 32 | library(whirl) 33 | 34 | options(whirl.track_files = TRUE) 35 | options(whirl.verbosity_level = "minimal") 36 | ``` 37 | 38 | The `verbosity_level` is set to `minimal` for nicer printing in this vignette. 39 | Now we are ready to execute the script: 40 | 41 | ```{r run} 42 | result <- run("example.R") 43 | 44 | print(result) 45 | ``` 46 | 47 | The script is now executed and you can access the logs below: 48 | 49 | ```{r copy, include=FALSE} 50 | # Also check if all outputs were created as expected 51 | withr::with_dir(tmp, { 52 | articles_folder <- file.path(wd, "../../docs/articles") 53 | 54 | articles_folder |> 55 | dir.exists() |> 56 | stopifnot() 57 | 58 | c("summary.html", "example_log.html", "plot1.png") |> 59 | lapply(file.copy, to = articles_folder, overwrite = TRUE) |> 60 | unlist() |> 61 | all() |> 62 | stopifnot() 63 | }) 64 | ``` 65 | 66 | * [View summary log](summary.html) 67 | * [View log for example.R](example_log.html) 68 | * [The saved plot1.png](plot1.png) 69 | -------------------------------------------------------------------------------- /R/internal_run.R: -------------------------------------------------------------------------------- 1 | #' Sending the scripts to the whirl_r_queue for execution 2 | #' 3 | #' @param input A character vector of file path(s) to R, R Markdown, Quarto 4 | #' scripts, or files in a folder using regular expression, or to to a whirl 5 | #' config file. The input can also be structured in a list where each element 6 | #' will be executed in parallel. 7 | #' @param steps An optional argument that can be used if only certain steps 8 | #' within a config files is to be executed. Should be equivalent to the names 9 | #' of the steps found in the config file. If kept as NULL (default) then all 10 | #' steps listed in the config file will be executed. 11 | #' @param queue The whirl_r_queue that should execute the scripts 12 | #' @param level Depth of the recursive config calls. 13 | #' The initial call will have 1. 14 | #' @inheritParams options_params 15 | #' @return A tibble containing the execution results for all the scripts. 16 | #' @noRd 17 | internal_run <- function(input, 18 | steps, 19 | queue, 20 | level) { 21 | # Enrich the input with "name" and "path" elements 22 | enriched <- enrich_input(input, steps) 23 | 24 | # Loop over the elements 25 | for (i in seq_along(enriched)) { 26 | files <- enriched[[i]]$path 27 | name <- enriched[[i]]$name 28 | 29 | # Messages 30 | cli_level <- get(paste0("cli_h", min(level, 3)), envir = asNamespace("cli")) 31 | zephyr::msg_verbose(message = name, msg_fun = cli_level) 32 | 33 | # If the step points to a config file then re-initiate internal_run() 34 | if (any(grepl("yaml|yml", get_file_ext(files)))) { 35 | internal_run( 36 | input = files, 37 | steps = steps, 38 | queue = queue, 39 | level = level + 1 40 | ) 41 | } else { 42 | # Execute the scripts 43 | queue$run(scripts = files, tag = name) 44 | zephyr::msg_verbose(message = "\n", msg_fun = cli::cli_verbatim) 45 | } 46 | } 47 | 48 | invisible(queue) 49 | } 50 | -------------------------------------------------------------------------------- /R/status.R: -------------------------------------------------------------------------------- 1 | #' Get execution status 2 | #' 3 | #' Retrieves errors and warnings from the generated markdown file, 4 | #' and derives the execution status. 5 | #' 6 | #' @noRd 7 | 8 | get_status <- function(md, start) { 9 | x <- readChar(con = md, nchars = file.size(md)) |> 10 | paste(collapse = "\n") |> 11 | stringr::str_split("\n:::") 12 | 13 | x <- x[[1]] 14 | 15 | add_python <- x |> 16 | stringr::str_detect("\\{.python .cell-code") |> 17 | any() 18 | 19 | # Errors 20 | 21 | errors <- x |> 22 | stringr::str_subset( 23 | pattern = "^ *\\{\\.cell-output \\.cell-output-error\\}" 24 | ) |> 25 | stringr::str_remove_all("\\{[^\\}]*\\}") |> 26 | stringr::str_squish() 27 | 28 | if (add_python) { 29 | python_errors <- x |> 30 | stringr::str_subset(pattern = "^ *\\{\\.cell-output") |> 31 | stringr::str_remove_all("\\{[^\\}]*\\}") |> 32 | stringr::str_squish() |> 33 | stringr::str_subset("Error:") 34 | 35 | errors <- c(errors, python_errors) 36 | } 37 | 38 | # Warnings 39 | 40 | warnings <- x |> 41 | stringr::str_subset( 42 | pattern = "^ *\\{\\.cell-output \\.cell-output-stderr\\}" 43 | ) |> 44 | stringr::str_remove_all("\\{[^\\}]*\\}") |> 45 | stringr::str_squish() |> 46 | stringr::str_subset(pattern = "^(W|w)arning") 47 | 48 | if (add_python) { 49 | python_warnings <- x |> 50 | stringr::str_subset(pattern = "^ *\\{\\.cell-output") |> 51 | stringr::str_remove_all("\\{[^\\}]*\\}") |> 52 | stringr::str_squish() |> 53 | stringr::str_subset("Warning:") 54 | 55 | warnings <- c(warnings, python_warnings) 56 | } 57 | 58 | # Status 59 | 60 | if (length(errors)) { 61 | status <- "error" 62 | } else if (length(warnings)) { 63 | status <- "warning" 64 | } else { 65 | status <- "success" 66 | } 67 | 68 | # Return list with status 69 | 70 | list( 71 | message = status, 72 | warnings = warnings, 73 | errors = errors, 74 | start = readRDS(start), 75 | end = file.mtime(md) 76 | ) 77 | } 78 | -------------------------------------------------------------------------------- /R/normalize_with_base.R: -------------------------------------------------------------------------------- 1 | #' Normalize a Path with Respect to a Base Directory 2 | #' 3 | #' This function normalizes a given path with respect to a specified base 4 | #' directory. 5 | #' If the path is relative, it combines the base directory and the path, then 6 | #' normalizes the resulting path. 7 | #' If the path is absolute or starts with `~`, it normalizes the path directly. 8 | #' If no base directory is specified, the current working directory is used as 9 | #' the base. 10 | #' 11 | #' @param path A character string representing the path to be normalized. 12 | #' Can be relative, absolute, or start with `~`. 13 | #' @param base A character string representing the base directory with respect 14 | #' to which the path should be normalized. The default is the current working 15 | #' directory ("."). 16 | #' @return A character string representing the normalized path. 17 | #' @examplesIf FALSE 18 | #' base <- "/my/base/directory" 19 | #' relative_path <- "subdir/file.txt" 20 | #' normalized_path <- normalize_with_base(relative_path, base) 21 | #' print(normalized_path) 22 | #' 23 | #' # Using the current working directory as the base 24 | #' relative_path <- "subdir/file.txt" 25 | #' normalized_path <- normalize_with_base(relative_path) 26 | #' print(normalized_path) 27 | #' 28 | #' # Using an absolute path 29 | #' absolute_path <- "/another/directory/subdir/file.txt" 30 | #' normalized_path <- normalize_with_base(absolute_path) 31 | #' print(normalized_path) 32 | #' 33 | #' # Using a path starting with ~ 34 | #' home_path <- "~/subdir/file.txt" 35 | #' normalized_path <- normalize_with_base(home_path) 36 | #' print(normalized_path) 37 | #' @noRd 38 | normalize_with_base <- function(path, base = ".") { 39 | # Expand any ~ in the path to the full home directory path 40 | path <- path.expand(path) 41 | 42 | # Check if the path is absolute 43 | if (!grepl("^(/|[a-zA-Z]:)", path)) { 44 | # Combine the base and the relative path if the path is not absolute 45 | path <- file.path(base, path) 46 | } 47 | 48 | # Normalize the path 49 | normalized_path <- normalizePath(path, winslash = "/", mustWork = FALSE) 50 | 51 | return(normalized_path) 52 | } 53 | -------------------------------------------------------------------------------- /R/renv.R: -------------------------------------------------------------------------------- 1 | #' Get renv status 2 | #' @noRd 3 | 4 | renv_status <- function() { 5 | rlang::check_installed("renv") 6 | 7 | msg <- utils::capture.output(status <- renv::status()) 8 | 9 | structure( 10 | list(message = msg, status = status), 11 | class = c("whirl_renv_status", "list") 12 | ) 13 | } 14 | 15 | #' @noRd 16 | 17 | print.whirl_renv_status <- function(x, ...) { 18 | x$message |> 19 | cat(sep = "\n") 20 | 21 | return(invisible(x)) 22 | } 23 | 24 | #' @noRd 25 | 26 | knit_print_whirl_renv_status <- function(x, ...) { 27 | if (!length(x$status$lockfile$Packages)) { 28 | renv_note <- "warning" 29 | renv_title <- "renv not used" 30 | renv_message <- NULL 31 | } else if (x$status$synchronized) { 32 | renv_note <- "tip" 33 | renv_title <- "renv synchronized" 34 | renv_message <- x$message 35 | } else { 36 | renv_note <- "important" 37 | renv_title <- "renv out of sync" 38 | renv_message <- x$message |> 39 | renv_message_table() |> 40 | renv_message_headers() 41 | } 42 | 43 | quarto_callout( 44 | text = renv_message, 45 | title = renv_title, 46 | type = renv_note, 47 | collapse = if (!is.null(renv_message)) { 48 | TRUE 49 | } else { 50 | NULL 51 | } 52 | ) 53 | } 54 | 55 | #' Format renv message with markdown table. 56 | #' Used when packages are in inconsistent state only. 57 | #' @noRd 58 | 59 | renv_message_table <- function(renv_message) { 60 | i <- grepl(pattern = "^ +", x = renv_message) |> 61 | which() 62 | 63 | if (!length(i)) { 64 | return(renv_message) 65 | } 66 | 67 | renv_message[i] <- gsub( 68 | pattern = "( |$)(?! )", 69 | replacement = "|", 70 | x = renv_message[i], 71 | perl = TRUE 72 | ) 73 | 74 | j <- i[[1]] 75 | 76 | c( 77 | utils::head(renv_message, j), 78 | gsub(pattern = "[^|]", replacement = "-", x = renv_message[j]), 79 | utils::tail(renv_message, -j) 80 | ) 81 | } 82 | 83 | #' Bump renv status headers down to header 3 84 | #' @noRd 85 | 86 | renv_message_headers <- function(renv_message) { 87 | gsub(pattern = "^#", replacement = "###", x = renv_message) 88 | } 89 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: whirl 2 | Title: Log Execution of Scripts 3 | Version: 0.3.1.9001 4 | Authors@R: c( 5 | person("Aksel", "Thomsen", , "oath@novonordisk.com", role = c("aut", "cre")), 6 | person("Lovemore", "Gakava", , "lvgk@novonordisk.com", role = "aut"), 7 | person("Cervan", "Girard", , "cgid@novonordisk.com", role = "aut"), 8 | person("Kristian", "Troejelsgaard", , "ktqn@novonordisk.com", role = "aut"), 9 | person("Steffen Falgreen", "Larsen", , "sffl@novonordisk.com", role = "aut"), 10 | person("Vladimir", "Obucina", , "vlob@novonordisk.com", role = "aut"), 11 | person("Michael", "Svingel", , "wymz@novonordisk.com", role = "aut"), 12 | person("Skander", "Mulder", , "sktf@novonordisk.com", role = "aut"), 13 | person("Oliver", "Lundsgaard", , "ovlr@novonordisk.com", role = "aut"), 14 | person("Novo Nordisk A/S", role = "cph") 15 | ) 16 | Description: Logging of scripts suitable for clinical trials using 17 | 'Quarto' to create nice human readable logs. 'whirl' enables 18 | execution of scripts in batch, while simultaneously creating logs for 19 | the execution of each script, and providing an overview summary log of 20 | the entire batch execution. 21 | License: Apache License (>= 2) 22 | URL: https://novonordisk-opensource.github.io/whirl/, 23 | https://github.com/novonordisk-opensource/whirl 24 | BugReports: https://github.com/NovoNordisk-OpenSource/whirl/issues 25 | Depends: 26 | R (>= 4.1) 27 | Imports: 28 | callr, 29 | cli, 30 | dplyr, 31 | jsonlite, 32 | kableExtra, 33 | knitr, 34 | purrr, 35 | quarto, 36 | R6 (>= 2.4.0), 37 | reticulate, 38 | rlang, 39 | sessioninfo, 40 | stringr, 41 | tibble, 42 | unglue, 43 | utils, 44 | withr, 45 | yaml, 46 | zephyr (>= 0.1.1) 47 | Suggests: 48 | ggplot2, 49 | renv, 50 | rstudioapi, 51 | testthat (>= 3.0.0), 52 | usethis 53 | VignetteBuilder: 54 | knitr 55 | Config/testthat/edition: 3 56 | Encoding: UTF-8 57 | Roxygen: list(markdown = TRUE) 58 | RoxygenNote: 7.3.2 59 | SystemRequirements: Quarto command line tool 60 | (). 61 | -------------------------------------------------------------------------------- /tests/testthat/test-enrich_input.R: -------------------------------------------------------------------------------- 1 | test_that("Enrich input works as expected", { 2 | # Find all R programs 3 | 4 | enriched <- test_script("_whirl_r_programs.yaml") |> 5 | enrich_input() |> 6 | expect_type("list") |> 7 | expect_length(1) 8 | 9 | enriched[[1]] |> 10 | expect_type("list") |> 11 | expect_length(2) |> 12 | expect_named(c("name", "paths")) |> 13 | lapply(expect_type, "character") |> 14 | invisible() 15 | 16 | # Unnamed steps 17 | 18 | test_script("_whirl_unnamed.yaml") |> 19 | enrich_input() |> 20 | expect_type("list") |> 21 | expect_length(3) |> 22 | vapply(FUN = \(x) x$name, FUN.VALUE = character(1)) |> 23 | expect_equal( 24 | c("Named step", "Step 2", "Step 3") 25 | ) 26 | 27 | # File input 28 | 29 | enriched <- test_script("success.R") |> 30 | enrich_input() |> 31 | expect_type("list") |> 32 | expect_length(1) 33 | 34 | enriched[[1]] |> 35 | expect_type("list") |> 36 | expect_length(2) |> 37 | expect_named(c("name", "paths")) |> 38 | unlist() |> 39 | expect_equal( 40 | c( 41 | name = "Step 1", 42 | paths = test_script("success.R") 43 | ) 44 | ) 45 | 46 | # Pruning a config file 47 | 48 | test_script("_whirl.yaml") |> 49 | enrich_input(steps = "Second step") |> 50 | expect_type("list") |> 51 | expect_length(1) |> 52 | vapply(FUN = \(x) x$name, FUN.VALUE = character(1)) |> 53 | expect_equal("Second step") 54 | 55 | test_script("_whirl.yaml") |> 56 | enrich_input(steps = c("First step", "Second step")) |> 57 | expect_type("list") |> 58 | expect_length(2) |> 59 | vapply(FUN = \(x) x$name, FUN.VALUE = character(1)) |> 60 | expect_equal(c("First step", "Second step")) 61 | 62 | # Expected error when input is a directory 63 | 64 | test_script("") |> 65 | enrich_input() |> 66 | expect_error() 67 | 68 | # Evaluate expressions in yaml 69 | 70 | test_script("_whirl_expression.yaml") |> 71 | enrich_input() |> 72 | expect_type("list") |> 73 | expect_length(1) |> 74 | vapply(FUN = \(x) x$name, FUN.VALUE = character(1)) |> 75 | expect_match(regexp = format(Sys.Date())) 76 | }) 77 | -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | addin 2 | addins 3 | Aksel 4 | arning 5 | asis 6 | aut 7 | autoupdate0 8 | biocompute 9 | Biocompute 10 | Bioinformatics 11 | callout 12 | callr 13 | Cervan 14 | cgid 15 | chdir 16 | cli 17 | colour 18 | colnames 19 | commonmark 20 | compats 21 | Compats 22 | config 23 | Config 24 | cph 25 | CRAN 26 | cre 27 | cyclocomp 28 | dev 29 | Devskim 30 | dir 31 | dontrun 32 | dplyr 33 | dset 34 | emph 35 | ENOENT 36 | ENOTDIR 37 | envir 38 | ENXIO 39 | Falgreen 40 | fileext 41 | Finalise 42 | funct 43 | Gakava 44 | getenv 45 | getframe 46 | getOption 47 | getwd 48 | gfm 49 | ggplot 50 | ggsave 51 | Girard 52 | github 53 | globbing 54 | grepl 55 | grpvar 56 | gsub 57 | GxP 58 | hjust 59 | https 60 | httr 61 | initialise 62 | ìnst 63 | io 64 | jsonlite 65 | kable 66 | kableExtra 67 | knitr 68 | Kristian 69 | ktqn 70 | lapply 71 | lintr 72 | loadedpath 73 | loadedversion 74 | Lovemore 75 | Lundsgaard 76 | lvgk 77 | markua 78 | md 79 | mdfmt 80 | mdformats 81 | mediatype 82 | multiarch 83 | namespacing 84 | nchars 85 | ncol 86 | newname 87 | nocov 88 | nodename 89 | Nordisk 90 | notangle 91 | Novo 92 | novonordisk 93 | NovoNordisk 94 | nrow 95 | numpy 96 | Obucina 97 | Oliver 98 | ondiskversion 99 | openat 100 | opensource 101 | OpenSource 102 | overwritable 103 | pandoc 104 | parametrics 105 | pkgdown 106 | pkgload 107 | pkgname 108 | pkgs 109 | PKGS 110 | pkgversion 111 | pmap 112 | Posix 113 | pre 114 | purrr 115 | Rbuildignore 116 | rdname 117 | README 118 | renv 119 | RENV 120 | Renviron 121 | reticulate 122 | rlang 123 | rlist 124 | rmarkdown 125 | roxygen 126 | Roxygen 127 | Roxygenize 128 | RoxygenNote 129 | Rproj 130 | rstudio 131 | rstudioapi 132 | seealso 133 | sessioninfo 134 | setenv 135 | sffl 136 | Skander 137 | skyblue 138 | Steffen 139 | stopifnot 140 | stringr 141 | succes 142 | succesfully 143 | Svingel 144 | Sweave 145 | Sys 146 | sysname 147 | testthat 148 | testuser 149 | Thomsen 150 | tibble 151 | tidyr 152 | tidyverse 153 | tmpdir 154 | tmpf 155 | Troejelsgaard 156 | unglue 157 | ungroup 158 | unlist 159 | unnest 160 | urandom 161 | usethis 162 | vapply 163 | vctr 164 | vctrs 165 | VignetteBuilder 166 | vjust 167 | vlob 168 | winslash 169 | withr 170 | WORDLIST 171 | yaml 172 | 173 | -------------------------------------------------------------------------------- /R/use_whirl.R: -------------------------------------------------------------------------------- 1 | #' Use whirl 2 | #' 3 | #' @description 4 | #' 5 | #' Utility function to setup execution with whirl in your project: 6 | #' 7 | #' 1. Creates configuration file (default `_whirl.yml`) 8 | #' 1. Updates `.gitignore` to not include log files 9 | #' 10 | #' See `vignette("whirl")` for how to specify paths inside the 11 | #' configuration file. 12 | #' 13 | #' @param config_file Path to the whirl config file, relative to the project 14 | #' @export 15 | 16 | use_whirl <- function(config_file = "_whirl.yml") { 17 | cli::cli_h1("Setup {.pkg whirl}") 18 | 19 | use_template("_whirl.yml", config_file = config_file) 20 | 21 | cli::cli_alert_info("Run project with {.run whirl::run(\"{config_file}\")}") 22 | 23 | cli::cli_h1("") 24 | 25 | return(invisible(config_file)) 26 | } 27 | 28 | #' Use whirl to create biocompute logs 29 | #' 30 | #' @description 31 | #' 32 | #' Utility function to setup execution with whirl in your project suitable for 33 | #' creating biocompute logs with `write_biocompute()`: 34 | #' 35 | #' 1. Creates configuration file (default `_whirl.yml`) with default values for the `biocompute` metadata. 36 | #' 1. Updates `.gitignore` to not include log files 37 | #' 38 | #' See `vignette("whirl")` for how to specify paths inside the 39 | #' configuration file. 40 | #' 41 | #' @param config_file Path to the whirl config file, relative to the project 42 | #' @param parametrics_file Path to the biocompute parametrics file, relative to the project 43 | #' @export 44 | 45 | use_biocompute <- function(config_file = "_whirl.yml", parametrics_file = "_parametrics.yml") { 46 | cli::cli_h1("Setup {.pkg whirl} and biocompute logs") 47 | 48 | use_template(template = "_biocompute.yml", config_file = config_file) 49 | use_template(template = "_parametrics.yml", config_file = parametrics_file) 50 | 51 | cli::cli_alert_info( 52 | "Run project with {.run whirl::run(\"{config_file}\")} and {.run whirl::write_biocompute()}" 53 | ) 54 | 55 | cli::cli_h1("") 56 | 57 | return(invisible(config_file)) 58 | } 59 | 60 | #' @noRd 61 | use_template <- function(template, config_file) { 62 | rlang::check_installed("usethis") 63 | 64 | usethis::use_git_ignore(ignores = "*_log.(html|json|md)") 65 | 66 | config <- system.file("use_whirl", template, package = "whirl") |> 67 | readLines() 68 | 69 | config_file_path <- usethis::proj_path(config_file) 70 | usethis::write_over(path = config_file_path, lines = config) 71 | usethis::edit_file(path = config_file_path) 72 | } 73 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # All available hooks: https://pre-commit.com/hooks.html 2 | # R specific hooks: https://github.com/lorenzwalthert/precommit 3 | repos: 4 | - repo: https://github.com/lorenzwalthert/precommit 5 | rev: v0.4.3.9003 6 | hooks: 7 | # - id: style-files 8 | # args: [--style_pkg=styler, --style_fun=tidyverse_style] 9 | - id: roxygenize 10 | additional_dependencies: 11 | - dplyr 12 | - ggplot2 13 | - httr 14 | - jsonlite 15 | - kableExtra 16 | - quarto 17 | - rmarkdown 18 | - sessioninfo 19 | - tibble 20 | - tidyr 21 | - unglue 22 | - zephyr 23 | - reticulate 24 | - id: use-tidy-description 25 | - id: spell-check 26 | exclude: > 27 | (?x)^( 28 | .*\.[rR]| 29 | .*\.feather| 30 | .*\.jpeg| 31 | .*\.pdf| 32 | .*\.png| 33 | .*\.py| 34 | .*\.RData| 35 | .*\.rds| 36 | .*\.Rds| 37 | .*\.Rproj| 38 | .*\.sh| 39 | (.*/|)\.gitignore| 40 | (.*/|)\.gitlab-ci\.yml| 41 | (.*/|)\.lintr| 42 | (.*/|)\.pre-commit-.*| 43 | (.*/|)\.Rbuildignore| 44 | (.*/|)\.Renviron| 45 | (.*/|)\.Rprofile| 46 | (.*/|)\.travis\.yml| 47 | (.*/|)appveyor\.yml| 48 | (.*/|)NAMESPACE| 49 | (.*/|)renv/settings\.dcf| 50 | (.*/|)renv\.lock| 51 | (.*/|)WORDLIST| 52 | \.github/workflows/.*| 53 | data/.*| 54 | )$ 55 | # - id: lintr 56 | - id: readme-rmd-rendered 57 | - id: parsable-R 58 | - id: no-browser-statement 59 | # - id: no-print-statement 60 | - id: no-debug-statement 61 | - id: deps-in-desc 62 | - id: pkgdown 63 | - repo: https://github.com/pre-commit/pre-commit-hooks 64 | rev: v5.0.0 65 | hooks: 66 | - id: check-added-large-files 67 | args: ['--maxkb=200'] 68 | - id: file-contents-sorter 69 | files: '^\.Rbuildignore$' 70 | - id: end-of-file-fixer 71 | exclude: '\.Rd' 72 | - repo: https://github.com/pre-commit-ci/pre-commit-ci-config 73 | rev: v1.6.1 74 | hooks: 75 | # Only required when https://pre-commit.ci is used for config validation 76 | - id: check-pre-commit-ci-config 77 | - repo: local 78 | hooks: 79 | - id: forbid-to-commit 80 | name: Don't commit common R artifacts 81 | entry: Cannot commit .Rhistory, .RData, .Rds or .rds. 82 | language: fail 83 | files: '\.(Rhistory|RData|Rds|rds)$' 84 | ci: 85 | autoupdate_schedule: monthly 86 | skip: [pkgdown] -------------------------------------------------------------------------------- /man/whirl-options-params.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/whirl-options.R 3 | \name{whirl-options-params} 4 | \alias{whirl-options-params} 5 | \title{Internal parameters for reuse in functions} 6 | \arguments{ 7 | \item{verbosity_level}{Verbosity level for functions in whirl. 8 | See \link[zephyr:verbosity_level]{zephyr::verbosity_level} for details.. Default: \code{NA_character_}.} 9 | 10 | \item{out_formats}{Which log format(s) to produce. Possibilities are \code{html}, 11 | \code{json}, and markdown formats: \code{gfm}, \code{commonmark}, and \code{markua}.. Default: \code{"html"}.} 12 | 13 | \item{track_files}{Should files read and written be tracked? 14 | Currently only supported on Linux.. Default: \code{FALSE}.} 15 | 16 | \item{check_renv}{Should the projects renv status be checked?. Default: \code{FALSE}.} 17 | 18 | \item{track_files_discards}{List of file naming patterns not be tracked when track_files = TRUE. Default: \code{c("^/lib", "^/etc", "^/lib64", "^/usr", "^/var", "^/opt", "^/sys", "^/proc", "^/tmp", "^/null", "^/urandom", "^/.cache")}.} 19 | 20 | \item{track_files_keep}{List of file naming patterns always to be tracked when 21 | track_files = TRUE. Default: \code{NULL}.} 22 | 23 | \item{approved_packages}{List of approved R packages and their version in the format: \{name\}@\{version\}. Default: \code{NULL}.} 24 | 25 | \item{approved_python_packages}{List of approved Python packages and their version in the format: \{name\}@\{version\}. Default: \code{NULL}.} 26 | 27 | \item{n_workers}{Number of simultaneous workers used in the run function. 28 | A maximum of 128 workers is allowed.. Default: \code{1}.} 29 | 30 | \item{log_dir}{The output directory of the log files. Default is the folder of 31 | the executed script. log_dir can be a path as a character or it can be a 32 | function that takes the script path as input and returns the log directory. 33 | For more information see the examples of \code{run()} or \code{vignette('whirl')}.. Default: \code{function (x) dirname(x)}.} 34 | 35 | \item{execute_dir}{The working directory of the process executing each script. 36 | Default us to execute R files from the working directory when calling \code{run()} 37 | and all other functions from the directory of the script. To change provide 38 | a character path (used for all scripts) or a function that takes the script 39 | as input and returns the execution directory.. Default: \code{NULL}.} 40 | 41 | \item{wait_timeout}{Timeout for waiting for the R process from callr::r_session to 42 | start, in milliseconds.. Default: \code{9000}.} 43 | 44 | \item{environment_secrets}{Secret environment variable patterns. 45 | Any variables matching will not be included in the logs.. Default: \code{c("BASH_FUNC", "_SSL_CERT", "_KEY", "_PAT", "_TOKEN")}.} 46 | } 47 | \description{ 48 | Internal parameters for reuse in functions 49 | } 50 | \details{ 51 | See \link{whirl-options} for more information. 52 | } 53 | \keyword{internal} 54 | -------------------------------------------------------------------------------- /R/enrich_input.R: -------------------------------------------------------------------------------- 1 | #' Converting input to a standardized list with that contain step names and 2 | #' files paths 3 | #' 4 | #' @param input Can be a vector, a list or a whirl config file. 5 | #' @param steps A filter argument for selecting specific steps that should be 6 | #' executed 7 | #' @return A list 8 | #' @noRd 9 | enrich_input <- function( 10 | input, # nolint: cyclocomp_linter 11 | steps = NULL 12 | ) { 13 | # Characterize the input 14 | is_config_file <- 15 | !is.list(input) && 16 | any(grepl("yaml|yml", get_file_ext(input))) 17 | is_character <- is.character(input) 18 | 19 | # Read yaml and extract list 20 | if (is_config_file && length(input) == 1) { 21 | root_dir <- dirname(input) 22 | config_whirl <- yaml::read_yaml(file = input, eval.expr = TRUE) 23 | got <- config_whirl$"steps" 24 | } else { 25 | root_dir <- getwd() 26 | } 27 | 28 | # Convert vector to list 29 | if (is_character && !is_config_file) { 30 | got <- list(input) 31 | } 32 | 33 | if (is.list(input)) { 34 | got <- input 35 | } 36 | 37 | names <- list() 38 | paths <- list() 39 | for (i in seq_along(got)) { 40 | # Identify the step names - if none, then create a default name 41 | check_name <- any(grepl("name", names(got[[i]]))) 42 | if (check_name) { 43 | names[[i]] <- got[[i]][[which(grepl("name", names(got[[i]])))]] 44 | } else { 45 | names[[i]] <- paste0("Step ", i) 46 | } 47 | 48 | # Identify the paths 49 | check_path <- any(grepl("path", names(got[[i]]))) 50 | if (check_path) { 51 | paths[[i]] <- got[[i]][[which(grepl("path", names(got[[i]])))]] 52 | } else { 53 | paths[[i]] <- got[[i]] 54 | } 55 | } 56 | 57 | # Normalizing the paths and read regexp 58 | paths <- read_glob( 59 | input = paths, 60 | root_dir = root_dir 61 | ) 62 | 63 | # If input include one or more directories 64 | paths_is_dir <- unlist(paths) 65 | paths_is_dir <- paths_is_dir[dir.exists(paths_is_dir)] 66 | if (length(paths_is_dir)) { 67 | cli::cli_abort( 68 | "The input argument in run() does not accept directories, 69 | please specify which file(s) in {.val {paths_is_dir}} that should be executed" 70 | ) 71 | } 72 | 73 | # Merge the names and paths into a list 74 | out <- mapply(list, "name" = names, "paths" = paths, SIMPLIFY = FALSE) 75 | 76 | # Get the step names 77 | step_names <- unlist(out)[grepl("name", names(unlist(out)))] 78 | 79 | # Prune the list when steps have been selected 80 | if (!is.null(steps)) { 81 | id <- which(step_names %in% steps) 82 | # Update the vector of names 83 | step_names <- step_names[id] 84 | # Update the list 85 | out <- out[id] 86 | } 87 | 88 | # Output the steps that will be executed 89 | message_ <- c( 90 | "The following steps will be executed", 91 | step_names |> rlang::set_names("*") 92 | ) 93 | 94 | zephyr::msg_verbose(message = message_, msg_fun = cli::cli_inform) 95 | 96 | invisible(out) 97 | } 98 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | # Getting file extension 2 | get_file_ext <- function(file_paths) { 3 | vapply( 4 | X = file_paths, 5 | FUN = function(file_path) { 6 | file_name <- basename(file_path) 7 | file_parts <- strsplit(file_name, "\\.")[[1]] 8 | file_extension <- ifelse( 9 | length(file_parts) == 1, 10 | "", 11 | utils::tail(file_parts, 1) 12 | ) 13 | return(file_extension) 14 | }, 15 | FUN.VALUE = character(1), 16 | USE.NAMES = FALSE 17 | ) 18 | } 19 | 20 | # Function to scale a numeric vector to percentage 21 | scale_to_percent <- function(x, digits = 2) { 22 | percent_values <- x * 100 23 | formatted_percent_values <- sprintf( 24 | fmt = paste0("%.", digits, "f%%"), 25 | percent_values 26 | ) 27 | return(formatted_percent_values) 28 | } 29 | 30 | # Function to replace NA values with the last non-NA value 31 | replace_na_with_last <- function(x) { 32 | last_non_na <- NA 33 | for (i in seq_along(x)) { 34 | if (!is.na(x[i])) { 35 | last_non_na <- x[i] 36 | } else if (!is.na(last_non_na)) { 37 | x[i] <- last_non_na 38 | } 39 | } 40 | return(x) 41 | } 42 | 43 | # Function to get relative path 44 | path_rel <- function(path, start = ".") { 45 | # Normalize the paths 46 | path <- normalizePath(path, winslash = "/", mustWork = FALSE) 47 | start <- normalizePath(start, winslash = "/", mustWork = FALSE) 48 | 49 | # Split the paths into components 50 | path_parts <- strsplit(path, "/")[[1]] 51 | start_parts <- strsplit(start, "/")[[1]] 52 | 53 | # Check if the paths are the same 54 | if (identical(path_parts, start_parts)) { 55 | return(".") 56 | } 57 | 58 | # Check if the start is a subdirectory of the path 59 | if (all(path_parts %in% start_parts)) { 60 | up_levels <- length(start_parts) - length(path_parts) 61 | relative_path <- c(rep("..", up_levels)) 62 | return(paste(relative_path, collapse = "/")) 63 | } 64 | 65 | # Find the common prefix length 66 | common_length <- 0 67 | for (i in seq_along(start_parts)) { 68 | if (i > length(path_parts) || start_parts[i] != path_parts[i]) { 69 | break 70 | } 71 | common_length <- common_length + 1 72 | } 73 | 74 | # Compute the relative path 75 | up_levels <- length(start_parts) - common_length 76 | down_levels <- path_parts[(common_length + 1):length(path_parts)] 77 | 78 | relative_path <- c(rep("..", up_levels), down_levels) 79 | 80 | # Join the components into a single path 81 | relative_path <- paste(relative_path, collapse = "/") 82 | 83 | return(relative_path) 84 | } 85 | 86 | # Easily create cli links (only when possible) 87 | create_cli_links <- function(text, href, .envir = parent.frame()) { 88 | if (!cli::is_dynamic_tty()) { 89 | return( 90 | cli::format_message(message = text, .envir = .envir) 91 | ) 92 | } 93 | 94 | vapply( 95 | X = paste0("{.href [", text, "](file://", href, ")}"), 96 | FUN = cli::format_message, 97 | FUN.VALUE = character(1), 98 | .envir = .envir, 99 | USE.NAMES = FALSE 100 | ) 101 | } 102 | -------------------------------------------------------------------------------- /R/render_summary.R: -------------------------------------------------------------------------------- 1 | #' Render data.frame into a summary.html file 2 | #' 3 | #' @param input The input data.frame that should be rendered into a summary.html 4 | #' file 5 | #' @param summary_file A character string specifying the path where the summary 6 | #' HTML file should be saved. Defaults to `"summary.html"`. 7 | #' 8 | #' @return Takes a data.frame as input and returns a log in html format 9 | #' @noRd 10 | render_summary <- function(input, summary_file = "summary.html") { 11 | summary_qmd <- withr::local_tempfile( 12 | lines = readLines(system.file("documents/summary.qmd", package = "whirl")), 13 | fileext = ".qmd" 14 | ) 15 | 16 | summary_log_html <- gsub( 17 | pattern = "qmd", 18 | replacement = "html", 19 | x = basename(summary_qmd) 20 | ) 21 | 22 | summary_dir_f <- normalizePath(dirname(summary_file), winslash = "/") 23 | my_summaries <- knit_print_whirl_summary_info(input, summary_dir_f) 24 | withr::with_dir( 25 | tempdir(), 26 | { 27 | tryCatch( 28 | { 29 | quarto::quarto_render( 30 | input = summary_qmd, 31 | output_format = "html", 32 | execute_params = list( 33 | summary_df = my_summaries 34 | ), 35 | quiet = TRUE 36 | ) 37 | }, 38 | error = function(e) { 39 | cli::cli_abort( 40 | "Failed to render summary file {.file {summary_dir_f}}: 41 | {e$message}" 42 | ) 43 | } 44 | ) 45 | } 46 | ) 47 | 48 | # Create requested outputs 49 | tryCatch( 50 | { 51 | file.copy( 52 | from = file.path(dirname(summary_qmd), summary_log_html), 53 | to = summary_file, 54 | overwrite = TRUE 55 | ) 56 | }, 57 | error = function(e) { 58 | warning("File copy failed: ", e$message) 59 | FALSE 60 | } 61 | ) 62 | } 63 | 64 | #' @noRd 65 | knit_print_whirl_summary_info <- function(x, path_rel_start, ...) { 66 | hold <- x |> 67 | data.frame(check.names = FALSE) 68 | 69 | row.names(hold) <- NULL 70 | ncols <- ncol(hold) 71 | 72 | if (grepl("rstudio_cloud", Sys.getenv("R_CONFIG_ACTIVE"))) { 73 | formatted <- file.path("/file_show?path=", hold$"Hyperlink") 74 | } else { 75 | formatted <- lapply(hold$"Hyperlink", path_rel, start = path_rel_start) |> 76 | unlist() |> 77 | file.path() 78 | } 79 | 80 | hold$Hyperlink <- paste0(sprintf( 81 | '%s', 82 | formatted, 83 | "HTML Log" 84 | )) 85 | 86 | knitr::kable(hold, format = "html", escape = FALSE) |> 87 | kableExtra::column_spec( 88 | 1:ncols, 89 | background = ifelse( 90 | hold[["Status"]] == "error", 91 | "#fceeef", 92 | ifelse( 93 | hold[["Status"]] == "warning", 94 | "#fffaea", 95 | ifelse( 96 | hold[["Status"]] == "success", 97 | "#ebf5f1", 98 | ifelse(hold[["Status"]] == "skip", "#94CBFF", "white") 99 | ) 100 | ) 101 | ) 102 | ) |> 103 | kableExtra::kable_styling( 104 | bootstrap_options = "striped", 105 | full_width = TRUE 106 | ) 107 | } 108 | -------------------------------------------------------------------------------- /R/custom_logging.R: -------------------------------------------------------------------------------- 1 | #' Helper function to log custom messages 2 | #' 3 | #' Useful for e.g. read and write operations on databases etc. 4 | #' that are not automatically captured. 5 | #' 6 | #' The default environment variable `WHIRL_LOG_MSG` is set in the session used 7 | #' to log scripts, and input is automatically captured in the resulting log. 8 | #' 9 | #' If run outside of whirl, meaning when the above environment variable is 10 | #' unset, the operations are streamed to `stdout()`. By default the console. 11 | #' 12 | #' @name custom_logging 13 | #' @param file [character()] description of the file that was read, written or 14 | #' deleted. 15 | #' @param log [character()] path to the log file. 16 | #' @examples 17 | #' # Stream logs to console since `WHIRL_LOG_MSG` is not set: 18 | #' log_read("my/folder/input.txt") 19 | #' log_write("my/folder/output.txt") 20 | #' log_delete("my/folder/old_output.txt") 21 | NULL 22 | 23 | #' @rdname custom_logging 24 | #' @export 25 | log_read <- function(file, log = Sys.getenv("WHIRL_LOG_MSG")) { 26 | write_to_log(file, "read", log) 27 | } 28 | 29 | #' @rdname custom_logging 30 | #' @export 31 | log_write <- function(file, log = Sys.getenv("WHIRL_LOG_MSG")) { 32 | write_to_log(file, "write", log) 33 | } 34 | 35 | #' @rdname custom_logging 36 | #' @export 37 | log_delete <- function(file, log = Sys.getenv("WHIRL_LOG_MSG")) { 38 | write_to_log(file, "delete", log) 39 | } 40 | 41 | #' @noRd 42 | write_to_log <- function( 43 | file, 44 | type = c("read", "write", "delete"), 45 | log = Sys.getenv("WHIRL_LOG_MSG")) { 46 | type <- rlang::arg_match(type) 47 | stopifnot(rlang::is_string(file)) 48 | stopifnot(rlang::is_string(log)) 49 | 50 | x <- log_df( 51 | type = type, 52 | file = file 53 | ) 54 | 55 | if (log == "") { 56 | jsonlite::stream_out(x = x, verbose = FALSE) 57 | } else { 58 | con <- file(description = log, open = "a") 59 | jsonlite::stream_out(x = x, con = con, verbose = FALSE) 60 | close(con) 61 | } 62 | } 63 | 64 | #' @noRd 65 | read_from_log <- function(log = Sys.getenv("WHIRL_LOG_MSG"), track_files) { 66 | if (log == "" || !file.exists(log)) { 67 | if (!track_files) { 68 | return(NULL) 69 | } 70 | return(log_df()) 71 | } 72 | 73 | con <- file(description = log, open = "r") 74 | log_info <- jsonlite::stream_in(con, verbose = FALSE) 75 | close(con) 76 | log_info$time <- as.POSIXct(log_info$time) 77 | return(log_info) 78 | } 79 | 80 | #' @noRd 81 | log_df <- function(type = character(), file = character()) { 82 | data.frame( 83 | time = rep(x = Sys.time(), times = length(file)), 84 | type = type, 85 | file = file 86 | ) 87 | } 88 | 89 | #' @noRd 90 | split_log <- function(x, types = c("read", "write", "delete")) { 91 | if (is.null(x)) { 92 | return(NULL) 93 | } 94 | 95 | # Split in a tibble for each type of output 96 | x <- split(x[c("time", "file")], x$type) 97 | 98 | # Add empty table for types not reported 99 | out <- vector(mode = "list", length = length(types)) |> 100 | rlang::set_names(types) 101 | 102 | out[names(x)] <- x 103 | 104 | i <- lapply(X = out, FUN = is.null) |> 105 | unlist() |> 106 | which() 107 | 108 | out[i] <- list(log_df()[c("time", "file")]) 109 | 110 | lapply(X = out, FUN = `rownames<-`, value = NULL) 111 | } 112 | -------------------------------------------------------------------------------- /man/write_biocompute.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/biocompute.R 3 | \name{write_biocompute} 4 | \alias{write_biocompute} 5 | \title{Create biocompute logs} 6 | \usage{ 7 | write_biocompute(queue = run("_whirl.yml"), path = "bco.json", ...) 8 | } 9 | \arguments{ 10 | \item{queue}{Result from \code{run()}.} 11 | 12 | \item{path}{A character string specifying the file path to write BioCompute log to.} 13 | 14 | \item{...}{Additional arguments parsed to \code{jsonlite::write_json()}. Note always uses \code{auto_unbox = TRUE}.} 15 | } 16 | \value{ 17 | (\code{invisible}) \code{list} of the biocompute domains and their content. 18 | } 19 | \description{ 20 | BioCompute is a standard for logs of programs for for Bioinformatics Computational Analyses. 21 | 22 | The BioCompute object is a \code{json} log that can be created based on the output of \code{run()}. 23 | } 24 | \details{ 25 | The object consists of the following domains: 26 | \itemize{ 27 | \item \strong{Specifications}: 28 | \itemize{ 29 | \item \emph{spec_version}: Version of BioCompute used (`https://w3id.org/biocompute/1.3.0/``) 30 | \item \emph{object_id}: Unique project id 31 | \item \emph{type}: Your project type 32 | \item \emph{etag}: Your \code{etag} id from the BioCompute Object Portal 33 | } 34 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Provenance-domain}{Provenance Domain} 35 | \itemize{ 36 | \item This is used to track the history of the BCO. Review and signatures go here. 37 | } 38 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Usability-domain}{Usability Domain} 39 | \itemize{ 40 | \item This is used to improve searchability by allowing a free-text description of the BCO. 41 | \item Provide external document. 42 | } 43 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Extension-domain}{Extension Domain} 44 | \itemize{ 45 | \item This is used to add any additional structured information that is not directly covered by the BCO. 46 | } 47 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Description-domain}{Description Domain} 48 | \itemize{ 49 | \item Contains a structured field for the description of external references, the pipeline steps, 50 | and the relationship of I/O objects. 51 | \item Provide external document. 52 | \item \strong{Note}: Use of \code{keywords} and \code{External_Reference} entries are not yet implemented. 53 | To use fill out the entries manually after creating the BioCompute object.` 54 | } 55 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Execution-domain}{Execution Domain} 56 | \itemize{ 57 | \item Contains fields for the execution of the BCO. 58 | \item \strong{Note}: Use of \code{external_data_endpoints} not implemented. Fill out manually afterwards if needed. 59 | } 60 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Parametric-domain}{Parametric Domain} 61 | \itemize{ 62 | \item Represents the list of parameters customizing the computational flow which can affect 63 | the output of the calculations. 64 | } 65 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Iodomain}{IO Domain} 66 | \itemize{ 67 | \item Represents the list of global input and output files created by the computational workflow. 68 | } 69 | \item \href{https://wiki.biocomputeobject.org/index.php?title=Error-domain}{Error Domain} 70 | \itemize{ 71 | \item Defines the empirical and algorithmic limits and error sources of the BCO. 72 | \item \strong{Note}: Use of this domain is not clearly defined. 73 | It is therefore always left empty in the current implementation. 74 | If you want to add content do so manually after creating the BCO. 75 | } 76 | } 77 | 78 | See the \href{https://www.biocomputeobject.org}{BioCompute Object Portal} and the 79 | \href{https://wiki.biocomputeobject.org/Main_Page}{BioCompute Objects Wiki} for more information. 80 | } 81 | -------------------------------------------------------------------------------- /R/whirl-options.R: -------------------------------------------------------------------------------- 1 | #' @name whirl-options 2 | #' @title Options for whirl 3 | #' @description 4 | #' `r zephyr::list_options(as = "markdown", .envir = "whirl")` 5 | NULL 6 | 7 | #' @title Internal parameters for reuse in functions 8 | #' @name whirl-options-params 9 | #' @eval zephyr::list_options(as = "params", .envir = "whirl") 10 | #' @details 11 | #' See [whirl-options] for more information. 12 | #' @keywords internal 13 | NULL 14 | 15 | zephyr::create_option( 16 | name = "verbosity_level", 17 | default = NA_character_, 18 | description = "Verbosity level for functions in whirl. 19 | See [zephyr::verbosity_level] for details." 20 | ) 21 | 22 | zephyr::create_option( 23 | name = "out_formats", 24 | default = "html", 25 | description = "Which log format(s) to produce. Possibilities are `html`, 26 | `json`, and markdown formats: `gfm`, `commonmark`, and `markua`." 27 | ) 28 | 29 | zephyr::create_option( 30 | name = "track_files", 31 | default = FALSE, 32 | description = "Should files read and written be tracked? 33 | Currently only supported on Linux." 34 | ) 35 | 36 | zephyr::create_option( 37 | name = "check_renv", 38 | default = FALSE, 39 | description = "Should the projects renv status be checked?" 40 | ) 41 | 42 | zephyr::create_option( 43 | name = "track_files_discards", 44 | default = c( 45 | "^/lib", 46 | "^/etc", 47 | "^/lib64", 48 | "^/usr", 49 | "^/var", 50 | "^/opt", 51 | "^/sys", 52 | "^/proc", 53 | "^/tmp", 54 | "^/null", 55 | "^/urandom", 56 | "^/.cache" 57 | ), 58 | description = "List of file naming patterns not be tracked when track_files = TRUE" 59 | ) 60 | 61 | zephyr::create_option( 62 | name = "track_files_keep", 63 | default = NULL, 64 | description = "List of file naming patterns always to be tracked when 65 | track_files = TRUE" 66 | ) 67 | 68 | zephyr::create_option( 69 | name = "approved_packages", 70 | default = NULL, 71 | description = "List of approved R packages and their version in the format: \\{name\\}@\\{version\\}" 72 | ) 73 | 74 | zephyr::create_option( 75 | name = "approved_python_packages", 76 | default = NULL, 77 | description = "List of approved Python packages and their version in the format: \\{name\\}@\\{version\\}" 78 | ) 79 | 80 | zephyr::create_option( 81 | name = "n_workers", 82 | default = 1, 83 | description = "Number of simultaneous workers used in the run function. 84 | A maximum of 128 workers is allowed." 85 | ) 86 | 87 | zephyr::create_option( 88 | name = "log_dir", 89 | default = \(x) dirname(x), 90 | description = "The output directory of the log files. Default is the folder of 91 | the executed script. log_dir can be a path as a character or it can be a 92 | function that takes the script path as input and returns the log directory. 93 | For more information see the examples of `run()` or `vignette('whirl')`." 94 | ) 95 | 96 | zephyr::create_option( 97 | name = "execute_dir", 98 | default = NULL, 99 | description = "The working directory of the process executing each script. 100 | Default us to execute R files from the working directory when calling `run()` 101 | and all other functions from the directory of the script. To change provide 102 | a character path (used for all scripts) or a function that takes the script 103 | as input and returns the execution directory." 104 | ) 105 | 106 | zephyr::create_option( 107 | name = "wait_timeout", 108 | default = 9000, 109 | description = "Timeout for waiting for the R process from callr::r_session to 110 | start, in milliseconds." 111 | ) 112 | 113 | zephyr::create_option( 114 | name = "environment_secrets", 115 | default = c( 116 | "BASH_FUNC", 117 | "_SSL_CERT", 118 | "_KEY", 119 | "_PAT", 120 | "_TOKEN" 121 | ), 122 | description = "Secret environment variable patterns. 123 | Any variables matching will not be included in the logs." 124 | ) 125 | -------------------------------------------------------------------------------- /inst/documents/dummy.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | params: 3 | script: 'dev' 4 | renv: TRUE 5 | tmpdir: "." 6 | converted_qmd: "temp_script.qmd" 7 | --- 8 | 9 | ```{r} 10 | #| label: Setup 11 | #| include: false 12 | knitr::opts_chunk$set( 13 | error = TRUE, 14 | warning = TRUE, 15 | message = TRUE, 16 | echo = TRUE 17 | ) 18 | 19 | file.path(params$tmpdir, "parent_options.rds") |> 20 | readRDS() |> 21 | options() 22 | ``` 23 | 24 | ```{r} 25 | #| label: Convert R script to qmd 26 | #| eval: !expr base::grepl("\\.R$", params$script) 27 | #| echo: false 28 | # Conversion is done to avoid problems caused by the warnings thrown 29 | # by knitr::knit(), see e.g. issue #151 30 | knitr::spin( 31 | text = readLines(con = params$script, warn = FALSE), 32 | knit = FALSE, 33 | format = "qmd" 34 | ) |> 35 | writeLines( 36 | con = file.path(params$tmpdir, params$converted_qmd) 37 | ) 38 | ``` 39 | 40 | ```{r} 41 | #| label: Log R 42 | #| child: !expr file.path(params$tmpdir, params$converted_qmd) 43 | #| eval: !expr file.exists(file.path(params$tmpdir, params$converted_qmd)) 44 | #| include: !expr file.exists(file.path(params$tmpdir, params$converted_qmd)) 45 | ``` 46 | 47 | ```{r} 48 | #| label: Log Quarto 49 | #| child: !expr params$script 50 | #| eval: !expr base::grepl("\\.qmd$", params$script) 51 | #| include: !expr base::grepl("\\.qmd$", params$script) 52 | ``` 53 | 54 | ```{r} 55 | #| label: Log Rmarkdown 56 | #| child: !expr params$script 57 | #| eval: !expr base::grepl("\\.Rmd$", params$script) 58 | #| include: !expr base::grepl("\\.Rmd$", params$script) 59 | ``` 60 | 61 | ```{python} 62 | #| label: Setup Python tracking 63 | #| eval: !expr base::grepl("\\.py$", params$script) 64 | #| include: false 65 | #| error: false 66 | import sys 67 | sys.path.append(r.params["tmpdir"]) 68 | from python_modules import get_package_status 69 | get_package_status(r.params["tmpdir"]+"/"+'py_old_status.json') 70 | ``` 71 | 72 | ```{python} 73 | #| label: Log Python 74 | #| file: !expr params$script 75 | #| eval: !expr base::grepl("\\.py$", params$script) 76 | #| include: !expr base::grepl("\\.py$", params$script) 77 | ``` 78 | 79 | ```{python} 80 | #| label: Log Python modules 81 | #| eval: !expr base::grepl("\\.py$", params$script) 82 | #| include: false 83 | #| error: false 84 | get_package_status(r.params["tmpdir"]+"/"+'py_new_status.json') 85 | ``` 86 | 87 | ```{r} 88 | #| label: Python modules 89 | #| eval: !expr base::grepl("\\.py$", params$script) 90 | #| include: false 91 | #| error: false 92 | saveRDS( 93 | object = system('python -m pip list -v', intern = TRUE), 94 | file = file.path(params$tmpdir, "py_pip_list.rds") 95 | ) 96 | ``` 97 | 98 | ```{r} 99 | #| label: renv status 100 | #| include: false 101 | #| error: false 102 | if (is.character(params$renv) && params$renv == "yes" || 103 | is.logical(params$renv) && params$renv) { 104 | saveRDS( 105 | object = whirl:::renv_status(), 106 | file = file.path(params$tmpdir, "renv_status.rds") 107 | ) 108 | } 109 | ``` 110 | 111 | ```{r} 112 | #| label: Log options and environment 113 | #| include: false 114 | #| error: false 115 | saveRDS( 116 | object = options(), 117 | file = file.path(params$tmpdir, "options.rds") 118 | ) 119 | 120 | saveRDS( 121 | object = Sys.getenv(), 122 | file = file.path(params$tmpdir, "environment.rds") 123 | ) 124 | ``` 125 | 126 | ```{r} 127 | #| label: Session info 128 | #| include: false 129 | #| error: false 130 | info <- list( # Due to https://github.com/r-lib/sessioninfo/issues/96 131 | platform = sessioninfo::session_info(info = "platform")[["platform"]], 132 | packages = sessioninfo::session_info(info = "packages")[["packages"]], 133 | python = sessioninfo::session_info(info = "python")[["python"]] 134 | ) 135 | 136 | if (!"quarto" %in% names(info$platform)) { # Due to older versions of sessioninfo 137 | info$platform$quarto <- paste(quarto::quarto_version(), "@", quarto::quarto_path()) 138 | } 139 | 140 | saveRDS( 141 | object = info, 142 | file = file.path(params$tmpdir, "session_info.rds") 143 | ) 144 | ``` 145 | -------------------------------------------------------------------------------- /man/run.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run.R 3 | \name{run} 4 | \alias{run} 5 | \title{Execute single or multiple R, R Markdown, and Quarto scripts} 6 | \usage{ 7 | run( 8 | input = "_whirl.yml", 9 | steps = NULL, 10 | summary_file = "summary.html", 11 | n_workers = zephyr::get_option("n_workers", "whirl"), 12 | check_renv = zephyr::get_option("check_renv", "whirl"), 13 | track_files = zephyr::get_option("track_files", "whirl"), 14 | out_formats = zephyr::get_option("out_formats", "whirl"), 15 | log_dir = zephyr::get_option("log_dir", "whirl") 16 | ) 17 | } 18 | \arguments{ 19 | \item{input}{A character vector of file path(s) to R, R Markdown, Quarto 20 | scripts, or files in a folder using regular expression, or to to a whirl 21 | config file. The input can also be structured in a list where each element 22 | will be executed sequentially, while scripts within each element can be 23 | executed in parallel.} 24 | 25 | \item{steps}{An optional argument that can be used if only certain steps 26 | within a config files (or list) is to be executed. Should be equivalent to 27 | the names of the steps found in the config file. If kept as NULL (default) 28 | then all steps listed in the config file will be executed.} 29 | 30 | \item{summary_file}{A character string specifying the file path where the 31 | summary log will be stored.} 32 | 33 | \item{n_workers}{Number of simultaneous workers used in the run function. 34 | A maximum of 128 workers is allowed.. Default: \code{1}.} 35 | 36 | \item{check_renv}{Should the projects renv status be checked?. Default: \code{FALSE}.} 37 | 38 | \item{track_files}{Should files read and written be tracked? 39 | Currently only supported on Linux.. Default: \code{FALSE}.} 40 | 41 | \item{out_formats}{Which log format(s) to produce. Possibilities are \code{html}, 42 | \code{json}, and markdown formats: \code{gfm}, \code{commonmark}, and \code{markua}.. Default: \code{"html"}.} 43 | 44 | \item{log_dir}{The output directory of the log files. Default is the folder of 45 | the executed script. log_dir can be a path as a character or it can be a 46 | function that takes the script path as input and returns the log directory. 47 | For more information see the examples of \code{run()} or \code{vignette('whirl')}.. Default: \code{function (x) dirname(x)}.} 48 | } 49 | \value{ 50 | A tibble containing the execution results for all the scripts. 51 | } 52 | \description{ 53 | Executes and logs the execution of the scripts. 54 | Logs for each script are stored in the same folder as the script. 55 | 56 | The way the execution is logged is configurable through several options for 57 | e.g. the verbosity of the logs. 58 | See \link{whirl-options} on how to configure these. 59 | } 60 | \examples{ 61 | \dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 62 | # Copy example scripts: 63 | file.copy( 64 | from = system.file("examples", c("success.R", "warning.R", "error.R"), 65 | package = "whirl" 66 | ), 67 | to = tempdir() 68 | ) 69 | 70 | # Run a single script and create log: 71 | run(file.path(tempdir(), "success.R")) 72 | 73 | # Run several scripts in parallel on up to 2 workers: 74 | run( 75 | input = file.path(tempdir(), c("success.R", "warning.R", "error.R")), 76 | n_workers = 2 77 | ) 78 | 79 | # Run several scripts in two steps by providing them as list elements: 80 | run( 81 | list( 82 | file.path(tempdir(), c("success.R", "warning.R")), 83 | file.path(tempdir(), "error.R") 84 | ) 85 | ) 86 | 87 | # Re-directing the logs to a sub-folder by utilizing the log_dir argument in 88 | # run(). This will require that the sub-folder exists. 89 | 90 | # Specifying the path using a manually defined character 91 | run(file.path(tempdir(), "success.R"), log_dir = tempdir()) 92 | 93 | # Specifying the path with a generic function that can handle the scripts 94 | # individually. 95 | run( 96 | input = file.path(tempdir(), "success.R"), 97 | log_dir = function(x) {paste0(dirname(x), "/logs")} 98 | ) 99 | \dontshow{\}) # examplesIf} 100 | } 101 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # whirl dev 2 | 3 | * Fixed bug where warnings were given when scripts are missing a final EOL (#206) 4 | * Fixed bug so no warning is shown when saving `options()` to the temp folder used by whirl (#206) 5 | * Fixed so Quarto is started with the right renv library paths when using renv (#215) 6 | * Refactored log.qmd to remove read_log. 7 | 8 | # whirl 0.3.1 9 | 10 | * Improved error handling when the log cannot be created. 11 | * Added new option `environment_secrets` to control which secret environment variables not to include in the log. 12 | * Improved progress bar of `run()` to show all currently running scripts. 13 | * Removed `verbosity_level` argument to `run()` since it is now completely controlled by zephyr options (see `help("whirl-options")`). 14 | * Fixed bug where a script would not execute if using `options(warn = 2)` (#151) 15 | * The log now distinguishes between directly and indirectly used packages, and visually highlights their approval status if a list of approved packages is provided. 16 | * Added check for approved Python packages with the `approved_python_packages` option similar to for R. 17 | * Improved how to find used Python packages, so it now only lists the packages actually used in the script. 18 | 19 | # whirl 0.3.0 20 | 21 | * Add `write_biocompute()` to create [BioCompute Objects](https://www.biocomputeobject.org/) containing the logs in a standardized JSON format. 22 | * Calling `run()` with `track_files = TRUE` now checks if strace can be attached to the process. 23 | * Improved json logs and similar returned output from running a script (`result` column in return from `run()`). 24 | * Simplified approved packages check. Now the user supplies a character vector of packages and versions specified as `{package}@{version}`. 25 | * Implement use of `tag` in the returned output. Each script is now tagged with the step name in the summary report. 26 | 27 | # whirl 0.2.0 28 | 29 | * Initial CRAN release. 30 | * Default `input` argument of `run()` set to "_whirl.yml". 31 | * Added RStudio addins for running all scripts and the active script. 32 | * Fix typo in `track_files_discards` option. 33 | * Increases unit test coverage and skips `run()` tests etc. when Quarto is not available. 34 | 35 | # whirl 0.1.7 36 | * Enable redirection of logs through the `log_dir` argument in `run()`. 37 | * Changed the title on the individual logs to the script name and moved the path to a distinct section within the title-block. 38 | * Fixed a bug where the hyperlink in the summary files was not rendered correctly. 39 | * Fixed a bug where the installed python packages were not listed in the log. 40 | * Enable the use of R expressions in the yaml configuration file. 41 | * Enables the user to define the working directory for each script with the `execute_dir` option. 42 | 43 | # whirl 0.1.6 44 | * Added support for logging of Python scripts with `run()`. 45 | * Improved unit tests for `run()`. 46 | * Fixing a bug where the queue was not returned correctly in some instances. 47 | * Switched to using `Sys.glob()` instead of `utils::glob2rx()`. 48 | 49 | # whirl 0.1.4 50 | * Add `use_whirl()` utility function. 51 | 52 | # whirl 0.1.3 53 | * Adding additional arguments to `run()` allowing the user to: 54 | - control the verbosity level 55 | - specify whether renv should be checked 56 | - specify which files to track 57 | - adjust the output format of the log files. 58 | 59 | # whirl 0.1.1 60 | * Fix enabling rendering of md log formats("gfm", "commonmark", "markua"). 61 | 62 | # whirl 0.1.0 63 | * First version publicly available on GitHub. 64 | 65 | # whirl 0.0.5 66 | * Updated documentation 67 | * README and vignettes are now ready for users. 68 | 69 | # whirl 0.0.4 70 | * Adjusting `run()` to unify execution of scripts, lists of scripts, and configuration files. 71 | * Using multiple independent `callr::r_session` when executing several scripts. 72 | * Cleanup of namespace and exported functions. 73 | 74 | # whirl 0.0.3 75 | * Initial version of `run()`. 76 | * Substituting spinner with progress bar when executing single scripts. 77 | 78 | # whirl 0.0.2 79 | * Update so that the execution (including order of execution) can be controlled through a config file. 80 | 81 | # whirl 0.0.1 82 | * First version of package. 83 | -------------------------------------------------------------------------------- /inst/documents/log.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "`r basename(params$title)`" 3 | date: "`r Sys.time()`" 4 | date-format: "YYYY-MM-DDTHH:mm:ss zzz" 5 | abstract: "`r params$title`" 6 | template-partials: 7 | - title-block.html 8 | params: 9 | result: NULL 10 | title: 'dev' 11 | tmpdir: '.' 12 | format: 13 | html: 14 | embed-resources: true 15 | toc: true 16 | toc-depth: 2 17 | toc-expand: true 18 | smooth-scroll: true 19 | css: style.css 20 | --- 21 | 22 | # Summary 23 | 24 | ```{r} 25 | #| label: Setup 26 | #| echo: false 27 | 28 | file.path(params$tmpdir, "parent_options.rds") |> 29 | readRDS() |> 30 | options() 31 | 32 | format_approved <- function(x) { 33 | if (all(is.na(x$approved))) { 34 | x$approved <- NULL 35 | return(x) 36 | } 37 | 38 | x$approved = ifelse(x$approved == TRUE, "\u2705 Yes", "\u274C No") 39 | 40 | x 41 | } 42 | 43 | knit_print.data.frame <- function(x, ...) { 44 | names(x) <- gsub( 45 | # Use space instead of . or _ in columns names 46 | pattern = "\\.|_", 47 | replacement = " ", 48 | x = names(x) 49 | ) 50 | 51 | names(x) <- gsub( 52 | # Convert all column names to title case 53 | pattern = "\\b([[:alpha:]])([[:alpha:]]+)", 54 | replacement = "\\U\\1\\L\\2", 55 | x = names(x), 56 | perl = TRUE 57 | ) 58 | 59 | for (i in which(vapply(X = x, FUN = is.logical, FUN.VALUE = logical(1)))) { 60 | x[[i]] <- ifelse(x[[i]], "Yes", "No") # Yes/No instead of TRUE/FALSE 61 | } 62 | 63 | x |> 64 | knitr::kable(format = "html") |> 65 | kableExtra::kable_styling( 66 | bootstrap_options = "striped", 67 | full_width = TRUE 68 | ) |> 69 | knitr::knit_print() 70 | } 71 | 72 | registerS3method( 73 | "knit_print", 74 | "data.frame", 75 | knit_print.data.frame, 76 | envir = asNamespace("knitr") 77 | ) 78 | ``` 79 | 80 | ```{r} 81 | #| label: Status 82 | #| include: false 83 | result <- readRDS(params$result) 84 | ``` 85 | 86 | ```{r} 87 | #| label: Report status 88 | #| echo: false 89 | status <- result$status 90 | 91 | status_txt <- status[c("error", "warning")] 92 | status_txt <- status_txt[lapply(status_txt, length) > 0] |> 93 | lapply(\(x) paste("*", x, collapse = "\n")) |> 94 | unlist() 95 | 96 | whirl:::quarto_callout( 97 | text = status_txt, 98 | title = status$message, 99 | type = switch( 100 | status$message, 101 | "error" = "important", 102 | "warning" = "warning", 103 | "success" = "tip" 104 | ), 105 | collapse = status$message != "success" 106 | ) 107 | ``` 108 | 109 | ```{r} 110 | #| label: Renv 111 | #| echo: false 112 | #| eval: !expr file.exists(file.path(params$tmpdir, "renv_status.rds")) 113 | params$tmpdir |> 114 | file.path("renv_status.rds") |> 115 | readRDS() |> 116 | whirl:::knit_print_whirl_renv_status() 117 | ``` 118 | 119 | ```{r} 120 | #| label: Used files 121 | #| child: !expr file.path(params$tmpdir, "files.qmd") 122 | #| eval: !expr (!is.null(result$files)) 123 | ``` 124 | 125 | # Script 126 | ::: {.script-container} 127 | 128 | ```{r} 129 | #| child: !expr file.path(params$tmpdir, "dummy.md") 130 | ``` 131 | 132 | ::: 133 | 134 | # Session info 135 | 136 | ```{r} 137 | #| label: Session info settings 138 | #| include: false 139 | knitr::opts_chunk$set( 140 | error = FALSE, 141 | warning = FALSE, 142 | message = FALSE, 143 | echo = FALSE 144 | ) 145 | ``` 146 | 147 | :::{.callout-note collapse=false appearance="minimal"} 148 | ## Platform 149 | 150 | ```{r} 151 | #| label: Platform 152 | result$session$platform 153 | ``` 154 | 155 | ::: 156 | 157 | ```{r} 158 | #| label: Python 159 | #| child: !expr file.path(params$tmpdir, "python.qmd") 160 | #| eval: !expr grepl("\\.py$", params$title) 161 | ``` 162 | 163 | :::{.callout-note collapse=false appearance="minimal"} 164 | ## R packages used directly 165 | 166 | ```{r} 167 | #| label: R packages used directly 168 | result$session$R |> 169 | dplyr::filter(directly_used) |> 170 | dplyr::select("package", "version", "approved", "source") |> 171 | format_approved() 172 | ``` 173 | 174 | ::: 175 | 176 | :::{.callout-note collapse=true appearance="minimal"} 177 | ## R packages used indirectly 178 | 179 | ```{r} 180 | #| label: R packages used indirectly 181 | result$session$R |> 182 | dplyr::filter(!directly_used) |> 183 | dplyr::select("package", "version", "source") 184 | ``` 185 | 186 | ::: 187 | 188 | :::{.callout-note collapse=true appearance="minimal"} 189 | ## Environment variables 190 | 191 | ```{r} 192 | #| label: Environment 193 | result$session$environment 194 | ``` 195 | 196 | ::: 197 | 198 | :::{.callout-note collapse=true appearance="minimal"} 199 | ## Options 200 | 201 | ```{r} 202 | #| label: Options 203 | result$session$options 204 | ``` 205 | 206 | ::: 207 | -------------------------------------------------------------------------------- /man/whirl-options.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/whirl-options.R 3 | \name{whirl-options} 4 | \alias{whirl-options} 5 | \title{Options for whirl} 6 | \description{ 7 | \subsection{verbosity_level}{ 8 | 9 | Verbosity level for functions in whirl. 10 | See \link[zephyr:verbosity_level]{zephyr::verbosity_level} for details. 11 | \itemize{ 12 | \item Default: \code{NA_character_} 13 | \item Option: \code{whirl.verbosity_level} 14 | \item Environment: \code{R_WHIRL_VERBOSITY_LEVEL} 15 | } 16 | } 17 | 18 | \subsection{out_formats}{ 19 | 20 | Which log format(s) to produce. Possibilities are \code{html}, 21 | \code{json}, and markdown formats: \code{gfm}, \code{commonmark}, and \code{markua}. 22 | \itemize{ 23 | \item Default: \code{"html"} 24 | \item Option: \code{whirl.out_formats} 25 | \item Environment: \code{R_WHIRL_OUT_FORMATS} 26 | } 27 | } 28 | 29 | \subsection{track_files}{ 30 | 31 | Should files read and written be tracked? 32 | Currently only supported on Linux. 33 | \itemize{ 34 | \item Default: \code{FALSE} 35 | \item Option: \code{whirl.track_files} 36 | \item Environment: \code{R_WHIRL_TRACK_FILES} 37 | } 38 | } 39 | 40 | \subsection{check_renv}{ 41 | 42 | Should the projects renv status be checked? 43 | \itemize{ 44 | \item Default: \code{FALSE} 45 | \item Option: \code{whirl.check_renv} 46 | \item Environment: \code{R_WHIRL_CHECK_RENV} 47 | } 48 | } 49 | 50 | \subsection{track_files_discards}{ 51 | 52 | List of file naming patterns not be tracked when track_files = TRUE 53 | \itemize{ 54 | \item Default: \code{c("^/lib", "^/etc", "^/lib64", "^/usr", "^/var", "^/opt", "^/sys", "^/proc", "^/tmp", "^/null", "^/urandom", "^/.cache")} 55 | \item Option: \code{whirl.track_files_discards} 56 | \item Environment: \code{R_WHIRL_TRACK_FILES_DISCARDS} 57 | } 58 | } 59 | 60 | \subsection{track_files_keep}{ 61 | 62 | List of file naming patterns always to be tracked when 63 | track_files = TRUE 64 | \itemize{ 65 | \item Default: \code{NULL} 66 | \item Option: \code{whirl.track_files_keep} 67 | \item Environment: \code{R_WHIRL_TRACK_FILES_KEEP} 68 | } 69 | } 70 | 71 | \subsection{approved_packages}{ 72 | 73 | List of approved R packages and their version in the format: \{name\}@\{version\} 74 | \itemize{ 75 | \item Default: \code{NULL} 76 | \item Option: \code{whirl.approved_packages} 77 | \item Environment: \code{R_WHIRL_APPROVED_PACKAGES} 78 | } 79 | } 80 | 81 | \subsection{approved_python_packages}{ 82 | 83 | List of approved Python packages and their version in the format: \{name\}@\{version\} 84 | \itemize{ 85 | \item Default: \code{NULL} 86 | \item Option: \code{whirl.approved_python_packages} 87 | \item Environment: \code{R_WHIRL_APPROVED_PYTHON_PACKAGES} 88 | } 89 | } 90 | 91 | \subsection{n_workers}{ 92 | 93 | Number of simultaneous workers used in the run function. 94 | A maximum of 128 workers is allowed. 95 | \itemize{ 96 | \item Default: \code{1} 97 | \item Option: \code{whirl.n_workers} 98 | \item Environment: \code{R_WHIRL_N_WORKERS} 99 | } 100 | } 101 | 102 | \subsection{log_dir}{ 103 | 104 | The output directory of the log files. Default is the folder of 105 | the executed script. log_dir can be a path as a character or it can be a 106 | function that takes the script path as input and returns the log directory. 107 | For more information see the examples of \code{run()} or \code{vignette('whirl')}. 108 | \itemize{ 109 | \item Default: \code{function (x) dirname(x)} 110 | \item Option: \code{whirl.log_dir} 111 | \item Environment: \code{R_WHIRL_LOG_DIR} 112 | } 113 | } 114 | 115 | \subsection{execute_dir}{ 116 | 117 | The working directory of the process executing each script. 118 | Default us to execute R files from the working directory when calling \code{run()} 119 | and all other functions from the directory of the script. To change provide 120 | a character path (used for all scripts) or a function that takes the script 121 | as input and returns the execution directory. 122 | \itemize{ 123 | \item Default: \code{NULL} 124 | \item Option: \code{whirl.execute_dir} 125 | \item Environment: \code{R_WHIRL_EXECUTE_DIR} 126 | } 127 | } 128 | 129 | \subsection{wait_timeout}{ 130 | 131 | Timeout for waiting for the R process from callr::r_session to 132 | start, in milliseconds. 133 | \itemize{ 134 | \item Default: \code{9000} 135 | \item Option: \code{whirl.wait_timeout} 136 | \item Environment: \code{R_WHIRL_WAIT_TIMEOUT} 137 | } 138 | } 139 | 140 | \subsection{environment_secrets}{ 141 | 142 | Secret environment variable patterns. 143 | Any variables matching will not be included in the logs. 144 | \itemize{ 145 | \item Default: \code{c("BASH_FUNC", "_SSL_CERT", "_KEY", "_PAT", "_TOKEN")} 146 | \item Option: \code{whirl.environment_secrets} 147 | \item Environment: \code{R_WHIRL_ENVIRONMENT_SECRETS} 148 | } 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /tests/testthat/test-run.R: -------------------------------------------------------------------------------- 1 | expect_single_script <- function(res) { 2 | res[["status"]] |> 3 | testthat::expect_equal("success") 4 | 5 | res[["result"]][[1]] |> 6 | names() |> 7 | testthat::expect_equal(c("script", "status", "files", "session", "logs")) 8 | 9 | return(invisible(res)) 10 | } 11 | 12 | test_that("Run single R script", { 13 | skip_if_no_quarto() 14 | res <- test_script("success.R") |> 15 | run(summary_file = NULL) |> 16 | expect_no_warning() |> 17 | expect_no_error() 18 | 19 | expect_single_script(res) 20 | 21 | withr::with_options( 22 | new = list(whirl.verbosity_level = "verbose"), 23 | code = { 24 | test_script("success.R") |> 25 | run(summary_file = NULL) |> 26 | expect_message() |> 27 | suppressMessages() 28 | } 29 | ) 30 | }) 31 | 32 | test_that("Run single python script", { 33 | skip_on_cran() 34 | skip_if_no_quarto() 35 | skip_if_no_python() 36 | res <- test_script("py_success.py") |> 37 | run(summary_file = NULL) |> 38 | expect_no_warning() |> 39 | expect_no_error() 40 | 41 | expect_single_script(res) 42 | }) 43 | 44 | expect_multiple_scripts <- function(res) { 45 | res[["status"]] |> 46 | testthat::expect_equal(c("success", "warning", "error")) 47 | 48 | res[["result"]][[1]][["status"]][c("errors", "warnings")] |> 49 | lapply(\(x) length(x) > 0) |> 50 | unlist() |> 51 | testthat::expect_equal(c(FALSE, FALSE), ignore_attr = TRUE) 52 | 53 | res[["result"]][[2]][["status"]][c("errors", "warnings")] |> 54 | lapply(\(x) length(x) > 0) |> 55 | unlist() |> 56 | testthat::expect_equal(c(FALSE, TRUE), ignore_attr = TRUE) 57 | 58 | res[["result"]][[3]][["status"]][c("errors", "warnings")] |> 59 | lapply(\(x) length(x) > 0) |> 60 | unlist() |> 61 | testthat::expect_equal(c(TRUE, FALSE), ignore_attr = TRUE) 62 | 63 | return(invisible(res)) 64 | } 65 | 66 | test_that("Run multiple R scripts", { 67 | skip_if_no_quarto() 68 | res <- test_script(c("success.R", "warning.R", "error.R")) |> 69 | run(n_workers = 2) |> 70 | expect_no_error() 71 | 72 | expect_multiple_scripts(res) 73 | }) 74 | 75 | test_that("Run multiple python scripts", { 76 | skip_on_cran() 77 | skip_if_no_quarto() 78 | skip_if_no_python() 79 | res <- test_script(c("py_success.py", "py_warning.py", "py_error.py")) |> 80 | run(n_workers = 2) |> 81 | expect_no_error() 82 | 83 | expect_multiple_scripts(res) 84 | }) 85 | 86 | test_that("Run yaml config file", { 87 | skip_on_cran() 88 | skip_if_no_quarto() 89 | res <- test_script("_whirl.yaml") |> 90 | run(summary_file = NULL, n_workers = 2) |> 91 | expect_no_error() 92 | }) 93 | 94 | test_that("Change the log_dir to a path", { 95 | skip_on_cran() 96 | skip_if_no_quarto() 97 | # Custom path 98 | custom_path <- withr::local_tempdir() 99 | 100 | # Execute run() with log_dir = custom path 101 | res <- test_script("success.R") |> 102 | run(summary_file = NULL, log_dir = custom_path) |> 103 | expect_no_error() 104 | 105 | # Check if the log file is created in the custom path 106 | file.path(custom_path, "success_log.html") |> 107 | file.exists() |> 108 | expect_true() 109 | }) 110 | 111 | test_that("Change the log_dir with a function", { 112 | skip_on_cran() 113 | skip_if_no_quarto() 114 | # Custom path and copy script 115 | custom_path <- withr::local_tempdir() 116 | dir.create(file.path(custom_path, "logs")) 117 | file.copy(from = test_script("warning.R"), to = custom_path) |> 118 | expect_true() 119 | 120 | # Execute run() with log_dir as a function 121 | res <- file.path(custom_path, "warning.R") |> 122 | run(summary_file = NULL, log_dir = function(x) { 123 | paste0(dirname(x), "/logs") 124 | }) |> 125 | expect_no_error() 126 | 127 | # Check if the log file is created in the correct folder 128 | file.path(custom_path, "logs", "warning_log.html") |> 129 | file.exists() |> 130 | expect_true() 131 | }) 132 | 133 | test_that("Change the execute_dir to a path", { 134 | skip_on_cran() 135 | skip_if_no_quarto() 136 | custom_path <- withr::local_tempdir() 137 | withr::local_options(whirl.execute_dir = custom_path) 138 | 139 | test_script("success.R") |> 140 | run(summary_file = NULL) |> 141 | expect_no_error() 142 | 143 | withr::local_options(whirl.execute_dir = "this/path/does/not/exist") 144 | 145 | test_script("success.R") |> 146 | run(summary_file = NULL) |> 147 | expect_error() 148 | }) 149 | 150 | test_that("Change the execute_dir to a function", { 151 | skip_on_cran() 152 | skip_if_no_quarto() 153 | withr::local_options(whirl.execute_dir = \(x) dirname(x)) 154 | 155 | test_script("success.R") |> 156 | run(summary_file = NULL) |> 157 | expect_no_error() 158 | }) 159 | -------------------------------------------------------------------------------- /R/run.R: -------------------------------------------------------------------------------- 1 | #' Execute single or multiple R, R Markdown, and Quarto scripts 2 | #' 3 | #' @description 4 | #' Executes and logs the execution of the scripts. 5 | #' Logs for each script are stored in the same folder as the script. 6 | #' 7 | #' The way the execution is logged is configurable through several options for 8 | #' e.g. the verbosity of the logs. 9 | #' See [whirl-options] on how to configure these. 10 | #' 11 | #' @param input A character vector of file path(s) to R, R Markdown, Quarto 12 | #' scripts, or files in a folder using regular expression, or to to a whirl 13 | #' config file. The input can also be structured in a list where each element 14 | #' will be executed sequentially, while scripts within each element can be 15 | #' executed in parallel. 16 | #' @param steps An optional argument that can be used if only certain steps 17 | #' within a config files (or list) is to be executed. Should be equivalent to 18 | #' the names of the steps found in the config file. If kept as NULL (default) 19 | #' then all steps listed in the config file will be executed. 20 | #' @param summary_file A character string specifying the file path where the 21 | #' summary log will be stored. 22 | #' @inheritParams whirl-options-params 23 | #' @return A tibble containing the execution results for all the scripts. 24 | #' 25 | #' @examplesIf FALSE 26 | #' # Copy example scripts: 27 | #' file.copy( 28 | #' from = system.file("examples", c("success.R", "warning.R", "error.R"), 29 | #' package = "whirl" 30 | #' ), 31 | #' to = tempdir() 32 | #' ) 33 | #' 34 | #' # Run a single script and create log: 35 | #' run(file.path(tempdir(), "success.R")) 36 | #' 37 | #' # Run several scripts in parallel on up to 2 workers: 38 | #' run( 39 | #' input = file.path(tempdir(), c("success.R", "warning.R", "error.R")), 40 | #' n_workers = 2 41 | #' ) 42 | #' 43 | #' # Run several scripts in two steps by providing them as list elements: 44 | #' run( 45 | #' list( 46 | #' file.path(tempdir(), c("success.R", "warning.R")), 47 | #' file.path(tempdir(), "error.R") 48 | #' ) 49 | #' ) 50 | #' 51 | #' # Re-directing the logs to a sub-folder by utilizing the log_dir argument in 52 | #' # run(). This will require that the sub-folder exists. 53 | #' 54 | #' # Specifying the path using a manually defined character 55 | #' run(file.path(tempdir(), "success.R"), log_dir = tempdir()) 56 | #' 57 | #' # Specifying the path with a generic function that can handle the scripts 58 | #' # individually. 59 | #' run( 60 | #' input = file.path(tempdir(), "success.R"), 61 | #' log_dir = function(x) {paste0(dirname(x), "/logs")} 62 | #' ) 63 | #' 64 | #' @export 65 | run <- function( 66 | input = "_whirl.yml", 67 | steps = NULL, 68 | summary_file = "summary.html", 69 | n_workers = zephyr::get_option("n_workers", "whirl"), 70 | check_renv = zephyr::get_option("check_renv", "whirl"), 71 | track_files = zephyr::get_option("track_files", "whirl"), 72 | out_formats = zephyr::get_option("out_formats", "whirl"), 73 | log_dir = zephyr::get_option("log_dir", "whirl") 74 | ) { 75 | # Additional Settings 76 | track_files_discards <- zephyr::get_option("track_files_discards") |> 77 | c(.libPaths()) # Don't track the library paths 78 | track_files_keep <- zephyr::get_option("track_files_keep") 79 | 80 | # Check suggest imports if they are needed 81 | if (check_renv) { 82 | rlang::check_installed("renv") 83 | } 84 | 85 | # Message when initiating 86 | d <- NULL 87 | zephyr::msg_verbose( 88 | message = "Executing scripts and generating logs", 89 | theme = list( 90 | rule = list(color = "skyblue3", "line-type" = "double") 91 | ), 92 | msg_fun = \(message, theme, .envir) { 93 | d <<- cli::cli_div(theme = theme, .auto_close = FALSE) 94 | cli::cli_rule(message, .envir = .envir) 95 | } 96 | ) 97 | 98 | # Message when ending 99 | on.exit({ 100 | zephyr::msg_verbose( 101 | message = "End of process", 102 | div = d, 103 | msg_fun = \(message, div, .envir) { 104 | cli::cli_rule(message, .envir = .envir) 105 | cli::cli_end(div) 106 | } 107 | ) 108 | }) 109 | 110 | # Constrain the number of workers 111 | n_workers <- min(128, n_workers) 112 | 113 | zephyr::msg_verbose( 114 | message = "Executing scripts in parallel using {n_workers} worker(s)" 115 | ) 116 | 117 | # Initiating the queue 118 | queue <- whirl_queue$new( 119 | n_workers = n_workers, 120 | check_renv = check_renv, 121 | track_files = track_files, 122 | out_formats = out_formats, 123 | track_files_discards = track_files_discards, 124 | track_files_keep = track_files_keep, 125 | log_dir = log_dir 126 | ) 127 | 128 | result <- internal_run( 129 | input = input, 130 | steps = steps, 131 | queue = queue, 132 | level = 1 133 | ) 134 | 135 | # Create the summary log if required 136 | if (!is.null(summary_file)) { 137 | summary_tibble <- util_queue_summary(result$queue) 138 | render_summary(input = summary_tibble, summary_file = summary_file) 139 | } 140 | 141 | queue <- result$queue 142 | attr(x = queue, which = "whirl_input") <- input 143 | 144 | invisible(queue) 145 | } 146 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r setup, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | 15 | # Minimal printing for shorter readme 16 | options(whirl.verbosity_level = "minimal") 17 | 18 | # Use a temporary directory as working directory with all examples available 19 | tmp <- withr::local_tempdir() 20 | 21 | system.file("examples", package = "whirl") |> 22 | list.files(full.names = TRUE) |> 23 | file.copy(to = tmp) 24 | 25 | knitr::opts_knit$set(root.dir = tmp) 26 | ``` 27 | 28 | # whirl 29 | 30 | 31 | 32 | [![Checks](https://github.com/NovoNordisk-OpenSource/whirl/actions/workflows/check_and_co.yaml/badge.svg)](https://github.com/NovoNordisk-OpenSource/whirl/actions/workflows/check_and_co.yaml) 33 | 34 | [![Codecov test coverage](https://codecov.io/gh/NovoNordisk-OpenSource/whirl/graph/badge.svg)](https://app.codecov.io/gh/NovoNordisk-OpenSource/whirl) 35 | 36 | [![CRAN status](https://www.r-pkg.org/badges/version/whirl)](https://CRAN.R-project.org/package=whirl) 37 | 38 | [](https://pharmaverse.org) 39 | 40 | 41 | 42 | ## Overview 43 | 44 | The whirl package provide functionalities for executing scripts in batch and simultaneously getting a log from the individual executions. 45 | A log from script execution is in many pharmaceutical companies a GxP requirement, and the whirl package honors this requirement by generating a log that, among other things, contains information about: 46 | 47 | * Status (did the script run with any error or warnings) 48 | * The actual code itself 49 | * Date and time of execution 50 | * The environment the script was executed under (session info) 51 | * Information about packages versions that was utilized 52 | * Environmental variables 53 | 54 | And all this is wrapped into a nicely formatted html document that is easy to navigate. 55 | 56 | ## Installation 57 | 58 | ```{r, eval=FALSE} 59 | # Install the released version from CRAN: 60 | install.packages("whirl") 61 | # Install the development version from GitHub: 62 | pak::pak("NovoNordisk-OpenSource/whirl") 63 | ``` 64 | 65 | ## Usage 66 | 67 | The main function in the whirl package is `run()` which takes an `input` argument 68 | that defines the scripts to be executed. 69 | 70 | The simplest way is to provide the path to a single script: 71 | 72 | ```{r ex-script} 73 | library(whirl) 74 | 75 | run("success.R") 76 | ``` 77 | 78 | It is also possible to run several scripts simultaneously: 79 | 80 | ```{r ex-scripts} 81 | result <- run(c("success.R", "warning.R"), n_workers = 2) 82 | ``` 83 | 84 | Here we are specifying that `run()` can use up to two simultaneous workers to execute the scripts, 85 | meaning that they will be executed in parallel. 86 | 87 | When using `run()` the following files are created: 88 | 89 | 1. Creates a log in the same directory as the script with the names `{script_name}_log.html`. See [example_log.html](https://novonordisk-opensource.github.io/whirl/articles/example_log.html) for an example of a simple log. 90 | 1. Creates a summary log with the overall status of each script. Default path is `summary.html`. See [summary.html](https://novonordisk-opensource.github.io/whirl/articles/summary.html) for an example of a summary of the same log as above. 91 | 92 | Apart from this the function also returns a `tibble` with the status of the script execution similar to the content of the summary above: 93 | 94 | ```{r ex-return} 95 | print(result) 96 | ``` 97 | 98 | ## Config files 99 | 100 | `run()` also supports running scripts in several sequential steps. 101 | This setup is very useful when your projects have several steps that depends on each others output, and thereby need to be executed in a specific order. 102 | The best way to implement this in your project is use a configuration file for whirl. 103 | The configuration file is a `yaml` file that specifies each steps: 104 | 105 | `_whirl.yaml:` 106 | ```yaml 107 | `r paste(readLines("_whirl.yaml"), collapse = "\n")``` 108 | ``` 109 | 110 | Here we are specifying that in the first step we run `succes.R`. And then when this step has been completed we continue to running 111 | the scripts in the second steps. 112 | 113 | ```{r ex-run-config} 114 | result <- run("_whirl.yaml", n_workers = 2) 115 | ``` 116 | 117 | ```{r, ex-return-config} 118 | print(result) 119 | ``` 120 | 121 | ## Useful links 122 | 123 | For more information about how to customize the the execution and the logging for your needs see the following: 124 | 125 | * `run()`: For further information on how to call it. 126 | * `vignette("whirl")`: For a more in depth explanation, and more advanced usage. 127 | * `vignette("articles/example")`: With a simple example, including the created log. 128 | * `help("whirl-options")`: On how to change the default behavior of whirl. 129 | * [NovoNordisk-OpenSource/R-packages](https://novonordisk-opensource.github.io/R-packages/) for an overview of connector and other R packages published by Novo Nordisk. 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # whirl 5 | 6 | 7 | 8 | [![Checks](https://github.com/NovoNordisk-OpenSource/whirl/actions/workflows/check_and_co.yaml/badge.svg)](https://github.com/NovoNordisk-OpenSource/whirl/actions/workflows/check_and_co.yaml) 9 | 10 | [![Codecov test 11 | coverage](https://codecov.io/gh/NovoNordisk-OpenSource/whirl/graph/badge.svg)](https://app.codecov.io/gh/NovoNordisk-OpenSource/whirl) 12 | 13 | [![CRAN 14 | status](https://www.r-pkg.org/badges/version/whirl)](https://CRAN.R-project.org/package=whirl) 15 | 16 | [](https://pharmaverse.org) 17 | 18 | 19 | 20 | ## Overview 21 | 22 | The whirl package provide functionalities for executing scripts in batch 23 | and simultaneously getting a log from the individual executions. A log 24 | from script execution is in many pharmaceutical companies a GxP 25 | requirement, and the whirl package honors this requirement by generating 26 | a log that, among other things, contains information about: 27 | 28 | - Status (did the script run with any error or warnings) 29 | - The actual code itself 30 | - Date and time of execution 31 | - The environment the script was executed under (session info) 32 | - Information about packages versions that was utilized 33 | - Environmental variables 34 | 35 | And all this is wrapped into a nicely formatted html document that is 36 | easy to navigate. 37 | 38 | ## Installation 39 | 40 | ``` r 41 | # Install the released version from CRAN: 42 | install.packages("whirl") 43 | # Install the development version from GitHub: 44 | pak::pak("NovoNordisk-OpenSource/whirl") 45 | ``` 46 | 47 | ## Usage 48 | 49 | The main function in the whirl package is `run()` which takes an `input` 50 | argument that defines the scripts to be executed. 51 | 52 | The simplest way is to provide the path to a single script: 53 | 54 | ``` r 55 | library(whirl) 56 | 57 | run("success.R") 58 | #> ✔ success.R: Completed succesfully. See log html. 59 | ``` 60 | 61 | It is also possible to run several scripts simultaneously: 62 | 63 | ``` r 64 | result <- run(c("success.R", "warning.R"), n_workers = 2) 65 | #> ✔ success.R: Completed succesfully. See log html. 66 | #> ! warning.R: Completed with warnings. See log html. 67 | ``` 68 | 69 | Here we are specifying that `run()` can use up to two simultaneous 70 | workers to execute the scripts, meaning that they will be executed in 71 | parallel. 72 | 73 | When using `run()` the following files are created: 74 | 75 | 1. Creates a log in the same directory as the script with the names 76 | `{script_name}_log.html`. See 77 | [example_log.html](https://novonordisk-opensource.github.io/whirl/articles/example_log.html) 78 | for an example of a simple log. 79 | 2. Creates a summary log with the overall status of each script. 80 | Default path is `summary.html`. See 81 | [summary.html](https://novonordisk-opensource.github.io/whirl/articles/summary.html) 82 | for an example of a summary of the same log as above. 83 | 84 | Apart from this the function also returns a `tibble` with the status of 85 | the script execution similar to the content of the summary above: 86 | 87 | ``` r 88 | print(result) 89 | #> # A tibble: 2 × 6 90 | #> id tag script status result log_dir 91 | #> 92 | #> 1 1 Step 1 /private/var/folders/fx/71by3f551qzb… succe… /priva… 93 | #> 2 2 Step 1 /private/var/folders/fx/71by3f551qzb… warni… /priva… 94 | ``` 95 | 96 | ## Config files 97 | 98 | `run()` also supports running scripts in several sequential steps. This 99 | setup is very useful when your projects have several steps that depends 100 | on each others output, and thereby need to be executed in a specific 101 | order. The best way to implement this in your project is use a 102 | configuration file for whirl. The configuration file is a `yaml` file 103 | that specifies each steps: 104 | 105 | `_whirl.yaml:` 106 | 107 | ``` yaml 108 | steps: 109 | - name: "First step" 110 | paths: 111 | - "success.R" 112 | - name: "Second step" 113 | paths: 114 | - "warning.R" 115 | - "error.R"`` 116 | ``` 117 | 118 | Here we are specifying that in the first step we run `succes.R`. And 119 | then when this step has been completed we continue to running the 120 | scripts in the second steps. 121 | 122 | ``` r 123 | result <- run("_whirl.yaml", n_workers = 2) 124 | #> ✔ success.R: Completed succesfully. See log html. 125 | #> ! warning.R: Completed with warnings. See log html. 126 | #> ✖ error.R: Completed with errors. See log html. 127 | ``` 128 | 129 | ``` r 130 | print(result) 131 | #> # A tibble: 3 × 6 132 | #> id tag script status result log_dir 133 | #> 134 | #> 1 1 First step /private/var/folders/fx/71by3f5… succe… /priva… 135 | #> 2 2 Second step /private/var/folders/fx/71by3f5… warni… /priva… 136 | #> 3 3 Second step /private/var/folders/fx/71by3f5… error /priva… 137 | ``` 138 | 139 | ## Useful links 140 | 141 | For more information about how to customize the the execution and the 142 | logging for your needs see the following: 143 | 144 | - `run()`: For further information on how to call it. 145 | - `vignette("whirl")`: For a more in depth explanation, and more 146 | advanced usage. 147 | - `vignette("articles/example")`: With a simple example, including the 148 | created log. 149 | - `help("whirl-options")`: On how to change the default behavior of 150 | whirl. 151 | - [NovoNordisk-OpenSource/R-packages](https://novonordisk-opensource.github.io/R-packages/) 152 | for an overview of connector and other R packages published by Novo 153 | Nordisk. 154 | -------------------------------------------------------------------------------- /R/log.R: -------------------------------------------------------------------------------- 1 | #' Helper function to read in all temporary information to be used in the log 2 | #' @noRd 3 | read_info <- function( 4 | script, 5 | md, 6 | start, 7 | log, 8 | pkgs_used, 9 | session, 10 | environment, 11 | options, 12 | python_pip_list = NULL, 13 | python_new_status = NULL, 14 | python_old_status = NULL, 15 | track_files = FALSE 16 | ) { 17 | info <- list( 18 | script = readRDS(script), 19 | status = get_status(md = md, start = start), 20 | files = log |> 21 | read_from_log(track_files = track_files) |> 22 | split_log(), 23 | session = read_session_info( 24 | file = session, 25 | pkgs_used = pkgs_used 26 | ) 27 | ) 28 | 29 | info$session$environment <- read_environment(environment) 30 | info$session$options <- read_options(options) 31 | 32 | if (!is.null(python_pip_list) && file.exists(python_pip_list)) { 33 | info$session$platform <- info$session$platform |> 34 | dplyr::bind_rows( 35 | data.frame( 36 | setting = "python", 37 | value = python_version() 38 | ) 39 | ) 40 | 41 | info$session$python <- read_python( 42 | old_status = python_old_status, 43 | new_status = python_new_status, 44 | pip_list = python_pip_list 45 | ) 46 | 47 | info$session <- 48 | info$session[c("platform", "R", "python", "environment", "options")] 49 | } 50 | 51 | return(info) 52 | } 53 | 54 | #' Read and format session info output from `sessioninfo::session_info()` 55 | #' @noRd 56 | read_session_info <- function(file, pkgs_used) { 57 | info <- readRDS(file) 58 | pkgs_used <- readRDS(pkgs_used) 59 | 60 | platform <- info[["platform"]] |> 61 | unlist() |> 62 | tibble::enframe(name = "setting", value = "value") 63 | 64 | r_packages <- info[["packages"]] |> 65 | tibble::as_tibble() |> 66 | dplyr::mutate( 67 | package = .data$package, 68 | version = .data$loadedversion, 69 | attached = .data$attached, 70 | path = .data$loadedpath, 71 | date = vapply( 72 | X = .data$package, 73 | FUN = utils::packageDate, 74 | FUN.VALUE = Sys.Date(), 75 | USE.NAMES = FALSE 76 | ) |> 77 | as.Date(), 78 | source = source, 79 | url = vapply( 80 | X = .data$package, 81 | FUN = \(x) { 82 | utils::packageDescription(x)[["URL"]] |> 83 | dplyr::coalesce(NA_character_) 84 | }, 85 | FUN.VALUE = character(1), 86 | USE.NAMES = FALSE 87 | ), 88 | directly_used = .data$package %in% pkgs_used[["Package"]], 89 | approved = check_approved( 90 | used = paste(.data$package, .data$version, sep = "@"), 91 | approved = zephyr::get_option("approved_packages") 92 | ), 93 | ) |> 94 | dplyr::select( 95 | "package", 96 | "version", 97 | "attached", 98 | "directly_used", 99 | "approved", 100 | "path", 101 | "date", 102 | "source", 103 | "url" 104 | ) 105 | 106 | list( 107 | platform = platform, 108 | R = r_packages 109 | ) 110 | } 111 | 112 | #' Read and format list of environment variables from `Sys.getenv()` 113 | #' @noRd 114 | read_environment <- function(file) { 115 | readRDS(file) |> 116 | as.list() |> 117 | unlist(recursive = FALSE) |> 118 | tibble::enframe(name = "variable", value = "value") |> 119 | dplyr::filter( 120 | stringr::str_detect( 121 | string = .data$variable, 122 | pattern = paste0( 123 | zephyr::get_option("environment_secrets", "whirl"), 124 | collapse = "|" 125 | ), 126 | negate = TRUE 127 | ) 128 | ) 129 | } 130 | 131 | #' Read and format options output from `options()` 132 | #' @noRd 133 | read_options <- function(file) { 134 | readRDS(file) |> 135 | tibble::enframe(name = "option", value = "value") |> 136 | dplyr::filter( 137 | !.data$option %in% "rl_word_breaks" # Removed due to breaking tables 138 | ) 139 | } 140 | 141 | #' Retrieve python version and path 142 | #' @noRd 143 | python_version <- function() { 144 | reticulate::py_config()[["version"]] |> 145 | as.character() |> 146 | paste("@", reticulate::py_config()[["python"]]) 147 | } 148 | 149 | #' Read and format python packages information from a JSON file 150 | #' JSON files created in `inst/documents/python_modules.py`. 151 | #' Pip list is created in `ìnst/documents/dummy.qmd`. 152 | #' @noRd 153 | read_python <- function(old_status, new_status, pip_list) { 154 | old <- old_status |> 155 | jsonlite::read_json() |> 156 | lapply(FUN = unlist, use.names = FALSE) 157 | 158 | new <- new_status |> 159 | jsonlite::read_json() |> 160 | lapply(FUN = unlist, use.names = FALSE) 161 | 162 | pip <- pip_list |> 163 | readRDS() |> 164 | parse_pip_list() 165 | 166 | if (!nrow(pip)) { 167 | return( 168 | tibble::tibble( 169 | package = character(0), 170 | version = character(0), 171 | directly_used = logical(0), 172 | approved = logical(0), 173 | path = character(0) 174 | ) 175 | ) 176 | } 177 | 178 | pip |> 179 | dplyr::filter( 180 | .data$package %in% 181 | c( 182 | setdiff(new$namespaced, old$namespaced), 183 | setdiff(new$loaded, old$loaded) 184 | ) 185 | ) |> 186 | dplyr::mutate( 187 | directly_used = .data$package %in% new$namespaced, 188 | approved = check_approved( 189 | used = paste(.data$package, .data$version, sep = "@"), 190 | approved = zephyr::get_option("approved_python_packages") 191 | ) 192 | ) |> 193 | dplyr::select("package", "version", "directly_used", "approved", "path") 194 | } 195 | 196 | #' @noRd 197 | parse_pip_list <- function(x) { 198 | x <- utils::read.table(text = x) 199 | 200 | nm <- tolower(x[1, ]) 201 | nm[which(nm == "location")] <- "path" 202 | 203 | names(x) <- nm 204 | 205 | x |> 206 | utils::tail(-2) |> 207 | tibble::as_tibble() 208 | } 209 | -------------------------------------------------------------------------------- /vignettes/whirl.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Execute Scripts" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Execute Scripts} 6 | %\VignetteEngine{knitr::rmarkdown_notangle} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>", 14 | eval = FALSE 15 | ) 16 | ``` 17 | 18 | # Introduction 19 | 20 | The whirl package provides functionalities for executing scripts in batch while simultaneously getting logs from the individual executions. In the following sections, we will go through how to use the package. 21 | 22 | # Ways to call the `run()` function 23 | 24 | ## Single and multiple files, and wild cards 25 | 26 | The `input` argument in the `run()` function can in the most simple case point to a single file for which an execution and log-generation is required. 27 | 28 | ```{r} 29 | library(whirl) 30 | run(input = "path/to/script.R") 31 | ``` 32 | 33 | The `input` argument can also point to multiple files in a directory using wildcard (or globbing) patterns. 34 | In this case, all scripts in the directory will be executed in parallel and a log will be generated for each script. 35 | The number of workers can be specified through the `n_workers` argument (default is set to one). 36 | A summary log file will also be provided as a fast way to get an overview of the execution. 37 | The location of the summary file can be controlled with the `summary_file` argument. 38 | 39 | ```{r} 40 | # Execution of all R files in a specific directory 41 | run( 42 | input = "path/to/directory/*.R", 43 | n_workers = 4, 44 | summary_file = "path/to/summary" 45 | ) 46 | 47 | # Execution of all R files starting with "2_" in a specific directory 48 | run( 49 | input = "path/to/directory/2_*.R", 50 | n_workers = 8, 51 | summary_file = "path/to/summary" 52 | ) 53 | ``` 54 | 55 | More information on how the wildcards are interpreted see `Sys.glob()`. 56 | 57 | It is also possible to provide a character vector of several paths (either single files or glob expression) that should be executed. Note that whenever the `input` argument in is supplied with a character vector (e.g. `c("path/to/script1.R", "path/to/script2.R")`) it assumes that these can be executed independently and in parallel. If the elements needs to be executed sequentially this can be achieved by using a `list()` instead (see below). 58 | 59 | ## Using `list()` as input 60 | 61 | If the scripts have to be executed in a specific order, the `input` argument can be supplied as a list. The scripts will then be executed in the order they are listed in the list, with scripts listed in the same element being executed in parallel (if `n_workers` > 1). 62 | 63 | ```{r} 64 | # In the below example, script1.R and script2.R will be executed in parallel 65 | run( 66 | input = c( 67 | "path/to/script1.R", 68 | "path/to/script2.R" 69 | ), 70 | n_workers = 2 71 | ) 72 | 73 | # In the below example, script1.R and script2.R will be executed in parallel, 74 | # and all R files in the directory will subsequently be executed in parallel 75 | run( 76 | input = list( 77 | c("path/to/script1.R", "path/to/script2.R"), 78 | "path/to/directory/*.R" 79 | ), 80 | n_workers = 2 81 | ) 82 | 83 | # In the below example, script1.R and script2.R will be executed in parallel, 84 | # and subsequently script3.R and script4.R will be executed in parallel 85 | run( 86 | input = list( 87 | c("path/to/script1.R", "path/to/script2.R"), 88 | c("path/to/script3.R", "path/to/script4.R") 89 | ), 90 | n_workers = 2 91 | ) 92 | ``` 93 | 94 | The list can also be supplied with names list elements. 95 | This can be useful during execution as some of these 'name' will be printed to the console. 96 | 97 | E.g. 98 | ```{r} 99 | run( 100 | input = list( 101 | list( 102 | name = "Step 1", 103 | paths = c("path/to/script1.R", "path/to/script2.R") 104 | ), 105 | list( 106 | name = "Step 2", 107 | paths = c("path/to/script3.R", "path/to/script4.R") 108 | ) 109 | ), 110 | n_workers = 2 111 | ) 112 | ``` 113 | 114 | ## Using a configuration file as input 115 | 116 | The execution order can also be pre-specified in a configuration file (config file for short). 117 | The config file could have the following structure. 118 | 119 | ```yaml 120 | steps: 121 | - name: "Step 1" 122 | paths: 123 | - "path/to/script1.R" 124 | - "path/to/script2.R" 125 | - name: "Step 2" 126 | paths: 127 | - "path/to/script3.R" 128 | - "path/to/script4.R" 129 | ``` 130 | 131 | In this case, the `input` argument in the `run()` function should point to the config file. 132 | Assuming the config file is called `config.yaml`, the execution can be initiated as follows: 133 | 134 | ```{r} 135 | run(input = "path/to/config.yaml", n_workers = 4) 136 | ``` 137 | 138 | Each steps in the config file will be executed independently while scrips within each step will be executed in parallel using the number of workers specified with the `n_workers` argument. 139 | 140 | # Adjusting the log directory 141 | ## How to use the `log_dir` argument to specify where to store the logs 142 | When executing `run()` the default is to store logs in the directory where the individual scripts are located. For example, if we apply `run()` to a a vector of scripts with the following paths `c(path/to/dir1/script1.R, path/to/dir2/script2.R)`, the log of script1.R and script2.R will be stored in **path/to/dir1** and **path/to/dir2**, respectively. 143 | 144 | If the logs should be stored in a different directory, the `log_dir` argument can be used. 145 | This argument can be supplied with a character string or a function. Note that in either case the directory that `log_dir` points to must exist before the execution is initiated. 146 | 147 | If the `log_dir` is supplied with a character pointing to a specific path the call could look like: 148 | 149 | ```{r} 150 | run(input = "path/to/script.R", log_dir = "path/to/logs") 151 | ``` 152 | 153 | In this example the log of script.R will be stored in **path/to/logs**. 154 | Note that if multiple scripts are executed and `log_dir` is a character to a path, then every log will be redirected to the same directory - in this case **path/to/logs**. 155 | 156 | 157 | If a more dynamic approach is needed the `log_dir` argument can also be supplied with a function that will be applied to the individual path of every script. 158 | For example, if multiple script are executed and the logs needs to be stored in a sub-folder within the script directories this could be achieved by: 159 | 160 | ```{r} 161 | run( 162 | input = c("path/to/dir1/script1.R", "path/to/dir2/script2.R"), 163 | log_dir = function(x) { 164 | paste0(dirname(x), "/logs") 165 | } 166 | ) 167 | ``` 168 | 169 | In this example the log of script1.R will be stored in **path/to/dir1/logs** and the log of script2.R will be stored in **path/to/dir2/logs**. 170 | 171 | Note that **x** refer to the path of the script that is being executed. 172 | -------------------------------------------------------------------------------- /R/strace.R: -------------------------------------------------------------------------------- 1 | #' Start strace 2 | #' @param pid [integer] process id to attach strace onto 3 | #' @param file [character] path to the file in which to store the strace log 4 | #' @noRd 5 | 6 | start_strace <- function(pid, file) { 7 | # Check OS first 8 | os_type <- Sys.info()["sysname"] 9 | if (os_type != "Linux") { 10 | cli::cli_abort(paste("strace does not support", os_type)) 11 | } 12 | 13 | # Just construct and run the command directly 14 | tryCatch( 15 | expr = { 16 | cmd <- sprintf( 17 | "strace -f -q -ttt -T -e trace=all -s 256 -o %s -p %s -y", 18 | file, 19 | pid 20 | ) 21 | system(cmd, wait = FALSE) 22 | }, 23 | error = function(e) { 24 | if (grepl("permission denied|not permitted", tolower(e$message))) { 25 | cli::cli_abort( 26 | "strace cannot attach to process. This may be due to permission errors" 27 | ) 28 | } 29 | 30 | if ( 31 | grepl("not found|no such file|command not found", tolower(e$message)) 32 | ) { 33 | cli::cli_abort("strace is not installed or not found in PATH") 34 | } 35 | 36 | cli::cli_abort(paste("Error:", e$message)) 37 | } 38 | ) 39 | } 40 | 41 | #' Get strace info ready for reporting 42 | #' 43 | #' @param path [character] path to the strace log 44 | #' @param p_wd [character] path to the working directory used for the process 45 | #' tracked in strace 46 | #' @param strace_discards [character] keywords to use to discard files from 47 | #' the info 48 | #' @param types [character] which element(s) to report in the info. If not 49 | #' found in strace, a dummy `data.frame` is inserted. 50 | #' @return [list] of `data.frame`(s) of the relevant files for each type of info 51 | #' @noRd 52 | 53 | read_strace_info <- function( 54 | path, 55 | p_wd = dirname(path), 56 | strace_discards = character(), 57 | strace_keep = character(), 58 | types = c("read", "write", "delete") 59 | ) { 60 | strace <- path |> 61 | read_strace(p_wd = p_wd) |> 62 | refine_strace(strace_discards = strace_discards, strace_keep = strace_keep) 63 | 64 | class(strace) <- c("whirl_log_info", class(strace)) 65 | 66 | # Split in a tibble for each type of output 67 | 68 | strace <- split(strace[c("time", "file")], strace$type) 69 | 70 | # Add empty table for types not reported 71 | 72 | out <- vector(mode = "list", length = length(types)) |> 73 | rlang::set_names(types) 74 | 75 | out[names(strace)] <- strace 76 | 77 | i <- lapply(X = out, FUN = is.null) |> 78 | unlist() |> 79 | which() 80 | dummy <- tibble::tibble(file = "No files") 81 | class(dummy) <- c("whirl_log_info", class(dummy)) 82 | out[i] <- list(dummy) 83 | 84 | return(out) 85 | } 86 | 87 | #' Read strace file 88 | #' 89 | #' @param path [character] path to the strace log 90 | #' @param p_wd [character] path to the working directory used for the process 91 | #' tracked in strace 92 | #' @return [data.frame] with strace information where all files are reported 93 | #' with their full path 94 | #' @noRd 95 | 96 | read_strace <- function(path, p_wd) { 97 | # Early return if file does not exist 98 | 99 | if (!file.exists(path)) { 100 | return( 101 | tibble::tibble( 102 | seq = integer(), 103 | time = as.POSIXct(character()), 104 | file = character(), 105 | type = character() 106 | ) 107 | ) 108 | } 109 | 110 | # Read strace log 111 | 112 | strace <- readLines(con = path, warn = FALSE) |> 113 | stringr::str_squish() |> 114 | stringr::str_subset("openat|unlink|chdir") |> 115 | stringr::str_subset( 116 | pattern = "ENOENT \\(No such file or directory\\)|ENXIO \\(No such device or address\\)| ENOTDIR \\(Not a directory\\)", # nolint: line_length_linter 117 | negate = TRUE 118 | ) |> 119 | stringr::str_subset( 120 | "|<\\.{3} [a-zA-Z]+ resumed>", 121 | negate = TRUE 122 | ) 123 | 124 | # Early return if no information 125 | 126 | if (length(strace) == 0) { 127 | return( 128 | tibble::tibble( 129 | seq = integer(), 130 | time = as.POSIXct(character()), 131 | file = character(), 132 | type = character() 133 | ) 134 | ) 135 | } 136 | 137 | # Otherwise extract information 138 | 139 | strace_df <- strace |> 140 | unglue::unglue_data( 141 | patterns = list( 142 | "{pid} {time} {funct}({keyword}<{dir}>, \"{path}\", {action}, {access}) = {result}<{result_dir}> <{duration}>", # nolint: line_length_linter 143 | "{pid} {time} {funct}({keyword}<{dir}>, \"{path}\", {action}) = {result}<{result_dir}> <{duration}>", # nolint: line_length_linter 144 | "{pid} {time} {funct}({keyword}<{dir}>, \"{path}\", {action}) = {result} <{duration}>", # nolint: line_length_linter 145 | "{pid} {time} {funct}(\"{path}\") = {result} <{duration}>" # nolint: line_length_linter 146 | ) 147 | ) |> 148 | tibble::as_tibble() |> 149 | dplyr::mutate( 150 | seq = dplyr::row_number(), 151 | pid = as.numeric(.data$pid), 152 | time = as.POSIXct(as.numeric(.data$time), origin = "1970-01-01"), 153 | result = as.numeric(.data$result), 154 | duration = as.numeric(.data$duration), 155 | type = dplyr::case_when( 156 | .data$funct == "chdir" ~ "chdir", 157 | stringr::str_detect(.data$funct, "unlink") ~ "delete", 158 | .data$funct == "openat" & 159 | stringr::str_detect(.data$action, "O_DIRECTORY") ~ 160 | "lookup", 161 | .data$funct == "openat" & is.na(.data$access) ~ "read", 162 | .data$funct == "openat" & !is.na(.data$access) ~ "write", 163 | ), 164 | wd = dplyr::case_when( 165 | .data$type == "chdir" & .data$path == "." ~ p_wd, 166 | .data$type == "chdir" ~ .data$path 167 | ) |> 168 | replace_na_with_last() |> 169 | dplyr::coalesce(p_wd), 170 | dir = dplyr::coalesce(.data$dir, .data$wd) 171 | ) 172 | 173 | # Full paths etc 174 | 175 | strace_df |> 176 | dplyr::mutate( 177 | file = dplyr::if_else( 178 | stringr::str_detect(string = .data$path, pattern = "^/", negate = TRUE), 179 | file.path(.data$dir, .data$path), 180 | .data$path 181 | ) 182 | ) |> 183 | dplyr::filter(.data$type %in% c("read", "write", "delete")) |> 184 | dplyr::select("seq", "time", "file", "type") 185 | } 186 | 187 | #' refine strace output 188 | #' 189 | #' @param strace_df [data.frame] with strace information as returned from 190 | #' `read_strace` 191 | #' @param strace_discards [character] keywords to use to discard files from the 192 | #' info 193 | #' @return [data.frame] with strace information where discarded and duplicate 194 | #' files are removed 195 | #' @noRd 196 | 197 | refine_strace <- function( 198 | strace_df, 199 | strace_discards = character(), 200 | strace_keep = character() 201 | ) { 202 | # Remove discards if provided 203 | 204 | if (length(strace_discards) && length(strace_keep)) { 205 | strace_df <- strace_df |> 206 | dplyr::filter( 207 | stringr::str_detect( 208 | string = .data$file, 209 | pattern = paste0(strace_discards, collapse = "|"), 210 | negate = TRUE 211 | ) | 212 | stringr::str_detect( 213 | string = .data$file, 214 | pattern = paste0(strace_keep, collapse = "|") 215 | ) 216 | ) 217 | } else if (length(strace_discards)) { 218 | strace_df <- strace_df |> 219 | dplyr::filter( 220 | stringr::str_detect( 221 | string = .data$file, 222 | pattern = paste0(strace_discards, collapse = "|"), 223 | negate = TRUE 224 | ) 225 | ) 226 | } 227 | 228 | # Derive net status (clean duplicate entries) 229 | 230 | strace_df |> 231 | dplyr::filter( 232 | # First read or write 233 | .data$type %in% 234 | c("read", "write") & 235 | !duplicated(strace_df[c("file", "type")]) | 236 | # Last delete 237 | .data$type %in% 238 | c("delete") & 239 | !duplicated(strace_df[c("file", "type")], fromLast = TRUE) 240 | ) |> 241 | dplyr::group_by(.data$file) |> 242 | dplyr::arrange(.data$file, .data$seq) |> 243 | dplyr::filter( 244 | # Remove reads from a file created earlier 245 | .data$type == "read" & 246 | !cumsum(.data$type == "write") | 247 | # Remove write when the file is deleted afterwards 248 | .data$type == "write" & !cumsum(rev(.data$type) == "delete") | 249 | # Remove delete when the file was created earlier, and not read before 250 | # that creation 251 | .data$type == "delete" & 252 | ( 253 | !cumsum(.data$type == "write") | 254 | utils::head(.data$type, 1) == "read" 255 | ) 256 | ) |> 257 | dplyr::ungroup() |> 258 | dplyr::arrange(.data$seq, .data$file) |> 259 | dplyr::select("time", "file", "type") 260 | } 261 | -------------------------------------------------------------------------------- /tests/testthat/test-strace.R: -------------------------------------------------------------------------------- 1 | strace_info <- function(path = "strace.log") { 2 | read_strace_info( 3 | path = path, 4 | p_wd = getwd(), 5 | strace_discards = zephyr::get_option("track_files_discards", "whirl"), 6 | strace_keep = getwd() 7 | ) 8 | } 9 | 10 | wait_for_condition <- function(check_fn, timeout = 2, interval = 0.1, error_msg = NULL) { 11 | start_time <- Sys.time() 12 | 13 | while (as.numeric(Sys.time() - start_time) < timeout) { 14 | tryCatch({ 15 | if (check_fn()) { 16 | return(TRUE) 17 | } 18 | }, error = function(e) { 19 | # Continue waiting if check_fn() fails 20 | }) 21 | 22 | Sys.sleep(interval) 23 | } 24 | 25 | # If we get here, the condition was never met 26 | if (is.null(error_msg)) { 27 | error_msg <- paste("Condition not met within", timeout, "seconds") 28 | } 29 | 30 | stop(error_msg, call. = FALSE) 31 | } 32 | 33 | check_strace_pattern <- function(pattern, operation, path = "strace.log") { 34 | function() { 35 | test <- strace_info(path = path) 36 | any(grepl(x = test[[operation]]$file, pattern = pattern)) 37 | } 38 | } 39 | 40 | 41 | test_that("strace works", { 42 | skip_on_cran() 43 | skip_on_os(c("windows", "mac", "solaris")) 44 | 45 | withr::with_tempdir( 46 | code = { 47 | cat("this is a dummy file to check strace", file = "dummy.txt") 48 | 49 | p <- callr::r_session$new() 50 | start_strace(pid = p$get_pid(), file = file.path(getwd(), "strace.log")) 51 | 52 | # Wait for strace to initialize 53 | wait_for_condition( 54 | check_fn = function() file.exists("strace.log"), 55 | error_msg = "strace log file was not created" 56 | ) 57 | 58 | # Test operations 59 | p$run(\() saveRDS(object = mtcars, file = "mtcars.rds")) 60 | wait_for_condition( 61 | check_fn = check_strace_pattern("mtcars.rds", "write"), 62 | error_msg = "mtcars.rds write operation not detected" 63 | ) |> testthat::expect_true() 64 | 65 | p$run(\() readLines("dummy.txt")) 66 | wait_for_condition( 67 | check_fn = check_strace_pattern("dummy.txt", "read"), 68 | error_msg = "dummy.txt read operation not detected" 69 | ) |> testthat::expect_true() 70 | 71 | p$run(\() file.remove("dummy.txt")) 72 | wait_for_condition( 73 | check_fn = check_strace_pattern("dummy.txt", "delete"), 74 | error_msg = "dummy.txt delete operation not detected" 75 | ) |> testthat::expect_true() 76 | 77 | p$kill() 78 | } 79 | ) 80 | }) 81 | test_that("strace fails gracefully OS error handling", { 82 | skip_on_os("linux") 83 | 84 | # Get current OS 85 | os_type <- Sys.info()["sysname"] 86 | 87 | # Test for unsupported OS (Windows or Darwin) 88 | expect_error( 89 | start_strace(1234, "output.txt"), 90 | paste("strace does not support", os_type) 91 | ) 92 | }) 93 | 94 | test_that("strace fails gracefully with mocking error handling", { 95 | # Define test cases with OS names and expected outcomes 96 | test_cases <- list( 97 | list( 98 | os = "Windows", 99 | expected = "error", 100 | message = "strace does not support Windows", 101 | system_mock = NULL # Not needed for error cases 102 | ), 103 | list( 104 | os = "Darwin", 105 | expected = "error", 106 | message = "strace does not support Darwin", 107 | system_mock = NULL # Not needed for error cases 108 | ), 109 | list( 110 | os = "unknownOS", 111 | expected = "error", 112 | message = "strace does not support unknownOS", 113 | system_mock = NULL # Not needed for error cases 114 | ), 115 | list( 116 | os = "Linux", 117 | expected = "success", 118 | message = NULL, 119 | system_mock = function(...) 0 # Return success (0) for system call 120 | ) 121 | ) 122 | 123 | for (case in test_cases) { 124 | # Set up OS mock for this test case 125 | local_mocked_bindings( 126 | Sys.info = function() { 127 | c( 128 | sysname = case$os, 129 | release = "10.0", 130 | version = "10.0", 131 | nodename = paste0("test-", tolower(case$os)), 132 | machine = "x86_64", 133 | login = "testuser", 134 | user = "testuser", 135 | effective_user = "testuser" 136 | ) 137 | }, 138 | .package = "base" 139 | ) 140 | 141 | # Set up system mock if provided (for Linux case) 142 | if (!is.null(case$system_mock)) { 143 | local_mocked_bindings( 144 | system = case$system_mock, 145 | .package = "base" 146 | ) 147 | } 148 | 149 | # Check expected outcome 150 | if (case$expected == "error") { 151 | expect_error( 152 | start_strace(1234, "output.txt"), 153 | case$message 154 | ) 155 | } else if (case$expected == "success") { 156 | expect_no_error(start_strace(1234, "output.txt")) 157 | } 158 | } 159 | }) 160 | 161 | test_that("strace fails during execution handling", { 162 | # Mock Sys.info to return Linux 163 | local_mocked_bindings( 164 | Sys.info = function() { 165 | c( 166 | sysname = "Linux", 167 | release = "5.4.0", 168 | version = "5.4.0-generic", 169 | nodename = "test-linux", 170 | machine = "x86_64", 171 | login = "testuser", 172 | user = "testuser", 173 | effective_user = "testuser" 174 | ) 175 | }, 176 | .package = "base" 177 | ) 178 | 179 | # Define test cases: error message and expected error 180 | test_cases <- list( 181 | list( 182 | error = "Operation not permitted", 183 | expected = "strace cannot attach to process. This may be due to permission errors" 184 | ), 185 | list( 186 | error = "command not found", 187 | expected = "strace is not installed or not found in PATH" 188 | ), 189 | list( 190 | error = "random_not_defined_error12345", 191 | expected = "random_not_defined_error12345" # Original error should be propagated 192 | ) 193 | ) 194 | 195 | # Loop through each test case 196 | for (case in test_cases) { 197 | # Mock system to throw the specific error 198 | local_mocked_bindings( 199 | system = function(...) { 200 | stop(case$error) 201 | }, 202 | .package = "base" 203 | ) 204 | 205 | # Test the error handling 206 | expect_error( 207 | start_strace(1234, "output.txt"), 208 | case$expected 209 | ) 210 | } 211 | }) 212 | 213 | 214 | test_that("refine_strace filters out discarded files", { 215 | withr::with_tempdir( 216 | code = { 217 | # Test data with required seq column 218 | test_df <- tibble::tibble( 219 | seq = c(1, 2), 220 | time = as.POSIXct(c("2023-01-01 10:00:01", "2023-01-01 10:00:02")), 221 | file = c("/tmp/cache.txt", "/home/important.R"), 222 | type = c("read", "write") 223 | ) 224 | 225 | # Test: only discards provided (triggers the else if branch) 226 | result <- refine_strace(test_df, 227 | strace_discards = c("tmp"), 228 | strace_keep = character()) 229 | 230 | expect_equal(result$file, "/home/important.R") 231 | expect_equal(nrow(result), 1) 232 | } 233 | ) 234 | }) 235 | 236 | test_that("read_strace returns empty tibble when file does not exist", { 237 | withr::with_tempdir( 238 | code = { 239 | # Test with non-existent file path 240 | result <- read_strace("nonexistent_file.log", getwd()) 241 | 242 | # Verify empty tibble structure 243 | expect_s3_class(result, "tbl_df") 244 | expect_named(result, c("seq", "time", "file", "type")) 245 | expect_equal(nrow(result), 0) 246 | expect_equal(ncol(result), 4) 247 | 248 | # Verify column classes 249 | expect_type(result$seq, "integer") 250 | expect_s3_class(result$time, "POSIXct") 251 | expect_type(result$file, "character") 252 | expect_type(result$type, "character") 253 | } 254 | ) 255 | }) 256 | 257 | test_that("read_strace returns empty tibble when file has no relevant content", { 258 | withr::with_tempdir( 259 | code = { 260 | # Create a file with content that gets filtered out (no openat|unlink|chdir) 261 | cat("some irrelevant log content\nanother line without the required patterns\nyet another line", 262 | file = "empty_strace.log") 263 | 264 | strace_result <- read_strace("empty_strace.log", getwd()) 265 | 266 | # Test the returned data frame structure 267 | expect_true(tibble::is_tibble(strace_result)) 268 | expect_identical(colnames(strace_result), c("seq", "time", "file", "type")) 269 | expect_true(nrow(strace_result) == 0) 270 | expect_true(ncol(strace_result) == 4) 271 | 272 | # Test column data types 273 | expect_true(is.integer(strace_result$seq)) 274 | expect_true(inherits(strace_result$time, "POSIXct")) 275 | expect_true(is.character(strace_result$file)) 276 | expect_true(is.character(strace_result$type)) 277 | } 278 | ) 279 | }) 280 | -------------------------------------------------------------------------------- /R/whirl_queue.R: -------------------------------------------------------------------------------- 1 | #' Queue for continuous execution and logging of scripts 2 | #' @description 3 | #' Implementation of a queue for supporting the continuous execution and logging 4 | #' of several scripts. 5 | #' The queue can be used interactively, but is mainly designed to be the 6 | #' internal backbone of the `run()` function. 7 | #' When a queue has several workers, pushed scripts will be run in parallel. 8 | #' @importFrom R6 R6Class 9 | #' @noRd 10 | 11 | whirl_queue <- R6::R6Class( 12 | classname = "whirl_queue", 13 | public = list( 14 | #' @inheritParams options_params 15 | #' @description Initialize the new whirl_queue 16 | #' @return A [whirl_queue] object 17 | initialize = \( 18 | # jscpd:ignore-start 19 | n_workers = zephyr::get_option("n_workers", "whirl"), 20 | check_renv = zephyr::get_option("check_renv", "whirl"), 21 | track_files = zephyr::get_option("track_files", "whirl"), 22 | out_formats = zephyr::get_option("out_formats", "whirl"), 23 | track_files_discards = zephyr::get_option( 24 | "track_files_discards", 25 | "whirl" 26 | ), 27 | track_files_keep = zephyr::get_option("track_files_keep", "whirl"), 28 | log_dir = zephyr::get_option("log_dir", "whirl") 29 | # jscpd:ignore-end 30 | ) { 31 | wq_initialise( 32 | self, 33 | private, 34 | n_workers, 35 | check_renv, 36 | track_files, 37 | out_formats, 38 | track_files_discards, 39 | track_files_keep, 40 | log_dir 41 | ) 42 | }, 43 | 44 | #' @description Push scripts to the queue 45 | #' @param scripts [character] Full paths for the scripts to be executed 46 | #' @param tag (optional) [character] Tag for the scripts to include in 47 | #' the queue 48 | #' @return [invisible] self 49 | push = \(scripts, tag = NA_character_) { 50 | wq_push(self, private, scripts, tag) 51 | }, 52 | 53 | #' @description Push scripts in the queue without executing them. 54 | #' Utility to include skipped scripts in the final queue. 55 | #' @param scripts [character] Full paths for the scripts to be executed 56 | #' @param tag (optional) [character] Tag for the scripts to include in 57 | #' the queue 58 | #' @return [invisible] self 59 | skip = \(scripts, tag = NA_character_) { 60 | wq_skip(self, private, scripts, tag) 61 | }, 62 | 63 | #' @description Poll the queue and start next steps if needed 64 | #' @param timeout [numeric] The timeout in milliseconds. 65 | #' Note it is only implemented approximately if more than one script is 66 | #' running simultaneously. 67 | #' @return [character] Status of all scripts queue 68 | poll = \(timeout) { 69 | wq_poll(self, private, timeout) 70 | }, 71 | 72 | #' @description Wait for the queue to complete 73 | #' @param timeout [numeric] The timeout in milliseconds 74 | #' @return [invisible] self 75 | wait = \(timeout = -1) { 76 | wq_wait(self, private, timeout) 77 | }, 78 | 79 | #' @description Run scripts using the queue. 80 | #' This is a wrapper around calling both push() and wait(). 81 | #' @param scripts [character] with full paths for the scripts to be executed 82 | #' @param tag (optional) [character] Tag for the scripts to include in 83 | #' the queue 84 | #' @return [invisible] self 85 | run = \(scripts, tag = NA_character_) { 86 | wq_run(scripts, tag, self, private) 87 | }, 88 | 89 | #' @description Print method displaying the current status of the queue 90 | #' @return [invisible] self 91 | print = \() { 92 | print(self$queue) 93 | return(invisible(self)) 94 | } 95 | ), 96 | active = list( 97 | #' @field queue [tibble] Current status of the queue 98 | queue = \() { 99 | private$.queue 100 | }, 101 | 102 | #' @field workers [tibble] Current status of the workers 103 | workers = \() { 104 | private$.workers 105 | }, 106 | 107 | #' @field available_workers [integer] Which workers are available 108 | available_workers = \() { 109 | which(!self$workers$active) 110 | }, 111 | 112 | #' @field next_ids [integer] Which scripts are next in the queue 113 | next_ids = \() { 114 | self$queue$id[self$queue$status == "waiting"] |> 115 | utils::head(length(self$available_workers)) 116 | }, 117 | 118 | #' @field next_workers [integer] Which workers are next to be started 119 | next_workers = \() { 120 | self$available_workers |> 121 | utils::head(length(self$next_ids)) 122 | } 123 | ), 124 | private = list( 125 | .queue = NULL, 126 | .workers = NULL, 127 | .n_workers = NULL, 128 | check_renv = NULL, 129 | track_files = NULL, 130 | out_formats = NULL, 131 | track_files_discards = NULL, 132 | track_files_keep = NULL, 133 | log_dir = NULL, 134 | progress_bar = NULL 135 | ) 136 | ) 137 | 138 | wq_initialise <- function( 139 | self, 140 | private, 141 | n_workers, 142 | check_renv, 143 | track_files, 144 | out_formats, 145 | track_files_discards, 146 | track_files_keep, 147 | log_dir 148 | ) { 149 | private$check_renv <- check_renv 150 | private$track_files <- track_files 151 | private$out_formats <- out_formats 152 | private$track_files_discards <- track_files_discards 153 | private$track_files_keep <- track_files_keep 154 | private$log_dir <- log_dir 155 | 156 | private$.queue <- tibble::tibble( 157 | id = numeric(), 158 | tag = character(), 159 | script = character(), 160 | status = character(), 161 | result = list(), 162 | log_dir = character() 163 | ) 164 | 165 | private$.workers <- tibble::tibble( 166 | id = seq_len(n_workers), 167 | session = vector(mode = "list", length = n_workers), 168 | id_script = numeric(n_workers), 169 | step = numeric(n_workers), 170 | active = FALSE 171 | ) 172 | } 173 | 174 | wq_add_queue <- function(self, private, scripts, tag, status) { 175 | # Adding the log directory to the queue 176 | if (is.character(private$log_dir)) { 177 | # Check if the directory exists 178 | if (!file.exists(private$log_dir)) { 179 | cli::cli_abort( 180 | "Logs cannot be saved because {.val {private$log_dir}} does not exist" 181 | ) 182 | } 183 | folder <- file.path(private$log_dir) 184 | } else { 185 | folder <- private$log_dir(scripts) 186 | # Check if the directory exists 187 | unique_folders <- unique(folder) 188 | if (any(!file.exists(unique_folders))) { 189 | missing <- unique_folders[!file.exists(unique_folders)] # nolint: object_usage_linter 190 | cli::cli_abort( 191 | "Logs cannot be saved because {.val {missing}} does not exist" 192 | ) 193 | } 194 | } 195 | 196 | private$.queue <- self$queue |> 197 | tibble::add_row( 198 | id = nrow(self$queue) + seq_along(scripts), 199 | tag = tag, 200 | script = scripts, 201 | status = status, 202 | log_dir = folder 203 | ) 204 | return(invisible(self)) 205 | } 206 | 207 | wq_push <- function(self, private, scripts, tag) { 208 | wq_add_queue(self, private, scripts, tag, status = "waiting") 209 | } 210 | 211 | wq_skip <- function(self, private, scripts, tag) { 212 | wq_add_queue(self, private, scripts, tag, status = "skipped") 213 | } 214 | 215 | wq_poll <- function( 216 | self, 217 | private, 218 | timeout 219 | ) { 220 | # Start new sessions if there are available workers and waiting scripts in 221 | # the queue 222 | 223 | if (length(self$next_ids)) { 224 | nid <- self$next_ids 225 | wid <- self$next_workers 226 | private$.workers[["session"]][wid] <- replicate( 227 | n = length(wid), 228 | expr = whirl_r_session$new( 229 | check_renv = private$check_renv, 230 | track_files = private$track_files, 231 | out_formats = private$out_formats, 232 | track_files_discards = private$track_files_discards, 233 | track_files_keep = private$track_files_keep, 234 | log_dir = private$log_dir 235 | ), 236 | simplify = FALSE 237 | ) 238 | private$.workers[wid, "id_script"] <- nid 239 | private$.workers[wid, "active"] <- TRUE 240 | private$.queue[nid, "status"] <- "running" 241 | } 242 | 243 | # Check for active sessions that are idle and start the next step if needed 244 | # When completed the session is stopped and the status in the queue is updated 245 | 246 | i_active <- which(private$.workers$active) 247 | i_timeout <- round(timeout / length(i_active)) 248 | for (i in i_active) { 249 | p <- private$.workers$session[[i]]$poll(timeout = i_timeout) 250 | if (p == "ready") { 251 | private$.workers$session[[i]]$check_status() 252 | } 253 | if (private$.workers$session[[i]]$get_state() == "idle") { 254 | wq_next_step(self, private, i) 255 | } 256 | } 257 | 258 | return(self$queue$status) 259 | } 260 | 261 | wq_wait <- function(self, private, timeout) { 262 | start <- Sys.time() 263 | timeout <- timeout / 1000 # Convert to secs 264 | go <- TRUE 265 | while (go) { 266 | pb_update(id = private$progress_bar, queue = self$queue) 267 | self$poll(50) 268 | go <- any(self$queue$status %in% c("waiting", "running")) 269 | if (timeout >= 0 && difftime(Sys.time(), start, units = "secs") > timeout) { 270 | break 271 | } 272 | } 273 | return(invisible(self)) 274 | } 275 | 276 | wq_next_step <- function(self, private, wid) { 277 | private$.workers$step[[wid]] <- private$.workers$step[[wid]] + 1 278 | id_script <- private$.workers$id_script[[wid]] 279 | session <- private$.workers$session[[wid]] 280 | 281 | switch( 282 | EXPR = private$.workers$step[[wid]], 283 | 284 | # Step 1: Log script 285 | "1" = { 286 | script <- private$.queue$script[[id_script]] 287 | session$log_script(script) 288 | }, 289 | # Step 2: Create log 290 | "2" = { 291 | session$create_log() 292 | }, 293 | # Step 3: Finish log and create outputs 294 | "3" = { 295 | private$.queue$result[[id_script]] <- 296 | session$log_finish( 297 | out_dir = private$.queue$log_dir[[id_script]], 298 | format = private$out_formats 299 | ) 300 | # fmt: skip 301 | private$.queue$status[[id_script]] <- 302 | private$.queue$result[[id_script]]$status$message 303 | 304 | private$.workers$session[wid] <- list(NULL) 305 | private$.workers$active[[wid]] <- FALSE 306 | private$.workers$id_script[[wid]] <- 0 307 | private$.workers$step[[wid]] <- 0 308 | } 309 | ) 310 | 311 | return(invisible(wid)) 312 | } 313 | 314 | wq_run <- function(scripts, tag, self, private) { 315 | private$progress_bar <- pb_start() 316 | on.exit({ 317 | private$progress_bar <- pb_done(id = private$progress_bar) 318 | gc() # finalizes used whirl_r_sessions - cleanup temp folders 319 | }) 320 | self$push(scripts = scripts, tag = tag)$wait() 321 | } 322 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | ============== 3 | 4 | _Version 2.0, January 2004_ 5 | _<>_ 6 | 7 | ### Terms and Conditions for use, reproduction, and distribution 8 | 9 | #### 1. Definitions 10 | 11 | “License” shall mean the terms and conditions for use, reproduction, and 12 | distribution as defined by Sections 1 through 9 of this document. 13 | 14 | “Licensor” shall mean the copyright owner or entity authorized by the copyright 15 | owner that is granting the License. 16 | 17 | “Legal Entity” shall mean the union of the acting entity and all other entities 18 | that control, are controlled by, or are under common control with that entity. 19 | For the purposes of this definition, “control” means **(i)** the power, direct or 20 | indirect, to cause the direction or management of such entity, whether by 21 | contract or otherwise, or **(ii)** ownership of fifty percent (50%) or more of the 22 | outstanding shares, or **(iii)** beneficial ownership of such entity. 23 | 24 | “You” (or “Your”) shall mean an individual or Legal Entity exercising 25 | permissions granted by this License. 26 | 27 | “Source” form shall mean the preferred form for making modifications, including 28 | but not limited to software source code, documentation source, and configuration 29 | files. 30 | 31 | “Object” form shall mean any form resulting from mechanical transformation or 32 | translation of a Source form, including but not limited to compiled object code, 33 | generated documentation, and conversions to other media types. 34 | 35 | “Work” shall mean the work of authorship, whether in Source or Object form, made 36 | available under the License, as indicated by a copyright notice that is included 37 | in or attached to the work (an example is provided in the Appendix below). 38 | 39 | “Derivative Works” shall mean any work, whether in Source or Object form, that 40 | is based on (or derived from) the Work and for which the editorial revisions, 41 | annotations, elaborations, or other modifications represent, as a whole, an 42 | original work of authorship. For the purposes of this License, Derivative Works 43 | shall not include works that remain separable from, or merely link (or bind by 44 | name) to the interfaces of, the Work and Derivative Works thereof. 45 | 46 | “Contribution” shall mean any work of authorship, including the original version 47 | of the Work and any modifications or additions to that Work or Derivative Works 48 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 49 | by the copyright owner or by an individual or Legal Entity authorized to submit 50 | on behalf of the copyright owner. For the purposes of this definition, 51 | “submitted” means any form of electronic, verbal, or written communication sent 52 | to the Licensor or its representatives, including but not limited to 53 | communication on electronic mailing lists, source code control systems, and 54 | issue tracking systems that are managed by, or on behalf of, the Licensor for 55 | the purpose of discussing and improving the Work, but excluding communication 56 | that is conspicuously marked or otherwise designated in writing by the copyright 57 | owner as “Not a Contribution.” 58 | 59 | “Contributor” shall mean Licensor and any individual or Legal Entity on behalf 60 | of whom a Contribution has been received by Licensor and subsequently 61 | incorporated within the Work. 62 | 63 | #### 2. Grant of Copyright License 64 | 65 | Subject to the terms and conditions of this License, each Contributor hereby 66 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 67 | irrevocable copyright license to reproduce, prepare Derivative Works of, 68 | publicly display, publicly perform, sublicense, and distribute the Work and such 69 | Derivative Works in Source or Object form. 70 | 71 | #### 3. Grant of Patent License 72 | 73 | Subject to the terms and conditions of this License, each Contributor hereby 74 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 75 | irrevocable (except as stated in this section) patent license to make, have 76 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 77 | such license applies only to those patent claims licensable by such Contributor 78 | that are necessarily infringed by their Contribution(s) alone or by combination 79 | of their Contribution(s) with the Work to which such Contribution(s) was 80 | submitted. If You institute patent litigation against any entity (including a 81 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 82 | Contribution incorporated within the Work constitutes direct or contributory 83 | patent infringement, then any patent licenses granted to You under this License 84 | for that Work shall terminate as of the date such litigation is filed. 85 | 86 | #### 4. Redistribution 87 | 88 | You may reproduce and distribute copies of the Work or Derivative Works thereof 89 | in any medium, with or without modifications, and in Source or Object form, 90 | provided that You meet the following conditions: 91 | 92 | * **(a)** You must give any other recipients of the Work or Derivative Works a copy of 93 | this License; and 94 | * **(b)** You must cause any modified files to carry prominent notices stating that You 95 | changed the files; and 96 | * **(c)** You must retain, in the Source form of any Derivative Works that You distribute, 97 | all copyright, patent, trademark, and attribution notices from the Source form 98 | of the Work, excluding those notices that do not pertain to any part of the 99 | Derivative Works; and 100 | * **(d)** If the Work includes a “NOTICE” text file as part of its distribution, then any 101 | Derivative Works that You distribute must include a readable copy of the 102 | attribution notices contained within such NOTICE file, excluding those notices 103 | that do not pertain to any part of the Derivative Works, in at least one of the 104 | following places: within a NOTICE text file distributed as part of the 105 | Derivative Works; within the Source form or documentation, if provided along 106 | with the Derivative Works; or, within a display generated by the Derivative 107 | Works, if and wherever such third-party notices normally appear. The contents of 108 | the NOTICE file are for informational purposes only and do not modify the 109 | License. You may add Your own attribution notices within Derivative Works that 110 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 111 | provided that such additional attribution notices cannot be construed as 112 | modifying the License. 113 | 114 | You may add Your own copyright statement to Your modifications and may provide 115 | additional or different license terms and conditions for use, reproduction, or 116 | distribution of Your modifications, or for any such Derivative Works as a whole, 117 | provided Your use, reproduction, and distribution of the Work otherwise complies 118 | with the conditions stated in this License. 119 | 120 | #### 5. Submission of Contributions 121 | 122 | Unless You explicitly state otherwise, any Contribution intentionally submitted 123 | for inclusion in the Work by You to the Licensor shall be under the terms and 124 | conditions of this License, without any additional terms or conditions. 125 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 126 | any separate license agreement you may have executed with Licensor regarding 127 | such Contributions. 128 | 129 | #### 6. Trademarks 130 | 131 | This License does not grant permission to use the trade names, trademarks, 132 | service marks, or product names of the Licensor, except as required for 133 | reasonable and customary use in describing the origin of the Work and 134 | reproducing the content of the NOTICE file. 135 | 136 | #### 7. Disclaimer of Warranty 137 | 138 | Unless required by applicable law or agreed to in writing, Licensor provides the 139 | Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, 140 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 141 | including, without limitation, any warranties or conditions of TITLE, 142 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 143 | solely responsible for determining the appropriateness of using or 144 | redistributing the Work and assume any risks associated with Your exercise of 145 | permissions under this License. 146 | 147 | #### 8. Limitation of Liability 148 | 149 | In no event and under no legal theory, whether in tort (including negligence), 150 | contract, or otherwise, unless required by applicable law (such as deliberate 151 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 152 | liable to You for damages, including any direct, indirect, special, incidental, 153 | or consequential damages of any character arising as a result of this License or 154 | out of the use or inability to use the Work (including but not limited to 155 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 156 | any and all other commercial damages or losses), even if such Contributor has 157 | been advised of the possibility of such damages. 158 | 159 | #### 9. Accepting Warranty or Additional Liability 160 | 161 | While redistributing the Work or Derivative Works thereof, You may choose to 162 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 163 | other liability obligations and/or rights consistent with this License. However, 164 | in accepting such obligations, You may act only on Your own behalf and on Your 165 | sole responsibility, not on behalf of any other Contributor, and only if You 166 | agree to indemnify, defend, and hold each Contributor harmless for any liability 167 | incurred by, or claims asserted against, such Contributor by reason of your 168 | accepting any such warranty or additional liability. 169 | 170 | _END OF TERMS AND CONDITIONS_ 171 | 172 | ### APPENDIX: How to apply the Apache License to your work 173 | 174 | To apply the Apache License to your work, attach the following boilerplate 175 | notice, with the fields enclosed by brackets `[]` replaced with your own 176 | identifying information. (Don't include the brackets!) The text should be 177 | enclosed in the appropriate comment syntax for the file format. We also 178 | recommend that a file or class name and description of purpose be included on 179 | the same “printed page” as the copyright notice for easier identification within 180 | third-party archives. 181 | 182 | Copyright 2025 Novo Nordisk A/S, Danish company registration no. 24256790 183 | 184 | Licensed under the Apache License, Version 2.0 (the "License"); 185 | you may not use this file except in compliance with the License. 186 | You may obtain a copy of the License at 187 | 188 | http://www.apache.org/licenses/LICENSE-2.0 189 | 190 | Unless required by applicable law or agreed to in writing, software 191 | distributed under the License is distributed on an "AS IS" BASIS, 192 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 193 | See the License for the specific language governing permissions and 194 | limitations under the License. 195 | -------------------------------------------------------------------------------- /R/biocompute.R: -------------------------------------------------------------------------------- 1 | #' Create biocompute logs 2 | #' 3 | #' @description 4 | #' BioCompute is a standard for logs of programs for for Bioinformatics Computational Analyses. 5 | #' 6 | #' The BioCompute object is a `json` log that can be created based on the output of `run()`. 7 | #' 8 | #' @details 9 | #' The object consists of the following domains: 10 | #' 11 | #' * **Specifications**: 12 | #' * *spec_version*: Version of BioCompute used (`https://w3id.org/biocompute/1.3.0/``) 13 | #' * *object_id*: Unique project id 14 | #' * *type*: Your project type 15 | #' * *etag*: Your `etag` id from the BioCompute Object Portal 16 | #' 17 | #' * [Provenance Domain](https://wiki.biocomputeobject.org/index.php?title=Provenance-domain) 18 | #' * This is used to track the history of the BCO. Review and signatures go here. 19 | #' 20 | #' * [Usability Domain](https://wiki.biocomputeobject.org/index.php?title=Usability-domain) 21 | #' * This is used to improve searchability by allowing a free-text description of the BCO. 22 | #' * Provide external document. 23 | #' 24 | #' * [Extension Domain](https://wiki.biocomputeobject.org/index.php?title=Extension-domain) 25 | #' * This is used to add any additional structured information that is not directly covered by the BCO. 26 | #' 27 | #' * [Description Domain](https://wiki.biocomputeobject.org/index.php?title=Description-domain) 28 | #' * Contains a structured field for the description of external references, the pipeline steps, 29 | #' and the relationship of I/O objects. 30 | #' * Provide external document. 31 | #' * **Note**: Use of `keywords` and `External_Reference` entries are not yet implemented. 32 | #' To use fill out the entries manually after creating the BioCompute object.` 33 | #' 34 | #' * [Execution Domain](https://wiki.biocomputeobject.org/index.php?title=Execution-domain) 35 | #' * Contains fields for the execution of the BCO. 36 | #' * **Note**: Use of `external_data_endpoints` not implemented. Fill out manually afterwards if needed. 37 | #' 38 | #'* [Parametric Domain](https://wiki.biocomputeobject.org/index.php?title=Parametric-domain) 39 | #' * Represents the list of parameters customizing the computational flow which can affect 40 | #' the output of the calculations. 41 | #' 42 | #' * [IO Domain](https://wiki.biocomputeobject.org/index.php?title=Iodomain) 43 | #' * Represents the list of global input and output files created by the computational workflow. 44 | #' 45 | #' * [Error Domain](https://wiki.biocomputeobject.org/index.php?title=Error-domain) 46 | #' * Defines the empirical and algorithmic limits and error sources of the BCO. 47 | #' * **Note**: Use of this domain is not clearly defined. 48 | #' It is therefore always left empty in the current implementation. 49 | #' If you want to add content do so manually after creating the BCO. 50 | #' 51 | #' See the [BioCompute Object Portal](https://www.biocomputeobject.org) and the 52 | #' [BioCompute Objects Wiki](https://wiki.biocomputeobject.org/Main_Page) for more information. 53 | #' 54 | #' @param queue Result from `run()`. 55 | #' @param path A character string specifying the file path to write BioCompute log to. 56 | #' @param ... Additional arguments parsed to `jsonlite::write_json()`. Note always uses `auto_unbox = TRUE`. 57 | #' @return (`invisible`) `list` of the biocompute domains and their content. 58 | #' @export 59 | write_biocompute <- function( 60 | queue = run("_whirl.yml"), 61 | path = "bco.json", 62 | ... 63 | ) { 64 | config <- attr(queue, "whirl_input") 65 | 66 | if (is.null(config)) { 67 | cli::cli_abort("The `queue` must be created with `whirl::run()`") 68 | } else if ( 69 | !rlang::is_string(config) || 70 | !get_file_ext(config) %in% c("yml", "yaml") || 71 | is.null(yaml::read_yaml(config)[["biocompute"]]) 72 | ) { 73 | cli::cli_abort( 74 | "Input to `run()` must be a path to a yaml file with a biocompute entry. See `?use_biocompute()`." 75 | ) 76 | } 77 | 78 | bco <- create_biocompute(queue = queue, config = config) 79 | 80 | jsonlite::write_json(x = bco, path = path, auto_unbox = TRUE, ...) 81 | 82 | invisible(bco) 83 | } 84 | 85 | #' @noRd 86 | create_biocompute <- function(queue, config) { 87 | metadata <- yaml::read_yaml(config) 88 | 89 | list( 90 | object_id = metadata[["biocompute"]][["object_id"]] |> 91 | get_single_unique(), 92 | spec_version = metadata[["biocompute"]][["spec_version"]] |> 93 | get_single_unique(), 94 | etag = metadata[["biocompute"]][["etag"]] |> 95 | get_single_unique(), 96 | provenance_domain = NULL, 97 | usability_domain = metadata[["biocompute"]][["usability"]] |> 98 | get_single_unique(), 99 | extension_domain = metadata[["biocompute"]][["extension"]] |> 100 | get_single_unique(), 101 | description_domain = create_description_domain(queue), 102 | execution_domain = create_execution_domain(queue), 103 | parametric_domain = create_parametrics_domain(metadata, dirname(config)), 104 | io_domain = create_io_domain(queue), 105 | error_domain = list( 106 | algorithmic_error = NULL, 107 | empirical_error = NULL 108 | ) 109 | ) 110 | } 111 | 112 | # DESCRIPTION DOMAIN 113 | #' @noRd 114 | create_description_domain <- function(queue) { 115 | pipeline_steps <- queue |> 116 | dplyr::mutate( 117 | name = .data$script |> 118 | basename() |> 119 | sub(pattern = "\\.\\w+$", replacement = "") |> 120 | gsub(pattern = "[-_]", replacement = " "), 121 | step_number = .data$id, 122 | version = .data$result |> 123 | purrr::map_chr(c("script", "md5sum")), # Devskim: ignore DS126858 124 | description = .data$tag, 125 | prerequisite = .data$result |> 126 | purrr::map(c("session", "R")) |> 127 | purrr::map(.f = \(x) { 128 | x |> 129 | dplyr::mutate( 130 | name = paste( 131 | "R package:", 132 | .data$package, 133 | "- version:", 134 | .data$version 135 | ), 136 | uri = .data$url 137 | ) |> 138 | dplyr::select("name", "uri") |> 139 | purrr::pmap(.f = list) 140 | }), 141 | input_list = .data$result |> 142 | purrr::map(c("files", "read")) |> 143 | purrr::map(.f = bco_file_format), 144 | output_list = .data$result |> 145 | purrr::map(c("files", "write")) |> 146 | purrr::map(.f = bco_file_format) 147 | ) |> 148 | dplyr::select( 149 | "name", 150 | "step_number", 151 | "version", 152 | "description", 153 | "prerequisite", 154 | "input_list", 155 | "output_list" 156 | ) 157 | 158 | description_domain <- list( 159 | keywords = list(), 160 | External_Reference = list(), 161 | pipeline_steps = purrr::pmap(.l = pipeline_steps, .f = list) 162 | ) 163 | 164 | return(description_domain) 165 | } 166 | 167 | #' @noRd 168 | bco_file_format <- function(x) { 169 | if (is.null(x)) { 170 | return( 171 | dplyr::tibble( 172 | filename = character(0), 173 | uri = character(0), 174 | access_time = character(0) 175 | ) 176 | ) 177 | } 178 | x |> 179 | dplyr::mutate( 180 | filename = basename(.data$file), 181 | uri = .data$file, 182 | access_time = format(.data$time, format = "%Y-%m-%d %H:%M:%S %Z") 183 | ) |> 184 | dplyr::select("filename", "uri", "access_time") |> 185 | purrr::pmap(.f = list) 186 | } 187 | 188 | #' @noRd 189 | get_single_unique <- function(x) { 190 | x <- x |> 191 | unlist() |> 192 | unique() 193 | 194 | stopifnot(length(x) == 1) 195 | 196 | return(x) 197 | } 198 | 199 | #' @noRd 200 | get_unique_values <- function(x) { 201 | split(x, names(x)) |> 202 | lapply(\(x) { 203 | x |> 204 | unlist() |> 205 | unique() |> 206 | paste(collapse = ";") 207 | }) 208 | } 209 | 210 | #' @noRd 211 | create_execution_domain <- function(queue) { 212 | envvars <- queue$result |> 213 | purrr::map(c("session", "environment")) |> 214 | purrr::list_rbind() |> 215 | dplyr::distinct() |> 216 | dplyr::arrange("variable") 217 | 218 | platform <- queue$result |> 219 | purrr::map(c("session", "platform")) |> 220 | purrr::list_rbind() |> 221 | dplyr::distinct() |> 222 | dplyr::filter(.data$setting %in% c("version", "pandoc", "quarto")) 223 | 224 | platform <- split(x = platform$value, f = platform$setting) 225 | 226 | software_prerequisites <- list( 227 | list( 228 | name = "R", 229 | version = sub( 230 | pattern = "R version ([0-9]+\\.[0-9]+\\.[0-9]+).*", 231 | replacement = "\\1", 232 | x = get_single_unique(platform$version) 233 | ), 234 | URI = "https://www.r-project.org/" 235 | ), 236 | list( 237 | name = "quarto", 238 | version = get_single_unique(platform$quarto), 239 | URI = "https://quarto.org" 240 | ), 241 | list( 242 | name = "pandoc", 243 | version = get_single_unique(platform$pandoc), 244 | URI = "https://pandoc.org/" 245 | ) 246 | ) 247 | 248 | packages <- queue$result |> 249 | purrr::map(c("session", "R")) |> 250 | purrr::list_rbind() |> 251 | dplyr::distinct() |> 252 | dplyr::arrange(.data$package) |> 253 | dplyr::mutate( 254 | name = .data$package, 255 | version = .data$version, 256 | uri = .data$url 257 | ) |> 258 | dplyr::select("name", "version", "uri") |> 259 | purrr::pmap(list) 260 | 261 | software_prerequisites <- append(software_prerequisites, packages) 262 | 263 | execution_domain <- list( 264 | script = queue$script, 265 | script_driver = get_single_unique(platform$version), 266 | software_prerequisites = software_prerequisites, 267 | external_data_endpoints = list(), 268 | environment_variables = stats::setNames(envvars$value, envvars$variable) |> 269 | as.list() |> 270 | get_unique_values() 271 | ) 272 | 273 | return(execution_domain) 274 | } 275 | 276 | #' @noRd 277 | create_parametrics_domain <- function(config, base_path) { 278 | parametric_domain <- list() 279 | step_number <- 0 280 | for (step in config$steps) { 281 | if (!("parameter_files" %in% names(step))) { 282 | step_number <- step_number + 1 283 | next 284 | } 285 | 286 | for (parameter_file in step$parameter_files) { 287 | parameters <- yaml::read_yaml(normalize_with_base( 288 | parameter_file, 289 | base_path 290 | )) 291 | 292 | parametric_domain <- append( 293 | parametric_domain, 294 | purrr::map2( 295 | parameters, 296 | names(parameters), 297 | \(x, y) list(param = y, value = x, step = step_number) 298 | ) |> 299 | unname() 300 | ) 301 | } 302 | step_number <- step_number + 1 303 | } 304 | return(parametric_domain) 305 | } 306 | 307 | #' @noRd 308 | bco_create_outputs <- function(files) { 309 | entry <- vector(mode = "list", length = length(files)) 310 | for (i in seq_along(entry)) { 311 | entry[[i]]$mediatype <- if (grepl("\\.html$", files[[i]])) { 312 | "text/html" 313 | } else if (grepl("\\.zip$", files[[i]])) { 314 | "application/zip" 315 | } else if (grepl("\\.csv$", files[[i]])) { 316 | "text/csv" 317 | } else if (grepl("\\.txt$", files[[i]])) { 318 | "text/txt" 319 | } else { 320 | " " 321 | } 322 | 323 | entry[[i]]$uri <- list( 324 | filename = basename(files[[i]]), 325 | uri = files[[i]] 326 | ) 327 | } 328 | 329 | return(entry) 330 | } 331 | 332 | #' @noRd 333 | bco_create_inputs <- function(files) { 334 | entry <- vector(mode = "list", length = length(files)) 335 | for (i in seq_along(entry)) { 336 | entry[[i]]$uri <- list( 337 | uri = files[[i]] 338 | ) 339 | } 340 | 341 | return(entry) 342 | } 343 | 344 | #' @noRd 345 | create_io_domain <- function(queue) { 346 | input <- queue$result |> 347 | purrr::map(c("files", "read", "file")) |> 348 | unlist() |> 349 | unique() |> 350 | bco_create_inputs() 351 | 352 | output <- queue$result |> 353 | purrr::map(c("files", "write", "file")) |> 354 | unlist() |> 355 | unique() |> 356 | bco_create_outputs() 357 | 358 | return(list( 359 | input_subdomain = input, 360 | output_subdomain = output 361 | )) 362 | } 363 | --------------------------------------------------------------------------------