├── .github ├── .gitignore ├── workflows │ ├── pkgcheck.yaml │ ├── R-CMD-check-HTML5.yaml │ ├── pkgdown.yaml │ ├── R-CMD-check.yaml │ └── rhub.yaml ├── CONTRIBUTING.md └── CODE_OF_CONDUCT.md ├── vignettes ├── .gitignore └── rdocdump.qmd ├── LICENSE ├── man ├── figures │ ├── card.png │ └── logo.png ├── combine_vignettes.Rd ├── rdd_get_repos.Rd ├── extract_code_installed.Rd ├── rdd_set_repos.Rd ├── combine_rd.Rd ├── rdd_set_cache_path.Rd ├── cleanup_files.Rd ├── extract_code_source.Rd ├── resolve_pkg_path.Rd ├── rdd_extract_code.Rd └── rdd_to_txt.Rd ├── NAMESPACE ├── .Rbuildignore ├── NEWS.md ├── R ├── onLoad.R ├── util_cleanup_files.R ├── set_cache_path.R ├── util_combine_vignettes.R ├── get-or-set-repos.R ├── util_combine_rd.R ├── to_txt.R ├── extract_code.R └── util_resolve_pkg_path.R ├── tests ├── testthat.R └── testthat │ ├── test-set_cache_path.R │ ├── test-onLoad.R │ ├── test-util_combine_rd.R │ ├── test-util_combine_vignettes.R │ ├── test-util_resolve_pkg_path.R │ └── test-to_txt.R ├── inst ├── CITATION └── schemaorg.json ├── rdocdump.Rproj ├── tools └── meta-data-update-and-submission.R ├── .gitignore ├── LICENSE.md ├── DESCRIPTION ├── _pkgdown.yml ├── README.qmd ├── README.md ├── CITATION.cff └── codemeta.json /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2025 2 | COPYRIGHT HOLDER: rdocdump authors 3 | -------------------------------------------------------------------------------- /man/figures/card.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/e-kotov/rdocdump/HEAD/man/figures/card.png -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/e-kotov/rdocdump/HEAD/man/figures/logo.png -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(rdd_extract_code) 4 | export(rdd_get_repos) 5 | export(rdd_set_cache_path) 6 | export(rdd_set_repos) 7 | export(rdd_to_txt) 8 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^rdocdump\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | ^README\.Rmd$ 5 | ^cran-comments\.md$ 6 | ^private$ 7 | ^README\.qmd$ 8 | ^_pkgdown\.yml$ 9 | ^docs$ 10 | ^pkgdown$ 11 | ^.github$ 12 | ^CITATION\.cff$ 13 | ^codemeta\.json$ 14 | ^man/figures/logo\.png$ 15 | ^man/figures/card\.png$ 16 | ^CRAN-SUBMISSION$ 17 | ^.gemini$ 18 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # rdocdump (development version) 2 | 3 | # rdocdump 0.1.1 (2025-08-21) 4 | 5 | * Added `version` argument to `rdd_to_txt()` and `rdd_extract_code()` functions to specify the version of the package to download from CRAN for processing. 6 | 7 | # rdocdump 0.1.0 (2025-06-15) 8 | 9 | * Initial CRAN submission. 10 | 11 | * Basic functionality for parsing R documentation and source files. 12 | -------------------------------------------------------------------------------- /R/onLoad.R: -------------------------------------------------------------------------------- 1 | .onLoad <- function(libname, pkgname) { 2 | default_cache <- file.path(tempdir(), "rdocdump_cache") 3 | if (is.null(getOption("rdocdump.cache_path"))) { 4 | options(rdocdump.cache_path = default_cache) 5 | } 6 | # Set default repository option for rdocdump 7 | if (is.null(getOption("rdocdump.repos"))) { 8 | options(rdocdump.repos = c("CRAN" = "https://cloud.r-project.org")) 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview 7 | # * https://testthat.r-lib.org/articles/special-files.html 8 | 9 | library(testthat) 10 | library(rdocdump) 11 | 12 | test_check("rdocdump") 13 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry( 2 | key = "rdocdump", 3 | bibtype = "Manual", 4 | title = "rdocdump: Dump R Package Source, Documentation, and Vignettes into One File", 5 | author = person("Egor", "Kotov", , "kotov.egor@gmail.com", role = c("aut", "cre", "cph"), 6 | comment = c(ORCID = "0000-0001-6690-5345")), 7 | year = "2025", 8 | url = "https://github.com/e-kotov/rdocdump", 9 | doi = "10.32614/CRAN.package.rdocdump" 10 | ) 11 | -------------------------------------------------------------------------------- /man/combine_vignettes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/util_combine_vignettes.R 3 | \name{combine_vignettes} 4 | \alias{combine_vignettes} 5 | \title{Helper function to combine package vignettes} 6 | \usage{ 7 | combine_vignettes(pkg_path) 8 | } 9 | \arguments{ 10 | \item{pkg_path}{Path to the package source directory.} 11 | } 12 | \value{ 13 | A single string containing the combined vignettes from the package. 14 | } 15 | \description{ 16 | Helper function to combine package vignettes 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /rdocdump.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: f34ce716-32d2-4e84-9419-ff59edb83024 3 | 4 | RestoreWorkspace: No 5 | SaveWorkspace: No 6 | AlwaysSaveHistory: Default 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 4 11 | Encoding: UTF-8 12 | 13 | RnwWeave: Sweave 14 | LaTeX: pdfLaTeX 15 | 16 | AutoAppendNewline: Yes 17 | StripTrailingWhitespace: Yes 18 | LineEndingConversion: Posix 19 | 20 | BuildType: Package 21 | PackageUseDevtools: Yes 22 | PackageInstallArgs: --no-multiarch --with-keep.source 23 | PackageRoxygenize: rd,collate,namespace 24 | -------------------------------------------------------------------------------- /.github/workflows/pkgcheck.yaml: -------------------------------------------------------------------------------- 1 | name: pkgcheck 2 | 3 | # This will cancel running jobs once a new run is triggered 4 | concurrency: 5 | group: ${{ github.workflow }}-${{ github.head_ref }} 6 | cancel-in-progress: true 7 | 8 | on: 9 | # Manually trigger the Action under Actions/pkgcheck 10 | workflow_dispatch: 11 | # Run on every push to main 12 | push: 13 | branches: 14 | - main 15 | 16 | jobs: 17 | pkgcheck: 18 | runs-on: ubuntu-latest 19 | permissions: 20 | issues: write 21 | steps: 22 | - uses: ropensci-review-tools/pkgcheck-action@main 23 | -------------------------------------------------------------------------------- /tools/meta-data-update-and-submission.R: -------------------------------------------------------------------------------- 1 | # before release 2 | # usethis::use_version("patch") 3 | usethis::use_tidy_description() 4 | cffr::cff_write() 5 | codemetar::write_codemeta(write_minimeta = T) 6 | # urlchecker::url_check() 7 | # devtools::check(remote = TRUE, manual = TRUE) 8 | # devtools::check_win_devel() 9 | # devtools::check_win_release() 10 | # devtools::check_win_oldrelease() 11 | # foghorn::winbuilder_queue() 12 | # revdepcheck::revdep_check(num_workers = 4) 13 | 14 | # devtools::submit_cran() 15 | 16 | # usethis::use_github_release() 17 | # usethis::use_dev_version(push = TRUE) 18 | -------------------------------------------------------------------------------- /tests/testthat/test-set_cache_path.R: -------------------------------------------------------------------------------- 1 | test_that("rdd_set_cache_path sets the cache path correctly", { 2 | temp_cache <- tempfile("rdocdump_cache") 3 | on.exit(unlink(temp_cache, recursive = TRUE), add = TRUE) 4 | 5 | # <<< NEW: scope any existing option so it's auto-restored at test end 6 | withr::local_options(list( 7 | rdocdump.cache_path = getOption("rdocdump.cache_path") 8 | )) 9 | 10 | result <- rdd_set_cache_path(temp_cache) 11 | normalized_path <- normalizePath(temp_cache, winslash = "/") 12 | 13 | expect_equal(getOption("rdocdump.cache_path"), normalized_path) 14 | expect_equal(result, normalized_path) 15 | }) 16 | -------------------------------------------------------------------------------- /man/rdd_get_repos.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get-or-set-repos.R 3 | \name{rdd_get_repos} 4 | \alias{rdd_get_repos} 5 | \title{Get Current \code{rdocdump} Repository Options} 6 | \usage{ 7 | rdd_get_repos() 8 | } 9 | \value{ 10 | A character vector of repository URLs. 11 | } 12 | \description{ 13 | This function returns the current repository URLs used by \code{rdocdump}. The default is set to the CRAN repository at "https://cloud.r-project.org". This does not affect the repositories used by \code{install.packages()} in your current R session and/or project. To set repository options, use \code{\link{rdd_set_repos}}. 14 | } 15 | \examples{ 16 | # Get current rdocdump repository options 17 | rdd_get_repos() 18 | 19 | } 20 | -------------------------------------------------------------------------------- /man/extract_code_installed.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/extract_code.R 3 | \name{extract_code_installed} 4 | \alias{extract_code_installed} 5 | \title{Extract code from an installed package using its namespace. 6 | This function retrieves all functions from the package namespace and deparses them to get their source code.} 7 | \usage{ 8 | extract_code_installed(pkg_name) 9 | } 10 | \arguments{ 11 | \item{pkg_name}{The name of the installed package.} 12 | } 13 | \value{ 14 | A single string containing the source code of all functions in the package. 15 | } 16 | \description{ 17 | Extract code from an installed package using its namespace. 18 | This function retrieves all functions from the package namespace and deparses them to get their source code. 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/rdd_set_repos.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get-or-set-repos.R 3 | \name{rdd_set_repos} 4 | \alias{rdd_set_repos} 5 | \title{Set \code{rdocdump} Repository Options} 6 | \usage{ 7 | rdd_set_repos(repos) 8 | } 9 | \arguments{ 10 | \item{repos}{A character vector of repository URLs.} 11 | } 12 | \value{ 13 | Invisibly returns the new repository URLs. 14 | } 15 | \description{ 16 | This function sets the package repository URLs used by \code{rdocdump} when fetching package sources. May be useful for setting custom repositories or mirrors. This does not affect the repositories used by \code{install.packages()} in your current R session and/or project. 17 | } 18 | \examples{ 19 | # Set rdocdump repository options 20 | rdd_set_repos(c("CRAN" = "https://cloud.r-project.org")) 21 | 22 | } 23 | -------------------------------------------------------------------------------- /tests/testthat/test-onLoad.R: -------------------------------------------------------------------------------- 1 | # test_that(".onLoad sets rdocdump.cache_path if not already set", { 2 | # # Backup any existing option 3 | # old_opt <- getOption("rdocdump.cache_path") 4 | # options(rdocdump.cache_path = NULL) 5 | 6 | # # Expected default value based on tempdir() 7 | # default_cache <- file.path(tempdir(), "rdocdump_cache") 8 | 9 | # # Call .onLoad 10 | # .onLoad(libname = NULL, pkgname = NULL) 11 | # expect_equal(getOption("rdocdump.cache_path"), default_cache) 12 | 13 | # # Restore previous option 14 | # options(rdocdump.cache_path = old_opt) 15 | # }) 16 | 17 | # test_that(".onLoad does not override existing rdocdump.cache_path", { 18 | # old_value <- file.path(tempdir(), "existing_cache") 19 | # options(rdocdump.cache_path = old_value) 20 | 21 | # .onLoad(libname = NULL, pkgname = NULL) 22 | # expect_equal(getOption("rdocdump.cache_path"), old_value) 23 | # }) 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | .RDataTmp 8 | 9 | # User-specific files 10 | .Ruserdata 11 | 12 | # Example code in package build process 13 | *-Ex.R 14 | 15 | # Output files from R CMD build 16 | /*.tar.gz 17 | 18 | # Output files from R CMD check 19 | /*.Rcheck/ 20 | 21 | # RStudio files 22 | .Rproj.user/ 23 | 24 | # produced vignettes 25 | vignettes/*.html 26 | vignettes/*.pdf 27 | 28 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 29 | .httr-oauth 30 | 31 | # knitr and R markdown default cache directories 32 | *_cache/ 33 | /cache/ 34 | 35 | # Temporary files created by R markdown 36 | *.utf8.md 37 | *.knit.md 38 | 39 | # R Environment Variables 40 | .Renviron 41 | 42 | # pkgdown site 43 | docs/ 44 | 45 | # translation temp files 46 | po/*~ 47 | 48 | # RStudio Connect folder 49 | rsconnect/ 50 | .Rproj.user 51 | 52 | private 53 | docs 54 | inst/doc 55 | -------------------------------------------------------------------------------- /man/combine_rd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/util_combine_rd.R 3 | \name{combine_rd} 4 | \alias{combine_rd} 5 | \title{Combine Rd files into a single character vector. 6 | This function reads the Rd files from a package source directory or an installed package and combines them into a single string.} 7 | \usage{ 8 | combine_rd(pkg_path, is_installed = FALSE, pkg_name = NULL) 9 | } 10 | \arguments{ 11 | \item{pkg_path}{Path to the package source directory or the installed package.} 12 | 13 | \item{is_installed}{Logical indicating whether the package is installed (\code{TRUE}) or a source package (\code{FALSE}).} 14 | 15 | \item{pkg_name}{Optional package name if the package is installed.} 16 | } 17 | \value{ 18 | A single string containing the combined Rd documentation. 19 | } 20 | \description{ 21 | Combine Rd files into a single character vector. 22 | This function reads the Rd files from a package source directory or an installed package and combines them into a single string. 23 | } 24 | \keyword{internal} 25 | -------------------------------------------------------------------------------- /man/rdd_set_cache_path.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/set_cache_path.R 3 | \name{rdd_set_cache_path} 4 | \alias{rdd_set_cache_path} 5 | \title{Set \code{rdocdump} Cache Path in the Current R Session} 6 | \usage{ 7 | rdd_set_cache_path(path) 8 | } 9 | \arguments{ 10 | \item{path}{A \code{character} string specifying the directory to be used as the cache path.} 11 | } 12 | \value{ 13 | Invisibly returns the new cache path. 14 | } 15 | \description{ 16 | This function sets the cache path used by \code{rdocdump} to store temporary files (downloaded tar.gz archives and/or extracted directories) for the current R session. The cache path is stored in the option \code{"rdocdump.cache_path"}, which can be checked with \code{getOption("rdocdump.cache_path")}. The path is created if it does not exist. 17 | } 18 | \examples{ 19 | # set cache directory for `rdocdump` 20 | rdd_set_cache_path(paste0(tempdir(), "/rdocdump_cache")) 21 | # default cache directory 22 | unlink(getOption("rdocdump.cache_path"), recursive = TRUE) 23 | } 24 | -------------------------------------------------------------------------------- /man/cleanup_files.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/util_cleanup_files.R 3 | \name{cleanup_files} 4 | \alias{cleanup_files} 5 | \title{Cleanup Temporary Files} 6 | \usage{ 7 | cleanup_files(pkg_info, keep_files) 8 | } 9 | \arguments{ 10 | \item{pkg_info}{A list returned by \code{resolve_pkg_path()}, containing \code{tar_path} and \code{extracted_path}.} 11 | 12 | \item{keep_files}{A \code{character} value controlling whether temporary files should be kept. 13 | Possible values are: 14 | \itemize{ 15 | \item \code{"none"}: Delete both the tar.gz archive and the extracted files (default). 16 | \item \code{"tgz"}: Keep only the tar.gz archive. 17 | \item \code{"extracted"}: Keep only the extracted files. 18 | \item \code{"both"}: Keep both the tar.gz archive and the extracted files. 19 | }} 20 | } 21 | \value{ 22 | Invisibly returns \code{NULL}. If there are any issues with file deletion, warnings are issued. 23 | } 24 | \description{ 25 | Clean up temporary package archive and extracted files according to a keep_files policy. 26 | } 27 | \keyword{internal} 28 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2025 rdocdump authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /R/util_cleanup_files.R: -------------------------------------------------------------------------------- 1 | #' Cleanup Temporary Files 2 | #' 3 | #' @description 4 | #' Clean up temporary package archive and extracted files according to a keep_files policy. 5 | #' 6 | #' @param pkg_info A list returned by `resolve_pkg_path()`, containing `tar_path` and `extracted_path`. 7 | #' 8 | #' @inheritParams rdd_to_txt 9 | #' 10 | #' @return Invisibly returns `NULL`. If there are any issues with file deletion, warnings are issued. 11 | #' 12 | #' @keywords internal 13 | #' 14 | cleanup_files <- function( 15 | pkg_info, 16 | keep_files 17 | ) { 18 | if (!keep_files %in% c("tgz", "both") && !is.null(pkg_info$tar_path)) { 19 | res <- unlink(pkg_info$tar_path) 20 | if (res != 0L) { 21 | warning( 22 | "cleanup_files: failed to delete archive: ", 23 | pkg_info$tar_path, 24 | call. = FALSE 25 | ) 26 | } 27 | } 28 | 29 | if ( 30 | !keep_files %in% c("extracted", "both") && !is.null(pkg_info$extracted_path) 31 | ) { 32 | dir_to_remove <- dirname(pkg_info$extracted_path) 33 | res <- unlink(dir_to_remove, recursive = TRUE) 34 | if (res != 0L) { 35 | warning( 36 | "cleanup_files: failed to delete extracted directory: ", 37 | dir_to_remove, 38 | call. = FALSE 39 | ) 40 | } 41 | } 42 | 43 | invisible(NULL) 44 | } 45 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check-HTML5.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check-HTML5 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | R_KEEP_PKG_SOURCE: yes 17 | _R_CHECK_RD_VALIDATE_RD2HTML_: TRUE 18 | steps: 19 | - uses: actions/checkout@v2 20 | 21 | - name: Install pdflatex 22 | run: sudo apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra 23 | 24 | - name: Install tidy 25 | run: sudo apt install tidy 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | with: 29 | r-version: 'devel' 30 | http-user-agent: 'release' 31 | use-public-rspm: true 32 | 33 | - uses: r-lib/actions/setup-r-dependencies@v2 34 | with: 35 | extra-packages: any::rcmdcheck 36 | needs: check 37 | 38 | - uses: r-lib/actions/check-r-package@v2 39 | with: 40 | args: '"--as-cran"' 41 | build_args: 'character()' 42 | error-on: '"note"' 43 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: rdocdump 2 | Title: Dump 'R' Package Source, Documentation, and Vignettes into One File 3 | Version: 0.1.1.9000 4 | Authors@R: 5 | person("Egor", "Kotov", , "kotov.egor@gmail.com", role = c("aut", "cre", "cph"), 6 | comment = c(ORCID = "0000-0001-6690-5345")) 7 | Description: Dump source code, documentation and vignettes of an 'R' 8 | package into a single file. Supports installed packages, tar.gz 9 | archives, and package source directories. If the package is not 10 | installed, only its source is automatically downloaded from CRAN for 11 | processing. The output is a single plain text file or a character 12 | vector, which is useful to ingest complete package documentation and 13 | source into a large language model (LLM) or pass it further to other 14 | tools, such as 'ragnar' to 15 | create a Retrieval-Augmented Generation (RAG) workflow. 16 | License: MIT + file LICENSE 17 | URL: https://github.com/e-kotov/rdocdump, https://www.ekotov.pro/rdocdump/ 18 | BugReports: https://github.com/e-kotov/rdocdump/issues 19 | Suggests: 20 | curl, 21 | quarto, 22 | testthat (>= 3.0.0), 23 | withr 24 | VignetteBuilder: 25 | quarto 26 | Config/testthat/edition: 3 27 | Encoding: UTF-8 28 | Language: en 29 | Roxygen: list(markdown = TRUE) 30 | RoxygenNote: 7.3.2 31 | -------------------------------------------------------------------------------- /man/extract_code_source.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/extract_code.R 3 | \name{extract_code_source} 4 | \alias{extract_code_source} 5 | \title{Helper function to extract code from package source files. 6 | This function reads all \code{.R} files in the \code{R} directory and optionally includes files from the \code{tests} directory. 7 | It can also exclude roxygen2 documentation lines.} 8 | \usage{ 9 | extract_code_source(pkg_path, include_tests = FALSE, include_roxygen = FALSE) 10 | } 11 | \arguments{ 12 | \item{pkg_path}{Path to the package source directory.} 13 | 14 | \item{include_tests}{\code{logical}. If \code{TRUE}, for non-installed packages, the function will also include R source code from the \code{tests} directory. Defaults to \code{FALSE}.} 15 | 16 | \item{include_roxygen}{\code{logical}. If \code{TRUE}, roxygen2 documentation lines (lines starting with "#'") from R files will be included in the output. Defaults to \code{FALSE}.} 17 | } 18 | \value{ 19 | A single string containing the source code from the package's R files. 20 | } 21 | \description{ 22 | Helper function to extract code from package source files. 23 | This function reads all \code{.R} files in the \code{R} directory and optionally includes files from the \code{tests} directory. 24 | It can also exclude roxygen2 documentation lines. 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /R/set_cache_path.R: -------------------------------------------------------------------------------- 1 | #' Set `rdocdump` Cache Path in the Current R Session 2 | #' 3 | #' @description 4 | #' This function sets the cache path used by `rdocdump` to store temporary files (downloaded tar.gz archives and/or extracted directories) for the current R session. The cache path is stored in the option `"rdocdump.cache_path"`, which can be checked with `getOption("rdocdump.cache_path")`. The path is created if it does not exist. 5 | #' 6 | #' @param path A `character` string specifying the directory to be used as the cache path. 7 | #' 8 | #' @return Invisibly returns the new cache path. 9 | #' @export 10 | #' @examples 11 | #' # set cache directory for `rdocdump` 12 | #' rdd_set_cache_path(paste0(tempdir(), "/rdocdump_cache")) 13 | #' # default cache directory 14 | #' unlink(getOption("rdocdump.cache_path"), recursive = TRUE) 15 | rdd_set_cache_path <- function( 16 | path 17 | ) { 18 | # Validate the input path 19 | if (!is.character(path) || length(path) != 1) { 20 | stop( 21 | "`path` argument must be a single character string specifying the cache directory." 22 | ) 23 | } 24 | 25 | if (!dir.exists(path)) { 26 | dir.create(path, recursive = TRUE) 27 | } 28 | options(rdocdump.cache_path = normalizePath(path, winslash = "/")) 29 | message("rdocdump.cache_path set to: ", getOption("rdocdump.cache_path")) 30 | invisible(getOption("rdocdump.cache_path")) 31 | } 32 | -------------------------------------------------------------------------------- /R/util_combine_vignettes.R: -------------------------------------------------------------------------------- 1 | #' Helper function to combine package vignettes 2 | #' @param pkg_path Path to the package source directory. 3 | #' @return A single string containing the combined vignettes from the package. 4 | #' @keywords internal 5 | combine_vignettes <- function(pkg_path) { 6 | # Check for a "vignettes" directory; if not present, try "doc" 7 | if (dir.exists(file.path(pkg_path, "vignettes"))) { 8 | vignette_dir <- file.path(pkg_path, "vignettes") 9 | } else if (dir.exists(file.path(pkg_path, "doc"))) { 10 | vignette_dir <- file.path(pkg_path, "doc") 11 | } else { 12 | warning( 13 | "Neither 'vignettes' nor 'doc' directory found in the package source." 14 | ) 15 | return("") 16 | } 17 | 18 | # List files with common vignette extensions: .Rmd, .Rnw, .md, .qmd 19 | vignette_files <- list.files( 20 | vignette_dir, 21 | pattern = "\\.(Rmd|Rnw|md|qmd)$", 22 | full.names = TRUE, 23 | ignore.case = TRUE 24 | ) 25 | 26 | if (length(vignette_files) == 0) { 27 | warning("No vignette files found in the directory: ", vignette_dir) 28 | return("") 29 | } 30 | 31 | combined_text <- "" 32 | 33 | for (vf in vignette_files) { 34 | header_line <- paste0(strrep("-", 80), "\nVignette: ", basename(vf), "\n") 35 | text <- readLines(vf, warn = FALSE) 36 | combined_text <- paste( 37 | combined_text, 38 | header_line, 39 | paste(text, collapse = "\n"), 40 | "\n\n", 41 | sep = "\n" 42 | ) 43 | } 44 | 45 | return(combined_text) 46 | } 47 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | permissions: read-all 15 | 16 | jobs: 17 | pkgdown: 18 | runs-on: ubuntu-latest 19 | # Only restrict concurrency for non-PR jobs 20 | concurrency: 21 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 22 | env: 23 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 24 | permissions: 25 | contents: write 26 | steps: 27 | - uses: actions/checkout@v4 28 | 29 | - uses: r-lib/actions/setup-pandoc@v2 30 | 31 | - uses: r-lib/actions/setup-r@v2 32 | with: 33 | use-public-rspm: true 34 | 35 | - uses: r-lib/actions/setup-r-dependencies@v2 36 | with: 37 | extra-packages: any::pkgdown, any::bookdown, local::. 38 | needs: website 39 | 40 | - name: Build site 41 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 42 | shell: Rscript {0} 43 | 44 | - name: Deploy to GitHub pages 🚀 45 | if: github.event_name != 'pull_request' 46 | uses: JamesIves/github-pages-deploy-action@v4.5.0 47 | with: 48 | clean: false 49 | branch: gh-pages 50 | folder: docs 51 | -------------------------------------------------------------------------------- /R/get-or-set-repos.R: -------------------------------------------------------------------------------- 1 | #' Set `rdocdump` Repository Options 2 | #' 3 | #' @description 4 | #' This function sets the package repository URLs used by `rdocdump` when fetching package sources. May be useful for setting custom repositories or mirrors. This does not affect the repositories used by `install.packages()` in your current R session and/or project. 5 | #' 6 | #' @param repos A character vector of repository URLs. 7 | #' 8 | #' @return Invisibly returns the new repository URLs. 9 | #' 10 | #' @examples 11 | #' # Set rdocdump repository options 12 | #' rdd_set_repos(c("CRAN" = "https://cloud.r-project.org")) 13 | #' 14 | #' @export 15 | rdd_set_repos <- function(repos) { 16 | if (!is.character(repos)) { 17 | stop("repos must be a character vector of repository URLs.") 18 | } 19 | options(rdocdump.repos = repos) 20 | message("rdocdump.repos set to: ", paste(repos, collapse = ", ")) 21 | invisible(getOption("rdocdump.repos")) 22 | } 23 | 24 | #' Get Current `rdocdump` Repository Options 25 | #' 26 | #' @description 27 | #' This function returns the current repository URLs used by `rdocdump`. The default is set to the CRAN repository at "https://cloud.r-project.org". This does not affect the repositories used by `install.packages()` in your current R session and/or project. To set repository options, use \code{\link{rdd_set_repos}}. 28 | #' 29 | #' @return A character vector of repository URLs. 30 | #' 31 | #' @examples 32 | #' # Get current rdocdump repository options 33 | #' rdd_get_repos() 34 | #' 35 | #' @export 36 | rdd_get_repos <- function() { 37 | getOption("rdocdump.repos", getOption("repos")) 38 | } 39 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | name: R-CMD-check 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | R-CMD-check: 15 | runs-on: ${{ matrix.config.os }} 16 | 17 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | config: 23 | - {os: macos-latest, r: 'release'} 24 | - {os: windows-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 26 | - {os: ubuntu-latest, r: 'release'} 27 | - {os: ubuntu-latest, r: 'oldrel-1'} 28 | 29 | env: 30 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 31 | R_KEEP_PKG_SOURCE: yes 32 | 33 | steps: 34 | - uses: actions/checkout@v4 35 | 36 | - uses: r-lib/actions/setup-pandoc@v2 37 | 38 | - uses: r-lib/actions/setup-r@v2 39 | with: 40 | r-version: ${{ matrix.config.r }} 41 | http-user-agent: ${{ matrix.config.http-user-agent }} 42 | use-public-rspm: true 43 | 44 | - uses: r-lib/actions/setup-r-dependencies@v2 45 | with: 46 | extra-packages: any::rcmdcheck 47 | needs: check 48 | 49 | - uses: r-lib/actions/check-r-package@v2 50 | with: 51 | upload-snapshots: true 52 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 53 | -------------------------------------------------------------------------------- /tests/testthat/test-util_combine_rd.R: -------------------------------------------------------------------------------- 1 | test_that("combine_rd works for installed packages", { 2 | # For installed packages, pass the installed package path and package name. 3 | rd_text <- combine_rd( 4 | find.package("stats"), 5 | is_installed = TRUE, 6 | pkg_name = "stats" 7 | ) 8 | 9 | expect_true(nchar(rd_text) > 0) 10 | expect_match(rd_text, "Function:") 11 | }) 12 | 13 | test_that("combine_rd works for source packages", { 14 | # Create a dummy source package with minimal Rd documentation. 15 | temp_pkg <- tempfile("dummy_pkg") 16 | dir.create(temp_pkg) 17 | writeLines("Package: dummy\nVersion: 1.0", file.path(temp_pkg, "DESCRIPTION")) 18 | 19 | man_dir <- file.path(temp_pkg, "man") 20 | dir.create(man_dir) 21 | dummy_rd <- "\\name{dummy}\n\\alias{dummy}\n\\title{Dummy Function}\n\\description{A dummy function.}\n" 22 | writeLines(dummy_rd, file.path(man_dir, "dummy.Rd")) 23 | 24 | rd_text <- combine_rd(temp_pkg, is_installed = FALSE) 25 | 26 | expect_true(nchar(rd_text) > 0) 27 | expect_match(rd_text, "DESCRIPTION:") 28 | 29 | unlink(temp_pkg, recursive = TRUE) 30 | }) 31 | 32 | test_that("combine_rd errors when DESCRIPTION file is missing", { 33 | temp_pkg <- tempfile("pkg_no_desc") 34 | dir.create(temp_pkg) 35 | 36 | # Do not create a DESCRIPTION file. 37 | man_dir <- file.path(temp_pkg, "man") 38 | dir.create(man_dir) 39 | writeLines( 40 | "\\name{nodef}\n\\alias{nodef}\n\\title{No Def Function}", 41 | file.path(man_dir, "nodef.Rd") 42 | ) 43 | 44 | expect_error( 45 | combine_rd(temp_pkg, is_installed = FALSE), 46 | "Files 'DESCRIPTION' and 'DESCRIPTION.in' are missing." 47 | ) 48 | 49 | unlink(temp_pkg, recursive = TRUE) 50 | }) 51 | -------------------------------------------------------------------------------- /tests/testthat/test-util_combine_vignettes.R: -------------------------------------------------------------------------------- 1 | test_that("combine_vignettes works with a 'vignettes' directory", { 2 | pkg_dir <- tempfile("pkg_") 3 | dir.create(pkg_dir) 4 | vignette_dir <- file.path(pkg_dir, "vignettes") 5 | dir.create(vignette_dir) 6 | 7 | # Create a sample vignette file in vignettes directory 8 | vign_file <- file.path(vignette_dir, "example.Rmd") 9 | writeLines("This is an Rmd vignette", vign_file) 10 | 11 | out <- combine_vignettes(pkg_dir) 12 | expect_match( 13 | out, 14 | "Vignette: example\\.Rmd", 15 | info = "Should include the header with file name" 16 | ) 17 | expect_match( 18 | out, 19 | "This is an Rmd vignette", 20 | info = "Should include the content of the vignette" 21 | ) 22 | }) 23 | 24 | test_that("combine_vignettes falls back to 'doc' directory if 'vignettes' is absent", { 25 | pkg_dir <- tempfile("pkg_") 26 | dir.create(pkg_dir) 27 | doc_dir <- file.path(pkg_dir, "doc") 28 | dir.create(doc_dir) 29 | 30 | # Create a sample vignette file in doc directory 31 | vign_file <- file.path(doc_dir, "example.qmd") 32 | writeLines("This is a qmd vignette", vign_file) 33 | 34 | out <- combine_vignettes(pkg_dir) 35 | expect_match( 36 | out, 37 | "Vignette: example\\.qmd", 38 | info = "Should include the header with file name from the doc folder" 39 | ) 40 | expect_match( 41 | out, 42 | "This is a qmd vignette", 43 | info = "Should include the content of the vignette" 44 | ) 45 | }) 46 | 47 | test_that("combine_vignettes returns empty string if neither 'vignettes' nor 'doc' exists", { 48 | pkg_dir <- tempfile("pkg_") 49 | dir.create(pkg_dir) 50 | 51 | out <- suppressWarnings(combine_vignettes(pkg_dir)) 52 | expect_equal( 53 | out, 54 | "", 55 | info = "Should return an empty string when no relevant directory is found" 56 | ) 57 | }) 58 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://www.ekotov.pro/rdocdump/ 2 | template: 3 | bootstrap: 5 4 | bootswatch: flatly 5 | includes: 6 | in_header: | 7 | 8 | 9 | 10 | 11 | 21 | opengraph: 22 | image: 23 | src: man/figures/card.png 24 | alt: "rdocdump: Dump R Package Documentation and Vignettes into One File" 25 | twitter: 26 | creator: "@EgorKotov" 27 | card: summary_large_image 28 | 29 | authors: 30 | Egor Kotov: 31 | href: "https://www.ekotov.pro" 32 | 33 | navbar: 34 | structure: 35 | left: [intro, reference, articles, tutorials, news] 36 | right: [search, github, lightswitch] 37 | 38 | home: 39 | title: 'rdocdump: Dump R Package Documentation and Vignettes into One File' 40 | description: 'Dump R Package Documentation and Vignettes into One File' 41 | 42 | reference: 43 | - title: "Main functions" 44 | contents: 45 | - rdd_to_txt 46 | - rdd_extract_code 47 | - title: "Helper functions" 48 | contents: 49 | - rdd_set_cache_path 50 | - rdd_set_repos 51 | - rdd_get_repos 52 | -------------------------------------------------------------------------------- /R/util_combine_rd.R: -------------------------------------------------------------------------------- 1 | #' Combine Rd files into a single character vector. 2 | #' This function reads the Rd files from a package source directory or an installed package and combines them into a single string. 3 | #' @param pkg_path Path to the package source directory or the installed package. 4 | #' @param is_installed Logical indicating whether the package is installed (`TRUE`) or a source package (`FALSE`). 5 | #' @param pkg_name Optional package name if the package is installed. 6 | #' @return A single string containing the combined Rd documentation. 7 | #' @keywords internal 8 | combine_rd <- function( 9 | pkg_path, 10 | is_installed = FALSE, 11 | pkg_name = NULL 12 | ) { 13 | if (is_installed) { 14 | # For installed packages, use the package name. 15 | rd_db <- tools::Rd_db(package = pkg_name) 16 | } else { 17 | # For source packages, use the directory with the raw Rd files. 18 | rd_db <- tools::Rd_db(dir = pkg_path) 19 | } 20 | 21 | rd_texts <- mapply( 22 | function(rd, rd_name) { 23 | txt <- utils::capture.output(tools::Rd2txt(Rd = rd)) 24 | txt_clean <- gsub("_\b", "", txt, fixed = TRUE) 25 | # Extract function name by removing ".Rd" suffix from the file name. 26 | fname <- sub("\\.Rd$", "", rd_name) 27 | header_line <- paste0(strrep("-", 80), "\nFunction: ", fname, "()\n") 28 | paste0(header_line, paste(txt_clean, collapse = "\n")) 29 | }, 30 | rd_db, 31 | names(rd_db), 32 | SIMPLIFY = FALSE 33 | ) 34 | 35 | # Optionally include the DESCRIPTION file as a header. 36 | desc_file <- file.path(pkg_path, "DESCRIPTION") 37 | if (file.exists(desc_file)) { 38 | desc_text <- paste(readLines(desc_file), collapse = "\n") 39 | header <- paste0("DESCRIPTION:\n", desc_text, "\n\n") 40 | } else { 41 | header <- "" 42 | } 43 | 44 | combined_rd <- paste0(header, paste(unlist(rd_texts), collapse = "\n\n")) 45 | return(combined_rd) 46 | } 47 | -------------------------------------------------------------------------------- /inst/schemaorg.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://schema.org", 3 | "@graph": [ 4 | { 5 | "type": "SoftwareSourceCode", 6 | "author": { 7 | "id": "https://orcid.org/0000-0001-6690-5345" 8 | }, 9 | "codeRepository": "https://github.com/e-kotov/rdocdump", 10 | "copyrightHolder": { 11 | "id": "https://orcid.org/0000-0001-6690-5345", 12 | "type": "Person", 13 | "email": "kotov.egor@gmail.com", 14 | "familyName": "Kotov", 15 | "givenName": "Egor" 16 | }, 17 | "description": "Dump source code, documentation and vignettes of an 'R' package into a single file. Supports installed packages, tar.gz archives, and package source directories. If the package is not installed, only its source is automatically downloaded from CRAN for processing. The output is a single plain text file or a character vector, which is useful to ingest complete package documentation and source into a large language model (LLM) or pass it further to other tools, such as 'ragnar' to create a Retrieval-Augmented Generation (RAG) workflow.", 18 | "license": "https://spdx.org/licenses/MIT", 19 | "name": "rdocdump: Dump 'R' Package Source, Documentation, and Vignettes into One File", 20 | "programmingLanguage": { 21 | "type": "ComputerLanguage", 22 | "name": "R", 23 | "url": "https://r-project.org" 24 | }, 25 | "provider": { 26 | "id": "https://cran.r-project.org", 27 | "type": "Organization", 28 | "name": "Comprehensive R Archive Network (CRAN)", 29 | "url": "https://cran.r-project.org" 30 | }, 31 | "runtimePlatform": "R version 4.5.1 (2025-06-13)", 32 | "version": "0.1.1" 33 | }, 34 | { 35 | "id": "https://doi.org/10.32614/CRAN.package.rdocdump", 36 | "type": "SoftwareSourceCode", 37 | "author": { 38 | "id": "https://orcid.org/0000-0001-6690-5345", 39 | "type": "Person", 40 | "email": "kotov.egor@gmail.com", 41 | "familyName": "Kotov", 42 | "givenName": "Egor" 43 | }, 44 | "name": "rdocdump: Dump R Package Source, Documentation, and Vignettes into One File" 45 | } 46 | ] 47 | } 48 | -------------------------------------------------------------------------------- /man/resolve_pkg_path.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/util_resolve_pkg_path.R 3 | \name{resolve_pkg_path} 4 | \alias{resolve_pkg_path} 5 | \title{Resolve the path to a package directory or tarball} 6 | \usage{ 7 | resolve_pkg_path( 8 | pkg, 9 | cache_path = NULL, 10 | force_fetch = FALSE, 11 | version = NULL, 12 | repos = getOption("rdocdump.repos", getOption("repos")) 13 | ) 14 | } 15 | \arguments{ 16 | \item{pkg}{A \code{character} string specifying the package. This can be: 17 | \itemize{ 18 | \item an installed package name, 19 | \item a full path to a package source directory, 20 | \item a full path to a package archive file (tar.gz), or 21 | \item a package name not installed (which will then be downloaded from CRAN). 22 | }} 23 | 24 | \item{cache_path}{A \code{character} string specifying the directory where kept temporary files will be stored. 25 | By default, it uses the value of \code{getOption("rdocdump.cache_path")} which sets the cache directory to the temporary directory of the current R session.} 26 | 27 | \item{force_fetch}{\code{logical}. If \code{TRUE}, the package source will be fetched from CRAN as a tar.gz archive even if the package is already installed locally. Default is \code{FALSE}, but when \code{version} is specified, it will be set to \code{TRUE}.} 28 | 29 | \item{version}{Optional. A \code{character} string specifying the package version to fetch from CRAN. If not provided, the latest version will be used.} 30 | 31 | \item{repos}{A \code{character} vector of repository URLs. By default, it uses the value of \code{getOption("rdocdump.repos")} which sets the repository URLs to the default R repositories and is itself set to \code{c("CRAN" = "https://cloud.r-project.org")} on package load to prevent accidental downloads of pre-built packages from Posit Package Manager and R Universe.} 32 | } 33 | \value{ 34 | A list containing: 35 | \itemize{ 36 | \item \code{pkg_path}: Path to the package directory or tarball. 37 | \item \code{extracted_path}: Path to the extracted package directory (if applicable). 38 | \item \code{tar_path}: Path to the tarball if it was downloaded. 39 | \item \code{is_installed}: Logical indicating if the package is installed. 40 | } 41 | } 42 | \description{ 43 | This function resolves the path to a package directory or tarball, handling both installed packages and source packages from CRAN. 44 | } 45 | \keyword{internal} 46 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to rdocdump 2 | 3 | This outlines how to propose a change to rdocdump. 4 | For a detailed discussion on contributing to this and other tidyverse packages, please see the [development contributing guide](https://rstd.io/tidy-contrib) and [code review principles](https://code-review.tidyverse.org/). 5 | 6 | ## Fixing typos 7 | 8 | You can fix typos, spelling mistakes, or grammatical errors in the documentation directly using the GitHub web interface, as long as the changes are made in the _source_ file. 9 | This generally means you'll need to edit [roxygen2 comments](https://roxygen2.r-lib.org/articles/roxygen2.html) in an `.R`, not a `.Rd` file. 10 | You can find the `.R` file that generates the `.Rd` by reading the comment in the first line. 11 | 12 | ## Bigger changes 13 | 14 | If you want to make a bigger change, it's a good idea to first file an issue and make sure someone from the team agrees that it’s needed. 15 | If you’ve found a bug, please file an issue that illustrates the bug with a minimal 16 | [reprex](https://www.tidyverse.org/help/#reprex) (this will also help you write a unit test, if needed). 17 | See the guide on [how to create a great issue](https://code-review.tidyverse.org/issues/) for more advice. 18 | 19 | ### Pull request process 20 | 21 | * Fork the package and clone onto your computer. If you haven't done this before, we recommend using `usethis::create_from_github("e-kotov/rdocdump", fork = TRUE)`. 22 | 23 | * Install all development dependencies with `devtools::install_dev_deps()`, and then make sure the package passes R CMD check by running `devtools::check()`. 24 | If R CMD check doesn't pass cleanly, it's a good idea to ask for help before continuing. 25 | * Create a Git branch for your pull request (PR). We recommend using `usethis::pr_init("brief-description-of-change")`. 26 | 27 | * Make your changes, commit to git, and then create a PR by running `usethis::pr_push()`, and following the prompts in your browser. 28 | The title of your PR should briefly describe the change. 29 | The body of your PR should contain `Fixes #issue-number`. 30 | 31 | * For user-facing changes, add a bullet to the top of `NEWS.md` (i.e. just below the first header). Follow the style described in . 32 | 33 | ### Code style 34 | 35 | * New code should follow the tidyverse [style guide](https://style.tidyverse.org). 36 | You can use the [styler](https://CRAN.R-project.org/package=styler) package to apply these styles, but please don't restyle code that has nothing to do with your PR. 37 | 38 | * We use [roxygen2](https://cran.r-project.org/package=roxygen2), with [Markdown syntax](https://cran.r-project.org/web/packages/roxygen2/vignettes/rd-formatting.html), for documentation. 39 | 40 | * We use [testthat](https://cran.r-project.org/package=testthat) for unit tests. 41 | Contributions with test cases included are easier to accept. 42 | 43 | ## Code of Conduct 44 | 45 | Please note that the rdocdump project is released with a 46 | [Contributor Code of Conduct](.github/CODE_OF_CONDUCT.md). By contributing to this 47 | project you agree to abide by its terms. 48 | -------------------------------------------------------------------------------- /.github/workflows/rhub.yaml: -------------------------------------------------------------------------------- 1 | # R-hub's generic GitHub Actions workflow file. It's canonical location is at 2 | # https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml 3 | # You can update this file to a newer version using the rhub2 package: 4 | # 5 | # rhub::rhub_setup() 6 | # 7 | # It is unlikely that you need to modify this file manually. 8 | 9 | name: R-hub 10 | run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}" 11 | 12 | on: 13 | workflow_dispatch: 14 | inputs: 15 | config: 16 | description: 'A comma separated list of R-hub platforms to use.' 17 | type: string 18 | default: 'linux,windows,macos' 19 | name: 20 | description: 'Run name. You can leave this empty now.' 21 | type: string 22 | id: 23 | description: 'Unique ID. You can leave this empty now.' 24 | type: string 25 | 26 | jobs: 27 | 28 | setup: 29 | runs-on: ubuntu-latest 30 | outputs: 31 | containers: ${{ steps.rhub-setup.outputs.containers }} 32 | platforms: ${{ steps.rhub-setup.outputs.platforms }} 33 | 34 | steps: 35 | # NO NEED TO CHECKOUT HERE 36 | - uses: r-hub/actions/setup@v1 37 | with: 38 | config: ${{ github.event.inputs.config }} 39 | id: rhub-setup 40 | 41 | linux-containers: 42 | needs: setup 43 | if: ${{ needs.setup.outputs.containers != '[]' }} 44 | runs-on: ubuntu-latest 45 | name: ${{ matrix.config.label }} 46 | strategy: 47 | fail-fast: false 48 | matrix: 49 | config: ${{ fromJson(needs.setup.outputs.containers) }} 50 | container: 51 | image: ${{ matrix.config.container }} 52 | 53 | steps: 54 | - uses: r-hub/actions/checkout@v1 55 | - uses: r-hub/actions/platform-info@v1 56 | with: 57 | token: ${{ secrets.RHUB_TOKEN }} 58 | job-config: ${{ matrix.config.job-config }} 59 | - uses: r-hub/actions/setup-deps@v1 60 | with: 61 | token: ${{ secrets.RHUB_TOKEN }} 62 | job-config: ${{ matrix.config.job-config }} 63 | - uses: r-hub/actions/run-check@v1 64 | with: 65 | token: ${{ secrets.RHUB_TOKEN }} 66 | job-config: ${{ matrix.config.job-config }} 67 | 68 | other-platforms: 69 | needs: setup 70 | if: ${{ needs.setup.outputs.platforms != '[]' }} 71 | runs-on: ${{ matrix.config.os }} 72 | name: ${{ matrix.config.label }} 73 | strategy: 74 | fail-fast: false 75 | matrix: 76 | config: ${{ fromJson(needs.setup.outputs.platforms) }} 77 | 78 | steps: 79 | - uses: r-hub/actions/checkout@v1 80 | - uses: r-hub/actions/setup-r@v1 81 | with: 82 | job-config: ${{ matrix.config.job-config }} 83 | token: ${{ secrets.RHUB_TOKEN }} 84 | - uses: r-hub/actions/platform-info@v1 85 | with: 86 | token: ${{ secrets.RHUB_TOKEN }} 87 | job-config: ${{ matrix.config.job-config }} 88 | - uses: r-hub/actions/setup-deps@v1 89 | with: 90 | job-config: ${{ matrix.config.job-config }} 91 | token: ${{ secrets.RHUB_TOKEN }} 92 | - uses: r-hub/actions/run-check@v1 93 | with: 94 | job-config: ${{ matrix.config.job-config }} 95 | token: ${{ secrets.RHUB_TOKEN }} 96 | -------------------------------------------------------------------------------- /tests/testthat/test-util_resolve_pkg_path.R: -------------------------------------------------------------------------------- 1 | test_that("resolve_pkg_path identifies installed packages correctly", { 2 | # Use a base package, such as "stats", which is always installed. 3 | pkg_info <- resolve_pkg_path("stats", force_fetch = FALSE) 4 | 5 | expect_true(pkg_info$is_installed) 6 | expect_equal(pkg_info$pkg_name, "stats") 7 | expect_equal(pkg_info$pkg_path, find.package("stats")) 8 | }) 9 | 10 | test_that("resolve_pkg_path identifies a source package directory", { 11 | # Create a dummy source package directory. 12 | temp_pkg <- tempfile("dummy_pkg") 13 | dir.create(temp_pkg) 14 | writeLines("Package: dummy\nVersion: 1.0", file.path(temp_pkg, "DESCRIPTION")) 15 | 16 | # Create a 'man' directory with a dummy Rd file. 17 | man_dir <- file.path(temp_pkg, "man") 18 | dir.create(man_dir) 19 | dummy_rd <- "\\name{dummy}\n\\alias{dummy}\n\\title{Dummy Function}\n\\description{A dummy function.}\n" 20 | writeLines(dummy_rd, file.path(man_dir, "dummy.Rd")) 21 | 22 | pkg_info <- resolve_pkg_path(temp_pkg) 23 | expect_false(pkg_info$is_installed) 24 | expect_equal(pkg_info$pkg_path, temp_pkg) 25 | 26 | unlink(temp_pkg, recursive = TRUE) 27 | }) 28 | 29 | 30 | test_that("resolve_pkg_path handles tar.gz archive file correctly", { 31 | # Create a temporary dummy package directory with minimal files. 32 | dummy_pkg <- tempfile("dummy_pkg") 33 | dir.create(dummy_pkg) 34 | writeLines( 35 | "Package: dummy\nVersion: 1.0", 36 | file.path(dummy_pkg, "DESCRIPTION") 37 | ) 38 | man_dir <- file.path(dummy_pkg, "man") 39 | dir.create(man_dir) 40 | writeLines( 41 | "\\name{dummy}\n\\alias{dummy}\n\\title{Dummy Function}", 42 | file.path(man_dir, "dummy.Rd") 43 | ) 44 | 45 | # Create a tar.gz archive of the dummy package. 46 | tar_path <- tempfile("dummy_pkg", fileext = ".tar.gz") 47 | old_wd <- getwd() 48 | setwd(dirname(dummy_pkg)) 49 | # Create the archive without extra arguments. 50 | utils::tar(tarfile = tar_path, files = basename(dummy_pkg), tar = "internal") 51 | setwd(old_wd) 52 | 53 | expect_true(file.exists(tar_path)) # Ensure archive exists 54 | 55 | # Call the actual function with an explicit cache_path to force extraction. 56 | pkg_info <- resolve_pkg_path(tar_path, cache_path = tempdir()) 57 | 58 | expect_false(pkg_info$is_installed) 59 | # With cache_path specified, get_extract_dir() should return a directory path. 60 | expect_true(is.character(pkg_info$pkg_path)) 61 | expect_true(is.character(pkg_info$extracted_path)) 62 | expect_equal(pkg_info$pkg_path, pkg_info$extracted_path) 63 | expect_true(dir.exists(pkg_info$pkg_path)) 64 | 65 | # Clean up. 66 | unlink(pkg_info$pkg_path, recursive = TRUE) 67 | unlink(tar_path) 68 | }) 69 | 70 | 71 | test_that("resolve_pkg_path fetches package from CRAN", { 72 | skip_on_cran() 73 | skip_if_offline() 74 | 75 | package <- "ini" # choosing a minimal size stable package 76 | old_repos <- getOption("repos") 77 | options(repos = c(CRAN = "https://cloud.r-project.org")) 78 | pkg_info <- resolve_pkg_path( 79 | package, 80 | cache_path = tempdir(), 81 | force_fetch = TRUE 82 | ) 83 | options(repos = old_repos) 84 | 85 | expect_true(file.exists(pkg_info$tar_path)) 86 | expect_true(dir.exists(pkg_info$pkg_path)) 87 | unlink(pkg_info$pkg_path, recursive = TRUE) 88 | unlink(pkg_info$tar_path) 89 | }) 90 | -------------------------------------------------------------------------------- /README.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | format: gfm 3 | eval: false 4 | default-image-extension: "" 5 | --- 6 | 7 | 8 | 9 | 10 | # rdocdump: Dump 'R' Package Source, Documentation, and Vignettes into One File rdocdump website 11 | 12 | 13 | [![Project Status: Active](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) 14 | [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) 15 | [![CRAN status](https://www.r-pkg.org/badges/version/rdocdump)](https://CRAN.R-project.org/package=rdocdump) 16 | [![CRAN/METACRAN Total downloads](https://cranlogs.r-pkg.org/badges/grand-total/rdocdump?color=blue)](https://CRAN.R-project.org/package=rdocdump){target="_blank"} 17 | [![CRAN/METACRAN Downloads per month](https://cranlogs.r-pkg.org/badges/rdocdump?color=blue)](https://CRAN.R-project.org/package=rdocdump){target="_blank"} 18 | [![R-CMD-check](https://github.com/e-kotov/rdocdump/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/e-kotov/rdocdump/actions/workflows/R-CMD-check.yaml) 19 | [![pkgcheck](https://github.com/e-kotov/rdocdump/workflows/pkgcheck/badge.svg)](https://github.com/e-kotov/rdocdump/actions?query=workflow%3Apkgcheck) 20 | 21 | [![DOI](https://zenodo.org/badge/DOI/10.32614/CRAN.package.rdocdump.svg)](https://doi.org/10.32614/CRAN.package.rdocdump) 22 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15072446.svg)](https://doi.org/10.5281/zenodo.15072446) 23 | 24 | 25 | Dump source code, documentation and vignettes of an `R` package into a single file. Supports installed packages, `tar.gz` archives, and package source directories. If the package is not installed, only its source is automatically downloaded from CRAN for processing. The output is a single plain text file or a `character` vector, which is useful to ingest complete package documentation and source into a large language model (LLM) or pass it further to other tools, such as [`{ragnar}`](https://github.com/tidyverse/ragnar){target='_blank'} to create a Retrieval-Augmented Generation (RAG) workflow. 26 | 27 | ## Installation 28 | 29 | Install the latest stable release of `rdocdump` from CRAN with: 30 | 31 | ```r 32 | install.packages("rdocdump") 33 | ``` 34 | 35 | You can install the development version of `rdocdump` from R Universe with: 36 | 37 | ```r 38 | install.packages('rdocdump', 39 | repos = c('https://e-kotov.r-universe.dev', 'https://cloud.r-project.org') 40 | ) 41 | ``` 42 | 43 | or from GitHub with: 44 | 45 | ```r 46 | # install.packages("pak") 47 | pak::pak("e-kotov/rdocdump") 48 | ``` 49 | 50 | 51 | 52 | ## Example 53 | 54 | Extract documenation and source code of `{rJavaEnv}` package by downloading source from CRAN and save it to file `rJavaEnv_docs.txt` 55 | 56 | ```r 57 | rdd_to_txt( 58 | pkg = "rJavaEnv", 59 | file = tempfile("rJavaEnv_docs_", fileext = ".txt"), 60 | force_fetch = TRUE, # force download even if package is installed 61 | keep_files = "none" # delete temp files 62 | ) 63 | ``` 64 | 65 | 66 | ## Citation 67 | 68 | ```{r} 69 | #| eval: true 70 | #| echo: false 71 | #| results: 'asis' 72 | print(citation("rdocdump"), bibtex = FALSE) 73 | ``` 74 | 75 | BibTeX: 76 | ``` 77 | ```{r} 78 | #| eval: true 79 | #| echo: false 80 | #| results: 'asis' 81 | toBibtex(citation("rdocdump")) 82 | ``` 83 | ``` 84 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | # rdocdump: Dump ‘R’ Package Source, Documentation, and Vignettes into One File rdocdump website 6 | 7 | 8 | 9 | [![Project Status: 10 | Active](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) 11 | [![Lifecycle: 12 | experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) 13 | [![CRAN 14 | status](https://www.r-pkg.org/badges/version/rdocdump)](https://CRAN.R-project.org/package=rdocdump) 15 | CRAN/METACRAN Total downloads 19 | CRAN/METACRAN Downloads per month 23 | [![R-CMD-check](https://github.com/e-kotov/rdocdump/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/e-kotov/rdocdump/actions/workflows/R-CMD-check.yaml) 24 | [![pkgcheck](https://github.com/e-kotov/rdocdump/workflows/pkgcheck/badge.svg)](https://github.com/e-kotov/rdocdump/actions?query=workflow%3Apkgcheck) 25 | 26 | [![DOI](https://zenodo.org/badge/DOI/10.32614/CRAN.package.rdocdump.svg)](https://doi.org/10.32614/CRAN.package.rdocdump) 27 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15072446.svg)](https://doi.org/10.5281/zenodo.15072446) 28 | 29 | 30 | Dump source code, documentation and vignettes of an `R` package into a 31 | single file. Supports installed packages, `tar.gz` archives, and package 32 | source directories. If the package is not installed, only its source is 33 | automatically downloaded from CRAN for processing. The output is a 34 | single plain text file or a `character` vector, which is useful to 35 | ingest complete package documentation and source into a large language 36 | model (LLM) or pass it further to other tools, such as 37 | {ragnar} to create a 39 | Retrieval-Augmented Generation (RAG) workflow. 40 | 41 | ## Installation 42 | 43 | Install the latest stable release of `rdocdump` from CRAN with: 44 | 45 | ``` r 46 | install.packages("rdocdump") 47 | ``` 48 | 49 | You can install the development version of `rdocdump` from R Universe 50 | with: 51 | 52 | ``` r 53 | install.packages('rdocdump', 54 | repos = c('https://e-kotov.r-universe.dev', 'https://cloud.r-project.org') 55 | ) 56 | ``` 57 | 58 | or from GitHub with: 59 | 60 | ``` r 61 | # install.packages("pak") 62 | pak::pak("e-kotov/rdocdump") 63 | ``` 64 | 65 | ## Example 66 | 67 | Extract documenation and source code of `{rJavaEnv}` package by 68 | downloading source from CRAN and save it to file `rJavaEnv_docs.txt` 69 | 70 | ``` r 71 | rdd_to_txt( 72 | pkg = "rJavaEnv", 73 | file = tempfile("rJavaEnv_docs_", fileext = ".txt"), 74 | force_fetch = TRUE, # force download even if package is installed 75 | keep_files = "none" # delete temp files 76 | ) 77 | ``` 78 | 79 | ## Citation 80 | 81 | To cite package ‘rdocdump’ in publications use: 82 | 83 | Kotov E (2025). *rdocdump: Dump R Package Source, Documentation, and 84 | Vignettes into One File*. doi:10.32614/CRAN.package.rdocdump 85 | , 86 | . 87 | 88 | BibTeX: 89 | 90 | @Manual{rdocdump, 91 | title = {rdocdump: Dump R Package Source, Documentation, and Vignettes into One File}, 92 | author = {Egor Kotov}, 93 | year = {2025}, 94 | url = {https://github.com/e-kotov/rdocdump}, 95 | doi = {10.32614/CRAN.package.rdocdump}, 96 | } 97 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # -------------------------------------------- 2 | # CITATION file created with {cffr} R package 3 | # See also: https://docs.ropensci.org/cffr/ 4 | # -------------------------------------------- 5 | 6 | cff-version: 1.2.0 7 | message: 'To cite package "rdocdump" in publications use:' 8 | type: software 9 | license: MIT 10 | title: 'rdocdump: Dump ''R'' Package Source, Documentation, and Vignettes into One 11 | File' 12 | version: 0.1.1 13 | doi: 10.32614/CRAN.package.rdocdump 14 | identifiers: 15 | - type: doi 16 | value: 10.32614/CRAN.package.rdocdump 17 | abstract: Dump source code, documentation and vignettes of an 'R' package into a single 18 | file. Supports installed packages, tar.gz archives, and package source directories. 19 | If the package is not installed, only its source is automatically downloaded from 20 | CRAN for processing. The output is a single plain text file or a character vector, 21 | which is useful to ingest complete package documentation and source into a large 22 | language model (LLM) or pass it further to other tools, such as 'ragnar' 23 | to create a Retrieval-Augmented Generation (RAG) workflow. 24 | authors: 25 | - family-names: Kotov 26 | given-names: Egor 27 | email: kotov.egor@gmail.com 28 | orcid: https://orcid.org/0000-0001-6690-5345 29 | preferred-citation: 30 | type: manual 31 | title: 'rdocdump: Dump R Package Source, Documentation, and Vignettes into One File' 32 | authors: 33 | - family-names: Kotov 34 | given-names: Egor 35 | email: kotov.egor@gmail.com 36 | orcid: https://orcid.org/0000-0001-6690-5345 37 | year: '2025' 38 | url: https://github.com/e-kotov/rdocdump 39 | doi: 10.32614/CRAN.package.rdocdump 40 | repository: https://CRAN.R-project.org/package=rdocdump 41 | repository-code: https://github.com/e-kotov/rdocdump 42 | url: https://www.ekotov.pro/rdocdump/ 43 | contact: 44 | - family-names: Kotov 45 | given-names: Egor 46 | email: kotov.egor@gmail.com 47 | orcid: https://orcid.org/0000-0001-6690-5345 48 | keywords: 49 | - llm 50 | - r-package 51 | - rag 52 | - text 53 | references: 54 | - type: software 55 | title: curl 56 | abstract: 'curl: A Modern and Flexible Web Client for R' 57 | notes: Suggests 58 | url: https://jeroen.r-universe.dev/curl 59 | repository: https://CRAN.R-project.org/package=curl 60 | authors: 61 | - family-names: Ooms 62 | given-names: Jeroen 63 | email: jeroenooms@gmail.com 64 | orcid: https://orcid.org/0000-0002-4035-0289 65 | year: '2025' 66 | doi: 10.32614/CRAN.package.curl 67 | - type: software 68 | title: quarto 69 | abstract: 'quarto: R Interface to ''Quarto'' Markdown Publishing System' 70 | notes: Suggests 71 | url: https://quarto-dev.github.io/quarto-r/ 72 | repository: https://CRAN.R-project.org/package=quarto 73 | authors: 74 | - family-names: Allaire 75 | given-names: JJ 76 | email: jj@posit.co 77 | orcid: https://orcid.org/0000-0003-0174-9868 78 | - family-names: Dervieux 79 | given-names: Christophe 80 | email: cderv@posit.co 81 | orcid: https://orcid.org/0000-0003-4474-2498 82 | year: '2025' 83 | doi: 10.32614/CRAN.package.quarto 84 | - type: software 85 | title: testthat 86 | abstract: 'testthat: Unit Testing for R' 87 | notes: Suggests 88 | url: https://testthat.r-lib.org 89 | repository: https://CRAN.R-project.org/package=testthat 90 | authors: 91 | - family-names: Wickham 92 | given-names: Hadley 93 | email: hadley@posit.co 94 | year: '2025' 95 | doi: 10.32614/CRAN.package.testthat 96 | version: '>= 3.0.0' 97 | - type: software 98 | title: withr 99 | abstract: 'withr: Run Code ''With'' Temporarily Modified Global State' 100 | notes: Suggests 101 | url: https://withr.r-lib.org 102 | repository: https://CRAN.R-project.org/package=withr 103 | authors: 104 | - family-names: Hester 105 | given-names: Jim 106 | - family-names: Henry 107 | given-names: Lionel 108 | email: lionel@posit.co 109 | - family-names: Müller 110 | given-names: Kirill 111 | email: krlmlr+r@mailbox.org 112 | - family-names: Ushey 113 | given-names: Kevin 114 | email: kevinushey@gmail.com 115 | - family-names: Wickham 116 | given-names: Hadley 117 | email: hadley@posit.co 118 | - family-names: Chang 119 | given-names: Winston 120 | year: '2025' 121 | doi: 10.32614/CRAN.package.withr 122 | 123 | -------------------------------------------------------------------------------- /man/rdd_extract_code.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/extract_code.R 3 | \name{rdd_extract_code} 4 | \alias{rdd_extract_code} 5 | \title{Extract R Source Code from a Package} 6 | \usage{ 7 | rdd_extract_code( 8 | pkg, 9 | file = NULL, 10 | include_tests = FALSE, 11 | include_roxygen = FALSE, 12 | force_fetch = FALSE, 13 | version = NULL, 14 | cache_path = getOption("rdocdump.cache_path"), 15 | keep_files = "none", 16 | repos = getOption("rdocdump.repos", getOption("repos")) 17 | ) 18 | } 19 | \arguments{ 20 | \item{pkg}{A \code{character} string specifying the package. This can be: 21 | \itemize{ 22 | \item an installed package name, 23 | \item a full path to a package source directory, 24 | \item a full path to a package archive file (tar.gz), or 25 | \item a package name not installed (which will then be downloaded from CRAN). 26 | }} 27 | 28 | \item{file}{Optional. Save path for the output text file. If set, the function will return the path to the file instead of the combined text. Defaults to \code{NULL}.} 29 | 30 | \item{include_tests}{\code{logical}. If \code{TRUE}, for non-installed packages, the function will also include R source code from the \code{tests} directory. Defaults to \code{FALSE}.} 31 | 32 | \item{include_roxygen}{\code{logical}. If \code{TRUE}, roxygen2 documentation lines (lines starting with "#'") from R files will be included in the output. Defaults to \code{FALSE}.} 33 | 34 | \item{force_fetch}{\code{logical}. If \code{TRUE}, the package source will be fetched from CRAN even if the package is installed locally. Default is \code{FALSE}, but when \code{version} is specified, it will be set to \code{TRUE}.} 35 | 36 | \item{version}{Optional. A \code{character} string specifying the package version to fetch from CRAN. If not provided, the latest version will be used.} 37 | 38 | \item{cache_path}{A \code{character} string specifying the directory to use as a cache. Defaults to the value of \code{getOption("rdocdump.cache_path")}.} 39 | 40 | \item{keep_files}{A \code{character} value controlling whether temporary files should be kept. 41 | Possible values are: 42 | \itemize{ 43 | \item \code{"none"}: Delete both the tar.gz archive and the extracted files (default). 44 | \item \code{"tgz"}: Keep only the tar.gz archive. 45 | \item \code{"extracted"}: Keep only the extracted files. 46 | \item \code{"both"}: Keep both the tar.gz archive and the extracted files. 47 | }} 48 | 49 | \item{repos}{A \code{character} vector of repository URLs. By default, it uses the value of \code{getOption("rdocdump.repos")} which sets the repository URLs to the default R repositories and is itself set to \code{c("CRAN" = "https://cloud.r-project.org")} on package load to prevent accidental downloads of pre-built packages from Posit Package Manager and R Universe.} 50 | } 51 | \value{ 52 | A single string containing the combined R source code (and, optionally, roxygen2 documentation) from the package. 53 | } 54 | \description{ 55 | This function extracts the R source code from a package. For installed packages, it retrieves the package namespace and deparses all functions found in the package. For package source directories or archives (non-installed packages), it reads all \code{.R} files from the \code{R} directory and, optionally, from the \code{tests} directory. Optionally, it can include roxygen2 documentation from these files. 56 | } 57 | \examples{ 58 | # Extract only R source code (excluding roxygen2 documentation) from an installed package. 59 | code <- rdd_extract_code("splines") 60 | cat(substr(code, 1, 1000)) 61 | 62 | # Extract R source code including roxygen2 documentation from a package source directory. 63 | \donttest{ 64 | # set cache directory for `rdocdump` 65 | rdd_set_cache_path(paste0(tempdir(), "/rdocdump_cache")) 66 | 67 | local({ 68 | code_with_roxygen <- rdd_extract_code( 69 | "ini", 70 | include_roxygen = TRUE, 71 | force_fetch = TRUE, 72 | repos = c("CRAN" = "https://cran.r-project.org") 73 | ) 74 | cat(substr(code_with_roxygen, 1, 1000)) 75 | }) 76 | 77 | # Extract R source code from a package source directory, 78 | # including test files but excluding roxygen2 docs. 79 | local({ 80 | code_with_tests <- rdd_extract_code( 81 | "ini", 82 | include_roxygen = TRUE, 83 | include_tests = TRUE, 84 | force_fetch = TRUE, 85 | repos = c("CRAN" = "https://cran.r-project.org") 86 | ) 87 | cat(substr(code_with_tests, 1, 1000)) 88 | }) 89 | # clean cache directory 90 | unlink(getOption("rdocdump.cache_path"), recursive = TRUE, force = TRUE) 91 | } 92 | 93 | } 94 | -------------------------------------------------------------------------------- /man/rdd_to_txt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/to_txt.R 3 | \name{rdd_to_txt} 4 | \alias{rdd_to_txt} 5 | \title{Dump Package Source, Documentationm and Vignettes into Plain Text} 6 | \usage{ 7 | rdd_to_txt( 8 | pkg, 9 | file = NULL, 10 | content = "all", 11 | force_fetch = FALSE, 12 | version = NULL, 13 | keep_files = "none", 14 | cache_path = getOption("rdocdump.cache_path"), 15 | repos = getOption("rdocdump.repos", getOption("repos")) 16 | ) 17 | } 18 | \arguments{ 19 | \item{pkg}{A \code{character} string specifying the package. This can be: 20 | \itemize{ 21 | \item an installed package name, 22 | \item a full path to a package source directory, 23 | \item a full path to a package archive file (tar.gz), or 24 | \item a package name not installed (which will then be downloaded from CRAN). 25 | }} 26 | 27 | \item{file}{Optional. Save path for the output text file. If set, the function will return the path to the file instead of the combined text. Defaults to \code{NULL}.} 28 | 29 | \item{content}{A character vector specifying which components to include in the output. 30 | Possible values are: 31 | \itemize{ 32 | \item \code{"all"}: Include Rd documentation, vignettes, and R source code (default). 33 | \item \code{"docs"}: Include only the Rd documentation. 34 | \item \code{"vignettes"}: Include only the vignettes. 35 | \item \code{"code"}: Include only the R source code. When extracting code for non-installed packages, the function will not include roxygen2 documentation, as the documentation can be imported from the Rd files. If you want to extract the R source code with the roxygen2 documentation, use \code{\link{rdd_extract_code}} and set \code{include_roxygen} to \code{TRUE}. 36 | } 37 | You can specify multiple options (e.g., \code{c("docs", "code")} to include both documentation and source code).} 38 | 39 | \item{force_fetch}{\code{logical}. If \code{TRUE}, the package source will be fetched from CRAN as a tar.gz archive even if the package is already installed locally. Default is \code{FALSE}, but when \code{version} is specified, it will be set to \code{TRUE}.} 40 | 41 | \item{version}{Optional. A \code{character} string specifying the package version to fetch from CRAN. If not provided, the latest version will be used.} 42 | 43 | \item{keep_files}{A \code{character} value controlling whether temporary files should be kept. 44 | Possible values are: 45 | \itemize{ 46 | \item \code{"none"}: Delete both the tar.gz archive and the extracted files (default). 47 | \item \code{"tgz"}: Keep only the tar.gz archive. 48 | \item \code{"extracted"}: Keep only the extracted files. 49 | \item \code{"both"}: Keep both the tar.gz archive and the extracted files. 50 | }} 51 | 52 | \item{cache_path}{A \code{character} string specifying the directory where kept temporary files will be stored. 53 | By default, it uses the value of \code{getOption("rdocdump.cache_path")} which sets the cache directory to the temporary directory of the current R session.} 54 | 55 | \item{repos}{A \code{character} vector of repository URLs. By default, it uses the value of \code{getOption("rdocdump.repos")} which sets the repository URLs to the default R repositories and is itself set to \code{c("CRAN" = "https://cloud.r-project.org")} on package load to prevent accidental downloads of pre-built packages from Posit Package Manager and R Universe.} 56 | } 57 | \value{ 58 | A single string containing the combined package documentation, vignettes, and/or code as specified by the \code{content} argument. 59 | If the \code{file} argument is set, returns the path to the file. 60 | } 61 | \description{ 62 | This function produces a single text output for an R package by processing its documentation (Rd files from the package source or the documentation from already installed packages), vignettes, and/or R source code. 63 | } 64 | \examples{ 65 | # Extract documentation for built-in `stats` package (both docs and vignettes). 66 | docs <- rdd_to_txt("splines") 67 | cat(substr(docs, 1, 500)) 68 | 69 | \donttest{ 70 | # set cache directory for `rdocdump` 71 | rdd_set_cache_path(paste0(tempdir(), "/rdocdump_cache")) 72 | 73 | # Extract only documentation for rJavaEnv by downloading its source from CRAN 74 | docs <- rdd_to_txt( 75 | "rJavaEnv", 76 | force_fetch = TRUE, 77 | content = "docs", 78 | repos = c("CRAN" = "https://cran.r-project.org") 79 | ) 80 | lines <- unlist(strsplit(docs, "\n")) 81 | # Print the first 3 lines 82 | cat(head(lines, 3), sep = "\n") 83 | # Print the last 3 lines 84 | cat(tail(lines, 3), sep = "\n") 85 | 86 | # clean cache directory 87 | unlink(getOption("rdocdump.cache_path"), recursive = TRUE, force = TRUE) 88 | } 89 | 90 | } 91 | -------------------------------------------------------------------------------- /codemeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", 3 | "@type": "SoftwareSourceCode", 4 | "identifier": "rdocdump", 5 | "description": "Dump source code, documentation and vignettes of an 'R' package into a single file. Supports installed packages, tar.gz archives, and package source directories. If the package is not installed, only its source is automatically downloaded from CRAN for processing. The output is a single plain text file or a character vector, which is useful to ingest complete package documentation and source into a large language model (LLM) or pass it further to other tools, such as 'ragnar' to create a Retrieval-Augmented Generation (RAG) workflow.", 6 | "name": "rdocdump: Dump 'R' Package Source, Documentation, and Vignettes into One File", 7 | "relatedLink": ["https://www.ekotov.pro/rdocdump/", "https://CRAN.R-project.org/package=rdocdump"], 8 | "codeRepository": "https://github.com/e-kotov/rdocdump", 9 | "issueTracker": "https://github.com/e-kotov/rdocdump/issues", 10 | "license": "https://spdx.org/licenses/MIT", 11 | "version": "0.1.1", 12 | "programmingLanguage": { 13 | "@type": "ComputerLanguage", 14 | "name": "R", 15 | "url": "https://r-project.org" 16 | }, 17 | "runtimePlatform": "R version 4.5.1 (2025-06-13)", 18 | "provider": { 19 | "@id": "https://cran.r-project.org", 20 | "@type": "Organization", 21 | "name": "Comprehensive R Archive Network (CRAN)", 22 | "url": "https://cran.r-project.org" 23 | }, 24 | "author": [ 25 | { 26 | "@type": "Person", 27 | "givenName": "Egor", 28 | "familyName": "Kotov", 29 | "email": "kotov.egor@gmail.com", 30 | "@id": "https://orcid.org/0000-0001-6690-5345" 31 | } 32 | ], 33 | "copyrightHolder": [ 34 | { 35 | "@type": "Person", 36 | "givenName": "Egor", 37 | "familyName": "Kotov", 38 | "email": "kotov.egor@gmail.com", 39 | "@id": "https://orcid.org/0000-0001-6690-5345" 40 | } 41 | ], 42 | "maintainer": [ 43 | { 44 | "@type": "Person", 45 | "givenName": "Egor", 46 | "familyName": "Kotov", 47 | "email": "kotov.egor@gmail.com", 48 | "@id": "https://orcid.org/0000-0001-6690-5345" 49 | } 50 | ], 51 | "softwareSuggestions": [ 52 | { 53 | "@type": "SoftwareApplication", 54 | "identifier": "curl", 55 | "name": "curl", 56 | "provider": { 57 | "@id": "https://cran.r-project.org", 58 | "@type": "Organization", 59 | "name": "Comprehensive R Archive Network (CRAN)", 60 | "url": "https://cran.r-project.org" 61 | }, 62 | "sameAs": "https://CRAN.R-project.org/package=curl" 63 | }, 64 | { 65 | "@type": "SoftwareApplication", 66 | "identifier": "quarto", 67 | "name": "quarto", 68 | "provider": { 69 | "@id": "https://cran.r-project.org", 70 | "@type": "Organization", 71 | "name": "Comprehensive R Archive Network (CRAN)", 72 | "url": "https://cran.r-project.org" 73 | }, 74 | "sameAs": "https://CRAN.R-project.org/package=quarto" 75 | }, 76 | { 77 | "@type": "SoftwareApplication", 78 | "identifier": "testthat", 79 | "name": "testthat", 80 | "version": ">= 3.0.0", 81 | "provider": { 82 | "@id": "https://cran.r-project.org", 83 | "@type": "Organization", 84 | "name": "Comprehensive R Archive Network (CRAN)", 85 | "url": "https://cran.r-project.org" 86 | }, 87 | "sameAs": "https://CRAN.R-project.org/package=testthat" 88 | }, 89 | { 90 | "@type": "SoftwareApplication", 91 | "identifier": "withr", 92 | "name": "withr", 93 | "provider": { 94 | "@id": "https://cran.r-project.org", 95 | "@type": "Organization", 96 | "name": "Comprehensive R Archive Network (CRAN)", 97 | "url": "https://cran.r-project.org" 98 | }, 99 | "sameAs": "https://CRAN.R-project.org/package=withr" 100 | } 101 | ], 102 | "softwareRequirements": { 103 | "SystemRequirements": null 104 | }, 105 | "fileSize": "80.824KB", 106 | "citation": [ 107 | { 108 | "@type": "SoftwareSourceCode", 109 | "datePublished": "2025", 110 | "author": [ 111 | { 112 | "@type": "Person", 113 | "givenName": "Egor", 114 | "familyName": "Kotov", 115 | "email": "kotov.egor@gmail.com", 116 | "@id": "https://orcid.org/0000-0001-6690-5345" 117 | } 118 | ], 119 | "name": "rdocdump: Dump R Package Source, Documentation, and Vignettes into One File", 120 | "identifier": "10.32614/CRAN.package.rdocdump", 121 | "url": "https://github.com/e-kotov/rdocdump", 122 | "@id": "https://doi.org/10.32614/CRAN.package.rdocdump", 123 | "sameAs": "https://doi.org/10.32614/CRAN.package.rdocdump" 124 | } 125 | ], 126 | "releaseNotes": "https://github.com/e-kotov/rdocdump/blob/master/NEWS.md", 127 | "readme": "https://github.com/e-kotov/rdocdump/blob/main/README.md", 128 | "contIntegration": ["https://github.com/e-kotov/rdocdump/actions/workflows/R-CMD-check.yaml", "https://github.com/e-kotov/rdocdump/actions?query=workflow%3Apkgcheck"], 129 | "developmentStatus": ["https://www.repostatus.org/#active", "https://lifecycle.r-lib.org/articles/stages.html#experimental"], 130 | "keywords": ["llm", "r-package", "rag", "text"] 131 | } 132 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or advances of 31 | any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email address, 35 | without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at kotov.egor@gmail.com. 63 | All complaints will be reviewed and investigated promptly and fairly. 64 | 65 | All community leaders are obligated to respect the privacy and security of the 66 | reporter of any incident. 67 | 68 | ## Enforcement Guidelines 69 | 70 | Community leaders will follow these Community Impact Guidelines in determining 71 | the consequences for any action they deem in violation of this Code of Conduct: 72 | 73 | ### 1. Correction 74 | 75 | **Community Impact**: Use of inappropriate language or other behavior deemed 76 | unprofessional or unwelcome in the community. 77 | 78 | **Consequence**: A private, written warning from community leaders, providing 79 | clarity around the nature of the violation and an explanation of why the 80 | behavior was inappropriate. A public apology may be requested. 81 | 82 | ### 2. Warning 83 | 84 | **Community Impact**: A violation through a single incident or series of 85 | actions. 86 | 87 | **Consequence**: A warning with consequences for continued behavior. No 88 | interaction with the people involved, including unsolicited interaction with 89 | those enforcing the Code of Conduct, for a specified period of time. This 90 | includes avoiding interactions in community spaces as well as external channels 91 | like social media. Violating these terms may lead to a temporary or permanent 92 | ban. 93 | 94 | ### 3. Temporary Ban 95 | 96 | **Community Impact**: A serious violation of community standards, including 97 | sustained inappropriate behavior. 98 | 99 | **Consequence**: A temporary ban from any sort of interaction or public 100 | communication with the community for a specified period of time. No public or 101 | private interaction with the people involved, including unsolicited interaction 102 | with those enforcing the Code of Conduct, is allowed during this period. 103 | Violating these terms may lead to a permanent ban. 104 | 105 | ### 4. Permanent Ban 106 | 107 | **Community Impact**: Demonstrating a pattern of violation of community 108 | standards, including sustained inappropriate behavior, harassment of an 109 | individual, or aggression toward or disparagement of classes of individuals. 110 | 111 | **Consequence**: A permanent ban from any sort of public interaction within the 112 | community. 113 | 114 | ## Attribution 115 | 116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 117 | version 2.1, available at 118 | . 119 | 120 | Community Impact Guidelines were inspired by 121 | [Mozilla's code of conduct enforcement ladder][https://github.com/mozilla/inclusion]. 122 | 123 | For answers to common questions about this code of conduct, see the FAQ at 124 | . Translations are available at . 125 | 126 | [homepage]: https://www.contributor-covenant.org 127 | -------------------------------------------------------------------------------- /vignettes/rdocdump.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Quick Start: dump R docs and vignettes to text files for LLMs" 3 | vignette: > 4 | %\VignetteIndexEntry{Quick Start: dump R docs and vignettes to text files for LLMs} 5 | %\VignetteEngine{quarto::html} 6 | %\VignetteEncoding{UTF-8} 7 | execute: 8 | eval: false 9 | format: 10 | html: 11 | toc: true 12 | toc-depth: 2 13 | code-overflow: wrap 14 | --- 15 | 16 | # Introduction 17 | 18 | `{rdocdump}` is an R package designed to combine an R package’s documentation and vignettes into a single plain text file. This is particularly useful when you want to ingest complete package documentation into large language models (LLMs) or for archival purposes. `{rdocdump}` works with installed packages, source directories, tar.gz archives, or package sources available on CRAN. 19 | 20 | # Installation 21 | 22 | 23 | Install the latest stable release of `rdocdump` from CRAN with: 24 | 25 | ```r 26 | install.packages("rdocdump") 27 | ``` 28 | 29 | You can install the development version of `rdocdump` from R Universe with: 30 | 31 | ```r 32 | install.packages('rdocdump', 33 | repos = c('https://e-kotov.r-universe.dev', 'https://cloud.r-project.org') 34 | ) 35 | ``` 36 | 37 | or from GitHub with: 38 | 39 | ```r 40 | # install.packages("pak") 41 | pak::pak("e-kotov/rdocdump") 42 | ``` 43 | 44 | 45 | # Setting the Cache Path 46 | 47 | By default, `{rdocdump}` stores temporary files in a directory within R’s temporary directory. You can override this by setting a custom cache path using the helper function `rdd_set_cache_path()`. For example: 48 | 49 | ```r 50 | # Set a custom cache directory 51 | cache_dir <- file.path(tempdir(), "my_rdocdump_cache") 52 | rdd_set_cache_path(cache_dir) 53 | ``` 54 | 55 | ``` 56 | rdocdump.cache_path set to: /private/var/folders/gb/t5zr5rn15sldqybrmqbyh6y80000gn/T/Rtmpp5wRxV/my_rdocdump_cache 57 | ``` 58 | 59 | This ensures that temporary tar.gz archives and extracted files are stored in your specified directory. 60 | 61 | # Extracting Documentation 62 | 63 | The main function in rdocdump is `rdd_to_txt()`, which accepts several types of inputs for the `pkg` argument: 64 | 65 | - An installed package name (e.g., `"stats"`). 66 | 67 | - A full path to a package source directory. 68 | 69 | - A full path to a package archive (tar.gz). 70 | 71 | - A package name available on CRAN (downloaded automatically if not installed). 72 | 73 | Here is an example that downloads the source for the package `{rJavaEnv}` from CRAN, extracts its documentation, and saves it to a file: 74 | 75 | ```r 76 | # Extract documentation for 'rJavaEnv' and save to a text file. 77 | rdd_to_txt( 78 | pkg = "rJavaEnv", 79 | file = tempfile("rJavaEnv_docs_", fileext = ".txt"), 80 | force_fetch = TRUE, # Force download even if the package is installed. 81 | keep_files = "none" # Delete temporary files after extraction. 82 | ) 83 | ``` 84 | 85 | ``` 86 | Fetching package source from CRAN... 87 | trying URL 'https://cloud.r-project.org/src/contrib/rJavaEnv_0.3.0.tar.gz' 88 | Content type 'application/x-gzip' length 104016 bytes (101 KB) 89 | ================================================== 90 | downloaded 101 KB 91 | 92 | [1] "/private/var/folders/gb/t5zr5rn15sldqybrmqbyh6y80000gn/T/Rtmpp5wRxV/rJavaEnv_docs_c0736421bbc2.txt" 93 | ``` 94 | 95 | If you prefer to simply get the combined documentation as a character string, call the function without the `file` argument: 96 | 97 | ```r 98 | # Extract and capture the combined documentation in a variable. 99 | docs <- rdd_to_txt(pkg = "splines") 100 | cat(substr(docs, 1, 1000)) # Print the first 1000 characters for a preview. 101 | ``` 102 | 103 | ``` 104 | DESCRIPTION: 105 | Package: splines 106 | Version: 4.5.1 107 | Priority: base 108 | Imports: graphics, stats 109 | Title: Regression Spline Functions and Classes 110 | Author: Douglas M. Bates and 111 | William N. Venables 112 | Maintainer: R Core Team 113 | Contact: R-help mailing list 114 | Description: Regression spline functions and classes. 115 | License: Part of R 4.5.1 116 | Suggests: Matrix, methods 117 | NeedsCompilation: yes 118 | Built: R 4.5.1; aarch64-apple-darwin20; 2025-06-14 01:29:30 UTC; unix 119 | 120 | -------------------------------------------------------------------------------- 121 | Function: asVector() 122 | Coerce an Object to a Vector 123 | 124 | Description: 125 | 126 | This is a generic function. Methods for this function coerce 127 | objects of given classes to vectors. 128 | 129 | Usage: 130 | 131 | asVector(object) 132 | 133 | Arguments: 134 | 135 | object: An object. 136 | 137 | Details: 138 | 139 | Methods for vector coercion in new classes must be created for the 140 | ‘asVector’ generic instead of ‘as.vector’. 141 | ``` 142 | 143 | # Choosing what to dump to text 144 | 145 | You can also choose if you want just the package documentaiton, just the vignettes, or both to be combined with the `content` argument: 146 | 147 | ```r 148 | docs <- rdd_to_txt( 149 | pkg = "utils", 150 | content = "vignettes" 151 | ) 152 | cat(substr(docs, 1, 1000)) # Print the first 1000 characters for a preview. 153 | ``` 154 | 155 | As you can see below, only the vignettes were combined: 156 | 157 | ``` 158 | -------------------------------------------------------------------------------- 159 | Vignette: Sweave.Rnw 160 | 161 | % File src/library/utils/vignettes/Sweave.Rnw 162 | % Part of the R package, https://www.R-project.org 163 | % Copyright 2002-2022 Friedrich Leisch and the R Core Team 164 | % Distributed under GPL 2 or later 165 | 166 | \documentclass[a4paper]{article} 167 | 168 | %\VignetteIndexEntry{Sweave User Manual} 169 | %\VignettePackage{utils} 170 | %\VignetteDepends{tools, datasets, stats, graphics} 171 | 172 | \title{Sweave User Manual} 173 | \author{Friedrich Leisch and R Core Team} 174 | 175 | \usepackage[round]{natbib} 176 | \usepackage{graphicx, Rd} 177 | \usepackage{listings} 178 | 179 | \lstset{frame=trbl,basicstyle=\small\tt} 180 | \usepackage{hyperref} 181 | \usepackage{color} 182 | \definecolor{Blue}{rgb}{0,0,0.8} 183 | \hypersetup{% 184 | colorlinks,% 185 | plainpages=true,% 186 | linkcolor=black,% 187 | citecolor=black,% 188 | urlcolor=Blue,% 189 | %pdfstartview=FitH,% or Fit 190 | pdfstartview={XYZ null null 1},% 191 | pdfview={XYZ null null null},% 192 | pdfpagemode=UseNone,% for no outline 193 | pdfauthor={Friedrich Leisch and R Core Team},% 194 | pdftitle={Sweave 195 | ``` 196 | 197 | # Handling Temporary Files 198 | 199 | The argument `keep_files` controls whether temporary files (the downloaded archive and/or extracted directory) are retained: 200 | 201 | - `"none"` (default): Delete both the tar.gz archive and the extracted files. 202 | 203 | - `"tgz"`: Keep only the tar.gz archive. 204 | 205 | - `"extracted"`: Keep only the extracted files. 206 | 207 | - `"both"`: Keep both the tar.gz archive and the extracted files. 208 | 209 | Choose the option that best fits your workflow. 210 | -------------------------------------------------------------------------------- /R/to_txt.R: -------------------------------------------------------------------------------- 1 | #' Dump Package Source, Documentationm and Vignettes into Plain Text 2 | #' 3 | #' @description 4 | #' This function produces a single text output for an R package by processing its documentation (Rd files from the package source or the documentation from already installed packages), vignettes, and/or R source code. 5 | #' 6 | #' @param pkg A `character` string specifying the package. This can be: 7 | #' \itemize{ 8 | #' \item an installed package name, 9 | #' \item a full path to a package source directory, 10 | #' \item a full path to a package archive file (tar.gz), or 11 | #' \item a package name not installed (which will then be downloaded from CRAN). 12 | #' } 13 | #' @param file Optional. Save path for the output text file. If set, the function will return the path to the file instead of the combined text. Defaults to `NULL`. 14 | #' @param force_fetch `logical`. If `TRUE`, the package source will be fetched from CRAN as a tar.gz archive even if the package is already installed locally. Default is `FALSE`, but when `version` is specified, it will be set to `TRUE`. 15 | #' @param version Optional. A `character` string specifying the package version to fetch from CRAN. If not provided, the latest version will be used. 16 | #' @param content A character vector specifying which components to include in the output. 17 | #' Possible values are: 18 | #' \itemize{ 19 | #' \item `"all"`: Include Rd documentation, vignettes, and R source code (default). 20 | #' \item `"docs"`: Include only the Rd documentation. 21 | #' \item `"vignettes"`: Include only the vignettes. 22 | #' \item `"code"`: Include only the R source code. When extracting code for non-installed packages, the function will not include roxygen2 documentation, as the documentation can be imported from the Rd files. If you want to extract the R source code with the roxygen2 documentation, use \code{\link{rdd_extract_code}} and set `include_roxygen` to `TRUE`. 23 | #' } 24 | #' You can specify multiple options (e.g., `c("docs", "code")` to include both documentation and source code). 25 | #' @param keep_files A `character` value controlling whether temporary files should be kept. 26 | #' Possible values are: 27 | #' \itemize{ 28 | #' \item `"none"`: Delete both the tar.gz archive and the extracted files (default). 29 | #' \item `"tgz"`: Keep only the tar.gz archive. 30 | #' \item `"extracted"`: Keep only the extracted files. 31 | #' \item `"both"`: Keep both the tar.gz archive and the extracted files. 32 | #' } 33 | #' @param cache_path A `character` string specifying the directory where kept temporary files will be stored. 34 | #' By default, it uses the value of `getOption("rdocdump.cache_path")` which sets the cache directory to the temporary directory of the current R session. 35 | #' 36 | #' @param repos A `character` vector of repository URLs. By default, it uses the value of `getOption("rdocdump.repos")` which sets the repository URLs to the default R repositories and is itself set to `c("CRAN" = "https://cloud.r-project.org")` on package load to prevent accidental downloads of pre-built packages from Posit Package Manager and R Universe. 37 | #' 38 | #' @return A single string containing the combined package documentation, vignettes, and/or code as specified by the `content` argument. 39 | #' If the `file` argument is set, returns the path to the file. 40 | #' 41 | #' @export 42 | #' 43 | #' @examples 44 | #' # Extract documentation for built-in `stats` package (both docs and vignettes). 45 | #' docs <- rdd_to_txt("splines") 46 | #' cat(substr(docs, 1, 500)) 47 | #' 48 | #' \donttest{ 49 | #' # set cache directory for `rdocdump` 50 | #' rdd_set_cache_path(paste0(tempdir(), "/rdocdump_cache")) 51 | #' 52 | #' # Extract only documentation for rJavaEnv by downloading its source from CRAN 53 | #' docs <- rdd_to_txt( 54 | #' "rJavaEnv", 55 | #' force_fetch = TRUE, 56 | #' content = "docs", 57 | #' repos = c("CRAN" = "https://cran.r-project.org") 58 | #' ) 59 | #' lines <- unlist(strsplit(docs, "\n")) 60 | #' # Print the first 3 lines 61 | #' cat(head(lines, 3), sep = "\n") 62 | #' # Print the last 3 lines 63 | #' cat(tail(lines, 3), sep = "\n") 64 | #' 65 | #' # clean cache directory 66 | #' unlink(getOption("rdocdump.cache_path"), recursive = TRUE, force = TRUE) 67 | #' } 68 | #' 69 | rdd_to_txt <- function( 70 | pkg, 71 | file = NULL, 72 | content = "all", 73 | force_fetch = FALSE, 74 | version = NULL, 75 | keep_files = "none", 76 | cache_path = getOption("rdocdump.cache_path"), 77 | repos = getOption("rdocdump.repos", getOption("repos")) 78 | ) { 79 | # Validate keep_files argument. 80 | if (!keep_files %in% c("none", "tgz", "extracted", "both")) { 81 | stop( 82 | 'Invalid value for keep_files. Choose one of "none", "tgz", "extracted", "both".' 83 | ) 84 | } 85 | 86 | # Define allowed content options. 87 | allowed_content <- c("docs", "vignettes", "code") 88 | # If "all" is specified, use all allowed options. 89 | if ("all" %in% content) { 90 | effective_content <- allowed_content 91 | } else { 92 | effective_content <- content 93 | } 94 | if (!all(effective_content %in% allowed_content)) { 95 | stop(paste( 96 | 'Invalid value for content. Choose any of', 97 | paste(c("all", allowed_content), collapse = ", "), 98 | "." 99 | )) 100 | } 101 | 102 | # Resolve package source path using the existing helper. 103 | pkg_info <- resolve_pkg_path( 104 | pkg, 105 | cache_path, 106 | force_fetch = force_fetch || !is.null(version), 107 | version = version, 108 | repos = repos 109 | ) 110 | pkg_path <- pkg_info$pkg_path 111 | 112 | # Initialize component texts. 113 | docs_text <- "" 114 | vignettes_text <- "" 115 | code_text <- "" 116 | 117 | if ("docs" %in% effective_content) { 118 | docs_text <- combine_rd( 119 | pkg_path, 120 | is_installed = pkg_info$is_installed, 121 | pkg_name = pkg_info$pkg_name 122 | ) 123 | } 124 | if ("vignettes" %in% effective_content) { 125 | vignettes_text <- combine_vignettes(pkg_path) 126 | } 127 | if ("code" %in% effective_content) { 128 | code_text <- rdd_extract_code( 129 | pkg = if (pkg_info$is_installed) pkg else pkg_path, 130 | file = NULL, 131 | include_tests = FALSE, 132 | include_roxygen = FALSE, 133 | force_fetch = force_fetch || !is.null(version), 134 | version = version, 135 | cache_path = cache_path, 136 | keep_files = "both" # make sure the files are not deleted prematurely, as rdd_to_txt will take care of that later 137 | ) 138 | } 139 | 140 | # Combine components in a fixed order: docs, vignettes, then code. 141 | components <- list() 142 | if ("docs" %in% effective_content && nzchar(docs_text)) { 143 | components <- c(components, docs_text) 144 | } 145 | if ("vignettes" %in% effective_content && nzchar(vignettes_text)) { 146 | components <- c(components, vignettes_text) 147 | } 148 | if ("code" %in% effective_content && nzchar(code_text)) { 149 | components <- c(components, code_text) 150 | } 151 | combined_text <- paste(components, collapse = "\n\n") 152 | 153 | # Clean up temporary files according to keep_files 154 | cleanup_result <- cleanup_files(pkg_info, keep_files) 155 | 156 | if (!is.null(file)) { 157 | writeLines(combined_text, con = file) 158 | return(file) 159 | } 160 | return(combined_text) 161 | } 162 | -------------------------------------------------------------------------------- /R/extract_code.R: -------------------------------------------------------------------------------- 1 | #' Extract R Source Code from a Package 2 | #' 3 | #' @description 4 | #' This function extracts the R source code from a package. For installed packages, it retrieves the package namespace and deparses all functions found in the package. For package source directories or archives (non-installed packages), it reads all `.R` files from the `R` directory and, optionally, from the `tests` directory. Optionally, it can include roxygen2 documentation from these files. 5 | #' 6 | #' @param pkg A `character` string specifying the package. This can be: 7 | #' \itemize{ 8 | #' \item an installed package name, 9 | #' \item a full path to a package source directory, 10 | #' \item a full path to a package archive file (tar.gz), or 11 | #' \item a package name not installed (which will then be downloaded from CRAN). 12 | #' } 13 | #' @param file Optional. Save path for the output text file. If set, the function will return the path to the file instead of the combined text. Defaults to `NULL`. 14 | #' @param include_tests `logical`. If `TRUE`, for non-installed packages, the function will also include R source code from the `tests` directory. Defaults to `FALSE`. 15 | #' @param include_roxygen `logical`. If `TRUE`, roxygen2 documentation lines (lines starting with "#'") from R files will be included in the output. Defaults to `FALSE`. 16 | #' @param force_fetch `logical`. If `TRUE`, the package source will be fetched from CRAN even if the package is installed locally. Default is `FALSE`, but when `version` is specified, it will be set to `TRUE`. 17 | #' @param version Optional. A `character` string specifying the package version to fetch from CRAN. If not provided, the latest version will be used. 18 | #' @param cache_path A `character` string specifying the directory to use as a cache. Defaults to the value of `getOption("rdocdump.cache_path")`. 19 | #' 20 | #' @inheritParams rdd_to_txt 21 | #' 22 | #' @return A single string containing the combined R source code (and, optionally, roxygen2 documentation) from the package. 23 | #' 24 | #' @export 25 | #' 26 | #' @examples 27 | #' # Extract only R source code (excluding roxygen2 documentation) from an installed package. 28 | #' code <- rdd_extract_code("splines") 29 | #' cat(substr(code, 1, 1000)) 30 | #' 31 | #' # Extract R source code including roxygen2 documentation from a package source directory. 32 | #' \donttest{ 33 | #' # set cache directory for `rdocdump` 34 | #' rdd_set_cache_path(paste0(tempdir(), "/rdocdump_cache")) 35 | #' 36 | #' local({ 37 | #' code_with_roxygen <- rdd_extract_code( 38 | #' "ini", 39 | #' include_roxygen = TRUE, 40 | #' force_fetch = TRUE, 41 | #' repos = c("CRAN" = "https://cran.r-project.org") 42 | #' ) 43 | #' cat(substr(code_with_roxygen, 1, 1000)) 44 | #'}) 45 | #' 46 | #' # Extract R source code from a package source directory, 47 | #' # including test files but excluding roxygen2 docs. 48 | #' local({ 49 | #' code_with_tests <- rdd_extract_code( 50 | #' "ini", 51 | #' include_roxygen = TRUE, 52 | #' include_tests = TRUE, 53 | #' force_fetch = TRUE, 54 | #' repos = c("CRAN" = "https://cran.r-project.org") 55 | #' ) 56 | #' cat(substr(code_with_tests, 1, 1000)) 57 | #'}) 58 | #' # clean cache directory 59 | #' unlink(getOption("rdocdump.cache_path"), recursive = TRUE, force = TRUE) 60 | #' } 61 | #' 62 | rdd_extract_code <- function( 63 | pkg, 64 | file = NULL, 65 | include_tests = FALSE, 66 | include_roxygen = FALSE, 67 | force_fetch = FALSE, 68 | version = NULL, 69 | cache_path = getOption("rdocdump.cache_path"), 70 | keep_files = "none", 71 | repos = getOption("rdocdump.repos", getOption("repos")) 72 | ) { 73 | # Pass version to resolve_pkg_path and force fetching if a version is specified. 74 | pkg_info <- resolve_pkg_path( 75 | pkg, 76 | cache_path, 77 | force_fetch = force_fetch || !is.null(version), 78 | version = version, 79 | repos = repos 80 | ) 81 | 82 | combined_code <- if ( 83 | !is.null(pkg_info$is_installed) && pkg_info$is_installed 84 | ) { 85 | if (is.null(pkg_info$pkg_name)) { 86 | stop("Installed package does not provide pkg_name information.") 87 | } 88 | extract_code_installed(pkg_info$pkg_name) 89 | } else { 90 | extract_code_source( 91 | pkg_info$pkg_path, 92 | include_tests, 93 | include_roxygen 94 | ) 95 | } 96 | 97 | # Clean up temporary files according to keep_files 98 | cleanup_result <- cleanup_files(pkg_info, keep_files) 99 | 100 | if (!is.null(file)) { 101 | writeLines(combined_code, con = file) 102 | return(file) 103 | } 104 | 105 | combined_code 106 | } 107 | 108 | #' Extract code from an installed package using its namespace. 109 | #' This function retrieves all functions from the package namespace and deparses them to get their source code. 110 | #' @param pkg_name The name of the installed package. 111 | #' @return A single string containing the source code of all functions in the package. 112 | #' @keywords internal 113 | extract_code_installed <- function(pkg_name) { 114 | # Load the namespace of the installed package. 115 | ns <- asNamespace(pkg_name) 116 | 117 | obj_names <- ls(ns, all.names = TRUE) 118 | 119 | code_strings <- lapply(obj_names, function(nm) { 120 | obj <- get(nm, envir = ns) 121 | if (is.function(obj)) { 122 | # Deparse the function to retrieve its source code. 123 | code <- deparse(obj) 124 | header <- paste0(strrep("-", 80), "\nFunction: ", nm, "()\n") 125 | paste0(header, paste(code, collapse = "\n")) 126 | } else { 127 | NULL 128 | } 129 | }) 130 | 131 | # Combine all function source codes. 132 | combined_code <- paste(unlist(code_strings), collapse = "\n\n") 133 | 134 | return(combined_code) 135 | } 136 | 137 | #' Helper function to extract code from package source files. 138 | #' This function reads all `.R` files in the `R` directory and optionally includes files from the `tests` directory. 139 | #' It can also exclude roxygen2 documentation lines. 140 | #' @param pkg_path Path to the package source directory. 141 | #' @inheritParams rdd_extract_code 142 | #' @return A single string containing the source code from the package's R files. 143 | #' @keywords internal 144 | extract_code_source <- function( 145 | pkg_path, 146 | include_tests = FALSE, 147 | include_roxygen = FALSE 148 | ) { 149 | code_text <- "" 150 | 151 | # Read all .R files in the R directory. 152 | r_dir <- file.path(pkg_path, "R") 153 | if (dir.exists(r_dir)) { 154 | r_files <- list.files(r_dir, pattern = "\\.[rR]$", full.names = TRUE) 155 | for (rf in r_files) { 156 | header <- paste0(strrep("-", 80), "\nFile: ", basename(rf), "\n") 157 | file_content <- readLines(rf, warn = FALSE) 158 | 159 | # Exclude roxygen2 documentation if not requested. 160 | if (!include_roxygen) { 161 | file_content <- file_content[!grepl("^#'", file_content)] 162 | } 163 | 164 | code_text <- paste( 165 | code_text, 166 | header, 167 | paste(file_content, collapse = "\n"), 168 | "\n\n", 169 | sep = "\n" 170 | ) 171 | } 172 | } else { 173 | warning("R directory not found in the package source.") 174 | } 175 | 176 | # Optionally, include R files from tests directory. 177 | if (include_tests) { 178 | tests_dir <- file.path(pkg_path, "tests") 179 | if (dir.exists(tests_dir)) { 180 | test_files <- list.files( 181 | tests_dir, 182 | pattern = "\\.[rR]$", 183 | full.names = TRUE 184 | ) 185 | 186 | for (tf in test_files) { 187 | header <- paste0(strrep("-", 80), "\nTest File: ", basename(tf), "\n") 188 | file_content <- readLines(tf, warn = FALSE) 189 | if (!include_roxygen) { 190 | file_content <- file_content[!grepl("^#'", file_content)] 191 | } 192 | code_text <- paste( 193 | code_text, 194 | header, 195 | paste(file_content, collapse = "\n"), 196 | "\n\n", 197 | sep = "\n" 198 | ) 199 | } 200 | } 201 | } 202 | 203 | return(code_text) 204 | } 205 | -------------------------------------------------------------------------------- /R/util_resolve_pkg_path.R: -------------------------------------------------------------------------------- 1 | #' Resolve the path to a package directory or tarball 2 | #' @description 3 | #' This function resolves the path to a package directory or tarball, handling both installed packages and source packages from CRAN. 4 | #' @inheritParams rdd_to_txt 5 | #' @return A list containing: 6 | #' - `pkg_path`: Path to the package directory or tarball. 7 | #' - `extracted_path`: Path to the extracted package directory (if applicable). 8 | #' - `tar_path`: Path to the tarball if it was downloaded. 9 | #' - `is_installed`: Logical indicating if the package is installed. 10 | #' 11 | #' @keywords internal 12 | #' 13 | resolve_pkg_path <- function( 14 | pkg, 15 | cache_path = NULL, 16 | force_fetch = FALSE, 17 | version = NULL, 18 | repos = getOption("rdocdump.repos", getOption("repos")) 19 | ) { 20 | if (!is.character(pkg) || length(pkg) != 1L) { 21 | stop("Argument 'pkg' must be a single character string.") 22 | } 23 | 24 | # Helper function to parse tarball filename into package name and version. 25 | parse_tarball_name <- function(tar_path) { 26 | base_name <- basename(tar_path) # e.g., "rJavaEnv_0.2.2.tar.gz" 27 | folder_name <- sub("\\.tar\\.gz$", "", base_name) 28 | parts <- strsplit(folder_name, "_")[[1]] 29 | if (length(parts) < 2) { 30 | stop( 31 | "Tarball filename does not conform to the expected pattern 'pkgname_version.tar.gz'." 32 | ) 33 | } 34 | version <- parts[length(parts)] 35 | pkgname <- paste(parts[-length(parts)], collapse = "_") 36 | list(pkgname = pkgname, version = version) 37 | } 38 | 39 | # Helper function to determine extraction directory. 40 | get_extract_dir <- function(tar_path) { 41 | info <- parse_tarball_name(tar_path) 42 | if (!is.null(cache_path)) { 43 | file.path(cache_path, info$pkgname, info$version) 44 | } else { 45 | tempfile(paste0(info$pkgname, "_", info$version)) 46 | } 47 | } 48 | 49 | if (file.exists(pkg)) { 50 | if (dir.exists(pkg)) { 51 | # Check if directory is a source package by looking for Rd files in "man/" 52 | man_dir <- file.path(pkg, "man") 53 | rd_files <- if (dir.exists(man_dir)) { 54 | list.files(man_dir, pattern = "\\.Rd$", full.names = TRUE) 55 | } else { 56 | character(0) 57 | } 58 | if (length(rd_files) > 0) { 59 | # It is a source package 60 | return(list( 61 | pkg_path = pkg, 62 | extracted_path = NULL, 63 | tar_path = NULL, 64 | is_installed = FALSE 65 | )) 66 | } else { 67 | # No .Rd files found in "man/" -> assume it's an installed package. 68 | return(list( 69 | pkg_path = pkg, 70 | extracted_path = NULL, 71 | tar_path = NULL, 72 | is_installed = TRUE, 73 | pkg_name = pkg 74 | )) 75 | } 76 | } else { 77 | # pkg is a file; assume it is a tar.gz archive. 78 | if (!grepl("\\.tar\\.gz$", pkg)) { 79 | stop( 80 | "The specified file is not a recognized package archive (expected extension .tar.gz)." 81 | ) 82 | } 83 | extract_dir <- get_extract_dir(pkg) 84 | if (!dir.exists(extract_dir)) { 85 | dir.create(extract_dir, recursive = TRUE) 86 | } 87 | utils::untar(pkg, exdir = extract_dir) 88 | # Flatten extra top-level folder if necessary. 89 | subdirs <- list.dirs(extract_dir, recursive = FALSE, full.names = TRUE) 90 | if (length(subdirs) == 1L) { 91 | files <- list.files( 92 | subdirs[1], 93 | full.names = TRUE, 94 | all.files = TRUE, 95 | no.. = TRUE 96 | ) 97 | file.copy(files, extract_dir, recursive = TRUE) 98 | unlink(subdirs[1], recursive = TRUE) 99 | } 100 | return(list( 101 | pkg_path = extract_dir, 102 | extracted_path = extract_dir, 103 | tar_path = NULL, 104 | is_installed = FALSE 105 | )) 106 | } 107 | } else { 108 | # pkg is not an existing file/directory: treat it as a package name. 109 | # If force_fetch is TRUE, ignore any locally installed package. 110 | pkg_found <- if (!force_fetch) { 111 | tryCatch(find.package(pkg), error = function(e) NULL) 112 | } else { 113 | NULL 114 | } 115 | if (!is.null(pkg_found) && is.null(version)) { 116 | # Installed package found. 117 | return(list( 118 | pkg_path = pkg_found, 119 | extracted_path = NULL, 120 | tar_path = NULL, 121 | is_installed = TRUE, 122 | pkg_name = pkg 123 | )) 124 | } else { 125 | message("Fetching package source from CRAN...") 126 | dest_dir <- if (!is.null(cache_path)) cache_path else tempdir() 127 | if (!dir.exists(dest_dir)) { 128 | dir.create(dest_dir, recursive = TRUE) 129 | } 130 | # Warn if repos contains known problematic URLs. 131 | if ( 132 | any(grepl("posit\\.co|r-universe\\.dev", repos, ignore.case = TRUE)) 133 | ) { 134 | warning( 135 | "Using a repository URL from posit.co or r-universe.dev may result in pre-built binaries being downloaded instead of the package source." 136 | ) 137 | } 138 | 139 | if (!is.null(version)) { 140 | # Construct URL for a specific version. 141 | repo_url <- repos[1] # Use the first repo. 142 | tar_filename <- paste0(pkg, "_", version, ".tar.gz") 143 | # Try archive first. 144 | url <- file.path(repo_url, "src/contrib/Archive", pkg, tar_filename) 145 | # Try downloading. 146 | res <- try( 147 | suppressWarnings( 148 | utils::download.file( 149 | url, 150 | destfile = file.path(dest_dir, tar_filename), 151 | mode = "wb", 152 | quiet = TRUE 153 | ) 154 | ), 155 | silent = TRUE 156 | ) 157 | # If archive fails, try main contrib. 158 | if (inherits(res, "try-error") || res != 0) { 159 | url <- file.path(repo_url, "src/contrib", tar_filename) 160 | res <- try( 161 | suppressWarnings( 162 | utils::download.file( 163 | url, 164 | destfile = file.path(dest_dir, tar_filename), 165 | mode = "wb", 166 | quiet = TRUE 167 | ) 168 | ), 169 | silent = TRUE 170 | ) 171 | } 172 | if (inherits(res, "try-error") || res != 0) { 173 | stop(paste( 174 | "Could not download package", 175 | pkg, 176 | "version", 177 | version, 178 | "from", 179 | url 180 | )) 181 | } 182 | dp <- matrix( 183 | c(tar_filename, file.path(dest_dir, tar_filename)), 184 | nrow = 1 185 | ) 186 | } else { 187 | dp <- utils::download.packages( 188 | pkg, 189 | destdir = dest_dir, 190 | type = "source", 191 | repos = repos 192 | ) 193 | } 194 | 195 | if (nrow(dp) < 1L) { 196 | stop("Package not found on CRAN.") 197 | } 198 | archive <- dp[, 2] 199 | base_name <- basename(archive) 200 | # If cache_path is provided, move the archive there. 201 | if (!is.null(cache_path)) { 202 | dest_archive <- file.path(cache_path, base_name) 203 | file.rename(archive, dest_archive) 204 | archive <- dest_archive 205 | } 206 | extract_dir <- get_extract_dir(archive) 207 | if (!dir.exists(extract_dir)) { 208 | dir.create(extract_dir, recursive = TRUE) 209 | } 210 | utils::untar(archive, exdir = extract_dir) 211 | subdirs <- list.dirs(extract_dir, recursive = FALSE, full.names = TRUE) 212 | if (length(subdirs) == 1L) { 213 | files <- list.files( 214 | subdirs[1], 215 | full.names = TRUE, 216 | all.files = TRUE, 217 | no.. = TRUE 218 | ) 219 | file.copy(files, extract_dir, recursive = TRUE) 220 | unlink(subdirs[1], recursive = TRUE) 221 | } 222 | return(list( 223 | pkg_path = extract_dir, 224 | extracted_path = extract_dir, 225 | tar_path = archive, 226 | is_installed = FALSE 227 | )) 228 | } 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /tests/testthat/test-to_txt.R: -------------------------------------------------------------------------------- 1 | # Tests for the rdd_to_txt function in the rdocdump package 2 | fake_resolve_pkg_path <- function( 3 | pkg, 4 | cache_path, 5 | force_fetch, 6 | version, 7 | repos 8 | ) { 9 | list( 10 | pkg_path = pkg, 11 | is_installed = FALSE, 12 | pkg_name = "testpkg", 13 | tar_path = NULL, 14 | version = NULL, 15 | extracted_path = NULL 16 | ) 17 | } 18 | 19 | test_that("rdd_to_txt works for installed packages", { 20 | # Use an installed package such as "stats". 21 | txt <- rdd_to_txt("stats", keep_files = "none") 22 | 23 | expect_true(nchar(txt) > 0) 24 | expect_match(txt, "Function:") 25 | }) 26 | 27 | test_that("rdd_to_txt with invalid keep_files value errors", { 28 | expect_error( 29 | rdd_to_txt("stats", keep_files = "invalid"), 30 | "Invalid value for keep_files" 31 | ) 32 | }) 33 | 34 | test_that("rdd_to_txt writes output to file when file parameter is provided", { 35 | temp_file <- tempfile("rdoc_output", fileext = ".txt") 36 | on.exit(unlink(temp_file)) 37 | 38 | txt <- rdd_to_txt("stats", file = temp_file, keep_files = "none") 39 | expect_true(file.exists(temp_file)) 40 | content <- readLines(temp_file) 41 | expect_true(length(content) > 0) 42 | }) 43 | 44 | test_that("rdd_to_txt combines DESCRIPTION, Rd documentation and vignettes", { 45 | pkg_dir <- tempfile("pkg_") 46 | dir.create(pkg_dir) 47 | 48 | # Create a minimal DESCRIPTION file so that combine_rd() works. 49 | desc_file <- file.path(pkg_dir, "DESCRIPTION") 50 | writeLines(c("Package: testpkg", "Version: 0.1"), desc_file) 51 | 52 | # Create a dummy 'man' directory with an Rd file (for documentation) 53 | man_dir <- file.path(pkg_dir, "man") 54 | dir.create(man_dir) 55 | rd_file <- file.path(man_dir, "test.Rd") 56 | writeLines("\\name{test}\n\\alias{test}\n\\title{Test Function}", rd_file) 57 | 58 | # Create a 'vignettes' directory with a sample vignette file 59 | vignette_dir <- file.path(pkg_dir, "vignettes") 60 | dir.create(vignette_dir) 61 | vign_file <- file.path(vignette_dir, "example.md") 62 | writeLines("This is a vignette", vign_file) 63 | 64 | local_mocked_bindings( 65 | resolve_pkg_path = fake_resolve_pkg_path, 66 | .package = "rdocdump" 67 | ) 68 | 69 | out <- rdd_to_txt(pkg_dir, keep_files = "none") 70 | 71 | # Adjusted expectations: 72 | # Check that the output includes the DESCRIPTION content. 73 | expect_match( 74 | out, 75 | "Package: testpkg", 76 | info = "Output should include DESCRIPTION header with package name" 77 | ) 78 | # Check that the output includes the Rd documentation header for the Rd file. 79 | expect_match( 80 | out, 81 | "Function: test\\(\\)", 82 | info = "Output should include Rd documentation header" 83 | ) 84 | # Check that the output includes the vignette header. 85 | expect_match( 86 | out, 87 | "Vignette: example\\.md", 88 | info = "Output should include vignette header" 89 | ) 90 | # Check that the output includes the vignette content. 91 | expect_match( 92 | out, 93 | "This is a vignette", 94 | info = "Output should include vignette content" 95 | ) 96 | }) 97 | 98 | 99 | test_that("rdd_to_txt outputs only documentation when content is 'docs'", { 100 | pkg_dir <- tempfile("pkg_") 101 | dir.create(pkg_dir) 102 | 103 | # Create a minimal DESCRIPTION file so that combine_rd() works. 104 | desc_file <- file.path(pkg_dir, "DESCRIPTION") 105 | writeLines(c("Package: testpkg", "Version: 0.1"), desc_file) 106 | 107 | # Create a dummy 'man' directory with an Rd file (for documentation) 108 | man_dir <- file.path(pkg_dir, "man") 109 | dir.create(man_dir) 110 | rd_file <- file.path(man_dir, "test.Rd") 111 | writeLines("\\name{test}\n\\alias{test}\n\\title{Test Function}", rd_file) 112 | 113 | # Create a 'vignettes' directory with a sample vignette file 114 | vignette_dir <- file.path(pkg_dir, "vignettes") 115 | dir.create(vignette_dir) 116 | vign_file <- file.path(vignette_dir, "example.md") 117 | writeLines("This is a vignette", vign_file) 118 | 119 | local_mocked_bindings( 120 | resolve_pkg_path = fake_resolve_pkg_path, 121 | .package = "rdocdump" 122 | ) 123 | 124 | out_docs <- rdd_to_txt(pkg_dir, keep_files = "none", content = "docs") 125 | 126 | # Check that the output includes the DESCRIPTION and Rd documentation headers... 127 | expect_match( 128 | out_docs, 129 | "Package: testpkg", 130 | info = "Output should include DESCRIPTION header" 131 | ) 132 | expect_match( 133 | out_docs, 134 | "Function: test\\(\\)", 135 | info = "Output should include Rd documentation header" 136 | ) 137 | 138 | # ...but it should NOT include the vignette header or content. 139 | expect_false( 140 | grepl("Vignette: example\\.md", out_docs), 141 | info = "Output should not include vignette header" 142 | ) 143 | expect_false( 144 | grepl("This is a vignette", out_docs), 145 | info = "Output should not include vignette content" 146 | ) 147 | }) 148 | 149 | test_that("rdd_to_txt outputs only vignettes when content is 'vignettes'", { 150 | pkg_dir <- tempfile("pkg_") 151 | dir.create(pkg_dir) 152 | 153 | # Create a minimal DESCRIPTION file. 154 | desc_file <- file.path(pkg_dir, "DESCRIPTION") 155 | writeLines(c("Package: testpkg", "Version: 0.1"), desc_file) 156 | 157 | # Create a dummy 'man' directory with an Rd file. 158 | man_dir <- file.path(pkg_dir, "man") 159 | dir.create(man_dir) 160 | rd_file <- file.path(man_dir, "test.Rd") 161 | writeLines("\\name{test}\n\\alias{test}\n\\title{Test Function}", rd_file) 162 | 163 | # Create a 'vignettes' directory with a sample vignette file. 164 | vignette_dir <- file.path(pkg_dir, "vignettes") 165 | dir.create(vignette_dir) 166 | vign_file <- file.path(vignette_dir, "example.md") 167 | writeLines("This is a vignette", vign_file) 168 | 169 | local_mocked_bindings( 170 | resolve_pkg_path = fake_resolve_pkg_path, 171 | .package = "rdocdump" 172 | ) 173 | 174 | out_vignettes <- rdd_to_txt( 175 | pkg_dir, 176 | keep_files = "none", 177 | content = "vignettes" 178 | ) 179 | 180 | # Check that the output includes the vignette header and content. 181 | expect_match( 182 | out_vignettes, 183 | "Vignette: example\\.md", 184 | info = "Output should include vignette header" 185 | ) 186 | expect_match( 187 | out_vignettes, 188 | "This is a vignette", 189 | info = "Output should include vignette content" 190 | ) 191 | 192 | # But it should NOT include the documentation header. 193 | expect_false( 194 | grepl("Function: test\\(\\)", out_vignettes), 195 | info = "Output should not include Rd documentation header" 196 | ) 197 | }) 198 | 199 | test_that("rdd_to_txt keeps tar.gz archive when keep_files is 'tgz'", { 200 | skip_on_cran() 201 | skip_if_offline() 202 | 203 | # Create a unique cache directory for this test. 204 | cache_dir <- tempfile("cache_tgz") 205 | dir.create(cache_dir) 206 | 207 | # Call rdd_to_txt on the "ini" package. 208 | # force_fetch = TRUE ensures it downloads from CRAN, 209 | # and keep_files = "tgz" should leave the tar.gz archive in the cache. 210 | old_repos <- getOption("repos") 211 | options(repos = c(CRAN = "https://cloud.r-project.org")) 212 | out <- suppressWarnings(rdd_to_txt( 213 | "ini", 214 | force_fetch = TRUE, 215 | keep_files = "tgz", 216 | cache_path = cache_dir 217 | )) 218 | options(repos = old_repos) 219 | 220 | # Look for any tar.gz file in the cache directory. 221 | tar_files <- list.files( 222 | cache_dir, 223 | pattern = "\\.tar\\.gz$", 224 | full.names = TRUE 225 | ) 226 | expect_true(length(tar_files) > 0) 227 | 228 | # Clean up the cache directory. 229 | unlink(cache_dir, recursive = TRUE) 230 | }) 231 | 232 | test_that("rdd_to_txt keeps both tar.gz archive and extracted files when keep_files is 'both'", { 233 | skip_on_cran() 234 | skip_if_offline() 235 | 236 | # Create a unique cache directory. 237 | cache_dir <- tempfile("cache_both") 238 | dir.create(cache_dir) 239 | 240 | # Call rdd_to_txt on the "ini" package with keep_files = "both". 241 | old_repos <- getOption("repos") 242 | options(repos = c(CRAN = "https://cloud.r-project.org")) 243 | out <- suppressWarnings(rdd_to_txt( 244 | "ini", 245 | force_fetch = TRUE, 246 | keep_files = "both", 247 | cache_path = cache_dir 248 | )) 249 | options(repos = old_repos) 250 | 251 | # Check that a tar.gz file exists in the cache. 252 | tar_files <- list.files( 253 | cache_dir, 254 | pattern = "\\.tar\\.gz$", 255 | full.names = TRUE 256 | ) 257 | expect_true(length(tar_files) > 0) 258 | 259 | # For the extracted package: resolve_pkg_path creates an extraction directory 260 | # as file.path(cache_dir, , ). For package "ini", we expect a subdirectory 261 | # named "ini" to exist under cache_dir. 262 | extracted_dir <- file.path(cache_dir, "ini") 263 | expect_true(dir.exists(extracted_dir)) 264 | 265 | # There should be at least one directory inside the "ini" folder. 266 | subdirs <- list.dirs(extracted_dir, recursive = FALSE, full.names = TRUE) 267 | expect_true(length(subdirs) > 0) 268 | 269 | # Clean up the cache directory. 270 | unlink(cache_dir, recursive = TRUE) 271 | }) 272 | 273 | test_that("rdd_to_txt fetches a specific package version when 'version' is provided", { 274 | skip_on_cran() 275 | skip_if_offline() 276 | 277 | cache_dir <- tempfile("cache_version_test") 278 | dir.create(cache_dir) 279 | 280 | # Use a package with a known version history, e.g., "jsonlite" 281 | # and a version that is not the latest. 282 | pkg_name <- "ini" 283 | pkg_version <- "0.1" 284 | 285 | old_repos <- getOption("repos") 286 | options(repos = c(CRAN = "https://cloud.r-project.org")) 287 | 288 | # Fetch the package with the specified version. 289 | out <- suppressWarnings(rdd_to_txt( 290 | pkg_name, 291 | version = pkg_version, 292 | force_fetch = TRUE, 293 | keep_files = "extracted", 294 | cache_path = cache_dir 295 | )) 296 | 297 | options(repos = old_repos) 298 | 299 | # Check that the extracted directory for the correct version exists. 300 | expected_dir <- file.path(cache_dir, pkg_name, pkg_version) 301 | expect_true(dir.exists(expected_dir)) 302 | 303 | # Verify the version from the DESCRIPTION file. 304 | desc_file <- file.path(expected_dir, "DESCRIPTION") 305 | expect_true(file.exists(desc_file)) 306 | 307 | desc_content <- readLines(desc_file) 308 | version_line <- grep("Version:", desc_content, value = TRUE) 309 | expect_true(grepl(pkg_version, version_line)) 310 | 311 | # Clean up the cache directory. 312 | unlink(cache_dir, recursive = TRUE) 313 | }) 314 | 315 | test_that("rdd_to_txt sets force_fetch=TRUE internally whenever 'version' is provided", { 316 | calls <- new.env(parent = emptyenv()) 317 | calls$resolve_force_fetch <- NULL 318 | calls$rdd_force_fetch <- NULL 319 | 320 | fake_resolve_pkg_path_spy <- function( 321 | pkg, 322 | cache_path, 323 | force_fetch, 324 | version, 325 | repos 326 | ) { 327 | calls$resolve_force_fetch <- force_fetch 328 | list( 329 | pkg_path = tempfile("pkg_"), 330 | is_installed = FALSE, 331 | pkg_name = "ini", 332 | tar_path = NULL, 333 | version = version, 334 | extracted_path = NULL 335 | ) 336 | } 337 | 338 | fake_rdd_extract_code_spy <- function( 339 | pkg, 340 | file, 341 | include_tests, 342 | include_roxygen, 343 | force_fetch, 344 | version, 345 | cache_path, 346 | keep_files 347 | ) { 348 | calls$rdd_force_fetch <- force_fetch 349 | "CODE" 350 | } 351 | 352 | # No-op helpers to avoid touching filesystem or requiring real pkg content 353 | fake_cleanup_files <- function(pkg_info, keep_files) invisible(NULL) 354 | fake_combine_rd <- function(...) "" # not used when content = "code" 355 | fake_combine_vignettes <- function(...) "" # not used when content = "code" 356 | 357 | local_mocked_bindings( 358 | resolve_pkg_path = fake_resolve_pkg_path_spy, 359 | rdd_extract_code = fake_rdd_extract_code_spy, 360 | cleanup_files = fake_cleanup_files, 361 | combine_rd = fake_combine_rd, 362 | combine_vignettes = fake_combine_vignettes, 363 | .package = "rdocdump" 364 | ) 365 | 366 | out <- rdd_to_txt( 367 | "ini", 368 | version = "0.1", # << the trigger 369 | force_fetch = FALSE, # user-specified FALSE should be overridden 370 | content = "code", 371 | keep_files = "none" 372 | ) 373 | 374 | expect_identical(out, "CODE") 375 | expect_true(isTRUE(calls$resolve_force_fetch)) 376 | expect_true(isTRUE(calls$rdd_force_fetch)) 377 | }) 378 | 379 | test_that("rdd_to_txt passes through force_fetch unchanged when 'version' is NULL", { 380 | calls <- new.env(parent = emptyenv()) 381 | calls$resolve_force_fetch <- NULL 382 | calls$rdd_force_fetch <- NULL 383 | 384 | fake_resolve_pkg_path_spy <- function( 385 | pkg, 386 | cache_path, 387 | force_fetch, 388 | version, 389 | repos 390 | ) { 391 | calls$resolve_force_fetch <- force_fetch 392 | list( 393 | pkg_path = tempfile("pkg_"), 394 | is_installed = FALSE, 395 | pkg_name = "ini", 396 | tar_path = NULL, 397 | version = version, 398 | extracted_path = NULL 399 | ) 400 | } 401 | 402 | fake_rdd_extract_code_spy <- function( 403 | pkg, 404 | file, 405 | include_tests, 406 | include_roxygen, 407 | force_fetch, 408 | cache_path, 409 | version, 410 | keep_files 411 | ) { 412 | calls$rdd_force_fetch <- force_fetch 413 | "CODE" 414 | } 415 | 416 | fake_cleanup_files <- function(pkg_info, keep_files) invisible(NULL) 417 | fake_combine_rd <- function(...) "" 418 | fake_combine_vignettes <- function(...) "" 419 | 420 | local_mocked_bindings( 421 | resolve_pkg_path = fake_resolve_pkg_path_spy, 422 | rdd_extract_code = fake_rdd_extract_code_spy, 423 | cleanup_files = fake_cleanup_files, 424 | combine_rd = fake_combine_rd, 425 | combine_vignettes = fake_combine_vignettes, 426 | .package = "rdocdump" 427 | ) 428 | 429 | # Case A: user passes force_fetch = FALSE, version = NULL -> internal should be FALSE 430 | out_A <- rdd_to_txt( 431 | "ini", 432 | version = NULL, 433 | force_fetch = FALSE, 434 | content = "code", 435 | keep_files = "none" 436 | ) 437 | expect_identical(out_A, "CODE") 438 | expect_false(isTRUE(calls$resolve_force_fetch)) 439 | expect_false(isTRUE(calls$rdd_force_fetch)) 440 | 441 | # Reset captured values 442 | calls$resolve_force_fetch <- NULL 443 | calls$rdd_force_fetch <- NULL 444 | 445 | # Case B: user passes force_fetch = TRUE, version = NULL -> internal should be TRUE 446 | out_B <- rdd_to_txt( 447 | "ini", 448 | version = NULL, 449 | force_fetch = TRUE, 450 | content = "code", 451 | keep_files = "none" 452 | ) 453 | expect_identical(out_B, "CODE") 454 | expect_true(isTRUE(calls$resolve_force_fetch)) 455 | expect_true(isTRUE(calls$rdd_force_fetch)) 456 | }) 457 | --------------------------------------------------------------------------------