├── .gitignore ├── .Rbuildignore ├── finddatasetpkgs.Rproj ├── NAMESPACE ├── DESCRIPTION ├── man └── get_dataset_pkgs.Rd ├── README.Rmd ├── README.md └── R └── get_dataset_pkgs.R /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^README\.Rmd$ 4 | ^README-.*\.png$ 5 | -------------------------------------------------------------------------------- /finddatasetpkgs.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | StripTrailingWhitespace: Yes 16 | 17 | BuildType: Package 18 | PackageUseDevtools: Yes 19 | PackageInstallArgs: --no-multiarch --with-keep.source 20 | PackageRoxygenize: rd,collate,namespace 21 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(get_dataset_pkgs) 4 | importFrom(knitr,kable) 5 | importFrom(magrittr,"%>%") 6 | importFrom(magrittr,extract) 7 | importFrom(rvest,html_node) 8 | importFrom(rvest,html_table) 9 | importFrom(stats,setNames) 10 | importFrom(stringr,regex) 11 | importFrom(stringr,str_detect) 12 | importFrom(stringr,str_replace_all) 13 | importFrom(utils,browseURL) 14 | importFrom(withr,with_output_sink) 15 | importFrom(xml2,read_html) 16 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: finddatasetpkgs 2 | Type: Package 3 | Title: Get Packages With Datasets 4 | Version: 0.0.1 5 | Author: Richard Cotton 6 | Maintainer: Richard Cotton 7 | Description: List all CRAN R packages with "datasets" or "data sets" in the 8 | Title field of their DESCRIPTION file. 9 | Imports: 10 | knitr, 11 | magrittr, 12 | rvest, 13 | stats, 14 | stringr, 15 | utils, 16 | withr, 17 | xml2 18 | License: Unlimited 19 | Encoding: UTF-8 20 | LazyData: true 21 | RoxygenNote: 6.0.1 22 | -------------------------------------------------------------------------------- /man/get_dataset_pkgs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_dataset_pkgs.R 3 | \name{get_dataset_pkgs} 4 | \alias{get_dataset_pkgs} 5 | \title{Get CRAN R packages with datasets} 6 | \usage{ 7 | get_dataset_pkgs(browse = TRUE, repos = getOption("repos")["CRAN"]) 8 | } 9 | \arguments{ 10 | \item{browse}{Logical. If \code{TRUE}, open the resulting data frame in a 11 | browser.} 12 | 13 | \item{repos}{A string pointing to a CRAN instance. If \code{NULL} or 14 | \code{NA} then the master CRAN at \url{https://cran.r-project.org} is used.} 15 | } 16 | \value{ 17 | A data frame with two columns. 18 | \describe{ 19 | \item{pkgname}{Character. Name of the package.} 20 | \item{description}{Character. A quick description of the package.} 21 | } 22 | } 23 | \description{ 24 | Gets a data frame of CRAN R Packages that mention "dataset" or "data set" 25 | in the Title field of their DESCRIPTION file. 26 | } 27 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | md_document: 4 | variant: markdown_github 5 | --- 6 | 7 | 8 | 9 | ```{r, echo = FALSE} 10 | knitr::opts_chunk$set( 11 | collapse = TRUE, 12 | comment = "#>", 13 | fig.path = "README-" 14 | ) 15 | ``` 16 | 17 | # finddatasetpkgs 18 | 19 | *finddatasetpkgs* helps you find other R packages that contain datasets. At the moment, it is pretty stupid, and just looks in the Title field of the DESCRIPTION file of each package on CRAN, and checks if it contains "dataset" or "data set". 20 | 21 | ## Example 22 | 23 | There is one user-facing function, `get_dataset_pkgs()`. Call it without any arguments to get a data frame of packages that probably have datasets in them. 24 | 25 | By default, the data will also be shown in a your web browser. 26 | 27 | ```{r, get_dataset_pkgs_browse, eval = FALSE} 28 | library(finddatasetpkgs) 29 | get_dataset_pkgs() # show in browser 30 | ``` 31 | 32 | You can turn this off, if you just want to work with the data frame programmatically. 33 | 34 | ```{r, get_dataset_pkgs_program, result = "asis"} 35 | library(finddatasetpkgs) 36 | pkgs <- get_dataset_pkgs(browse = FALSE) # don't show 37 | knitr::kable(head(pkgs)) 38 | ``` 39 | 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | md_document: 4 | variant: markdown_github 5 | --- 6 | 7 | 8 | 9 | 10 | 11 | # finddatasetpkgs 12 | 13 | *finddatasetpkgs* helps you find other R packages that contain datasets. At the moment, it is pretty stupid, and just looks in the Title field of the DESCRIPTION file of each package on CRAN, and checks if it contains "dataset" or "data set". 14 | 15 | ## Example 16 | 17 | There is one user-facing function, `get_dataset_pkgs()`. Call it without any arguments to get a data frame of packages that probably have datasets in them. 18 | 19 | By default, the data will also be shown in a your web browser. 20 | 21 | 22 | ```r 23 | library(finddatasetpkgs) 24 | get_dataset_pkgs() # show in browser 25 | ``` 26 | 27 | You can turn this off, if you just want to work with the data frame programmatically. 28 | 29 | 30 | ```r 31 | library(finddatasetpkgs) 32 | pkgs <- get_dataset_pkgs(browse = FALSE) # don't show 33 | knitr::kable(head(pkgs)) 34 | ``` 35 | 36 | 37 | 38 | | |pkgname |description | 39 | |:---|:-------------------------|:--------------------------------------------------------------------------------------------| 40 | |142 |agridat |Agricultural Datasets | 41 | |233 |AnalyzeFMRI |Functions for analysis of fMRI datasets stored in the ANALYZE or NIFTI format | 42 | |293 |aplore3 |Datasets from Hosmer, Lemeshow and Sturdivant, "Applied Logistic Regression" (3rd Ed., 2013) | 43 | |301 |AppliedPredictiveModeling |Functions and Data Sets for 'Applied Predictive Modeling' | 44 | |305 |aprean3 |Datasets from Draper and Smith "Applied Regression Analysis" (3rd Ed., 1998) | 45 | |328 |archdata |Example Datasets from Archaeological Research | 46 | 47 | -------------------------------------------------------------------------------- /R/get_dataset_pkgs.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(".") 2 | 3 | #' Get CRAN R packages with datasets 4 | #' 5 | #' Gets a data frame of CRAN R Packages that mention "dataset" or "data set" 6 | #' in the Title field of their DESCRIPTION file. 7 | #' @param browse Logical. If \code{TRUE}, open the resulting data frame in a 8 | #' browser. 9 | #' @param repos A string pointing to a CRAN instance. If \code{NULL} or 10 | #' \code{NA} then the master CRAN at \url{https://cran.r-project.org} is used. 11 | #' @return A data frame with two columns. 12 | #' \describe{ 13 | #' \item{pkgname}{Character. Name of the package.} 14 | #' \item{description}{Character. A quick description of the package.} 15 | #' } 16 | #' @importFrom knitr kable 17 | #' @importFrom magrittr %>% 18 | #' @importFrom magrittr extract 19 | #' @importFrom rvest html_node 20 | #' @importFrom rvest html_table 21 | #' @importFrom stats setNames 22 | #' @importFrom stringr str_detect 23 | #' @importFrom stringr str_replace_all 24 | #' @importFrom stringr regex 25 | #' @importFrom utils browseURL 26 | #' @importFrom withr with_output_sink 27 | #' @importFrom xml2 read_html 28 | #' @export 29 | get_dataset_pkgs <- function(browse = TRUE, repos = getOption("repos")["CRAN"]) { 30 | if(is.null(repos) || is.na(repos)) { 31 | repos <- "https://cran.r-project.org" 32 | } 33 | cran_url <- file.path(repos, "web/packages/available_packages_by_name.html") 34 | page <- read_html(cran_url) 35 | 36 | all_pkgs <- page %>% 37 | html_node(xpath = "//table") %>% 38 | html_table(fill = TRUE) %>% 39 | setNames(c("pkgname", "description")) %>% 40 | extract( 41 | nzchar(.$pkgname), 42 | ) 43 | 44 | pkgs_with_datasets <- all_pkgs %>% 45 | extract( 46 | str_detect(.$description, regex("data ?set", ignore_case = TRUE)), 47 | ) 48 | pkgs_with_datasets$description <- str_replace_all( 49 | pkgs_with_datasets$description, regex("\\s"), " " 50 | ) 51 | 52 | if(browse) { 53 | pkgs_with_datasets$pkgname <- make_link(pkgs_with_datasets$pkgname) 54 | tfile <- tempfile("pkgs_with_datasets_", fileext = ".html") 55 | with_output_sink( 56 | tfile, 57 | print(kable(pkgs_with_datasets, "html", escape = FALSE)) 58 | ) 59 | browseURL(tfile) 60 | invisible(pkgs_with_datasets) 61 | } else { 62 | pkgs_with_datasets 63 | } 64 | } 65 | 66 | make_link <- function(pkg) { 67 | sprintf('%s', pkg, pkg) 68 | } 69 | --------------------------------------------------------------------------------