├── .github ├── .gitignore └── workflows │ ├── pkgdown.yaml │ └── R-CMD-check.yaml ├── vignettes ├── .gitignore ├── articles │ ├── figure │ │ ├── tol-assessment-bars-1.png │ │ └── tol-assessment-tree-1.png │ ├── precompile.R │ ├── retrieve-all-query-results.Rmd │ ├── building-checklist.Rmd │ └── mapping-assessed-species.Rmd ├── KNMS.Rmd ├── POWO.Rmd ├── IPNI.Rmd ├── KRS.Rmd ├── WCVP.Rmd └── ToL.Rmd ├── _pkgdown.yml ├── LICENSE ├── .gitignore ├── R ├── reexports.R ├── utils.R ├── data.R ├── krs.R ├── request-next.R ├── knms.R ├── format.R ├── tidy.R ├── query-formatting.R ├── ipni.R ├── requests.R ├── powo.R ├── kew_citation.R ├── print.R ├── wcvp.R └── tol.R ├── tests ├── testthat.R └── testthat │ ├── test-requests.R │ ├── test-krs.R │ ├── test-knms.R │ ├── test-query-formatting.R │ ├── test-request-next.R │ ├── test-tol.R │ ├── test-ipni.R │ ├── test-wcvp.R │ └── test-powo.R ├── data ├── danish_plants.rda └── angiosperm_assessments.rda ├── .Rbuildignore ├── man ├── reexports.Rd ├── get_filters.Rd ├── get_keywords.Rd ├── danish_plants.Rd ├── angiosperm_assessments.Rd ├── request_next.Rd ├── kew_citation.Rd ├── match_krs.Rd ├── download_wcvp.Rd ├── lookup_ipni.Rd ├── match_knms.Rd ├── lookup_powo.Rd ├── lookup_wcvp.Rd ├── load_tol.Rd ├── download_tol.Rd ├── lookup_tol.Rd ├── search_ipni.Rd ├── search_powo.Rd ├── search_tol.Rd └── search_wcvp.Rd ├── kewr.Rproj ├── data-raw ├── angiosperm_assessments.R └── danish_plants.R ├── DESCRIPTION ├── LICENSE.md ├── NAMESPACE ├── README.Rmd └── README.md /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://github.com/barnabywalker/kewr/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2020 2 | COPYRIGHT HOLDER: Barnaby Walker 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | inst/doc 5 | *.zip 6 | docs 7 | -------------------------------------------------------------------------------- /R/reexports.R: -------------------------------------------------------------------------------- 1 | #' @importFrom generics tidy 2 | #' @export 3 | generics::tidy 4 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(kewr) 3 | 4 | test_check("kewr") 5 | -------------------------------------------------------------------------------- /data/danish_plants.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/barnabywalker/kewr/HEAD/data/danish_plants.rda -------------------------------------------------------------------------------- /data/angiosperm_assessments.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/barnabywalker/kewr/HEAD/data/angiosperm_assessments.rda -------------------------------------------------------------------------------- /vignettes/articles/figure/tol-assessment-bars-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/barnabywalker/kewr/HEAD/vignettes/articles/figure/tol-assessment-bars-1.png -------------------------------------------------------------------------------- /vignettes/articles/figure/tol-assessment-tree-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/barnabywalker/kewr/HEAD/vignettes/articles/figure/tol-assessment-tree-1.png -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^kewr\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | ^README\.Rmd$ 5 | ^\.github$ 6 | ^data-raw$ 7 | ^_pkgdown\.yml$ 8 | ^docs$ 9 | ^pkgdown$ 10 | ^vignettes/articles$ 11 | -------------------------------------------------------------------------------- /tests/testthat/test-requests.R: -------------------------------------------------------------------------------- 1 | test_that("raises 404 error for bad URL", { 2 | expect_error(make_request_("NOT A VALID URL")) 3 | }) 4 | 5 | test_that("raises error for non-json response", { 6 | url <- "https://www.wcvp.science.kew.org" 7 | expect_error(make_request_(url)) 8 | }) 9 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reexports.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{tidy} 7 | \title{Objects exported from other packages} 8 | \keyword{internal} 9 | \description{ 10 | These objects are imported from other packages. Follow the links 11 | below to see their documentation. 12 | 13 | \describe{ 14 | \item{generics}{\code{\link[generics]{tidy}}} 15 | }} 16 | 17 | -------------------------------------------------------------------------------- /kewr.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /man/get_filters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{get_filters} 4 | \alias{get_filters} 5 | \title{Get valid filters for a resource.} 6 | \usage{ 7 | get_filters(resource = c("wcvp", "powo", "ipni", "tol")) 8 | } 9 | \arguments{ 10 | \item{resource}{The resource being queried.} 11 | } 12 | \value{ 13 | A character vector of valid filters. 14 | } 15 | \description{ 16 | Get valid filters for a resource. 17 | } 18 | \examples{ 19 | get_filters("wcvp") 20 | get_filters("powo") 21 | get_filters("ipni") 22 | 23 | } 24 | -------------------------------------------------------------------------------- /man/get_keywords.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{get_keywords} 4 | \alias{get_keywords} 5 | \title{Get valid keywords for a resource.} 6 | \usage{ 7 | get_keywords(resource = c("wcvp", "powo", "ipni", "tol")) 8 | } 9 | \arguments{ 10 | \item{resource}{The resource being queried.} 11 | } 12 | \value{ 13 | A character vector of valid keywords. 14 | } 15 | \description{ 16 | Get valid keywords for a resource. 17 | } 18 | \examples{ 19 | get_keywords("wcvp") 20 | get_keywords("powo") 21 | get_keywords("ipni") 22 | 23 | } 24 | -------------------------------------------------------------------------------- /vignettes/articles/precompile.R: -------------------------------------------------------------------------------- 1 | #' precompile articles that take a long time to run 2 | library(knitr) 3 | library(here) 4 | 5 | # remove the figures folder for regeneration 6 | if (dir.exists(here("vignettes/articles/figure"))) { 7 | unlink(here("vignettes/articles/figure"), recursive=TRUE) 8 | } 9 | 10 | # Conservation status on the Tree of Life 11 | knit("vignettes/articles/conservation-status-treeoflife.Rmd.orig", 12 | "vignettes/articles/conservation-status-treeoflife.Rmd") 13 | 14 | # move any figures that have been created to the articles folder 15 | file.rename(here("figure"), here("vignettes/articles/figure")) 16 | -------------------------------------------------------------------------------- /tests/testthat/test-krs.R: -------------------------------------------------------------------------------- 1 | test_that("match URL returns status 200", { 2 | url <- krs_url_() 3 | response <- httr::RETRY("GET", url, times=3) 4 | 5 | expect_equal(httr::status_code(response), 200) 6 | }) 7 | 8 | test_that("match URL response is json", { 9 | url <- krs_url_() 10 | 11 | response <- httr::GET(url) 12 | 13 | expect_equal(httr::http_type(response), "application/json") 14 | }) 15 | 16 | test_that("raises error for unimplemented keyword", { 17 | query <- list(published="1920") 18 | expect_error(match_krs(query), "Query keyword.+ not recognised") 19 | }) 20 | 21 | test_that("tidy match results returns tibble", { 22 | results <- match_krs("Poa annua") 23 | tidied <- tidy(results) 24 | 25 | expect_s3_class(tidied, "tbl_df") 26 | }) 27 | 28 | -------------------------------------------------------------------------------- /data-raw/angiosperm_assessments.R: -------------------------------------------------------------------------------- 1 | # libraries ---- 2 | library(rredlist) # make requests to IUCN Red List 3 | library(dplyr) # manipulate data 4 | library(purrr) # map functions over lists 5 | 6 | # set key ---- 7 | key <- Sys.getenv("IUCN_REDLIST_KEY") 8 | 9 | # download all assessments ---- 10 | all_assessments <- rl_sp(all=TRUE, key=key) 11 | all_assessments <- map_dfr(all_assessments, ~.x$result) 12 | 13 | # filter for just angiosperms ---- 14 | angiosperm_assessments <- 15 | all_assessments %>% 16 | filter(class_name %in% c("MAGNOLIOPSIDA", "LILIOPSIDA")) %>% 17 | select(taxonid, scientific_name, taxonomic_authority, category) 18 | 19 | # convert to tibble for ease 20 | angiosperm_assessments <- as_tibble(angiosperm_assessments) 21 | 22 | usethis::use_data(angiosperm_assessments, overwrite = TRUE) 23 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | #' Get valid filters for a resource. 2 | #' 3 | #' @param resource The resource being queried. 4 | #' 5 | #' @return A character vector of valid filters. 6 | #' 7 | #' @examples 8 | #' get_filters("wcvp") 9 | #' get_filters("powo") 10 | #' get_filters("ipni") 11 | #' 12 | #' @export 13 | get_filters <- function(resource=c("wcvp", "powo", "ipni", "tol")) { 14 | resource <- match.arg(resource) 15 | 16 | filters <- get_filters_(resource) 17 | names(filters) 18 | } 19 | 20 | #' Get valid keywords for a resource. 21 | #' 22 | #' @param resource The resource being queried. 23 | #' 24 | #' @return A character vector of valid keywords. 25 | #' 26 | #' @examples 27 | #' get_keywords("wcvp") 28 | #' get_keywords("powo") 29 | #' get_keywords("ipni") 30 | #' 31 | #' @export 32 | get_keywords <- function(resource=c("wcvp", "powo", "ipni", "tol")) { 33 | resource <- match.arg(resource) 34 | 35 | keywords <- get_keywords_(resource) 36 | names(keywords) 37 | } 38 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: kewr 2 | Title: R Package to Access Kew Data APIs 3 | Version: 0.6.1 4 | Authors@R: 5 | person(given = "Barnaby", 6 | family = "Walker", 7 | role = c("aut", "cre"), 8 | email = "barnabywalker08@gmail.com", 9 | comment = c(ORCID = "0000-0002-3884-671X")) 10 | Description: An R package to access data from RGB Kew’s APIs. 11 | License: MIT + file LICENSE 12 | Encoding: UTF-8 13 | LazyData: true 14 | Roxygen: list(markdown = TRUE) 15 | RoxygenNote: 7.2.0 16 | Suggests: 17 | testthat, 18 | knitr, 19 | rmarkdown, 20 | ggplot2, 21 | progress 22 | URL: https://barnabywalker.github.io/kewr/, https://github.com/barnabywalker/kewr/ 23 | BugReports: https://github.com/barnabywalker/kewr/issues 24 | Imports: 25 | httr, 26 | jsonlite, 27 | glue, 28 | tibble, 29 | purrr, 30 | dplyr, 31 | stringr, 32 | rvest, 33 | here, 34 | tidyr, 35 | rlang, 36 | generics 37 | VignetteBuilder: knitr 38 | Depends: 39 | R (>= 2.10) 40 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2020 Barnaby Walker 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /man/danish_plants.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{danish_plants} 5 | \alias{danish_plants} 6 | \title{Extinction risk assessments for Danish plants.} 7 | \format{ 8 | A data frame with 361 rows and 4 variables: 9 | \describe{ 10 | \item{taxonid}{IUCN Red List unique ID for the taxon} 11 | \item{scientific_name}{The scientific name of the taxon} 12 | \item{authority}{The taxonomic authority of the taxon name} 13 | \item{category}{The IUCN Red List assessment category} 14 | } 15 | } 16 | \source{ 17 | \url{https://www.iucnredlist.org/} 18 | } 19 | \usage{ 20 | danish_plants 21 | } 22 | \description{ 23 | A dataset containing global extinction risk assessments for 24 | plants found in Denmark. Source from the IUCN Red List of 25 | Threatened Plants using the \code{rredlist} package. 26 | } 27 | \references{ 28 | Scott Chamberlain (2020). rredlist: 'IUCN' Red List Client. 29 | R package version 0.7.0. https://CRAN.R-project.org/package=rredlist 30 | 31 | IUCN 2021. The IUCN Red List of Threatened Species. Version 2020-3. 32 | \url{https://www.iucnredlist.org} 33 | } 34 | \keyword{datasets} 35 | -------------------------------------------------------------------------------- /data-raw/danish_plants.R: -------------------------------------------------------------------------------- 1 | # libraries ---- 2 | library(rredlist) # make requests to IUCN Red List 3 | library(dplyr) # manipulate data 4 | library(purrr) # map functions over lists 5 | 6 | # set key ---- 7 | key <- Sys.getenv("IUCN_REDLIST_KEY") 8 | 9 | # download danish assessments ---- 10 | iso_code <- "DK" 11 | assessments <- rl_sp_country(iso_code, key=key) 12 | 13 | # download full assessment info for each taxon ---- 14 | taxa_idx <- unique(assessments$result$taxonid) 15 | 16 | # wrap the search function to add a wait, so IUCN don't get upset 17 | f <- function(taxonid) { 18 | Sys.sleep(0.1) 19 | rl_search(id=taxonid, key=key) 20 | } 21 | 22 | # might still need a few retries to get everything without an error 23 | full_assessments <- map(taxa_idx, f) 24 | full_assessments <- map_dfr(full_assessments, ~.x$result) 25 | 26 | # narrow down to vascular plants ---- 27 | danish_plants <- 28 | full_assessments %>% 29 | filter(phylum == "TRACHEOPHYTA") %>% 30 | select(taxonid, scientific_name, authority, category) 31 | 32 | # convert to a tibble for ease 33 | danish_plants <- as_tibble(danish_plants) 34 | 35 | # save to data folder ---- 36 | usethis::use_data(danish_plants, overwrite = TRUE) 37 | -------------------------------------------------------------------------------- /man/angiosperm_assessments.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{angiosperm_assessments} 5 | \alias{angiosperm_assessments} 6 | \title{Extinction risk assessments for angiosperms.} 7 | \format{ 8 | A data frame with 53,542 rows and 4 variables: 9 | \describe{ 10 | \item{taxonid}{IUCN Red List unique ID for the taxon} 11 | \item{scientific_name}{The scientific name of the taxon} 12 | \item{taxonomic_authority}{The taxonomic authority of the taxon name} 13 | \item{category}{The IUCN Red List assessment category} 14 | } 15 | } 16 | \source{ 17 | \url{https://www.iucnredlist.org/} 18 | } 19 | \usage{ 20 | angiosperm_assessments 21 | } 22 | \description{ 23 | A dataset containing global extinction risk assessments for 24 | all assessed angiosperm species. Sourced from the IUCN Red List of 25 | Threatened Plants using the \code{rredlist} package. Last updated with 26 | version 2021-1 of the IUCN Red List. 27 | } 28 | \references{ 29 | Scott Chamberlain (2020). rredlist: 'IUCN' Red List Client. 30 | R package version 0.7.0. https://CRAN.R-project.org/package=rredlist 31 | 32 | IUCN 2021. The IUCN Red List of Threatened Species. Version 2020-3. 33 | \url{https://www.iucnredlist.org} 34 | } 35 | \keyword{datasets} 36 | -------------------------------------------------------------------------------- /tests/testthat/test-knms.R: -------------------------------------------------------------------------------- 1 | test_that("GET request to KNMS returns 405 error", { 2 | url <- knms_url_() 3 | response <- httr::GET(url) 4 | 5 | expect_equal(httr::status_code(response), 405) 6 | }) 7 | 8 | test_that("POST request to KNMS returns 200", { 9 | url <- knms_url_() 10 | response <- httr::POST(url, body=list(""), encode="json") 11 | 12 | expect_equal(httr::status_code(response), 200) 13 | }) 14 | 15 | test_that("POST request to KNMS returns a json", { 16 | url <- knms_url_() 17 | response <- httr::POST(url, body=list(""), encode="json") 18 | 19 | expect_equal(httr::http_type(response), "application/json") 20 | }) 21 | 22 | test_that("Raises error if missing value in names to match", { 23 | names <- c("Poa annua", NA_character_, NA_character_, "Myrcia almasensis") 24 | 25 | expect_error(match_knms(names), regexp="NA is present") 26 | }) 27 | 28 | test_that("Line parsing returns a tibble", { 29 | names <- c("Bad plant") 30 | matches <- match_knms(names) 31 | parsed <- parse_knms_line(matches$results[[1]]) 32 | 33 | expect_s3_class(parsed, "tbl_df") 34 | }) 35 | 36 | test_that("Match tidying returns a tibble", { 37 | names <- c("Bad plant", "Poa annua", "Myrcia guianensis") 38 | matches <- match_knms(names) 39 | tidied <- tidy(matches) 40 | 41 | expect_s3_class(tidied, "tbl_df") 42 | }) 43 | -------------------------------------------------------------------------------- /man/request_next.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/request-next.R 3 | \name{request_next} 4 | \alias{request_next} 5 | \alias{request_next.wcvp_search} 6 | \alias{request_next.powo_search} 7 | \alias{request_next.ipni_search} 8 | \alias{request_next.tol_search} 9 | \title{Request the next page of search results for a kewr results object.} 10 | \usage{ 11 | request_next(object) 12 | 13 | \method{request_next}{wcvp_search}(object) 14 | 15 | \method{request_next}{powo_search}(object) 16 | 17 | \method{request_next}{ipni_search}(object) 18 | 19 | \method{request_next}{tol_search}(object) 20 | } 21 | \arguments{ 22 | \item{object}{A kewr search results object.} 23 | } 24 | \value{ 25 | The next page of results as a kewr search object of 26 | the same type. 27 | } 28 | \description{ 29 | Request the next page of search results for a kewr results object. 30 | } 31 | \section{Methods (by class)}{ 32 | \itemize{ 33 | \item \code{wcvp_search}: Request the next page of WCVP search results. 34 | 35 | \item \code{powo_search}: Request the next page of POWO search results. 36 | 37 | \item \code{ipni_search}: Request the next page of IPNI search results. 38 | 39 | \item \code{tol_search}: Request the next page of ToL search results. 40 | }} 41 | 42 | \examples{ 43 | r <- search_wcvp("Poa") 44 | request_next(r) 45 | 46 | 47 | \donttest{ 48 | r <- search_powo("Poa") 49 | request_next(r) 50 | } 51 | 52 | r <- search_ipni("Poa") 53 | request_next(r) 54 | 55 | r <- search_tol("Poa") 56 | request_next(r) 57 | 58 | } 59 | -------------------------------------------------------------------------------- /tests/testthat/test-query-formatting.R: -------------------------------------------------------------------------------- 1 | test_that("error for unimplemented WCVP filters", { 2 | filters <- c("accepted", "has_image", "monkey") 3 | 4 | expect_error(format_filters_(filters, "wcvp"), 5 | ".+\\[has_image,monkey\\] are not recognised.") 6 | }) 7 | 8 | test_that("error for unimplemented POWO filters", { 9 | filters <- c("accepted", "author", "monkey") 10 | 11 | expect_error(format_filters_(filters, "powo"), 12 | ".+\\[author,monkey\\] are not recognised.") 13 | }) 14 | 15 | test_that("error for unimplemented IPNI filters", { 16 | filters <- c("infrafamilies", "author", "monkey") 17 | 18 | expect_error(format_filters_(filters, "ipni"), 19 | ".+\\[author,monkey\\] are not recognised.") 20 | }) 21 | 22 | test_that("error for unimplemented WCVP keywords", { 23 | query <- list("distribution"="Mexico") 24 | 25 | expect_error(format_query_(query, "wcvp"), 26 | ".+\\[distribution\\] are not recognised") 27 | }) 28 | 29 | test_that("error for unimplemented IPNI keywords", { 30 | query <- list("common_name"="daisy") 31 | 32 | expect_error(format_query_(query, "ipni"), 33 | ".+\\[common_name\\] are not recognised") 34 | }) 35 | 36 | test_that("error for unimplemented POWO keywords", { 37 | query <- list("lifeform"="epiphyte") 38 | 39 | expect_error(format_query_(query, "powo"), 40 | ".+\\[lifeform\\] are not recognised") 41 | }) 42 | 43 | test_that("error for unrecognised resource", { 44 | filters <- c("accepted") 45 | 46 | expect_error(format_filters_(filter, "google")) 47 | }) 48 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | - master 6 | 7 | name: pkgdown 8 | 9 | jobs: 10 | pkgdown: 11 | runs-on: macOS-latest 12 | env: 13 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 14 | steps: 15 | - uses: actions/checkout@v2 16 | 17 | - uses: r-lib/actions/setup-r@v1 18 | 19 | - uses: r-lib/actions/setup-pandoc@v1 20 | 21 | - name: Query dependencies 22 | run: | 23 | install.packages('remotes') 24 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 25 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 26 | shell: Rscript {0} 27 | 28 | - name: Cache R packages 29 | uses: actions/cache@v2 30 | with: 31 | path: ${{ env.R_LIBS_USER }} 32 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 33 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 34 | 35 | - name: Install dependencies 36 | run: | 37 | remotes::install_deps(dependencies = TRUE) 38 | install.packages("pkgdown", type = "binary") 39 | shell: Rscript {0} 40 | 41 | - name: Install package 42 | run: R CMD INSTALL . 43 | 44 | - name: Deploy package 45 | run: | 46 | git config --local user.email "actions@github.com" 47 | git config --local user.name "GitHub Actions" 48 | Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' 49 | -------------------------------------------------------------------------------- /man/kew_citation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kew_citation.R 3 | \name{kew_citation} 4 | \alias{kew_citation} 5 | \title{Get citation for Kew data resource.} 6 | \usage{ 7 | kew_citation(x) 8 | } 9 | \arguments{ 10 | \item{x}{Result of a call to \code{\link[=search_powo]{search_powo()}}, \code{\link[=lookup_powo]{lookup_powo()}}, \code{\link[=search_wcvp]{search_wcvp()}}, 11 | \code{\link[=lookup_wcvp]{lookup_wcvp()}}, \code{\link[=search_ipni]{search_ipni()}}, \code{\link[=lookup_ipni]{lookup_ipni()}}, \code{\link[=search_tol]{search_tol()}}, \code{\link[=load_tol]{load_tol()}}, 12 | \code{\link[=match_knms]{match_knms()}}, or \code{\link[=match_krs]{match_krs()}}} 13 | } 14 | \value{ 15 | A citation object with a print method for nice display. 16 | } 17 | \description{ 18 | Given the result of a query to a Kew data resource, get the appropriate 19 | citation. 20 | } 21 | \examples{ 22 | r <- search_powo(list(characteristic="poison")) 23 | kew_citation(r) 24 | 25 | r <- lookup_powo("320035-2") 26 | kew_citation(r) 27 | 28 | r <- search_wcvp(list(genus="Poa")) 29 | kew_citation(r) 30 | 31 | r <- lookup_wcvp("320035-2") 32 | kew_citation(r) 33 | 34 | r <- search_ipni(list(publishing_author="L.")) 35 | kew_citation(r) 36 | 37 | r <- lookup_ipni("12653-1") 38 | kew_citation(r) 39 | 40 | r <- search_tol("Poa") 41 | kew_citation(r) 42 | 43 | r <- lookup_tol(2717) 44 | kew_citation(r) 45 | 46 | tree <- load_tol() 47 | kew_citation(tree) 48 | 49 | match <- match_knms("Poa annua") 50 | kew_citation(match) 51 | 52 | match <- match_krs("Poa annua") 53 | kew_citation(match) 54 | 55 | 56 | } 57 | -------------------------------------------------------------------------------- /man/match_krs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/krs.R 3 | \name{match_krs} 4 | \alias{match_krs} 5 | \title{Match a name using KRS.} 6 | \usage{ 7 | match_krs(query, .wait = 0.2, .retries = 3) 8 | } 9 | \arguments{ 10 | \item{query}{The name to match using the reconciliation service. Use a named list to 11 | match parts of a name.} 12 | 13 | \item{.wait}{Time to wait before making a request, to help 14 | rate limiting.} 15 | 16 | \item{.retries}{The max number of times to retry the request to KRS. KRS seems 17 | to fail every other request, so adding a small number of retries helps prevent 18 | unnecessary failure.} 19 | } 20 | \value{ 21 | Returns an object of class \code{krs_match} that is a simple 22 | structure with slots for: 23 | \itemize{ 24 | \item \code{results}: the query results parsed into a list. 25 | \item \code{response}: the \link[httr:response]{httr response object}. 26 | } 27 | } 28 | \description{ 29 | Use the Kew Reconciliation Service to match a name against IPNI. 30 | } 31 | \details{ 32 | The \href{http://data1.kew.org/reconciliation/about/IpniName}{Kew Reconciliation Service (KRS)} 33 | allows name matching against IPNI using an Open Refine reconcilliation API. 34 | } 35 | \examples{ 36 | # Match a name. 37 | match_krs("Solanum sanchez-vegae") 38 | 39 | # Match a name using name parts 40 | match_krs(list(genus="Solanum", species="sanchez-vegae", author="Knapp")) 41 | 42 | # Format a returned match as a dataframe 43 | match <- match_krs(list(genus="Solanum", species="sanchez-vegae", author="Knapp")) 44 | tidy(match) 45 | 46 | } 47 | \seealso{ 48 | \itemize{ 49 | \item \code{\link[=match_knms]{match_knms()}} to use simple matching for a vector of names. 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /man/download_wcvp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wcvp.R 3 | \name{download_wcvp} 4 | \alias{download_wcvp} 5 | \title{Download the whole of the WCVP.} 6 | \usage{ 7 | download_wcvp(save_dir = NULL, version = NULL) 8 | } 9 | \arguments{ 10 | \item{save_dir}{A string specifying the folder to save the download in. If 11 | no value is provided, \link[here]{here} will be used.} 12 | 13 | \item{version}{An integer version number to download. The latest 14 | version will be downloaded by default.} 15 | } 16 | \description{ 17 | Download the latest or a specific version of the World 18 | Checklist of Vascular Plants (WCVP). 19 | } 20 | \details{ 21 | The \href{https://wcvp.science.kew.org/}{World Checklist of Vascular Plants (WCVP)} 22 | is a global consensus view of all known vascular plant species. 23 | It has been compiled by staff at RBG Kew in consultation with plant 24 | group experts. 25 | 26 | Versioned downloads of the whole WCVP are provided on the website. 27 | This function allows the user to download the latest or a specific 28 | version of the WCVP. 29 | } 30 | \examples{ 31 | \dontrun{ 32 | # download the latest version 33 | download_wcvp() 34 | 35 | # download version 1 36 | download_wcvp(version=1) 37 | } 38 | 39 | } 40 | \references{ 41 | WCVP (2020). World Checklist of Vascular Plants, version 2.0. Facilitated by the Royal Botanic Gardens, Kew. Published on the Internet; http://wcvp.science.kew.org/ 42 | } 43 | \seealso{ 44 | \itemize{ 45 | \item \code{\link[=lookup_wcvp]{lookup_wcvp()}} to lookup information about a taxon name 46 | using a valid IPNI ID. 47 | \item \code{\link[=search_wcvp]{search_wcvp()}} to search WCVP using a taxon name. 48 | } 49 | 50 | Other WCVP functions: 51 | \code{\link{lookup_wcvp}()}, 52 | \code{\link{search_wcvp}()} 53 | } 54 | \concept{WCVP functions} 55 | -------------------------------------------------------------------------------- /man/lookup_ipni.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ipni.R 3 | \name{lookup_ipni} 4 | \alias{lookup_ipni} 5 | \title{Look up a name in IPNI.} 6 | \usage{ 7 | lookup_ipni(id, type = c("taxon", "author", "publication"), .wait = 0.1) 8 | } 9 | \arguments{ 10 | \item{id}{A string containing a valid IPNI ID.} 11 | 12 | \item{type}{The type of record to look up. Either \code{taxon}, \code{author}, or \code{publication}.} 13 | 14 | \item{.wait}{Time to wait before making a request, to help 15 | rate limiting.} 16 | } 17 | \value{ 18 | An \code{ipni_taxon} object, which is a simple structure with fields 19 | for each of the fields returned by the lookup API, as well as the the \link[httr:response]{httr response object}. 20 | } 21 | \description{ 22 | Request the record for a taxon, author, or publication name in IPNI, 23 | using the IPNI ID. 24 | } 25 | \details{ 26 | The \href{https://www.ipni.org/}{International Plant Names Index (IPNI)} 27 | is a service that provides nomenclatural information for vascular plant names. 28 | 29 | The name lookup API allows users to retrieve information for 30 | a specific taxon, author, or publication name using the unique IPNI ID. If this is not known, 31 | it can be found out using the \link[=search_ipni]{IPNI search API}. 32 | } 33 | \examples{ 34 | 35 | # retrieve nomenclatural information for a taxon name 36 | name <- lookup_ipni("271445-2", "taxon") 37 | print(name) 38 | 39 | # tidy the results in a table 40 | tidy(name) 41 | 42 | # retrieve nomenclatural information for an author 43 | name <- lookup_ipni("20028192-1", type="author") 44 | tidy(name) 45 | 46 | # retrieve nomenclatural information for a publication 47 | name <- lookup_ipni("987-2", type="publication") 48 | tidy(name) 49 | 50 | } 51 | \seealso{ 52 | \itemize{ 53 | \item \code{\link[=search_ipni]{search_ipni()}} to search IPNI using a taxon name. 54 | } 55 | 56 | Other IPNI functions: 57 | \code{\link{search_ipni}()} 58 | } 59 | \concept{IPNI functions} 60 | -------------------------------------------------------------------------------- /man/match_knms.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/knms.R 3 | \name{match_knms} 4 | \alias{match_knms} 5 | \title{Match names with KNMS.} 6 | \usage{ 7 | match_knms(names) 8 | } 9 | \arguments{ 10 | \item{names}{A list or character vector of taxon names for matching. 11 | Must not contain missing values.} 12 | } 13 | \value{ 14 | A \code{knms_match} object - a simple structure containing the match 15 | results and some statistics about the number of matches. 16 | } 17 | \description{ 18 | Use the Kew Names Matching Service to match taxon names to 19 | records in Plants of the World Online. 20 | } 21 | \details{ 22 | The \href{http://namematch.science.kew.org/}{Kew Names Matching Service (KNMS)} allows 23 | a user to submit taxon names for matching against records in 24 | \href{http://powo.science.kew.org/}{Plants of the World Online (POWO)}. 25 | As far as I can tell, it uses exact matching as well as some rules-based matching 26 | to account for common orthographic variants and Latin mistakes. 27 | 28 | Names can be submitted to KNMS with or without an author string. 29 | If a name can match to multiple different records, for instance 30 | with synonyms, KNMS will return multiple matches. As such, we recommend 31 | submitting names first with the taxonomic authority and then without 32 | if no match can be found. 33 | 34 | KNMS allows multiple names to be submitted at once. However, it can 35 | be slow in returning results if too many names are submitted. For lots of names, 36 | \href{http://namematch.science.kew.org/csv}{the website provides an interface for submitting a CSV file}. 37 | } 38 | \examples{ 39 | 40 | # match a name 41 | match_knms("Poa annua L.") 42 | 43 | # match a vector of names 44 | names <- c("Myrcia guianensis", "Calyptranthes ranulphii", "Poa annua") 45 | match_knms(names) 46 | 47 | # tidy match results into a table 48 | names <- c("Myrcia guianensis", "Bad plant", "Poa annua") 49 | matches <- match_knms(names) 50 | tidy(matches) 51 | 52 | } 53 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' Extinction risk assessments for Danish plants. 2 | #' 3 | #' A dataset containing global extinction risk assessments for 4 | #' plants found in Denmark. Source from the IUCN Red List of 5 | #' Threatened Plants using the `rredlist` package. 6 | #' 7 | #' @format A data frame with 361 rows and 4 variables: 8 | #' \describe{ 9 | #' \item{taxonid}{IUCN Red List unique ID for the taxon} 10 | #' \item{scientific_name}{The scientific name of the taxon} 11 | #' \item{authority}{The taxonomic authority of the taxon name} 12 | #' \item{category}{The IUCN Red List assessment category} 13 | #' } 14 | #' 15 | #' @source \url{https://www.iucnredlist.org/} 16 | #' 17 | #' @references 18 | #' Scott Chamberlain (2020). rredlist: 'IUCN' Red List Client. 19 | #' R package version 0.7.0. https://CRAN.R-project.org/package=rredlist 20 | #' 21 | #' IUCN 2021. The IUCN Red List of Threatened Species. Version 2020-3. 22 | #' 23 | "danish_plants" 24 | 25 | #' Extinction risk assessments for angiosperms. 26 | #' 27 | #' A dataset containing global extinction risk assessments for 28 | #' all assessed angiosperm species. Sourced from the IUCN Red List of 29 | #' Threatened Plants using the `rredlist` package. Last updated with 30 | #' version 2021-1 of the IUCN Red List. 31 | #' 32 | #' @format A data frame with 53,542 rows and 4 variables: 33 | #' \describe{ 34 | #' \item{taxonid}{IUCN Red List unique ID for the taxon} 35 | #' \item{scientific_name}{The scientific name of the taxon} 36 | #' \item{taxonomic_authority}{The taxonomic authority of the taxon name} 37 | #' \item{category}{The IUCN Red List assessment category} 38 | #' } 39 | #' 40 | #' @source \url{https://www.iucnredlist.org/} 41 | #' 42 | #' @references 43 | #' Scott Chamberlain (2020). rredlist: 'IUCN' Red List Client. 44 | #' R package version 0.7.0. https://CRAN.R-project.org/package=rredlist 45 | #' 46 | #' IUCN 2021. The IUCN Red List of Threatened Species. Version 2020-3. 47 | #' 48 | "angiosperm_assessments" 49 | 50 | -------------------------------------------------------------------------------- /man/lookup_powo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/powo.R 3 | \name{lookup_powo} 4 | \alias{lookup_powo} 5 | \title{Look up a taxon in POWO.} 6 | \usage{ 7 | lookup_powo(taxonid, distribution = FALSE, .wait = 0.2) 8 | } 9 | \arguments{ 10 | \item{taxonid}{A string containing a valid IPNI ID.} 11 | 12 | \item{distribution}{Include distribution in results (default \code{FALSE}).} 13 | 14 | \item{.wait}{Time to wait before making a request, to help 15 | rate limiting.} 16 | } 17 | \value{ 18 | A \code{powo_taxon} object, which is a simple structure with fields 19 | for each of the fields returned by the lookup API, as well as the the \link[httr:response]{httr response object}. 20 | } 21 | \description{ 22 | Request the record for a taxon in Plants of the World Online (POWO) 23 | using the IPNI ID. 24 | } 25 | \details{ 26 | \href{http://www.plantsoftheworldonline.org/}{Plants of the World Online (POWO)} 27 | is a database of information on the world's flora. It curates information from 28 | published floras and other sources of floristic information. 29 | 30 | The taxon lookup API allows users to retrieve information about 31 | a specific taxon name using the unique IPNI ID. If this is not known, 32 | it can be found out using the \link[=search_powo]{POWO search API}. 33 | } 34 | \examples{ 35 | # retrieve information for a taxon name 36 | lookup_powo("271445-2") 37 | 38 | # print a summary of the returned information 39 | r <- lookup_powo("271445-2") 40 | print(r) 41 | 42 | # tidy returned record into a tibble 43 | r <- lookup_powo("271445-2") 44 | tidy(r) 45 | 46 | # tidy the returned list of synonyms into a tibble 47 | r <- lookup_wcvp("60447743-2") 48 | tidied <- tidy(r) 49 | tidyr::unnest(tidied, cols=synonyms, names_sep="_") 50 | 51 | # tidy the returned list of children into a tibble 52 | r <- lookup_wcvp("30000055-2") 53 | tidied <- tidy(r) 54 | tidyr::unnest(tidied, cols=children, names_sep="_") 55 | 56 | } 57 | \seealso{ 58 | \itemize{ 59 | \item \code{\link[=search_powo]{search_powo()}} to search POWO using a taxon name. 60 | } 61 | 62 | Other POWO functions: 63 | \code{\link{search_powo}()} 64 | } 65 | \concept{POWO functions} 66 | -------------------------------------------------------------------------------- /R/krs.R: -------------------------------------------------------------------------------- 1 | #' Match a name using KRS. 2 | #' 3 | #' Use the Kew Reconciliation Service to match a name against IPNI. 4 | #' 5 | #' The [Kew Reconciliation Service (KRS)](http://data1.kew.org/reconciliation/about/IpniName) 6 | #' allows name matching against IPNI using an Open Refine reconcilliation API. 7 | #' 8 | #' @param query The name to match using the reconciliation service. Use a named list to 9 | #' match parts of a name. 10 | #' @param .wait Time to wait before making a request, to help 11 | #' rate limiting. 12 | #' @param .retries The max number of times to retry the request to KRS. KRS seems 13 | #' to fail every other request, so adding a small number of retries helps prevent 14 | #' unnecessary failure. 15 | #' 16 | #' @return 17 | #' Returns an object of class `krs_match` that is a simple 18 | #' structure with slots for: 19 | #' 20 | #' * `results`: the query results parsed into a list. 21 | #' * `response`: the [httr response object][httr::response]. 22 | #' 23 | #' @examples 24 | #' # Match a name. 25 | #' match_krs("Solanum sanchez-vegae") 26 | #' 27 | #' # Match a name using name parts 28 | #' match_krs(list(genus="Solanum", species="sanchez-vegae", author="Knapp")) 29 | #' 30 | #' # Format a returned match as a dataframe 31 | #' match <- match_krs(list(genus="Solanum", species="sanchez-vegae", author="Knapp")) 32 | #' tidy(match) 33 | #' 34 | #' @seealso 35 | #' * [match_knms()] to use simple matching for a vector of names. 36 | #' 37 | #' @importFrom jsonlite toJSON 38 | #' 39 | #' @export 40 | match_krs <- function(query, .wait=0.2, .retries=3) { 41 | url <- krs_url_() 42 | 43 | # keeping a copy of this to return in the result object 44 | original_query <- query 45 | 46 | query <- format_refine_query_(query, "krs") 47 | 48 | results <- make_request_(url, query, .wait=.wait, .retries=.retries) 49 | 50 | structure( 51 | list( 52 | matches=length(results$content$result), 53 | results=results$content$result, 54 | query=original_query, 55 | response=results$response 56 | ), 57 | class="krs_match" 58 | ) 59 | } 60 | 61 | #' Make the KNMS URL. 62 | #' 63 | #' @noRd 64 | krs_url_ <- function() { 65 | get_url_("krs") 66 | } 67 | -------------------------------------------------------------------------------- /tests/testthat/test-request-next.R: -------------------------------------------------------------------------------- 1 | test_that("method exists for WCVP search results", { 2 | method_list <- methods(class="wcvp_search") 3 | method_list <- as.list(method_list) 4 | 5 | expect_true("request_next.wcvp_search" %in% method_list) 6 | }) 7 | 8 | test_that("method exists for POWO search results", { 9 | method_list <- methods(class="powo_search") 10 | method_list <- as.list(method_list) 11 | 12 | expect_true("request_next.powo_search" %in% method_list) 13 | }) 14 | 15 | test_that("method exists for IPNI search results", { 16 | method_list <- methods(class="ipni_search") 17 | method_list <- as.list(method_list) 18 | 19 | expect_true("request_next.ipni_search" %in% method_list) 20 | }) 21 | 22 | test_that("cursor changes for WCVP", { 23 | page1 <- search_wcvp(list(genus="Poa"), filters="accepted") 24 | page2 <- request_next(page1) 25 | 26 | expect_false(page1$cursor == page2$cursor) 27 | }) 28 | 29 | test_that("cursor changes for POWO", { 30 | page1 <- search_powo(list(genus="Poa"), filters="accepted") 31 | page2 <- request_next(page1) 32 | 33 | expect_false(page1$cursor == page2$cursor) 34 | }) 35 | 36 | test_that("cursor changes for IPNI", { 37 | page1 <- search_ipni(list(genus="Poa"), filters="species") 38 | page2 <- request_next(page1) 39 | 40 | expect_false(page1$cursor == page2$cursor) 41 | }) 42 | 43 | test_that("results change for WCVP", { 44 | page1 <- search_wcvp(list(genus="Poa"), filters="accepted") 45 | page2 <- request_next(page1) 46 | 47 | expect_false(page1$results[[1]]$id == page2$results[[1]]$id) 48 | }) 49 | 50 | test_that("results change for POWO", { 51 | page1 <- search_powo(list(genus="Poa"), filters="accepted") 52 | page2 <- request_next(page1) 53 | 54 | expect_false(page1$results[[1]]$fqId == page2$results[[1]]$fqId) 55 | }) 56 | 57 | test_that("results change for IPNI", { 58 | page1 <- search_ipni(list(genus="Poa"), filters="species") 59 | page2 <- request_next(page1) 60 | 61 | expect_false(page1$results[[1]]$id == page2$results[[1]]$id) 62 | }) 63 | 64 | test_that("results change for ToL", { 65 | page1 <- search_tol() 66 | page2 <- request_next(page1) 67 | 68 | expect_false(page1$results[[1]]$id == page2$results[[1]]$id) 69 | }) 70 | -------------------------------------------------------------------------------- /tests/testthat/test-tol.R: -------------------------------------------------------------------------------- 1 | test_that("search URL returns status 200", { 2 | url <- tol_search_url_() 3 | response <- httr::GET(url) 4 | 5 | expect_equal(httr::status_code(response), 200) 6 | }) 7 | 8 | test_that("search URL response is json", { 9 | url <- tol_search_url_() 10 | response <- httr::GET(url) 11 | 12 | expect_equal(httr::http_type(response), "application/json") 13 | }) 14 | 15 | test_that("search URL is for genes when asked for", { 16 | url <- tol_search_url_(type="genes") 17 | 18 | expect_true(stringr::str_detect(url, "/genes")) 19 | }) 20 | 21 | test_that("specimen URL response is json", { 22 | url <- tol_lookup_url_("2699") 23 | response <- httr::GET(url) 24 | 25 | expect_equal(httr::http_type(response), "application/json") 26 | }) 27 | 28 | test_that("gene lookup returns gene URL", { 29 | url <- tol_lookup_url_("51", type="gene") 30 | 31 | expect_true(stringr::str_detect(url, "/genes/")) 32 | }) 33 | 34 | test_that("specimen URL returns 404 for bad ID", { 35 | url <- tol_lookup_url_("plant") 36 | response <- httr::GET(url) 37 | expect_equal(status_code(response), 404) 38 | }) 39 | 40 | test_that("raises error for keyword search", { 41 | query <- list(name="Myrcia guianensis") 42 | 43 | expect_error(search_tol(query), 44 | "Keyword-based search not implemented") 45 | }) 46 | 47 | test_that("raises error for bad query input type", { 48 | query <- c("this", "is", "a", "bad", "query") 49 | 50 | expect_error(search_tol(query)) 51 | }) 52 | 53 | test_that("tidy search results returns tibble", { 54 | results <- search_tol("Poa annua") 55 | tidied <- tidy(results) 56 | 57 | expect_s3_class(tidied, "tbl_df") 58 | }) 59 | 60 | test_that("tidy lookup results returns tibble", { 61 | results <- lookup_tol("2699") 62 | tidied <- tidy(results) 63 | 64 | expect_s3_class(tidied, "tbl_df") 65 | }) 66 | 67 | test_that("loading fasta gives a fasta object", { 68 | url <- "http://sftp.kew.org/pub/paftol/current_release/fasta/by_gene/5328.dna.fasta" 69 | r <- load_tol(url) 70 | 71 | expect_equal(class(r), c("tol_fasta", "tol")) 72 | }) 73 | 74 | test_that("loading tree gives a tree object", { 75 | r <- load_tol() 76 | 77 | expect_equal(class(r), c("tol_tree", "tol")) 78 | }) 79 | -------------------------------------------------------------------------------- /man/lookup_wcvp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wcvp.R 3 | \name{lookup_wcvp} 4 | \alias{lookup_wcvp} 5 | \title{Look up a taxon in WCVP.} 6 | \usage{ 7 | lookup_wcvp(taxonid, .wait = 0.1) 8 | } 9 | \arguments{ 10 | \item{taxonid}{A string containing a valid IPNI ID.} 11 | 12 | \item{.wait}{Time to wait before making a request, to help 13 | rate limiting.} 14 | } 15 | \value{ 16 | A \code{wcvp_taxon} object, which is a simple structure with fields 17 | for each of the fields returned by the lookup API, as well as the the \link[httr:response]{httr response object}. 18 | } 19 | \description{ 20 | Request the record for a taxon in the World Checklist of 21 | Vascular Plants (WCVP) using the IPNI ID. 22 | } 23 | \details{ 24 | The \href{https://wcvp.science.kew.org/}{World Checklist of Vascular Plants (WCVP)} 25 | is a global consensus view of all known vascular plant species. 26 | It has been compiled by staff at RBG Kew in consultation with plant 27 | group experts. 28 | 29 | The taxon lookup API allows users to retrieve taxonomic information for 30 | a specific taxon name using the unique IPNI ID. If this is not known, 31 | it can be found out using the \link[=search_wcvp]{WCVP search API}. 32 | } 33 | \examples{ 34 | 35 | # retrieve taxonomic information for a taxon name 36 | lookup_wcvp("271445-2") 37 | 38 | # print a summary of the returned information 39 | r <- lookup_wcvp("271445-2") 40 | print(r) 41 | 42 | # tidy into a tibble 43 | r <- lookup_wcvp("271445-2") 44 | tidy(r) 45 | 46 | # tidy the returned list of synonyms into a tibble 47 | r <- lookup_wcvp("60447743-2") 48 | tidied <- tidy(r) 49 | tidyr::unnest(tidied, cols=synonyms, names_sep="_") 50 | 51 | # expand the child entries returned for each entry 52 | r <- lookup_wcvp("30000055-2") 53 | tidied <- tidy(r) 54 | tidyr::unnest(tidied, cols=children, names_sep="_") 55 | 56 | } 57 | \references{ 58 | WCVP (2020). World Checklist of Vascular Plants, version 2.0. Facilitated by the Royal Botanic Gardens, Kew. Published on the Internet; http://wcvp.science.kew.org/ 59 | } 60 | \seealso{ 61 | \itemize{ 62 | \item \code{\link[=search_wcvp]{search_wcvp()}} to search WCVP using a taxon name. 63 | \item \code{\link[=download_wcvp]{download_wcvp()}} to download the entire WCVP. 64 | } 65 | 66 | Other WCVP functions: 67 | \code{\link{download_wcvp}()}, 68 | \code{\link{search_wcvp}()} 69 | } 70 | \concept{WCVP functions} 71 | -------------------------------------------------------------------------------- /R/request-next.R: -------------------------------------------------------------------------------- 1 | 2 | #' Request the next page of search results for a kewr results object. 3 | #' 4 | #' @param object A kewr search results object. 5 | #' 6 | #' @return The next page of results as a kewr search object of 7 | #' the same type. 8 | #' 9 | #' @export 10 | request_next <- function(object) { 11 | UseMethod("request_next") 12 | } 13 | 14 | #' @describeIn request_next Request the next page of WCVP search results. 15 | #' 16 | #' @examples 17 | #' r <- search_wcvp("Poa") 18 | #' request_next(r) 19 | #' 20 | #' @export 21 | request_next.wcvp_search <- function(object) { 22 | .wait <- calculate_wait_(object) 23 | current_page <- object$page 24 | 25 | search_wcvp( 26 | query=object$query, 27 | filters=object$filters, 28 | limit=object$limit, 29 | cursor=object$cursor, 30 | .wait=.wait 31 | ) 32 | } 33 | 34 | #' @describeIn request_next Request the next page of POWO search results. 35 | #' 36 | #' @examples 37 | #' 38 | #' \donttest{ 39 | #' r <- search_powo("Poa") 40 | #' request_next(r) 41 | #' } 42 | #' 43 | #' @export 44 | request_next.powo_search <- function(object) { 45 | .wait <- calculate_wait_(object) 46 | current_page <- object$page 47 | 48 | search_powo( 49 | query=object$query, 50 | filters=object$filters, 51 | limit=object$limit, 52 | cursor=object$cursor, 53 | .wait=.wait 54 | ) 55 | } 56 | 57 | #' @describeIn request_next Request the next page of IPNI search results. 58 | #' 59 | #' @examples 60 | #' r <- search_ipni("Poa") 61 | #' request_next(r) 62 | #' 63 | #' @export 64 | request_next.ipni_search <- function(object) { 65 | .wait <- calculate_wait_(object) 66 | current_page <- object$page 67 | 68 | search_ipni( 69 | query=object$query, 70 | filters=object$filters, 71 | limit=object$limit, 72 | cursor=object$cursor, 73 | .wait=.wait 74 | ) 75 | } 76 | 77 | #' @describeIn request_next Request the next page of ToL search results. 78 | #' 79 | #' @examples 80 | #' r <- search_tol("Poa") 81 | #' request_next(r) 82 | #' 83 | #' @export 84 | request_next.tol_search <- function(object) { 85 | .wait <- calculate_wait_(object) 86 | current_page <- object$page 87 | 88 | search_tol( 89 | query=object$query, 90 | limit=object$limit, 91 | page=current_page + 1, 92 | .wait=.wait 93 | ) 94 | } 95 | 96 | calculate_wait_ <- function(object) { 97 | response_time <- object$response$times["total"] 98 | 99 | response_time / 2 100 | } 101 | -------------------------------------------------------------------------------- /man/load_tol.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tol.R 3 | \name{load_tol} 4 | \alias{load_tol} 5 | \title{Load the Tree of Life or another file from ToL.} 6 | \usage{ 7 | load_tol(url = NULL, .wait = 0.1) 8 | } 9 | \arguments{ 10 | \item{url}{URL pointing to a file on the ToL SFTP server.} 11 | 12 | \item{.wait}{Time to wait before making a request, to help 13 | rate limiting.} 14 | } 15 | \description{ 16 | Request a tree file for the whole ToL or an alignment, 17 | sequence, or gene tree for a particular specimen or gene. 18 | } 19 | \details{ 20 | The \href{https://treeoflife.kew.org/}{Tree of Life} is a database 21 | of specimens sequenced as part of Kew's efforts to build 22 | a comprehensive evolutionary tree of life for flowering plants. 23 | 24 | Newick tree, alignment, and sequence files are help on an SFTP server 25 | for download. The URLs to access these are stored in entries for specimens 26 | and genes in the ToL database. These can be accessed by either using \code{\link[=search_tol]{search_tol()}} 27 | to get all specimens for a particular order, family, genus, or species or by 28 | looking up a specific specimen or gene using \code{\link[=lookup_tol]{lookup_tol()}}. If no URL is specified, 29 | this will load the ToL tree. 30 | } 31 | \examples{ 32 | # load the ToL 33 | load_tol() 34 | 35 | # load a specimen fasta file 36 | specimen_info <- lookup_tol("1296") 37 | load_tol(specimen_info$fasta_file_url) 38 | 39 | # load a gene alignment file 40 | gene_info <- lookup_tol("51", type="gene") 41 | load_tol(gene_info$alignment_file_url) 42 | 43 | # load the gene tree 44 | load_tol(gene_info$tree_file_url) 45 | 46 | } 47 | \references{ 48 | Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035 49 | } 50 | \seealso{ 51 | \itemize{ 52 | \item \code{\link[=lookup_tol]{lookup_tol()}} to lookup information about a sequenced specimen 53 | using a valid ToL ID. 54 | \item \code{\link[=search_tol]{search_tol()}} to search ToL using taxonomic info. 55 | \item \code{\link[=download_tol]{download_tol()}} to save a file on the ToL SFTP server to file. 56 | } 57 | 58 | Other ToL functions: 59 | \code{\link{download_tol}()}, 60 | \code{\link{lookup_tol}()} 61 | } 62 | \concept{ToL functions} 63 | -------------------------------------------------------------------------------- /man/download_tol.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tol.R 3 | \name{download_tol} 4 | \alias{download_tol} 5 | \title{Download a file from the ToL SFTP server.} 6 | \usage{ 7 | download_tol(download_link = NULL, save_dir = NULL) 8 | } 9 | \arguments{ 10 | \item{download_link}{A string specifying the URL to download the file from. 11 | You can get a download URL for a particular specimen or gene using \code{\link[=lookup_tol]{lookup_tol()}}.} 12 | 13 | \item{save_dir}{A string specifying the folder to save the download in. If 14 | no value is provided, \link[here]{here} will be used.} 15 | } 16 | \description{ 17 | Download an alignment, sequence, or tree file from the ToL 18 | SFTP server. 19 | } 20 | \details{ 21 | The \href{https://treeoflife.kew.org/}{Tree of Life} is a database 22 | of specimens sequenced as part of Kew's efforts to build 23 | a comprehensive evolutionary tree of life for flowering plants. 24 | 25 | Sequence, alignment, and Newick tree files are help on an SFTP server 26 | for download. The URLs to access these are stored in entries for specimens 27 | and genes in the ToL database. These can be accessed by either using \code{\link[=search_tol]{search_tol()}} 28 | to get all specimens for a particular order, family, genus, or species or by 29 | looking up a specific specimen or gene using \code{\link[=lookup_tol]{lookup_tol()}} 30 | } 31 | \examples{ 32 | \dontrun{ 33 | # download a specimen fasta file 34 | specimen_info <- lookup_tol("1296") 35 | download_tol(specimen_info$fasta_file_url) 36 | 37 | # download a gene alignment file 38 | gene_info <- lookup_tol("51", type="gene") 39 | download_tol(gene_info$alignment_file_url) 40 | 41 | # download the gene tree 42 | download_tol(gene_info$tree_file_url) 43 | } 44 | 45 | } 46 | \references{ 47 | Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035 48 | } 49 | \seealso{ 50 | \itemize{ 51 | \item \code{\link[=lookup_tol]{lookup_tol()}} to lookup information about a sequenced specimen 52 | using a valid ToL ID. 53 | \item \code{\link[=search_tol]{search_tol()}} to search ToL using taxonomic info. 54 | \item \code{\link[=load_tol]{load_tol()}} load a file from the ToL SFTP server. 55 | } 56 | 57 | Other ToL functions: 58 | \code{\link{load_tol}()}, 59 | \code{\link{lookup_tol}()} 60 | } 61 | \concept{ToL functions} 62 | -------------------------------------------------------------------------------- /vignettes/KNMS.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "KNMS" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{KNMS} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | 17 | The [Kew Names Matching Service (KNMS)](http://namematch.science.kew.org/) allows a user to submit taxon names for matching against records in [Plants of the World Online (POWO)](http://powo.science.kew.org/). As far as I can tell, it uses exact matching as well as some rules-based matching to account for common orthographic variants and Latin mistakes. 18 | 19 | The website provides interfaces for submitting a list of names copied into a text field, as well as through a CSV upload. I've made this package interface with the text field submission. 20 | 21 | I'm not sure if KNMS is being maintained or updated at the moment, and may point towards an old version of POWO/WCVP. Therefore, it's a good first pass for matching the bulk of names but it's worth checking through any unmatched names, as they may be in POWO or WCVP. 22 | 23 | ```{r setup} 24 | library(kewr) 25 | ``` 26 | 27 | ## Matching names 28 | 29 | To use KNMS, you just need to submit names for matching as a character vector or list. 30 | 31 | ```{r} 32 | matches <- match_knms("Poa annua") 33 | matches 34 | ``` 35 | Where a name could be matched with multiple records - for example, if the same name has been published as different concepts - KNMS will return multiple matches, as it has here. 36 | 37 | The raw results from KNMS can be a bit difficult to interpret, so it is best to `tidy` them for inspection. 38 | 39 | ```{r} 40 | tidy(matches) 41 | ``` 42 | We can see here that `Poa annua` has matched to both *Poa annua L.* and *Poa annua Schltdl. & Cham.*. 43 | 44 | To avoid having to resolve these multiple matches, it is usually best to submit the author string as part of the name, if it is available. 45 | 46 | ```{r} 47 | matches <- match_knms("Poa annua L.") 48 | tidy(matches) 49 | ``` 50 | 51 | Multiple names can be submitted at once, as a list or character vector. 52 | 53 | ```{r} 54 | names_to_match <- c("Poa annua L.", "Myrcia guianensis", "Bulbophyllum sp.") 55 | 56 | matches <- match_knms(names_to_match) 57 | tidy(matches) 58 | ``` 59 | 60 | KNMS may not always match to the rank that you want. For instance, in the previous example *Bulbophyllum sp.* was matched to the genus name *Bulbophyllum*, rather than returning no match at all. 61 | 62 | KNMS will accept a large number of names for matching, but may hang if too many are submitted. The `match_knms` function will return a warning if you submit more than 1000 names. In these cases, it might be easier to split your list of names into chunks, or use the CSV upload on the KNMS website. 63 | -------------------------------------------------------------------------------- /R/knms.R: -------------------------------------------------------------------------------- 1 | #' Match names with KNMS. 2 | #' 3 | #' Use the Kew Names Matching Service to match taxon names to 4 | #' records in Plants of the World Online. 5 | #' 6 | #' The [Kew Names Matching Service (KNMS)](http://namematch.science.kew.org/) allows 7 | #' a user to submit taxon names for matching against records in 8 | #' [Plants of the World Online (POWO)](http://powo.science.kew.org/). 9 | #' As far as I can tell, it uses exact matching as well as some rules-based matching 10 | #' to account for common orthographic variants and Latin mistakes. 11 | #' 12 | #' Names can be submitted to KNMS with or without an author string. 13 | #' If a name can match to multiple different records, for instance 14 | #' with synonyms, KNMS will return multiple matches. As such, we recommend 15 | #' submitting names first with the taxonomic authority and then without 16 | #' if no match can be found. 17 | #' 18 | #' KNMS allows multiple names to be submitted at once. However, it can 19 | #' be slow in returning results if too many names are submitted. For lots of names, 20 | #' [the website provides an interface for submitting a CSV file](http://namematch.science.kew.org/csv). 21 | #' 22 | #' @param names A list or character vector of taxon names for matching. 23 | #' Must not contain missing values. 24 | #' 25 | #' @return A `knms_match` object - a simple structure containing the match 26 | #' results and some statistics about the number of matches. 27 | #' 28 | #' @examples 29 | #' 30 | #' # match a name 31 | #' match_knms("Poa annua L.") 32 | #' 33 | #' # match a vector of names 34 | #' names <- c("Myrcia guianensis", "Calyptranthes ranulphii", "Poa annua") 35 | #' match_knms(names) 36 | #' 37 | #' # tidy match results into a table 38 | #' names <- c("Myrcia guianensis", "Bad plant", "Poa annua") 39 | #' matches <- match_knms(names) 40 | #' tidy(matches) 41 | #' 42 | #' @importFrom glue glue 43 | #' 44 | #' @export 45 | match_knms <- function(names) { 46 | if (any(is.na(names))) { 47 | na_idx <- which(is.na(names)) 48 | message <- glue("KNMS cannot match if NA is present.", 49 | "You have NAs at positions {paste0(na_idx, collapse=', ')}", 50 | "", 51 | .sep="\n", .trim=FALSE) 52 | stop(message) 53 | } 54 | 55 | url <- knms_url_() 56 | 57 | body <- format_body_(names) 58 | 59 | results <- make_request_(url, body=body, method="POST") 60 | 61 | structure( 62 | list( 63 | response=results$response, 64 | submitted=body, 65 | matched=results$content$stats$matched, 66 | unmatched=results$content$stats$unmatched, 67 | multiple_matches=results$content$stats$multipleMatches, 68 | results=results$content$records 69 | ), 70 | class="knms_match" 71 | ) 72 | } 73 | 74 | #' Make the KNMS URL. 75 | #' 76 | #' @noRd 77 | knms_url_ <- function() { 78 | get_url_("knms") 79 | } 80 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(format,ipni_author) 4 | S3method(format,ipni_citation) 5 | S3method(format,ipni_publication) 6 | S3method(format,ipni_search) 7 | S3method(format,knms_match) 8 | S3method(format,powo_search) 9 | S3method(format,powo_taxon) 10 | S3method(format,wcvp_search) 11 | S3method(format,wcvp_taxon) 12 | S3method(kew_citation,ipni) 13 | S3method(kew_citation,knms_match) 14 | S3method(kew_citation,krs_match) 15 | S3method(kew_citation,powo) 16 | S3method(kew_citation,tol) 17 | S3method(kew_citation,wcvp) 18 | S3method(print,ipni_author) 19 | S3method(print,ipni_citation) 20 | S3method(print,ipni_publication) 21 | S3method(print,ipni_search) 22 | S3method(print,knms_match) 23 | S3method(print,krs_match) 24 | S3method(print,powo_search) 25 | S3method(print,powo_taxon) 26 | S3method(print,tol_fasta) 27 | S3method(print,tol_gene) 28 | S3method(print,tol_search) 29 | S3method(print,tol_specimen) 30 | S3method(print,tol_tree) 31 | S3method(print,wcvp_search) 32 | S3method(print,wcvp_taxon) 33 | S3method(request_next,ipni_search) 34 | S3method(request_next,powo_search) 35 | S3method(request_next,tol_search) 36 | S3method(request_next,wcvp_search) 37 | S3method(tidy,ipni_author) 38 | S3method(tidy,ipni_citation) 39 | S3method(tidy,ipni_publication) 40 | S3method(tidy,ipni_search) 41 | S3method(tidy,knms_match) 42 | S3method(tidy,krs_match) 43 | S3method(tidy,powo_search) 44 | S3method(tidy,powo_taxon) 45 | S3method(tidy,tol_gene) 46 | S3method(tidy,tol_search) 47 | S3method(tidy,tol_specimen) 48 | S3method(tidy,wcvp_search) 49 | S3method(tidy,wcvp_taxon) 50 | export(download_tol) 51 | export(download_wcvp) 52 | export(get_filters) 53 | export(get_keywords) 54 | export(kew_citation) 55 | export(load_tol) 56 | export(lookup_ipni) 57 | export(lookup_powo) 58 | export(lookup_tol) 59 | export(lookup_wcvp) 60 | export(match_knms) 61 | export(match_krs) 62 | export(request_next) 63 | export(search_ipni) 64 | export(search_powo) 65 | export(search_tol) 66 | export(search_wcvp) 67 | export(tidy) 68 | import(httr) 69 | importFrom(dplyr,na_if) 70 | importFrom(generics,tidy) 71 | importFrom(glue,glue) 72 | importFrom(glue,glue_collapse) 73 | importFrom(here,here) 74 | importFrom(httr,GET) 75 | importFrom(httr,user_agent) 76 | importFrom(jsonlite,fromJSON) 77 | importFrom(jsonlite,toJSON) 78 | importFrom(purrr,every) 79 | importFrom(purrr,flatten_chr) 80 | importFrom(purrr,map) 81 | importFrom(purrr,map2) 82 | importFrom(purrr,map_chr) 83 | importFrom(purrr,map_dfr) 84 | importFrom(purrr,map_lgl) 85 | importFrom(purrr,pluck) 86 | importFrom(rlang,.data) 87 | importFrom(rvest,html_attr) 88 | importFrom(rvest,html_nodes) 89 | importFrom(stringr,str_detect) 90 | importFrom(stringr,str_extract) 91 | importFrom(tibble,as_tibble_row) 92 | importFrom(tibble,tibble) 93 | importFrom(tidyr,fill) 94 | importFrom(utils,bibentry) 95 | importFrom(utils,download.file) 96 | importFrom(utils,head) 97 | importFrom(utils,str) 98 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | pull_request: 9 | branches: 10 | - main 11 | - master 12 | 13 | name: R-CMD-check 14 | 15 | jobs: 16 | R-CMD-check: 17 | runs-on: ${{ matrix.config.os }} 18 | 19 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | config: 25 | - {os: windows-latest, r: 'release'} 26 | - {os: macOS-latest, r: 'release'} 27 | - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} 28 | - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} 29 | 30 | env: 31 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 32 | RSPM: ${{ matrix.config.rspm }} 33 | 34 | steps: 35 | - uses: actions/checkout@v2 36 | 37 | - uses: r-lib/actions/setup-r@v1 38 | with: 39 | r-version: ${{ matrix.config.r }} 40 | 41 | - uses: r-lib/actions/setup-pandoc@v1 42 | 43 | - name: Query dependencies 44 | run: | 45 | install.packages('remotes') 46 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 47 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 48 | shell: Rscript {0} 49 | 50 | - name: Cache R packages 51 | if: runner.os != 'Windows' 52 | uses: actions/cache@v2 53 | with: 54 | path: ${{ env.R_LIBS_USER }} 55 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 56 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 57 | 58 | - name: Install system dependencies 59 | if: runner.os == 'Linux' 60 | run: | 61 | while read -r cmd 62 | do 63 | eval sudo $cmd 64 | done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') 65 | 66 | - name: Install dependencies 67 | run: | 68 | remotes::install_deps(dependencies = TRUE) 69 | remotes::install_cran("rcmdcheck") 70 | shell: Rscript {0} 71 | 72 | - name: Check 73 | env: 74 | _R_CHECK_CRAN_INCOMING_REMOTE_: false 75 | run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") 76 | shell: Rscript {0} 77 | 78 | - name: Upload check results 79 | if: failure() 80 | uses: actions/upload-artifact@main 81 | with: 82 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 83 | path: check 84 | -------------------------------------------------------------------------------- /man/lookup_tol.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tol.R 3 | \name{lookup_tol} 4 | \alias{lookup_tol} 5 | \title{Look up a sequenced specimen or gene in ToL.} 6 | \usage{ 7 | lookup_tol(id, type = c("specimen", "gene"), .wait = 0.1) 8 | } 9 | \arguments{ 10 | \item{id}{A string containing a valid ToL ID.} 11 | 12 | \item{type}{The type of record to lookup, either \code{specimen} or \code{gene}.} 13 | 14 | \item{.wait}{Time to wait before making a request, to help 15 | rate limiting.} 16 | } 17 | \value{ 18 | A \verb{tol_\{type\}} object, which is a simple structure with fields 19 | for each of the fields returned by the lookup API, 20 | as well as the the \link[httr:response]{httr response object}. 21 | } 22 | \description{ 23 | Request the record for a sequenced specimen or gene in ToL using 24 | its ToL ID. 25 | } 26 | \details{ 27 | The \href{https://treeoflife.kew.org/}{Tree of Life} is a database 28 | of specimens sequenced as part of Kew's efforts to build 29 | a comprehensive evolutionary tree of life for flowering plants. 30 | 31 | The lookup API allows users to retrieve taxonomic and sequencing 32 | information for a specific sequenced specimen or gene using the unique ToL ID. 33 | If this is not known, it can be found out using the \link[=search_tol]{ToL search API}. 34 | } 35 | \examples{ 36 | 37 | # retrieve information for a particular specimen 38 | lookup_tol("1296") 39 | 40 | # print a summary of the returned information 41 | r <- lookup_tol("1296") 42 | print(r) 43 | 44 | # tidy into a tibble 45 | r <- lookup_tol("1296") 46 | tidy(r) 47 | 48 | # extract the returned gene stats for the specimen 49 | r <- lookup_tol("1296") 50 | tidied <- tidy(r) 51 | tidied$gene_stats 52 | 53 | # expand the taxonomy info 54 | r <- lookup_tol("1296") 55 | tidied <- tidy(r) 56 | tidyr::unnest(tidied, cols=taxonomy, names_sep="_") 57 | 58 | # retrieve information for a particular gene 59 | lookup_tol("51", type="gene") 60 | 61 | # print a summary of the returned information 62 | r <- lookup_tol("51", type="gene") 63 | print(r) 64 | 65 | # tidy into a tibble 66 | r <- lookup_tol("51", type="gene") 67 | tidy(r) 68 | 69 | } 70 | \references{ 71 | Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035 72 | } 73 | \seealso{ 74 | \itemize{ 75 | \item \code{\link[=search_tol]{search_tol()}} to search ToL using taxonomic information. 76 | \item \code{\link[=download_tol]{download_tol()}} to download a file from the ToL SFTP server. 77 | \item \code{\link[=load_tol]{load_tol()}} load a file from the ToL SFTP server. 78 | } 79 | 80 | Other ToL functions: 81 | \code{\link{download_tol}()}, 82 | \code{\link{load_tol}()} 83 | } 84 | \concept{ToL functions} 85 | -------------------------------------------------------------------------------- /tests/testthat/test-ipni.R: -------------------------------------------------------------------------------- 1 | test_that("search URL returns status 200", { 2 | url <- ipni_search_url_() 3 | response <- httr::GET(url) 4 | 5 | expect_equal(httr::status_code(response), 200) 6 | }) 7 | 8 | test_that("search URL response is json", { 9 | url <- ipni_search_url_() 10 | response <- httr::GET(url) 11 | 12 | expect_equal(httr::http_type(response), "application/json") 13 | }) 14 | 15 | test_that("raises error for unimplemented keyword", { 16 | query <- list(name="Myrcia guianensis") 17 | 18 | expect_error(search_ipni(query), 19 | "Query keyword.+ not recognised") 20 | }) 21 | 22 | test_that("raises error for bad query input type", { 23 | query <- c("this", "is", "a", "bad", "query") 24 | 25 | expect_error(search_ipni(query)) 26 | }) 27 | 28 | test_that("tidy search results returns tibble", { 29 | results <- search_ipni("Poa annua") 30 | tidied <- tidy(results) 31 | 32 | expect_s3_class(tidied, "tbl_df") 33 | }) 34 | 35 | test_that("tidy lookup results returns tibble", { 36 | results <- lookup_ipni("30001404-2") 37 | tidied <- tidy(results) 38 | 39 | expect_s3_class(tidied, "tbl_df") 40 | }) 41 | 42 | test_that("specific filter only returns species", { 43 | query <- "Myrcia" 44 | filters <- c("species") 45 | 46 | results <- search_ipni(query, filters) 47 | all_species <- purrr::every(results$results, 48 | ~.x$rank == "spec.") 49 | 50 | expect_true(all_species) 51 | }) 52 | 53 | test_that("infraspecific filter only returns infraspecifics", { 54 | infra_ranks <- c("Variety", "Subspecies", "Form") 55 | 56 | query <- "Poa annua" 57 | filters <- c("infraspecies") 58 | 59 | results <- search_wcvp(query, filters) 60 | all_infra <- purrr::every(results$results, 61 | ~.x$rank %in% infra_ranks) 62 | 63 | expect_true(all_infra) 64 | }) 65 | 66 | test_that("generic filter only returns genera", { 67 | query <- "Myrcia" 68 | filters <- c("genera") 69 | 70 | results <- search_ipni(query, filters) 71 | all_genera <- purrr::every(results$results, 72 | ~.x$rank == "gen.") 73 | 74 | expect_true(all_genera) 75 | }) 76 | 77 | test_that("infrageneric filter only returns infragenera", { 78 | query <- "Behenantha" 79 | filters <- c("infragenera") 80 | 81 | results <- search_ipni(query, filters) 82 | all_genera <- purrr::every(results$results, 83 | ~.x$rank == "sect.") 84 | 85 | expect_true(all_genera) 86 | }) 87 | 88 | test_that("family filter only returns families", { 89 | 90 | query <- "poaceae" 91 | filters <- c("families") 92 | 93 | results <- search_wcvp(query, filters) 94 | all_families <- purrr::every(results$results, 95 | ~.x$rank == "fam.") 96 | 97 | expect_true(all_families) 98 | }) 99 | 100 | test_that("infrafamily filter only returns infrafamilies", { 101 | 102 | query <- "Rosoideae" 103 | filters <- c("infrafamilies") 104 | 105 | results <- search_ipni(query, filters) 106 | all_families <- purrr::every(results$results, 107 | ~.x$rank == "subfam.") 108 | 109 | expect_true(all_families) 110 | }) 111 | 112 | test_that("cursor returns next page of results", { 113 | query <- list(genus="Ulex") 114 | 115 | page1 <- search_ipni(query) 116 | page2 <- search_ipni(query, cursor=page1$cursor) 117 | 118 | expect_false(page1$results[[1]]$fqId == page2$results[[1]]$fqId) 119 | }) 120 | -------------------------------------------------------------------------------- /man/search_ipni.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ipni.R 3 | \name{search_ipni} 4 | \alias{search_ipni} 5 | \title{Search IPNI.} 6 | \usage{ 7 | search_ipni(query, filters = NULL, cursor = "*", limit = 50, .wait = 0.1) 8 | } 9 | \arguments{ 10 | \item{query}{The string to query IPNI with. If using keywords, the query 11 | must be formatted as a list.} 12 | 13 | \item{filters}{Filter to apply to search results.} 14 | 15 | \item{cursor}{A cursor returned by a previous search. 16 | If used, the query and filter must be exactly the same.} 17 | 18 | \item{limit}{The maximum number of records to return. This has a maximum of 1000.} 19 | 20 | \item{.wait}{Time to wait before making a request, to help 21 | rate limiting.} 22 | } 23 | \value{ 24 | Returns an object of class \code{ipni_search} that is a simple 25 | structure with slots for: 26 | \itemize{ 27 | \item \code{total}: the total number of results held in POWO for the query 28 | \item \code{pages}: the total number of results pages for the query. 29 | \item \code{limit}: the maximum number of results requested from the API, per page. 30 | \item \code{cursor}: a cursor to retrieve the next page of results from the API. 31 | \item \code{results}: the query results parsed into a list. 32 | \item \code{query}: the query string submitted to the API. 33 | \item \code{response}: the \link[httr:response]{httr response object}. 34 | } 35 | } 36 | \description{ 37 | Query the International Plant Names Index for nomenclatural information. 38 | } 39 | \details{ 40 | The \href{https://www.ipni.org/}{International Plant Names Index (IPNI)} 41 | is a service that provides nomenclatural information for vascular plant names. 42 | 43 | The search API allows users to query the database for plant names, 44 | as well as authors and publications. There may be limited support for 45 | some sort of fuzzy matching. 46 | 47 | There is some support for querying using keyword arguments. Documentation for 48 | the API is currently available in the \href{https://github.com/RBGKew/pykew/blob/master/pykew/ipni_terms.py}{\code{pykew} package}, 49 | so keywords have been copied across from there. There are sets of keywords 50 | relating to plants, authors, and publications. A full list of keywords can be viewed 51 | using the \code{get_keywords} function. 52 | 53 | The API will return nomenclatural information (publication date, nomenclatural status, author, etc.) 54 | of all names matching the query. These results can be limited, for example to only family names, 55 | using filters. Use the \code{get_filters} function to get a list of all implemented filters. 56 | } 57 | \examples{ 58 | # search for all names containing Poa annua 59 | results <- search_ipni("Poa annua") 60 | 61 | # tidy search results in a table 62 | tidy(results) 63 | 64 | # extract author team information for the search results 65 | results_tbl <- tidy(results) 66 | tidyr::unnest(results_tbl, cols=c(authorTeam), names_sep="_") 67 | 68 | # filter results to only species names 69 | species_results <- search_ipni("Poa annua", filters="species") 70 | tidy(species_results) 71 | 72 | # search for species from Mexico published in 1989 73 | q <- list(published="1989", distribution="Mexico") 74 | f <- "species" 75 | results <- search_ipni(q, filters=f) 76 | tidy(results) 77 | 78 | # search for an author by surname 79 | author_results <- search_ipni(list(author_surname="Gardiner")) 80 | tidy(author_results) 81 | 82 | } 83 | \seealso{ 84 | \itemize{ 85 | \item \code{\link[=lookup_ipni]{lookup_ipni()}} to look up a name using an IPNI ID. 86 | } 87 | 88 | Other IPNI functions: 89 | \code{\link{lookup_ipni}()} 90 | } 91 | \concept{IPNI functions} 92 | -------------------------------------------------------------------------------- /R/format.R: -------------------------------------------------------------------------------- 1 | # wcvp ---- 2 | #' @importFrom purrr map_dfr 3 | #' 4 | #' @export 5 | format.wcvp_search <- function(x, ...) { 6 | .Deprecated("tidy.wcvp_search") 7 | map_dfr(x$results, parse_nested_list) 8 | } 9 | 10 | #' @export 11 | format.wcvp_taxon <- function(x, ...) { 12 | .Deprecated("tidy.wcvp_taxon") 13 | x <- x[! names(x) %in% c("response", "queryId")] 14 | 15 | parse_nested_list(x) 16 | } 17 | 18 | # powo ---- 19 | 20 | #' @importFrom purrr map_dfr 21 | #' 22 | #' @export 23 | format.powo_search <- function(x, ...) { 24 | .Deprecated("tidy.powo_search") 25 | map_dfr(x$results, parse_nested_list) 26 | } 27 | 28 | #' @export 29 | format.powo_taxon <- function(x, field=c("none", "accepted", "synonyms", "classification", "basionym", "distribution", "distributionEnvelope"), ...) { 30 | .Deprecated("tidy.powo_taxon") 31 | x <- x[! names(x) %in% c("response", "queryId")] 32 | 33 | parse_nested_list(x) 34 | } 35 | 36 | # ipni ---- 37 | 38 | #' @importFrom purrr map_dfr 39 | #' 40 | #' @export 41 | format.ipni_search <- function(x, ...) { 42 | .Deprecated("tidy.ipni_search") 43 | map_dfr(x$results, parse_nested_list) 44 | } 45 | 46 | #' @export 47 | format.ipni_citation <- function(x, ...) { 48 | .Deprecated("tidy.ipni_citation") 49 | x <- x[! names(x) %in% c("response", "queryId")] 50 | 51 | parse_nested_list(x) 52 | } 53 | 54 | #' @export 55 | format.ipni_author <- function(x, ...) { 56 | .Deprecated("tidy.ipni_author") 57 | x <- x[! names(x) %in% c("response", "queryId")] 58 | 59 | parse_nested_list(x) 60 | } 61 | 62 | #' @export 63 | format.ipni_publication <- function(x, ...) { 64 | .Deprecated("tidy.ipni_publication") 65 | x <- x[! names(x) %in% c("response", "queryId")] 66 | 67 | parse_nested_list(x) 68 | } 69 | 70 | # knms ---- 71 | 72 | #' @importFrom purrr map_lgl map_dfr pluck 73 | #' @importFrom tidyr fill 74 | #' @importFrom rlang .data 75 | #' 76 | #' @export 77 | format.knms_match <- function(x, ...) { 78 | .Deprecated("tidy.knms_match") 79 | parsed <- map_dfr(x$results, parse_knms_line) 80 | 81 | formatted <- fill(parsed, .data$submitted, .data$matched) 82 | formatted$matched <- formatted$matched %in% c("true", "multiple_matches") 83 | 84 | formatted 85 | } 86 | 87 | # utils ---- 88 | 89 | #' Simple utility to wrap nested lists into a tibble. 90 | #' 91 | #' Nested lists are also converted to tibbles and inserted in list 92 | #' columns. 93 | #' 94 | #' @importFrom purrr map_chr map 95 | #' @importFrom tibble as_tibble_row 96 | #' 97 | #' @noRd 98 | parse_nested_list <- function(l) { 99 | if (is.null(names(l))) { 100 | return(map_dfr(l, parse_nested_list)) 101 | } 102 | 103 | null_cols <- map_lgl(l, is.null) 104 | l[null_cols] <- NA_character_ 105 | 106 | list_cols <- map_lgl(l, is.list) 107 | l[list_cols] <- map(l[list_cols], ~list(parse_nested_list(.x))) 108 | 109 | as_tibble_row(l) 110 | } 111 | 112 | #' Parse and format a single match result from KNMS. 113 | #' 114 | #' @importFrom stringr str_extract 115 | #' @importFrom dplyr na_if 116 | #' @importFrom tibble tibble 117 | #' 118 | #' @noRd 119 | parse_knms_line <- function(line) { 120 | submitted <- na_if(line[[1]], "") 121 | matched <- na_if(line[[2]], "") 122 | 123 | if (length(line) > 2) { 124 | ipni_id <- str_extract(line[[3]], "(?<=names:)[0-9\\-]+$") 125 | } else { 126 | ipni_id <- NA_character_ 127 | } 128 | 129 | if (length(line) > 3) { 130 | matched_record <- line[[4]] 131 | } else { 132 | matched_record <- NA_character_ 133 | } 134 | 135 | tibble(submitted=submitted, 136 | matched=matched, 137 | ipni_id=ipni_id, 138 | matched_record=matched_record) 139 | } 140 | -------------------------------------------------------------------------------- /man/search_powo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/powo.R 3 | \name{search_powo} 4 | \alias{search_powo} 5 | \title{Search POWO.} 6 | \usage{ 7 | search_powo(query, filters = NULL, cursor = NULL, limit = 50, .wait = 0.2) 8 | } 9 | \arguments{ 10 | \item{query}{The string to query POWO with. If using keywords, 11 | the query must be formatted as a list.} 12 | 13 | \item{filters}{Filter to apply to search results. 14 | Multiple filters must be supplied as a character vector.} 15 | 16 | \item{cursor}{A cursor returned by a previous search. 17 | If used, the query and filter must be exactly the same.} 18 | 19 | \item{limit}{The maximum number of records to return.} 20 | 21 | \item{.wait}{Time to wait before making a request, to help 22 | rate limiting.} 23 | } 24 | \value{ 25 | Returns an object of class \code{powo_search} that is a simple 26 | structure with slots for: 27 | \itemize{ 28 | \item \code{total}: the total number of results held in POWO for the query 29 | \item \code{pages}: the total number of results pages for the query. 30 | \item \code{limit}: the maximum number of results requested from the API, per page. 31 | \item \code{cursor}: a cursor to retrieve the next page of results from the API. 32 | \item \code{results}: the query results parsed into a list. 33 | \item \code{query}: the query string submitted to the API. 34 | \item \code{response}: the \link[httr:response]{httr response object}. 35 | } 36 | } 37 | \description{ 38 | Query Plants of the World Online for taxon information. 39 | } 40 | \details{ 41 | \href{http://www.plantsoftheworldonline.org/}{Plants of the World Online (POWO)} 42 | is a database of information on the world's flora. It curates information from 43 | published floras and other sources of floristic information. 44 | 45 | The search API allows users to query the database using plant names, 46 | geographic terms, and floristic characters. These can be queried using 47 | keyword arguments. Use the \code{get_keywords} function for a list of all implemented keywords. 48 | 49 | The API returns taxonomic information as well as species descriptions and 50 | image locations if available. These results can be limited, for example to accepted species, 51 | using filters. Use the \code{get_filters} function to get a list of all implemented filters. 52 | 53 | Distributions in POWO are categorised using the \href{https://www.tdwg.org/standards/wgsrpd/}{World Geographical Scheme for Recording Plant Distributions (WGSRP)}. 54 | Users can query POWO using distributions listed under WGSRPD levels 1 (continents), 55 | 2 (regions), and 3 (botanical countries). 56 | } 57 | \examples{ 58 | # search for all entries containing a genus name 59 | search_powo("Myrcia") 60 | 61 | # search for all accepted species within a genus 62 | search_powo("Myrcia", filters=c("species", "accepted")) 63 | 64 | # search for up to 100 species in a genus 65 | search_powo("Poa", filters=c("species"), limit=100) 66 | 67 | # search for all names in a family 68 | search_powo(list(family="Myrtaceae")) 69 | 70 | # search for all accepted species with blue flowers 71 | search_powo(list(flower="blue"), filters=c("accepted", "species")) 72 | 73 | # search for all accepted genera in Mexico 74 | search_powo(list(distribution="Mexico"), filters=c("accepted", "genera")) 75 | 76 | # search for a species name and print the results 77 | r <- search_powo("Myrcia guianensis", filters=c("species")) 78 | print(r) 79 | 80 | # simplify search results to a `tibble` 81 | r <- search_powo("Poa", filters=c("species")) 82 | tidy(r) 83 | 84 | } 85 | \seealso{ 86 | \itemize{ 87 | \item \code{\link[=lookup_powo]{lookup_powo()}} to look up a taxon in POWO using the IPNI ID. 88 | } 89 | 90 | Other POWO functions: 91 | \code{\link{lookup_powo}()} 92 | } 93 | \concept{POWO functions} 94 | -------------------------------------------------------------------------------- /vignettes/POWO.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "POWO" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{POWO} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | 17 | [Plants of the World Online (POWO)](http://www.plantsoftheworldonline.org/) is a database of information on the world's flora. It curates information from published floras and other sources of floristic information. POWO covers much of the same grounds as WCVP - you can get taxonomic information for names from it. But it also has more general information about taxa, like descriptions, lifeforms, images, and distributions. 18 | 19 | ```{r setup} 20 | library(kewr) 21 | library(dplyr) 22 | library(tidyr) 23 | ``` 24 | 25 | 26 | ## Searching POWO for taxa 27 | 28 | Searching POWO works by exact matching, so any mispellings or unrecognised orthographic variants will not be matched. 29 | 30 | For example, searching `Ulex europeaus` will not return any results: 31 | ```{r} 32 | results <- search_powo("Ulex europeaus") 33 | results 34 | ``` 35 | 36 | But searching `Ulex europaeus` will: 37 | 38 | ```{r} 39 | results <- search_powo("Ulex europaeus") 40 | results 41 | ``` 42 | 43 | ## Pagination 44 | 45 | By default, `search_powo` limits the number of results returned to 50. If you are expecting more results, you can increase this limit. 46 | 47 | ```{r} 48 | results <- search_powo("Ulex", limit=100) 49 | results 50 | ``` 51 | 52 | However, for searches with a large number of results, this may not be practical. In this case, or if you just don't know how many results to expect, you can request the next page of results. 53 | 54 | ```{r} 55 | ulex_page1 <- search_powo("Ulex") 56 | ulex_page2 <- request_next(ulex_page1) 57 | 58 | bind_rows( 59 | tidy(ulex_page1), 60 | tidy(ulex_page2) 61 | ) 62 | ``` 63 | 64 | ## Keyword searches and filtering 65 | 66 | You can perform more complicated searches using keywords and filters. 67 | For example, you can search for all accepted species in a particular family. 68 | 69 | ```{r} 70 | results <- search_powo(list(family="Ephedraceae"), 71 | filters=c("accepted", "species")) 72 | results 73 | ``` 74 | 75 | As well as taxonomic keywords, it is possible to search by other characteristics. 76 | 77 | For example, finding all accepted legumes with blue flowers. 78 | 79 | ```{r} 80 | blue_flowers <- search_powo(list(flower="blue", family="Fabaceae"), 81 | filters=c("accepted")) 82 | blue_flowers 83 | ``` 84 | 85 | Another use is to get a checklist of accepted species in a country. 86 | 87 | ```{r} 88 | checklist <- search_powo(list(distribution="Mexico"), 89 | filters=c("accepted")) 90 | 91 | checklist 92 | ``` 93 | 94 | A full list of keywords and filters can be found in the help page for `search_powo`. 95 | 96 | ## Looking up information about a taxon 97 | 98 | As well as searching by keyword, it is possible to use the IPNI ID of a taxon to look up its record in POWO. To get the IPNI ID, you can search WCVP, POWO, or IPNI, or use KNMS to find matches for the name. 99 | 100 | ```{r} 101 | ipni_id <- "119003-2" 102 | 103 | record <- lookup_powo(ipni_id) 104 | record 105 | ``` 106 | 107 | Optionally, you can choose to request the distribution information for the taxon as well. 108 | 109 | ```{r} 110 | record <- lookup_powo(ipni_id, distribution=TRUE) 111 | 112 | tidied <- tidy(record) 113 | 114 | tidied %>% 115 | select(fqId, distribution) %>% 116 | unnest(cols=distribution) %>% 117 | select(-introduced) %>% 118 | unnest(cols=natives) 119 | ``` 120 | -------------------------------------------------------------------------------- /vignettes/IPNI.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "IPNI" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{IPNI} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | 17 | ```{r setup} 18 | library(kewr) 19 | library(dplyr) 20 | ``` 21 | 22 | The [International Plant Names Index (IPNI)](https://www.ipni.org/) is a service that provides nomenclatural information for vascular plant names. 23 | 24 | It provides information on published plant names, as well as authors and publications. The type of information that it holds for plant names includes when it was published, the publication, the author, as well as any nomenclatural remarks. 25 | 26 | ## Searching IPNI for a record 27 | 28 | Searching for IPNI operates on exact matching. 29 | 30 | For example, `Poa anua` will not return any results: 31 | 32 | ```{r} 33 | results <- search_ipni("Poa anua") 34 | results 35 | ``` 36 | But `Poa annua` will: 37 | 38 | ```{r} 39 | results <- search_ipni("Poa annua") 40 | results 41 | ``` 42 | 43 | The exact matching does, however, work on fragments of a name. For instance, `Poa` will return results for the everything that includes that term: 44 | 45 | ```{r} 46 | results <- search_ipni("Poa") 47 | results 48 | ``` 49 | It should be noted that this will return results for authors and publications as well as taxon names. We can see the first result for this search is the publication "Poa de France, Begique et Suisse". 50 | 51 | ## Pagination 52 | 53 | Looking at the search above, we can see that there were a total of `r results$total` records found but only the first `r results$limit` were returned. There are two possible ways to get the rest of the search results. 54 | 55 | The first is to increase the record limit to ensure we get everything: 56 | 57 | ```{r} 58 | results <- search_ipni("Poa", limit=1000) 59 | results 60 | ``` 61 | However, IPNI caps the limit at a maximum of 1000 records. Also, we often don't know how many records a search will return, or whether it will return a large number of records. 62 | 63 | In those cases, we can ask IPNI for the next page of results. 64 | 65 | ```{r} 66 | query <- "Poa" 67 | page1 <- search_ipni(query) 68 | page2 <- request_next(page1) 69 | 70 | bind_rows( 71 | tidy(page1), 72 | tidy(page2) 73 | ) 74 | ``` 75 | 76 | ## Keywords and filtering 77 | 78 | You can perform more complicated searches using keywords and filters. 79 | For example, you can search for all genera names published for a particular family. 80 | 81 | ```{r} 82 | results <- search_ipni(list(family="Ephedraceae"), 83 | filters="genera") 84 | results 85 | ``` 86 | 87 | Or for all species names published in a particular year. 88 | 89 | ```{r} 90 | results <- search_ipni(list(published=1989), 91 | filters=c("species")) 92 | results 93 | ``` 94 | 95 | You can also use the keywords to search for author records. 96 | 97 | ```{r} 98 | results <- search_ipni(list(author_surname="Gardiner")) 99 | 100 | results 101 | ``` 102 | And for publication records. 103 | 104 | ```{r} 105 | results <- search_ipni(list(title="Bulletin")) 106 | results 107 | ``` 108 | 109 | 110 | A full list of keywords and filters can be found in the help page for `search_ipni`. 111 | 112 | ## Looking up a specific record 113 | 114 | Specific records for taxon names, authors, and publications can be looked up using the IPNI ID. These IDs are specific to each individual type of record, so the type needs provided. 115 | 116 | ```{r} 117 | name <- lookup_ipni("385169-1", type="taxon") 118 | name 119 | ``` 120 | 121 | ```{r} 122 | author <- lookup_ipni("20028192-1", type="author") 123 | author 124 | ``` 125 | 126 | ```{r} 127 | pub <- lookup_ipni("12471-2", type="publication") 128 | pub 129 | ``` 130 | -------------------------------------------------------------------------------- /tests/testthat/test-wcvp.R: -------------------------------------------------------------------------------- 1 | test_that("search URL returns status 200", { 2 | url <- wcvp_search_url_() 3 | response <- httr::GET(url) 4 | 5 | expect_equal(httr::status_code(response), 200) 6 | }) 7 | 8 | test_that("search URL response is json", { 9 | url <- wcvp_search_url_() 10 | response <- httr::GET(url) 11 | 12 | expect_equal(httr::http_type(response), "application/json") 13 | }) 14 | 15 | test_that("taxon URL response is json", { 16 | url <- wcvp_taxon_url_("30001404-2") 17 | response <- httr::GET(url) 18 | 19 | expect_equal(httr::http_type(response), "application/json") 20 | }) 21 | 22 | test_that("taxon URL returns 404 for bad ID", { 23 | url <- wcvp_taxon_url_("bad id") 24 | response <- httr::GET(url) 25 | expect_equal(status_code(response), 404) 26 | }) 27 | 28 | test_that("raises error for unimplemented keyword", { 29 | query <- list(name="Myrcia guianensis") 30 | 31 | expect_error(search_wcvp(query), 32 | "Query keyword.+ not recognised") 33 | }) 34 | 35 | test_that("raises error for bad query input type", { 36 | query <- c("this", "is", "a", "bad", "query") 37 | 38 | expect_error(search_wcvp(query)) 39 | }) 40 | 41 | test_that("accepted filter only returns accepted names", { 42 | query <- "Myrcia" 43 | filters <- c("accepted") 44 | 45 | results <- search_wcvp(query, filters) 46 | all_accepted <- purrr::every(results$results, 47 | ~.x$accepted) 48 | 49 | expect_true(all_accepted) 50 | }) 51 | 52 | test_that("specific filter only returns species", { 53 | query <- "Myrcia" 54 | filters <- c("species") 55 | 56 | results <- search_wcvp(query, filters) 57 | all_species <- purrr::every(results$results, 58 | ~.x$rank == "Species") 59 | 60 | expect_true(all_species) 61 | }) 62 | 63 | test_that("generic filter only returns genera", { 64 | query <- "Myrcia" 65 | filters <- c("genera") 66 | 67 | results <- search_wcvp(query, filters) 68 | all_genera <- purrr::every(results$results, 69 | ~.x$rank == "Genus") 70 | 71 | expect_true(all_genera) 72 | }) 73 | 74 | test_that("infraspecific filter only returns infraspecifics", { 75 | infra_ranks <- c("Variety", "Subspecies", "Form") 76 | 77 | query <- "Poa annua" 78 | filters <- c("infraspecies") 79 | 80 | results <- search_wcvp(query, filters) 81 | all_infra <- purrr::every(results$results, 82 | ~.x$rank %in% infra_ranks) 83 | 84 | expect_true(all_infra) 85 | }) 86 | 87 | test_that("family filter only returns families", { 88 | 89 | query <- "poaceae" 90 | filters <- c("families") 91 | 92 | results <- search_wcvp(query, filters) 93 | all_families <- purrr::every(results$results, 94 | ~.x$rank == "Family") 95 | 96 | expect_true(all_families) 97 | }) 98 | 99 | test_that("tidy search results returns tibble", { 100 | results <- search_wcvp("Poa annua", filters=c("species")) 101 | tidied <- tidy(results) 102 | 103 | expect_s3_class(tidied, "tbl_df") 104 | }) 105 | 106 | test_that("tidy lookup results returns tibble", { 107 | results <- lookup_wcvp("30001404-2") 108 | tidied <- tidy(results) 109 | 110 | expect_s3_class(tidied, "tbl_df") 111 | }) 112 | 113 | test_that("wcvp download link is a zip file", { 114 | download_link <- wcvp_download_url_() 115 | 116 | expect_true(endsWith(download_link, "zip")) 117 | }) 118 | 119 | test_that("wcvp download link returns right version", { 120 | download_link <- wcvp_download_url_(2) 121 | 122 | expect_true(stringr::str_detect(download_link, "2")) 123 | }) 124 | 125 | test_that("wcvp download link errors for unimplemented version", { 126 | expect_error(wcvp_download_url_(3000), 127 | "Not a recognised version") 128 | }) 129 | 130 | test_that("cursor returns next page of results", { 131 | query <- list(genus="Ulex") 132 | 133 | page1 <- search_wcvp(query) 134 | page2 <- search_wcvp(query, cursor=page1$cursor) 135 | 136 | expect_false(page1$results[[1]]$fqId == page2$results[[1]]$fqId) 137 | }) 138 | -------------------------------------------------------------------------------- /R/tidy.R: -------------------------------------------------------------------------------- 1 | # wcvp ---- 2 | #' @importFrom purrr map_dfr 3 | #' 4 | #' @export 5 | tidy.wcvp_search <- function(x, ...) { 6 | map_dfr(x$results, parse_nested_list_) 7 | } 8 | 9 | #' @export 10 | tidy.wcvp_taxon <- function(x, ...) { 11 | x <- x[! names(x) %in% c("response", "queryId")] 12 | 13 | parse_nested_list_(x) 14 | } 15 | 16 | # powo ---- 17 | 18 | #' @importFrom purrr map_dfr 19 | #' 20 | #' @export 21 | tidy.powo_search <- function(x, ...) { 22 | map_dfr(x$results, parse_nested_list_) 23 | } 24 | 25 | #' @export 26 | tidy.powo_taxon <- function(x, field=c("none", "accepted", "synonyms", "classification", "basionym", "distribution", "distributionEnvelope"), ...) { 27 | x <- x[! names(x) %in% c("response", "queryId")] 28 | 29 | parse_nested_list_(x) 30 | } 31 | 32 | # ipni ---- 33 | 34 | #' @importFrom purrr map_dfr 35 | #' 36 | #' @export 37 | tidy.ipni_search <- function(x, ...) { 38 | map_dfr(x$results, parse_nested_list_) 39 | } 40 | 41 | #' @export 42 | tidy.ipni_citation <- function(x, ...) { 43 | x <- x[! names(x) %in% c("response", "queryId")] 44 | 45 | parse_nested_list_(x) 46 | } 47 | 48 | #' @export 49 | tidy.ipni_author <- function(x, ...) { 50 | x <- x[! names(x) %in% c("response", "queryId")] 51 | 52 | parse_nested_list_(x) 53 | } 54 | 55 | #' @export 56 | tidy.ipni_publication <- function(x, ...) { 57 | x <- x[! names(x) %in% c("response", "queryId")] 58 | 59 | parse_nested_list_(x) 60 | } 61 | 62 | # tol ---- 63 | #' @importFrom purrr map_dfr 64 | #' 65 | #' @export 66 | tidy.tol_search <- function(x, ...) { 67 | map_dfr(x$results, parse_nested_list_) 68 | } 69 | 70 | #' @export 71 | tidy.tol_specimen <- function(x, ...) { 72 | x <- x[! names(x) %in% c("response", "queryId")] 73 | 74 | parse_nested_list_(x) 75 | } 76 | 77 | #' @export 78 | tidy.tol_gene <- function(x, ...) { 79 | x <- x[! names(x) %in% c("response", "queryId")] 80 | 81 | parse_nested_list_(x) 82 | } 83 | 84 | # knms ---- 85 | 86 | #' @importFrom purrr map_lgl map_dfr pluck 87 | #' @importFrom tidyr fill 88 | #' @importFrom rlang .data 89 | #' 90 | #' @export 91 | tidy.knms_match <- function(x, ...) { 92 | parsed <- map_dfr(x$results, parse_knms_line_) 93 | 94 | tidied <- fill(parsed, .data$submitted, .data$matched) 95 | tidied$matched <- tidied$matched %in% c("true", "multiple_matches") 96 | 97 | tidied 98 | } 99 | 100 | # krs ---- 101 | 102 | #' @importFrom purrr map_dfr 103 | #' 104 | #' @export 105 | tidy.krs_match <- function(x, ...) { 106 | map_dfr(x$results, parse_nested_list_) 107 | } 108 | 109 | # utils ---- 110 | 111 | #' Simple utility to wrap nested lists into a tibble. 112 | #' 113 | #' Nested lists are also converted to tibbles and inserted in list 114 | #' columns. 115 | #' 116 | #' @importFrom purrr map_chr map flatten_chr every 117 | #' @importFrom tibble as_tibble_row 118 | #' 119 | #' @noRd 120 | parse_nested_list_ <- function(l) { 121 | if (is.null(names(l)) & every(l, is.character)) { 122 | return(list(flatten_chr(l))) 123 | } 124 | 125 | if (is.null(names(l))) { 126 | return(map_dfr(l, parse_nested_list_)) 127 | } 128 | 129 | null_cols <- map_lgl(l, is.null) 130 | l[null_cols] <- NA 131 | 132 | list_cols <- map_lgl(l, is.list) 133 | l[list_cols] <- map(l[list_cols], ~list(parse_nested_list_(.x))) 134 | 135 | as_tibble_row(l) 136 | } 137 | 138 | #' Parse a single match result from KNMS. 139 | #' 140 | #' @importFrom stringr str_extract 141 | #' @importFrom dplyr na_if 142 | #' @importFrom tibble tibble 143 | #' 144 | #' @noRd 145 | parse_knms_line_ <- function(line) { 146 | submitted <- na_if(line[[1]], "") 147 | matched <- na_if(line[[2]], "") 148 | 149 | if (length(line) > 2) { 150 | ipni_id <- str_extract(line[[3]], "(?<=names:)[0-9\\-]+$") 151 | } else { 152 | ipni_id <- NA_character_ 153 | } 154 | 155 | if (length(line) > 3) { 156 | matched_record <- line[[4]] 157 | } else { 158 | matched_record <- NA_character_ 159 | } 160 | 161 | tibble(submitted=submitted, 162 | matched=matched, 163 | ipni_id=ipni_id, 164 | matched_record=matched_record) 165 | } 166 | -------------------------------------------------------------------------------- /man/search_tol.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tol.R 3 | \name{search_tol} 4 | \alias{search_tol} 5 | \title{Search Kew's Tree of Life for specimens or genes.} 6 | \usage{ 7 | search_tol(query = "", genes = FALSE, limit = 50, page = 1, .wait = 0.2) 8 | } 9 | \arguments{ 10 | \item{query}{The string to query the database with.} 11 | 12 | \item{genes}{Set to TRUE to download results for genes instead of specimens.} 13 | 14 | \item{limit}{An integer specifying the number of results 15 | to return.} 16 | 17 | \item{page}{An integer specify the page of results to request.} 18 | 19 | \item{.wait}{Time to wait before making a requests, to help 20 | rate limiting.} 21 | } 22 | \value{ 23 | Returns an object of class \code{tol_search} that is a simple 24 | structure with slots for: 25 | \itemize{ 26 | \item \code{total}: the total number of results held in ToL for the query. 27 | \item \code{page}: the page of results requested. 28 | \item \code{limit}: the maximum number of results requested from the API. 29 | \item \code{results}: the query results parsed into a list. 30 | \item \code{query}: the query string submitted to the API. 31 | \item \code{response}: the \link[httr:response]{httr response object}. 32 | } 33 | } 34 | \description{ 35 | Query Kew's Tree of Life for specimens that have 36 | been sampled for sequencing. 37 | } 38 | \details{ 39 | The \href{https://treeoflife.kew.org/}{Tree of Life} is a database 40 | of specimens sequenced as part of Kew's efforts to build 41 | a comprehensive evolutionary tree of life for flowering plants. 42 | 43 | The search API allows users to query the database for specimens 44 | based on their taxonomic information. Filtering and keyword-search 45 | are not currently implemented. All searches are based on taxonomic 46 | information, so \code{Myrcia} and \code{Myrtales} will return results, but 47 | \code{Brummitt} will not. 48 | 49 | The search API also allows users to download information about sequenced 50 | genes. There is currently no ability to search within the results for genes, 51 | but a table of all genes can be accessed using keyword argument \code{genes=TRUE}. 52 | } 53 | \examples{ 54 | # get the first 50 of all sequenced specimens 55 | search_tol(limit=50) 56 | 57 | # search for all sequenced Myrcia specimens 58 | search_tol("Myrcia") 59 | 60 | # get all sequenced specimens 61 | search_tol(limit=5000) 62 | 63 | # search for a species name and print the results 64 | r <- search_tol("Myrcia guianensis") 65 | print(r) 66 | 67 | # simplify search results to a `tibble` 68 | r <- search_tol("Myrcia") 69 | tidy(r) 70 | 71 | # gene stats are nested in the results 72 | r <- search_tol("Myrcia") 73 | tidied <- tidy(r) 74 | tidyr::unnest(tidied, cols=gene_stats) 75 | 76 | # species names are nested in the results 77 | r <- search_tol("Myrcia") 78 | tidied <- tidy(r) 79 | tidyr::unnest(tidied, cols=species, names_sep="_") 80 | 81 | # as is higher taxonomy 82 | r <- search_tol("Myrcia") 83 | tidied <- tidy(r) 84 | tidyr::unnest(tidied, cols=species, names_sep="_") 85 | 86 | # search for all gene entries and print results 87 | r <- search_tol(genes=TRUE, limit=500) 88 | print(r) 89 | 90 | # tidy the returned genes 91 | tidy(r) 92 | 93 | } 94 | \references{ 95 | Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035 96 | } 97 | \concept{ToL functions 98 | \itemize{ 99 | \item \code{\link[=lookup_tol]{lookup_tol()}} to lookup information about a sequenced specimen 100 | using a valid ToL ID. 101 | \item \code{\link[=download_tol]{download_tol()}} to download a file from the ToL SFTP server. 102 | \item \code{\link[=load_tol]{load_tol()}} load a file from the ToL SFTP server. 103 | }} 104 | -------------------------------------------------------------------------------- /tests/testthat/test-powo.R: -------------------------------------------------------------------------------- 1 | test_that("search URL returns status 200", { 2 | url <- powo_search_url_() 3 | 4 | Sys.sleep(0.1) 5 | response <- httr::GET(url) 6 | 7 | expect_equal(httr::status_code(response), 200) 8 | }) 9 | 10 | test_that("search URL response is json", { 11 | url <- powo_search_url_() 12 | 13 | Sys.sleep(0.1) 14 | response <- httr::GET(url) 15 | 16 | expect_equal(httr::http_type(response), "application/json") 17 | }) 18 | 19 | test_that("taxon URL response is json", { 20 | url <- powo_taxon_url_("30001404-2") 21 | 22 | Sys.sleep(0.1) 23 | response <- httr::GET(url) 24 | 25 | expect_equal(httr::http_type(response), "application/json") 26 | }) 27 | 28 | test_that("taxon URL returns 404 for bad ID", { 29 | url <- powo_taxon_url_("bad id") 30 | 31 | Sys.sleep(0.1) 32 | response <- httr::GET(url) 33 | 34 | expect_equal(status_code(response), 400) 35 | }) 36 | 37 | test_that("raises error for unimplemented keyword", { 38 | query <- list(published="1920") 39 | expect_error(search_powo(query, .wait=0.15), 40 | "Query keyword.+ not recognised") 41 | }) 42 | 43 | test_that("accepted filter only returns accepted names", { 44 | query <- "Myrcia" 45 | filters <- c("accepted") 46 | 47 | results <- search_powo(query, filters, .wait=0.15) 48 | all_accepted <- purrr::every(results$results, 49 | ~.x$accepted) 50 | 51 | expect_true(all_accepted) 52 | }) 53 | 54 | test_that("specific filter only returns species", { 55 | query <- "Myrcia" 56 | filters <- c("species") 57 | 58 | results <- search_powo(query, filters, .wait=0.15) 59 | all_species <- purrr::every(results$results, 60 | ~.x$rank == "Species") 61 | 62 | expect_true(all_species) 63 | }) 64 | 65 | test_that("generic filter only returns genera", { 66 | query <- "Myrcia" 67 | filters <- c("genera") 68 | 69 | results <- search_powo(query, filters, .wait=0.15) 70 | all_genera <- purrr::every(results$results, 71 | ~.x$rank == "Genus") 72 | 73 | expect_true(all_genera) 74 | }) 75 | 76 | test_that("infraspecific filter only returns infraspecifics", { 77 | infra_ranks <- c("Variety", "Subspecies", "Form") 78 | 79 | query <- "Poa annua" 80 | filters <- c("infraspecies") 81 | 82 | results <- search_powo(query, filters, .wait=0.15) 83 | all_infra <- purrr::every(results$results, 84 | ~.x$rank %in% infra_ranks) 85 | 86 | expect_true(all_infra) 87 | }) 88 | 89 | test_that("family filter only returns families", { 90 | 91 | query <- "poaceae" 92 | filters <- c("families") 93 | 94 | results <- search_powo(query, filters, .wait=0.15) 95 | all_families <- purrr::every(results$results, 96 | ~.x$rank == "Family") 97 | 98 | expect_true(all_families) 99 | }) 100 | 101 | test_that("image filter only returns things with images", { 102 | 103 | query <- "Myrcia" 104 | filters <- c("has_images") 105 | 106 | results <- search_powo(query, filters, .wait=0.15) 107 | all_images <- purrr::every(results$results, 108 | ~length(.x$images) > 0) 109 | 110 | expect_true(all_images) 111 | }) 112 | 113 | test_that("lookup with distribution returns distribution field", { 114 | taxonid <- "320035-2" 115 | 116 | results <- lookup_powo(taxonid, distribution=TRUE, .wait=0.15) 117 | 118 | expect_true("distribution" %in% names(results)) 119 | }) 120 | 121 | test_that("tidy search results returns tibble", { 122 | results <- search_powo("Poa annua", filters=c("species"), .wait=0.15) 123 | tidied <- tidy(results) 124 | 125 | expect_s3_class(tidied, "tbl_df") 126 | }) 127 | 128 | test_that("tidy lookup results returns tibble", { 129 | results <- lookup_powo("30001404-2", .wait=0.15) 130 | tidied <- tidy(results) 131 | 132 | expect_s3_class(tidied, "tbl_df") 133 | }) 134 | 135 | test_that("cursor returns next page of results", { 136 | query <- list(genus="Ulex") 137 | 138 | page1 <- search_powo(query, .wait=0.15) 139 | page2 <- search_powo(query, cursor=page1$cursor, .wait=0.15) 140 | 141 | expect_false(page1$results[[1]]$fqId == page2$results[[1]]$fqId) 142 | }) 143 | -------------------------------------------------------------------------------- /R/query-formatting.R: -------------------------------------------------------------------------------- 1 | #' Format filters for search APIs. 2 | #' 3 | #' Checks the filters are valid before joining them 4 | #' together with as a comma-separated string. 5 | #' 6 | #' @param filters A character vector of filter names. 7 | #' 8 | #' @noRd 9 | format_filters_ <- function(filters, resource) { 10 | if (is.null(filters)) { 11 | return(NULL) 12 | } 13 | 14 | filter_map <- get_filters_(resource) 15 | valid_filters <- names(filter_map) 16 | bad_filters <- setdiff(filters, valid_filters) 17 | 18 | if (length(bad_filters) > 0) { 19 | stop( 20 | sprintf( 21 | "Filters for [%s] must be one of [%s]\n[%s] are not recognised.", 22 | resource, 23 | paste(valid_filters, collapse=","), 24 | paste(bad_filters, collapse=",") 25 | ) 26 | ) 27 | } 28 | 29 | paste(filter_map[filters], collapse=",") 30 | } 31 | 32 | #' Format queries for search APIs. 33 | #' 34 | #' Checks if query is valid and then formats it correctly. 35 | #' 36 | #' @param query A string or list specifying the query. 37 | #' @param resource A string specifying the resource being queried. 38 | #' 39 | #' @importFrom glue glue 40 | #' 41 | #' @noRd 42 | format_query_ <- function(query, resource) { 43 | 44 | if (! is.list(query) & length(query) > 1) { 45 | message <- glue("{resource} search query must be a named list or a string.", 46 | "Provided query type: {typeof(query)}", 47 | "Provided query length: {query_length}", 48 | "", 49 | .sep="\n", .trim=FALSE) 50 | 51 | stop(message, call.=FALSE) 52 | } 53 | 54 | if (is.list(query)) { 55 | keywords <- names(query) 56 | keyword_map <- get_keywords_(resource) 57 | valid_keywords <- names(keyword_map) 58 | bad_keywords <- setdiff(keywords, valid_keywords) 59 | 60 | if (length(bad_keywords) > 0) { 61 | stop( 62 | sprintf( 63 | "Query keywords for [%s] must be one of [%s]\n[%s] are not recognised.", 64 | resource, 65 | paste(valid_keywords, collapse=","), 66 | paste(bad_keywords, collapse=",") 67 | ) 68 | ) 69 | } 70 | } 71 | 72 | if(is.list(query)) { 73 | names(query) <- keyword_map[keywords] 74 | query 75 | } else if(resource == "krs") { 76 | list(query=query) 77 | } else { 78 | list(q=query) 79 | } 80 | } 81 | 82 | #' Format query for an Open Refine API. 83 | #' 84 | #' Checks if query is valid, formats the keywords correctly, and makes it 85 | #' a JSON string. 86 | #' 87 | #' @param query A string or list specifying the query. 88 | #' @param resource A string specifying the resource being queried. 89 | #' 90 | #' @importFrom glue glue 91 | #' @importFrom jsonlite toJSON 92 | #' @importFrom purrr map2 93 | #' 94 | #' @noRd 95 | format_refine_query_ <- function(query, resource) { 96 | query <- format_query_(query, resource) 97 | 98 | properties <- query[names(query) != "query"] 99 | properties <- map2(names(properties), properties, format_refine_property_) 100 | 101 | q <- query$query 102 | query <- list(properties=properties) 103 | if(!is.null(q)) { 104 | query$query <- q 105 | } 106 | 107 | query <- toJSON(query, auto_unbox=TRUE) 108 | list(query=query) 109 | } 110 | 111 | #' Format an Open Refine property for an API request. 112 | #' 113 | #' @noRd 114 | format_refine_property_ <- function(name, value) { 115 | list(p=name, pid=name, v=value) 116 | } 117 | 118 | 119 | #' Format body for a POST request. 120 | #' 121 | #' The body of a POST request must be a list. 122 | #' So far, only KNMS uses POST requests. Names 123 | #' for matching might be input as a character vector, 124 | #' so we need to ensure the body is a list and coerce it 125 | #' if not. 126 | #' 127 | #' @param body The raw body as input. 128 | #' 129 | #' @noRd 130 | format_body_ <- function(body) { 131 | 132 | if (! is.list(body) & ! is.character(body)) { 133 | message <- glue("The body of a POST request must be either a list or character vector.", 134 | "Provided body type: {typeof(query)}", 135 | "", 136 | .sep="\n", .trim=FALSE) 137 | 138 | stop(message, call.=FALSE) 139 | } 140 | 141 | if (! is.list(body)) { 142 | body <- as.list(body) 143 | } 144 | 145 | body 146 | } 147 | -------------------------------------------------------------------------------- /vignettes/KRS.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "KRS" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{KRS} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | 17 | The [Kew Reconciliation Service (KRS)](http://data1.kew.org/reconciliation/about/IpniName) allows a user submit a taxon for matching against [IPNI](https://ipni.org/). 18 | 19 | The reconciliation service is an Open Refine style API that allows matching of a single name against IPNI. The matching is done by a series of transformations configured to botanical names 20 | in IPNI. These transformations are detailed [here](https://github.com/RBGKew/String-Transformers#string-transformers), I think. 21 | 22 | It appears that KRS is the service that sits behind [KNMS](KNMS.html). KNMS allows matching of 23 | batches of names in one request but does not allow matching to different parts of a name. If you have a set of names to match and just want to do simple matching, I'd use KNMS. But if you want 24 | to specify which parts of the names to match on, I'd use KRS. 25 | 26 | 27 | ```{r setup} 28 | library(kewr) 29 | library(dplyr) 30 | library(tidyr) 31 | ``` 32 | 33 | ## Matching names 34 | 35 | To use KRS, you can just submit a single name for matching. 36 | 37 | ```{r} 38 | match <- match_krs("Solanum sanchez-vegae S.Knapp") 39 | match 40 | ``` 41 | 42 | This also works without the author string included: 43 | 44 | ```{r} 45 | match <- match_krs("Solanum sanchez-vegae") 46 | match 47 | ``` 48 | 49 | The match results can be converted to a table for easier inspection. 50 | 51 | ```{r} 52 | tidy(match) 53 | ``` 54 | 55 | ## Matching parts of a name 56 | 57 | The reconciliation service provides a specification for matching to different parts 58 | of a botanical name. This is described in detail [here](http://data1.kew.org/reconciliation/about/IpniName). 59 | 60 | For example, if we want to match to the genus name *Myrcia*, we could submit a 61 | simple request like before. 62 | 63 | ```{r} 64 | match <- match_krs("Myrcia") 65 | match 66 | ``` 67 | 68 | But this has returned more than one result. We can be more specific by matching 69 | to the genus and the author. 70 | 71 | ```{r} 72 | match <- match_krs(list(genus="Myrcia", author="DC")) 73 | match 74 | ``` 75 | 76 | Which has narrowed it down more. 77 | 78 | We can specify a match for every part of a name like this. 79 | 80 | ```{r} 81 | match <- match_krs(list(genus="Myrcia", species="magnolifolia", infra="latifolia", 82 | author="Berg")) 83 | match 84 | ``` 85 | 86 | This match has worked even though there's a minor misspelling of the specific epithet 87 | and the author string. Matching to the taxon name works by a set of pre-configured 88 | string transformations that catch some common mistakes in botanical names. The 89 | matching to author strings is also slightly fuzzy. 90 | 91 | This matching also handles different taxonomic ranks using ordered epithets, where 92 | the highest rank is specified as `epithet_1` down to `epithet_3`. 93 | 94 | ```{r} 95 | match <- match_krs(list(epithet_1="Solanaceae")) 96 | match 97 | ``` 98 | 99 | This also works for infrageneric names. 100 | 101 | ```{r} 102 | match <- match_krs(list(epithet_1="Acacia", epithet_2="Aculeiferum", author="Vassal")) 103 | match 104 | ``` 105 | 106 | It should be noted that these last two examples give a score lower than 100, 107 | because they return more than one match. 108 | 109 | ## Matching more than one name 110 | 111 | If you want to do simple matching to more than one name, it might be easier to use 112 | KNMS. 113 | 114 | If you want to match the individual parts of multiple names, you can apply the 115 | matching function to the rows of a data frame, using `dplyr::rowwise`. 116 | 117 | ```{r} 118 | names <- tibble( 119 | genus=c("Poa", "Myrcia", "Solanum"), 120 | species=c("annua", "almasensis", "sanchez-vegae"), 121 | author=c("L.", "NicLugh.", "S.Knapp") 122 | ) 123 | 124 | 125 | matches <- 126 | names %>% 127 | rowwise() %>% 128 | mutate(match=list(match_krs(list(genus=genus, species=species, author=author)))) %>% 129 | mutate(match=list(tidy(match))) %>% 130 | unnest(cols=c(match)) 131 | 132 | matches 133 | ``` 134 | 135 | -------------------------------------------------------------------------------- /man/search_wcvp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wcvp.R 3 | \name{search_wcvp} 4 | \alias{search_wcvp} 5 | \title{Search WCVP for a taxon.} 6 | \usage{ 7 | search_wcvp(query, filters = NULL, cursor = "*", limit = 50, .wait = 0.1) 8 | } 9 | \arguments{ 10 | \item{query}{The taxon string to search WCVP for. If using keywords, 11 | the query must be formatted as a list.} 12 | 13 | \item{filters}{Filter to apply to search results. 14 | Multiple filters must be supplied as a character vector.} 15 | 16 | \item{cursor}{A cursor returned by a previous search. 17 | If used, the query and filter must be exactly the same.} 18 | 19 | \item{limit}{An integer specifying the maximum number of results 20 | to return.} 21 | 22 | \item{.wait}{Time to wait before making a request, to help 23 | rate limiting.} 24 | } 25 | \value{ 26 | Returns an object of class \code{wcvp_search} that is a simple 27 | structure with slots for: 28 | \itemize{ 29 | \item \code{total}: the total number of results held in WCVP for the query 30 | \item \code{cursor}: a cursor to retrieve the next page of results from the API. 31 | \item \code{limit}: the maximum number of results requested from the API. 32 | \item \code{results}: the query results parsed into a list. 33 | \item \code{query}: the query string submitted to the API. 34 | \item \code{filter}: the filter strings submitted to the API. 35 | \item \code{response}: the \link[httr:response]{httr response object}. 36 | } 37 | } 38 | \description{ 39 | Query the World Checklist of Vascular Plants search API 40 | for a taxon string. 41 | } 42 | \details{ 43 | The \href{https://wcvp.science.kew.org/}{World Checklist of Vascular Plants (WCVP)} 44 | is a global consensus view of all known vascular plant species. 45 | It has been compiled by staff at RBG Kew in consultation with plant 46 | group experts. 47 | 48 | The search API allows users to query the checklist for plant names. 49 | Currently, it does not support partial or fuzzy matching. 50 | In order to get a result, the user must supply a valid name string. 51 | For example, 'Myrcia' and 'Myrcia guianensis' will return results, 52 | but 'M' or 'Myr' will not. 53 | 54 | There is some support for querying using keyword arguments. The API is 55 | not currently documented, so only keywords that are definitely there have 56 | been implemented. Use the \code{get_keywords} function to view a list of all implemented keywords. 57 | 58 | The API will return taxonomic information (the family, authority, status, and rank) 59 | of all names matching the query. These results can be limited, for example to accepted species, 60 | using filters. Use the \code{get_filters} function to view a list of all implemented filters. 61 | } 62 | \examples{ 63 | # search for all entries containing a genus name 64 | search_wcvp("Myrcia") 65 | 66 | # search for all accepted species within a genus 67 | search_wcvp("Myrcia", filters=c("species", "accepted")) 68 | 69 | # search for up to 10,000 species in a genus 70 | search_wcvp("Poa", filters=c("species"), limit=10000) 71 | 72 | # search for all names in a family 73 | search_wcvp(list(family="Myrtaceae")) 74 | 75 | # search for genera within a family 76 | search_wcvp(list(family="Myrtaceae"), filters=c("genera")) 77 | 78 | # search for all names with a specific epithet 79 | search_wcvp(list(species="guianensis")) 80 | 81 | # search for a species name and print the results 82 | r <- search_wcvp("Myrcia guianensis", filters=c("species")) 83 | print(r) 84 | 85 | # simplify search results to a `tibble` 86 | r <- search_wcvp("Poa", filters=c("species")) 87 | tidy(r) 88 | 89 | # accepted name info is nested inside the records for synonyms 90 | # simplify accepted name info to the name ID 91 | r <- search_wcvp("Poa", filters=c("species")) 92 | tidied <- tidy(r) 93 | tidyr::unnest(tidied, cols=synonymOf, names_sep="_") 94 | 95 | } 96 | \references{ 97 | WCVP (2020). World Checklist of Vascular Plants, version 2.0. Facilitated by the Royal Botanic Gardens, Kew. Published on the Internet; http://wcvp.science.kew.org/ 98 | } 99 | \seealso{ 100 | \itemize{ 101 | \item \code{\link[=lookup_wcvp]{lookup_wcvp()}} to lookup information about a taxon name 102 | using a valid IPNI ID. 103 | \item \code{\link[=download_wcvp]{download_wcvp()}} to download the entire WCVP. 104 | } 105 | 106 | Other WCVP functions: 107 | \code{\link{download_wcvp}()}, 108 | \code{\link{lookup_wcvp}()} 109 | } 110 | \concept{WCVP functions} 111 | -------------------------------------------------------------------------------- /vignettes/articles/retrieve-all-query-results.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Retrieving all results for a query" 3 | --- 4 | 5 | ```{r, include = FALSE} 6 | knitr::opts_chunk$set( 7 | collapse = TRUE, 8 | comment = "#>" 9 | ) 10 | ``` 11 | 12 | To reduce the load on the servers, some of the Kew resources limit the number of results returned for a query. This tutorial will demonstrate how to download all the results for a query in a way that (hopefully) shouldn't upset the servers. 13 | 14 | ```{r setup, message=FALSE, warning=FALSE} 15 | library(kewr) 16 | library(dplyr) 17 | ``` 18 | 19 | ## Increasing the maximum number of results returned 20 | 21 | Possibly the simplest option is to just tell the resource that you want more results. 22 | 23 | By default, the search functions in *kewr* set the maximum number of results to 50. You can increase this to whatever you want, to make sure you get all the results you want. 24 | 25 | For instance, I know for sure that there are fewer than 2000 accepted species in the genus *Myrcia*. If I want to get a list of all these species from WCVP, I can, therefore, increase the maximum number of results to 2000. 26 | 27 | ```{r max-limit-search} 28 | results <- search_wcvp(query=list(genus="Myrcia"), 29 | filters=c("accepted", "species"), 30 | limit=2000) 31 | results 32 | ``` 33 | 34 | We can see from the results object that we have a single page of results that contains the entries for all 748 accepted species in the genus. 35 | 36 | However, this only really works when two things are true: 37 | 38 | 1. You know for sure there aren't more results than a certain number. 39 | 2. That number isn't too big. 40 | 41 | This strategy worked in this case because I knew there definitely weren't more than 2000 accepted species, and 2000 is a relatively small number as things go. If I there are more results than I expected, I run the risk of missing some entries. If my expected number of results was too big, say 20,000 or even 200,000, the request might time-out before I get anything back. 42 | 43 | ### Advantages: 44 | 45 | * You only have to make one request. 46 | 47 | ### Disadvantages: 48 | 49 | * You could miss some entries if there are more than you expect. 50 | * You might not get any results back if you ask for too many. 51 | 52 | ## Making multiple requests to get multiple pages of results 53 | 54 | The other way to get all of your results is to iterate over all the pages of your request. 55 | 56 | Making multiple smaller requests avoids the request hanging because you asked for too much data. However, some resources could have rate-limiting enable, which means they will block you if you make too many requests in a certain time period. Therefore, you need to balance the size of the request with the number that you're making. 57 | 58 | One way to make multiple requests is with a `for` loop. 59 | 60 | To get started, we'll make our first request outside of the for loop. This way, we can see how many pages we need to loop over. I've chosen a limit of 100 results per page here. 61 | 62 | ```{r paged-initial-search} 63 | query <- list(genus="Myrcia") 64 | filters <- c("accepted", "species") 65 | 66 | r <- search_wcvp(query, filters=filters, limit=100) 67 | r 68 | ``` 69 | 70 | Before we get the rest of the results in a `for` loop, it's worth tidying our first result into a data frame, which we'll use to add all our subsequent results to. 71 | 72 | ```{r} 73 | results <- tidy(r) 74 | ``` 75 | 76 | Now we can loop through and get the rest of our query results. 77 | 78 | **IMPORTANT**: making too many requests in a short period of time to POWO can cause problems for the server. By default, 79 | the `request_next` function adds in a little waiting period before making a new request. But you might get back an error if 80 | you're asking for lot's of things one after the other. 81 | 82 | ```{r paged-loop-search} 83 | for (i in 2:r$pages) { 84 | r <- request_next(r) 85 | 86 | new_results <- tidy(r) 87 | results <- bind_rows(results, new_results) 88 | } 89 | 90 | head(results) 91 | ``` 92 | 93 | We can check we have all the results by looking at the length of 94 | our results data frame: 95 | 96 | ```{r check-results-length} 97 | nrow(results) 98 | ``` 99 | 100 | ### Advantages: 101 | 102 | * Smaller requests are less likely to time-out. 103 | * You don't have to know how many results you expect before you start. 104 | 105 | ### Disadvantages 106 | 107 | * Making too many requests could overload the server and get you blocked. 108 | -------------------------------------------------------------------------------- /vignettes/WCVP.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "WCVP" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{WCVP} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | 17 | ```{r setup, warning=FALSE, message=FALSE} 18 | library(kewr) 19 | library(here) 20 | library(dplyr) 21 | ``` 22 | 23 | The [World Checklist of Vascular Plants (WCVP)](https://wcvp.science.kew.org/) is a global consensus view of all known vascular plant species. It has been compiled by staff at RBG Kew in consultation with plant group experts. 24 | 25 | The WCVP is a taxonomic database, and as such contains information like the taxonomic status and synonymy of plant names. It can be used for a number of tasks, including searching for all records with a particular name, getting a list of all accepted species in a genus, or looking up the accepted name for a species. 26 | 27 | ## Searching WCVP for a taxon name 28 | 29 | Searching in WCVP works by exact matching. This means that a misspelled name will not return any results. 30 | 31 | For instance, searching for `Poa anua` gets nothing: 32 | 33 | ```{r} 34 | results <- search_wcvp("Poa anua") 35 | results 36 | ``` 37 | 38 | But searching for the correctly spelled name will give a result: 39 | 40 | ```{r} 41 | results <- search_wcvp("Poa annua") 42 | results 43 | ``` 44 | 45 | Similarly, searching with partial matching does not work: 46 | 47 | ```{r} 48 | results <- search_wcvp("Ulex e") 49 | results 50 | ``` 51 | 52 | But searching for a genus name will return all taxa within that genus as well: 53 | 54 | ```{r} 55 | results <- search_wcvp("Ulex") 56 | results 57 | ``` 58 | 59 | With this search result, we can see that there are 92 records for `Ulex` but the API has only returned the first 50. 60 | 61 | To get all results for this search, we can either increase the number of returned results: 62 | 63 | ```{r} 64 | ulex_all <- search_wcvp("Ulex", limit=100) 65 | ulex_all 66 | ``` 67 | 68 | Or advance the searh on one page: 69 | 70 | ```{r} 71 | ulex_page1 <- search_wcvp("Ulex") 72 | ulex_page2 <- request_next(ulex_page1) 73 | ulex_page2 74 | ``` 75 | 76 | In both cases, we can tidy the results into a tibble to make subsequent analysis easier: 77 | 78 | ```{r} 79 | tidy(ulex_all) 80 | ``` 81 | 82 | ```{r} 83 | bind_rows( 84 | tidy(ulex_page1), 85 | tidy(ulex_page2) 86 | ) 87 | ``` 88 | 89 | ## Getting a list of accepted names in a genus 90 | 91 | The search function also accepts a number of keywords and filters that can be used to narrow down the results returned. A full list of keywords and filters can be found in the function documentation. 92 | 93 | An example use of these filters is retrieving a checklist of accepted species in a genus. In the search below, the `genus` keyword to retrieve all records in the genus *Myrcia*, while the filters `accepted` and `species` narrow these records down to just accepted species: 94 | 95 | ```{r} 96 | checklist <- search_wcvp(list(genus="Myrcia"), 97 | filters=c("accepted", "species"), 98 | limit=1000) 99 | 100 | checklist 101 | ``` 102 | 103 | ## Looking up accepted names 104 | 105 | As well as searching by name, the WCVP API can be used to look up taxonomic information for a known IPNI ID. The returned information can then be used to find all synonyms for a name, find the accepted name for a taxon, or find the parent/child taxa. 106 | 107 | The IPNI ID can be found a few different ways, for example using the `KNMS` API to match a name to an IPNI ID. In this case we will find it using WCVP. 108 | 109 | ```{r} 110 | search_result <- search_wcvp("Calyptranthes acevedoi", 111 | filters=c("species")) 112 | 113 | ipni_id <- search_result$results[[1]]$id 114 | ipni_id 115 | ``` 116 | 117 | With this, we can look the record up. 118 | 119 | ```{r} 120 | taxon_info <- lookup_wcvp(ipni_id) 121 | taxon_info 122 | ``` 123 | From this record, we can see that the name we had is a Homotypic Synonym. This record also contains the taxonomic information for the current accepted name: 124 | 125 | ```{r} 126 | tidied <- tidy(taxon_info) 127 | tidied$accepted[[1]] 128 | ``` 129 | 130 | ## Downloading the whole WCVP 131 | 132 | There are cases where it might be easier to download all of the WCVP, rather than make lots of requests to it. The `download_wcvp` function lets you download the whole WCVP to whatever directory you want. 133 | 134 | ```{r, eval=FALSE} 135 | save_path <- here() 136 | download_wcvp(save_path) 137 | ``` 138 | Previous versions of the WCVP are also available, for posterity and reproducibility. These can be downloaded by specifying which version you want. 139 | 140 | ```{r, eval=FALSE} 141 | download_wcvp(save_path, version=1) 142 | ``` 143 | 144 | -------------------------------------------------------------------------------- /vignettes/articles/building-checklist.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Building a species checklist" 3 | --- 4 | 5 | ```{r, include = FALSE} 6 | knitr::opts_chunk$set( 7 | collapse = TRUE, 8 | comment = "#>" 9 | ) 10 | ``` 11 | 12 | A common task that Plants of the World Online (POWO) can be used for is to build a checklist of native species for a country. 13 | 14 | In this demonstration, we will: 15 | 16 | 1. Request a list of all accepted species that occur in a country. 17 | 2. Get the native distribution of all those species. 18 | 3. Narrow our checklist down to native species. 19 | 4. Build another checklist for endemic species. 20 | 21 | ## Setup 22 | 23 | In addition to *kewr*, we'll load: 24 | 25 | * *dplyr* to manipulate the data 26 | * *tidyr* to reshape data frames 27 | * *purrr* to map functions across items in a list 28 | * *progress* to add a progress bar 29 | * *stringr* to extract some data from strings 30 | 31 | ```{r setup, message=FALSE, warning=FALSE} 32 | library(kewr) 33 | library(dplyr) 34 | library(tidyr) 35 | library(purrr) 36 | library(progress) 37 | library(stringr) 38 | ``` 39 | 40 | ## 1. Requesting a list of accepted species 41 | 42 | We'll get our list of accepted species for Iceland, using the POWO search API. 43 | 44 | ```{r iceland-species-query} 45 | query <- list(distribution="Iceland") 46 | filters <- c("accepted", "species") 47 | 48 | iceland_species <- search_powo(query, filters=filters, limit=1000) 49 | ``` 50 | 51 | In total, we have this many accepted species in Iceland: 52 | 53 | ```{r display-total} 54 | iceland_species$total 55 | ``` 56 | 57 | ## 2. Get the native distribution of all the species 58 | 59 | To get the native distribution for all our species, we need to use POWO's lookup API for every single one. 60 | 61 | First we'll extract a list of IDs from our results, using the `map` function from *purrr*. 62 | 63 | ```{r extract-species-id} 64 | ids <- map(iceland_species$results, ~str_extract(.x$fqId, "[\\d\\-]+$")) 65 | ``` 66 | 67 | Then we need to make all of our requests. To make things easier, we'll define a simple function that just accepts a species ID, and makes use of a progress bar to track our requests! 68 | 69 | ```{r download-distributions} 70 | pb <- progress_bar$new( 71 | format=" requesting [:bar] :current/:total (:percent)", 72 | total=length(ids) 73 | ) 74 | 75 | fcn <- function(id) { 76 | pb$tick() 77 | 78 | lookup_powo(id, distribution=TRUE) 79 | } 80 | 81 | iceland_records <- map(ids, fcn) 82 | ``` 83 | 84 | Now we have all the records for our species, we can tidy them as a data frame to make subsequent analysis a bit easier. 85 | 86 | ```{r tidy-results} 87 | iceland_checklist <- map_dfr(iceland_records, tidy) 88 | iceland_checklist 89 | ``` 90 | 91 | ## 3. Narrow the checklist to native species 92 | 93 | To narrow our species down, we'll add an extra column to indicate if a species is native to Iceland or not. This will let us filter our data using that column. 94 | 95 | I've done this below in a single, chained command by using the pipe (`%>%`) operator from *dplyr*. I've also taken advantage of the `rowwise` feature in the newer versions of *dplyr*. 96 | 97 | ```{r filter-native} 98 | check_native <- function(dist, country="Iceland") { 99 | native_dist <- dist$natives[[1]] 100 | 101 | country %in% native_dist$name 102 | } 103 | 104 | iceland_checklist <- 105 | iceland_checklist %>% 106 | rowwise() %>% 107 | mutate(is_native=check_native(distribution)) %>% 108 | ungroup() %>% 109 | filter(is_native) 110 | ``` 111 | 112 | Now all we have to do is tidy up our data frame by removing any columns we don't want anymore. 113 | 114 | ```{r tidy-native-checklist} 115 | iceland_checklist <- 116 | iceland_checklist %>% 117 | select(fqId, name, authors, taxonomicStatus, plantae, fungi, 118 | kingdom, phylum, family, genus, species) 119 | 120 | iceland_checklist 121 | ``` 122 | 123 | ## 4. Build a checklist of endemic species 124 | 125 | We can use our results from before to narrow the list down further to just species that are endemic to Iceland. 126 | 127 | ```{r filter-endemic} 128 | check_endemic <- function(dist, country="Iceland") { 129 | native_dist <- dist$natives[[1]] 130 | 131 | native <- country %in% native_dist$name 132 | endemic <- length(native_dist$name) == 1 133 | 134 | native & endemic 135 | } 136 | 137 | iceland_endemics <- map_dfr(iceland_records, tidy) 138 | 139 | iceland_endemics <- 140 | iceland_endemics %>% 141 | rowwise() %>% 142 | mutate(is_endemic=check_endemic(distribution)) %>% 143 | ungroup() %>% 144 | filter(is_endemic) %>% 145 | select(fqId, name, authors, taxonomicStatus, plantae, fungi, 146 | kingdom, phylum, family, genus, species) 147 | ``` 148 | 149 | How do the number of species in each list compare? 150 | 151 | ```{r compare-checklist-lengths} 152 | paste("native species: ", nrow(iceland_checklist)) 153 | paste("endemic species: ", nrow(iceland_endemics)) 154 | ``` 155 | -------------------------------------------------------------------------------- /vignettes/ToL.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "ToL" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{ToL} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | 17 | ```{r setup, warning=FALSE, message=FALSE} 18 | library(kewr) 19 | library(dplyr) 20 | library(tidyr) 21 | ``` 22 | 23 | 24 | The [Tree of Life](https://treeoflife.kew.org/) is a database of specimens sequenced as part of Kew's efforts to build a comprehensive evolutionary tree of life for flowering plants. 25 | 26 | This package accesses data from the Tree of Life Explorer, an output of the Plant and Fungal Trees of Life Project (PAFTOL). The data in the Tree of Life is generated by target sequence capture using the universal Angiosperm353 probe set. 27 | 28 | The Tree of Life contains information about specimens that have been sequenced and genes recovered in the process. It lets you download sequence data for the specimens, as well as alignments and trees for the genes. 29 | 30 | ## Viewing the Tree of Life 31 | 32 | The [Tree of Life Explorer](https://treeoflife.kew.org/tree-of-life) lets users view the tree of life constructed from the current dataset of samples. 33 | 34 | You can view it using `kewr` by loading it in: 35 | 36 | ```{r} 37 | tree <- load_tol() 38 | tree 39 | ``` 40 | 41 | This reads it as a single string, so you need to use other packages to parse it and view it (e.g, [ape](http://ape-package.ird.fr/)). 42 | 43 | ## Searching ToL for specimens 44 | 45 | The Tree of Life contains information about the specimens that have been sequenced to construct the tree. The long-term aim is to sample at least on species from every flowering plant genus. This means that, typically, there will be one specimen per species. 46 | 47 | You can search this information using the `search_tol` function. There is no filtering or keyword-search functionality, so queries are just the name of an order/family/genus/species. For example, to get all specimens for the genus *Myrcia*: 48 | 49 | ```{r} 50 | specimens <- search_tol("Myrcia") 51 | specimens 52 | ``` 53 | 54 | This searching works by exact matching, and the taxonomy follows [WCVP](https://wcvp.science.kew.org/) so only accepted names will work. For example, if we mispell *Myrcia* we get nothing: 55 | 56 | ```{r} 57 | search_tol("Mercya") 58 | ``` 59 | 60 | And if we search for an outdated synonym we get nothing: 61 | 62 | ```{r} 63 | search_tol("Gomidesia") 64 | ``` 65 | 66 | But search using higher taxonomy will work: 67 | 68 | ```{r} 69 | specimens <- search_tol("Myrtaceae") 70 | specimens 71 | ``` 72 | 73 | To get all these results, we can either increase the limit in the search function: 74 | 75 | ```{r} 76 | myrts_all <- search_tol("Myrtaceae", limit=500) 77 | myrts_all 78 | ``` 79 | Or do paged searching: 80 | 81 | ```{r} 82 | myrts1 <- search_tol("Myrtaceae") 83 | myrts2 <- request_next(myrts1) 84 | myrts2 85 | ``` 86 | 87 | And we can tidy our results into a dataframe: 88 | 89 | ```{r} 90 | tidied <- tidy(myrts_all) 91 | tidied 92 | ``` 93 | 94 | Some information is nested inside the tidied dataframe, but we can get to it by unnesting: 95 | 96 | ```{r} 97 | tidied %>% 98 | select(id, raw_reads, taxonomy) %>% 99 | unnest(col=c(taxonomy, raw_reads), names_sep="_") 100 | ``` 101 | 102 | ## Getting gene information 103 | 104 | The Tree of Life also contains information about the genes captured during sequencing. These can be accessed using the `search_tol` function: 105 | 106 | ```{r} 107 | genes_all <- search_tol(genes=TRUE, limit=500) 108 | tidy(genes_all) 109 | ``` 110 | 111 | But they cannot currently be queried, so the best bet is just to grab all of them. 112 | 113 | ## Looking up a record 114 | 115 | Information about a single specimen or gene can be looked up using their ID: 116 | 117 | ```{r} 118 | specimen <- lookup_tol("2660") 119 | specimen 120 | ``` 121 | ```{r} 122 | gene <- lookup_tol("51", type="gene") 123 | gene 124 | ``` 125 | ## Loading data 126 | 127 | Records returned by `search_tol` and `lookup_tol` contain links to data files on an SFTP server. You can load these into R using the `load_tol` function. As you saw at the top of this vignette, if you don't provide any URL to `load_tol`, it will load the whole Tree of Life tree file. 128 | 129 | To load a sequence file for a particular specimen: 130 | 131 | ```{r} 132 | load_tol(specimen$fasta_file_url) 133 | ``` 134 | 135 | To load a sequence file for a gene: 136 | ```{r} 137 | load_tol(gene$fasta_file_url) 138 | ``` 139 | 140 | Or the alignment file: 141 | ```{r} 142 | load_tol(gene$alignment_file_url) 143 | ``` 144 | 145 | Or the gene tree: 146 | ```{r} 147 | load_tol(gene$tree_file_url) 148 | ``` 149 | 150 | All files are returned as strings, so you will need to parse them to use them downstream. 151 | 152 | If you want to download these files directly, you can use the `download_tol` function. 153 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # kewr 17 | 18 | 19 | [![R build status](https://github.com/barnabywalker/kewr/workflows/R-CMD-check/badge.svg)](https://github.com/barnabywalker/kewr/actions) 20 | 21 | 22 | An R package to access data from RGB Kew's APIs. 23 | 24 | ## Overview 25 | 26 | kewr is meant to make accessing data from one of RGB Kew easier and to provide a consistent interface their public APIs. 27 | 28 | This package should cover: 29 | 30 | - [x] [World Checklist of Vascular Plants](https://wcvp.science.kew.org/) 31 | - [x] [Plants of the World Online](http://powo.science.kew.org/) 32 | - [x] [International Plant Names Index](https://www.ipni.org/) 33 | - [x] [Kew Names Matching Service](http://namematch.science.kew.org/) 34 | - [x] [Kew's Tree of Life](https://treeoflife.kew.org) 35 | - [x] [Kew Reconciliation Service](http://data1.kew.org/reconciliation/about/IpniName) 36 | 37 | New sources will be added as they come up. 38 | 39 | ## Installation 40 | 41 | kewr is not on CRAN yet but you can install the latest development version from GitHub: 42 | 43 | ``` r 44 | # install.packages("devtools") 45 | devtools::install_github("barnabywalker/kewr") 46 | ``` 47 | 48 | ## Usage 49 | 50 | Functions in this package all start with a prefix specifying what action you want to perform and a suffix referring to the resource. 51 | 52 | Four of the resources (POWO, WCVP, IPNI, and ToL) are databases storing flora, taxonomic, nomenclatural, or genetic information. These three resources all have a `search_*` and `lookup_*`. 53 | 54 | ### Retrieving records 55 | 56 | The `lookup_` functions can be used to retrieve a particular record by its unique IPNI ID: 57 | 58 | ``` r 59 | lookup_powo("320035-2") 60 | lookup_wcvp("320035-2") 61 | lookup_ipni("320035-2") 62 | ``` 63 | 64 | IPNI contains records for authors and publications, which can also be retrieved using the `lookup_ipni` function: 65 | 66 | ``` r 67 | lookup_ipni("20885-1", type="author") 68 | lookup_ipni("987-2", type="publication") 69 | ``` 70 | 71 | The ToL uses its own ID system. These IDs can be found by first searching the database. 72 | 73 | ``` r 74 | lookup_tol("2717") 75 | ``` 76 | 77 | ### Searching databases 78 | 79 | All four of these databases can be searched as well: 80 | 81 | ``` r 82 | search_powo("Poa annua") 83 | search_wcvp("Poa annua") 84 | search_ipni("Poa annua") 85 | search_tol("Poa annua") 86 | ``` 87 | And all, except the ToL, use filters and keywords for more advanced searches: 88 | 89 | ``` r 90 | search_powo(list(genus="Poa", distribution="Madagascar"), 91 | filters=c("accepted", "species")) 92 | search_wcvp(list(genus="Poa"), filters=c("accepted", "species")) 93 | search_ipni(list(genus="Poa", published=1920), 94 | filters=c("species")) 95 | ``` 96 | 97 | The number of search results returned are determined by the `limit` keyword: 98 | 99 | ```r 100 | search_powo(list(genus="Poa"), limit=20) 101 | search_wcvp(list(genus="Poa"), limit=20) 102 | search_ipni(list(genus="Poa"), limit=20) 103 | search_tol("Poa", limit=20) 104 | ``` 105 | 106 | The next page for a set of search results can be requested using the `request_next` function: 107 | 108 | ```r 109 | results <- search_powo(list(genus="Poa")) 110 | request_next(results) 111 | ``` 112 | 113 | ### Loading data from ToL 114 | 115 | Tree and gene data can be loaded directly from ToL into R. 116 | 117 | For instance, you can load the whole Tree of Life. 118 | ``` r 119 | load_tol() 120 | ``` 121 | 122 | Or a gene tree for a particular gene. 123 | ``` r 124 | gene_info <- lookup_tol("51", type="gene") 125 | load_tol(gene_info$tree_file_url) 126 | ``` 127 | 128 | Or a FASTA file for a specimen. 129 | ``` r 130 | specimen_info <- lookup_tol("1296") 131 | load_tol(specimen_info$fasta_file_url) 132 | ``` 133 | 134 | ### Downloading from the ToL 135 | 136 | The corresponding files can also be downloaded for use later or in other programmes. 137 | 138 | ``` r 139 | specimen_info <- lookup_tol("1296") 140 | download_tol(specimen_info$fasta_file_url) 141 | ``` 142 | 143 | ### Downloading the WCVP 144 | 145 | The whole of WCVP can be download to a directory using: 146 | 147 | ``` r 148 | download_wcvp() 149 | ``` 150 | 151 | ### Matching names 152 | 153 | The KNMS resource is only used for matching names to records in POWO/WCVP: 154 | 155 | ```r 156 | match_knms(c("Poa annua", "Magnolia grandifolia", "Bulbophyllum sp.")) 157 | ``` 158 | 159 | Single names can also be matched to IPNI using the KRS resources. 160 | 161 | ``` r 162 | match_krs("Poa annua") 163 | ``` 164 | 165 | KRS is slower for matching many names, as a request needs to be made for each one. 166 | But it has the advantage of allowing more complex matching: 167 | 168 | ```r 169 | match_krs(list(genus="Solanum", species="sanchez-vegae", author="S.Knapp")) 170 | ``` 171 | 172 | ### Tidying results 173 | 174 | Each function in this package returns an object that stores the original response as well as the content of the response parsed into a list. This is to give the user as much flexibility as possible and to make debugging things a bit easier. 175 | 176 | But this can be hard to use, so all the results objects can be tidied as a `tibble`: 177 | 178 | ``` r 179 | results <- search_powo("Poa annua") 180 | tidy(results) 181 | ``` 182 | 183 | ## Citing 184 | 185 | You can get information about how to cite `kewr` by using: 186 | 187 | ```r 188 | citation("kewr") 189 | ``` 190 | 191 | You can also get the citation to use for each data service using the different results objects: 192 | 193 | ``` 194 | r <- search_wcvp("Poa") 195 | kew_citation(r) 196 | ``` 197 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # kewr 5 | 6 | 7 | 8 | [![R build 9 | status](https://github.com/barnabywalker/kewr/workflows/R-CMD-check/badge.svg)](https://github.com/barnabywalker/kewr/actions) 10 | 11 | 12 | An R package to access data from RGB Kew’s APIs. 13 | 14 | ## Overview 15 | 16 | kewr is meant to make accessing data from one of RGB Kew easier and to 17 | provide a consistent interface their public APIs. 18 | 19 | This package should cover: 20 | 21 | - [x] [World Checklist of Vascular 22 | Plants](https://wcvp.science.kew.org/) 23 | - [x] [Plants of the World Online](http://powo.science.kew.org/) 24 | - [x] [International Plant Names Index](https://www.ipni.org/) 25 | - [x] [Kew Names Matching Service](http://namematch.science.kew.org/) 26 | - [x] [Kew’s Tree of Life](https://treeoflife.kew.org) 27 | - [x] [Kew Reconciliation 28 | Service](http://data1.kew.org/reconciliation/about/IpniName) 29 | 30 | New sources will be added as they come up. 31 | 32 | ## Installation 33 | 34 | kewr is not on CRAN yet but you can install the latest development 35 | version from GitHub: 36 | 37 | ``` r 38 | # install.packages("devtools") 39 | devtools::install_github("barnabywalker/kewr") 40 | ``` 41 | 42 | ## Usage 43 | 44 | Functions in this package all start with a prefix specifying what action 45 | you want to perform and a suffix referring to the resource. 46 | 47 | Four of the resources (POWO, WCVP, IPNI, and ToL) are databases storing 48 | flora, taxonomic, nomenclatural, or genetic information. These three 49 | resources all have a `search_*` and `lookup_*`. 50 | 51 | ### Retrieving records 52 | 53 | The `lookup_` functions can be used to retrieve a particular record by 54 | its unique IPNI ID: 55 | 56 | ``` r 57 | lookup_powo("320035-2") 58 | lookup_wcvp("320035-2") 59 | lookup_ipni("320035-2") 60 | ``` 61 | 62 | IPNI contains records for authors and publications, which can also be 63 | retrieved using the `lookup_ipni` function: 64 | 65 | ``` r 66 | lookup_ipni("20885-1", type="author") 67 | lookup_ipni("987-2", type="publication") 68 | ``` 69 | 70 | The ToL uses its own ID system. These IDs can be found by first 71 | searching the database. 72 | 73 | ``` r 74 | lookup_tol("2717") 75 | ``` 76 | 77 | ### Searching databases 78 | 79 | All four of these databases can be searched as well: 80 | 81 | ``` r 82 | search_powo("Poa annua") 83 | search_wcvp("Poa annua") 84 | search_ipni("Poa annua") 85 | search_tol("Poa annua") 86 | ``` 87 | 88 | And all, except the ToL, use filters and keywords for more advanced 89 | searches: 90 | 91 | ``` r 92 | search_powo(list(genus="Poa", distribution="Madagascar"), 93 | filters=c("accepted", "species")) 94 | search_wcvp(list(genus="Poa"), filters=c("accepted", "species")) 95 | search_ipni(list(genus="Poa", published=1920), 96 | filters=c("species")) 97 | ``` 98 | 99 | The number of search results returned are determined by the `limit` 100 | keyword: 101 | 102 | ``` r 103 | search_powo(list(genus="Poa"), limit=20) 104 | search_wcvp(list(genus="Poa"), limit=20) 105 | search_ipni(list(genus="Poa"), limit=20) 106 | search_tol("Poa", limit=20) 107 | ``` 108 | 109 | The next page for a set of search results can be requested using the 110 | `request_next` function: 111 | 112 | ``` r 113 | results <- search_powo(list(genus="Poa")) 114 | request_next(results) 115 | ``` 116 | 117 | ### Loading data from ToL 118 | 119 | Tree and gene data can be loaded directly from ToL into R. 120 | 121 | For instance, you can load the whole Tree of Life. 122 | 123 | ``` r 124 | load_tol() 125 | ``` 126 | 127 | Or a gene tree for a particular gene. 128 | 129 | ``` r 130 | gene_info <- lookup_tol("51", type="gene") 131 | load_tol(gene_info$tree_file_url) 132 | ``` 133 | 134 | Or a FASTA file for a specimen. 135 | 136 | ``` r 137 | specimen_info <- lookup_tol("1296") 138 | load_tol(specimen_info$fasta_file_url) 139 | ``` 140 | 141 | ### Downloading from the ToL 142 | 143 | The corresponding files can also be downloaded for use later or in other 144 | programmes. 145 | 146 | ``` r 147 | specimen_info <- lookup_tol("1296") 148 | download_tol(specimen_info$fasta_file_url) 149 | ``` 150 | 151 | ### Downloading the WCVP 152 | 153 | The whole of WCVP can be download to a directory using: 154 | 155 | ``` r 156 | download_wcvp() 157 | ``` 158 | 159 | ### Matching names 160 | 161 | The KNMS resource is only used for matching names to records in 162 | POWO/WCVP: 163 | 164 | ``` r 165 | match_knms(c("Poa annua", "Magnolia grandifolia", "Bulbophyllum sp.")) 166 | ``` 167 | 168 | Single names can also be matched to IPNI using the KRS resources. 169 | 170 | ``` r 171 | match_krs("Poa annua") 172 | ``` 173 | 174 | KRS is slower for matching many names, as a request needs to be made for 175 | each one. But it has the advantage of allowing more complex matching: 176 | 177 | ``` r 178 | match_krs(list(genus="Solanum", species="sanchez-vegae", author="S.Knapp")) 179 | ``` 180 | 181 | ### Tidying results 182 | 183 | Each function in this package returns an object that stores the original 184 | response as well as the content of the response parsed into a list. This 185 | is to give the user as much flexibility as possible and to make 186 | debugging things a bit easier. 187 | 188 | But this can be hard to use, so all the results objects can be tidied as 189 | a `tibble`: 190 | 191 | ``` r 192 | results <- search_powo("Poa annua") 193 | tidy(results) 194 | ``` 195 | 196 | ## Citing 197 | 198 | You can get information about how to cite `kewr` by using: 199 | 200 | ``` r 201 | citation("kewr") 202 | ``` 203 | 204 | You can also get the citation to use for each data service using the 205 | different results objects: 206 | 207 | r <- search_wcvp("Poa") 208 | kew_citation(r) 209 | -------------------------------------------------------------------------------- /R/ipni.R: -------------------------------------------------------------------------------- 1 | #' Search IPNI. 2 | #' 3 | #' Query the International Plant Names Index for nomenclatural information. 4 | #' 5 | #' The [International Plant Names Index (IPNI)](https://www.ipni.org/) 6 | #' is a service that provides nomenclatural information for vascular plant names. 7 | #' 8 | #' The search API allows users to query the database for plant names, 9 | #' as well as authors and publications. There may be limited support for 10 | #' some sort of fuzzy matching. 11 | #' 12 | #' There is some support for querying using keyword arguments. Documentation for 13 | #' the API is currently available in the [`pykew` package](https://github.com/RBGKew/pykew/blob/master/pykew/ipni_terms.py), 14 | #' so keywords have been copied across from there. There are sets of keywords 15 | #' relating to plants, authors, and publications. A full list of keywords can be viewed 16 | #' using the `get_keywords` function. 17 | #' 18 | #' The API will return nomenclatural information (publication date, nomenclatural status, author, etc.) 19 | #' of all names matching the query. These results can be limited, for example to only family names, 20 | #' using filters. Use the `get_filters` function to get a list of all implemented filters. 21 | #' 22 | #' @param query The string to query IPNI with. If using keywords, the query 23 | #' must be formatted as a list. 24 | #' @param filters Filter to apply to search results. 25 | #' @param cursor A cursor returned by a previous search. 26 | #' If used, the query and filter must be exactly the same. 27 | #' @param limit The maximum number of records to return. This has a maximum of 1000. 28 | #' @param .wait Time to wait before making a request, to help 29 | #' rate limiting. 30 | #' 31 | #' @return 32 | #' Returns an object of class `ipni_search` that is a simple 33 | #' structure with slots for: 34 | #' 35 | #' * `total`: the total number of results held in POWO for the query 36 | #' * `pages`: the total number of results pages for the query. 37 | #' * `limit`: the maximum number of results requested from the API, per page. 38 | #' * `cursor`: a cursor to retrieve the next page of results from the API. 39 | #' * `results`: the query results parsed into a list. 40 | #' * `query`: the query string submitted to the API. 41 | #' * `response`: the [httr response object][httr::response]. 42 | #' 43 | #' @examples 44 | #' # search for all names containing Poa annua 45 | #' results <- search_ipni("Poa annua") 46 | #' 47 | #' # tidy search results in a table 48 | #' tidy(results) 49 | #' 50 | #' # extract author team information for the search results 51 | #' results_tbl <- tidy(results) 52 | #' tidyr::unnest(results_tbl, cols=c(authorTeam), names_sep="_") 53 | #' 54 | #' # filter results to only species names 55 | #' species_results <- search_ipni("Poa annua", filters="species") 56 | #' tidy(species_results) 57 | #' 58 | #' # search for species from Mexico published in 1989 59 | #' q <- list(published="1989", distribution="Mexico") 60 | #' f <- "species" 61 | #' results <- search_ipni(q, filters=f) 62 | #' tidy(results) 63 | #' 64 | #' # search for an author by surname 65 | #' author_results <- search_ipni(list(author_surname="Gardiner")) 66 | #' tidy(author_results) 67 | #' 68 | #' @family IPNI functions 69 | #' @seealso 70 | #' * [lookup_ipni()] to look up a name using an IPNI ID. 71 | #' 72 | #' @export 73 | search_ipni <- function(query, filters=NULL, cursor="*", limit=50, .wait=0.1) { 74 | url <- ipni_search_url_() 75 | 76 | # keeping a copy of this to return in the result object 77 | original_query <- query 78 | 79 | query <- format_query_(query, "ipni") 80 | 81 | query$perPage <- limit 82 | query$cursor <- cursor 83 | query$f <- format_filters_(filters, "ipni") 84 | 85 | results <- make_request_(url, query, .wait=.wait) 86 | 87 | structure( 88 | list( 89 | total=results$content$totalResults, 90 | pages=results$content$totalPages, 91 | cursor=results$content$cursor, 92 | limit=results$content$perPage, 93 | results=results$content$results, 94 | query=original_query, 95 | filters=filters, 96 | response=results$response 97 | ), 98 | class=c("ipni_search", "ipni") 99 | ) 100 | } 101 | 102 | #' Look up a name in IPNI. 103 | #' 104 | #' Request the record for a taxon, author, or publication name in IPNI, 105 | #' using the IPNI ID. 106 | #' 107 | #' The [International Plant Names Index (IPNI)](https://www.ipni.org/) 108 | #' is a service that provides nomenclatural information for vascular plant names. 109 | #' 110 | #' The name lookup API allows users to retrieve information for 111 | #' a specific taxon, author, or publication name using the unique IPNI ID. If this is not known, 112 | #' it can be found out using the [IPNI search API][kewr::search_ipni]. 113 | #' 114 | #' @param id A string containing a valid IPNI ID. 115 | #' @param type The type of record to look up. Either `taxon`, `author`, or `publication`. 116 | #' @param .wait Time to wait before making a request, to help 117 | #' rate limiting. 118 | #' 119 | #' @return An `ipni_taxon` object, which is a simple structure with fields 120 | #' for each of the fields returned by the lookup API, as well as the the [httr response object][httr::response]. 121 | #' 122 | #' @examples 123 | #' 124 | #' # retrieve nomenclatural information for a taxon name 125 | #' name <- lookup_ipni("271445-2", "taxon") 126 | #' print(name) 127 | #' 128 | #' # tidy the results in a table 129 | #' tidy(name) 130 | #' 131 | #' # retrieve nomenclatural information for an author 132 | #' name <- lookup_ipni("20028192-1", type="author") 133 | #' tidy(name) 134 | #' 135 | #' # retrieve nomenclatural information for a publication 136 | #' name <- lookup_ipni("987-2", type="publication") 137 | #' tidy(name) 138 | #' 139 | #' @family IPNI functions 140 | #' @seealso 141 | #' * [search_ipni()] to search IPNI using a taxon name. 142 | #' 143 | #' @export 144 | lookup_ipni <- function(id, type=c("taxon", "author", "publication"), .wait=0.1) { 145 | type <- match.arg(type) 146 | 147 | url <- ipni_lookup_url_(id, type) 148 | 149 | result <- make_request_(url, query=NULL, .wait=.wait) 150 | 151 | # this might be better if things were explicitly listed 152 | record <- result$content 153 | record$response <- result$response 154 | record$queryId <- id 155 | 156 | structure( 157 | record, 158 | class=c(paste0("ipni_", record$recordType), "ipni") 159 | ) 160 | } 161 | 162 | #' Make the IPNI search URL. 163 | #' 164 | #' @noRd 165 | ipni_search_url_ <- function() { 166 | base <- get_url_("ipni") 167 | 168 | paste0(base, "/search") 169 | } 170 | 171 | #' Make the IPNI name lookup URL. 172 | #' 173 | #' @param id A valid IPNI ID. 174 | #' @param type The type of name to lookup. 175 | #' 176 | #' @noRd 177 | #' 178 | #' @importFrom glue glue 179 | ipni_lookup_url_ <- function(id, type) { 180 | base <- get_url_("ipni") 181 | 182 | type <- switch(type, 183 | taxon="n", 184 | author="a", 185 | publication="p") 186 | 187 | glue("{base}/{type}/{id}") 188 | } 189 | -------------------------------------------------------------------------------- /R/requests.R: -------------------------------------------------------------------------------- 1 | #' Get the names of valid keywords for querying a resource. 2 | #' 3 | #' @param resource The resource being queried. 4 | #' 5 | #' @return A named character vector of keywords. 6 | #' 7 | #' @importFrom glue glue 8 | #' 9 | #' @noRd 10 | get_keywords_ <- function(resource=c("wcvp", "powo", "ipni", "tol", "krs")) { 11 | resource <- match.arg(resource) 12 | 13 | if (resource %in% c("tol")) { 14 | stop(glue("Keyword-based search not implemented for resource: {resource}")) 15 | } 16 | 17 | switch( 18 | resource, 19 | wcvp=c( 20 | family="family", 21 | genus="genus", 22 | species="species" 23 | ), 24 | powo=c( 25 | # name 26 | name="name", 27 | common_name="common name", 28 | family="family", 29 | genus="genus", 30 | species="species", 31 | author="author", 32 | # characteristics 33 | characteristic="characteristic", 34 | summary="summary", 35 | appearance="appearance", 36 | flower="flower", 37 | fruit="fruit", 38 | leaf="leaf", 39 | inflorescence="inflorescence", 40 | seed="seed", 41 | cloning="cloning", 42 | use="use", 43 | # geography 44 | distribution="location" 45 | ), 46 | ipni=c( 47 | # name 48 | added="added", 49 | author="name author", 50 | basionym="basionym", 51 | basionym_author="basionym author", 52 | bibliographic_reference="bibliographic reference", 53 | citation_type="citation type", 54 | collection_number="collection number", 55 | collectors="collector team", 56 | distribution="distribution", 57 | family="family", 58 | full_name="full name", 59 | genus="genus", 60 | in_powo="in powo", 61 | infrafamily="infrafamily", 62 | infragenus="infragenus", 63 | infraspecies="infraspecies", 64 | modified="modified", 65 | name_status="name status", 66 | published="published", 67 | published_in="published in", 68 | publishing_author="publishing author", 69 | rank="rank", 70 | scientific_name="scientific name", 71 | species="species", 72 | species_author="species author", 73 | version="version", 74 | # author 75 | author_forename="author forename", 76 | author_full_name="author name", 77 | author_std_form="author std", 78 | author_surname="author surname", 79 | # publication 80 | pub_std_form="publication std", 81 | bph_number="bph number", 82 | pub_date="date", 83 | isbn="isbn", 84 | issn="issn", 85 | lc_number="lc number", 86 | preceded_by="preceded by", 87 | superceded_by="superceded by", 88 | title="publication title", 89 | tl2_author="tl2 author", 90 | tl2_number="tl2 number" 91 | ), 92 | krs=c( 93 | query="query", 94 | genus="epithet_1", 95 | species="epithet_2", 96 | infra="epithet_3", 97 | epithet_1="epithet_1", 98 | epithet_2="epithet_2", 99 | epithet_3="epithet_3", 100 | author="publishing_author", 101 | full_name="full_name", 102 | basionym_author="basionym_author" 103 | ) 104 | ) 105 | } 106 | 107 | #' Get the names of valid filters for a resource. 108 | #' 109 | #' @param resource The resource being queried. 110 | #' 111 | #' @return A character vector of filter names. 112 | #' 113 | #' @importFrom glue glue 114 | #' 115 | #' @noRd 116 | get_filters_ <- function(resource=c("wcvp", "powo", "ipni", "tol")) { 117 | resource <- match.arg(resource) 118 | 119 | if (resource %in% c("tol")) { 120 | stop(glue("Filters not implemented for resource: {resource}")) 121 | } 122 | 123 | switch( 124 | resource, 125 | wcvp=c(accepted="accepted", 126 | families="family", 127 | genera="generic", 128 | species="specific", 129 | infraspecies="infraspecific"), 130 | powo=c(accepted="accepted_names", 131 | has_images="has_images", 132 | families="family_f", 133 | genera="genus_f", 134 | species="species_f", 135 | infraspecies="infraspecific_f"), 136 | ipni=c(families="f_familial", 137 | genera="f_generic", 138 | species="f_specific", 139 | infraspecies="f_infraspecific", 140 | infragenera="f_infrageneric", 141 | infrafamilies="f_infrafamilial") 142 | ) 143 | } 144 | 145 | #' Get the base URL for a particular resource. 146 | #' 147 | #' @param resource Name of a Kew resource. 148 | #' @return The base URL for the requested resource. 149 | #' 150 | #' @noRd 151 | get_url_ <- function(resource=c("wcvp", "powo", "knms", "ipni", "tol", "krs")) { 152 | resource <- match.arg(resource) 153 | 154 | switch(resource, 155 | wcvp="https://wcvp.science.kew.org/api/v1", 156 | powo="http://www.plantsoftheworldonline.org/api/2", 157 | knms="http://namematch.science.kew.org/api/v2/powo/match", 158 | ipni="https://www.ipni.org/api/1", 159 | tol="https://treeoflife.kew.org/api", 160 | krs="http://data1.kew.org/reconciliation/reconcile/IpniName") 161 | } 162 | 163 | #' Get the package user agent. 164 | #' 165 | #' @noRd 166 | #' 167 | #' @importFrom httr user_agent 168 | get_user_agent_ <- function() { 169 | user_agent("https://github.com/barnabywalker/kewr") 170 | } 171 | 172 | #' Make a request to a Kew resource. 173 | #' 174 | #' @param url The URL for the resource API. 175 | #' @param query A list specifying a query. 176 | #' @param body A list specifying an optional body. 177 | #' @param json Whether to expect a json response or not, default TRUE. 178 | #' @param method The request method to make, e.g. 'GET' or 'POST'. 179 | #' @param .wait The time to wait before making the request, 180 | #' to help with rate limiting. 181 | #' @param .retries The max number of times to try a request before throwing 182 | #' an error. 183 | #' 184 | #' @return A list containing the returned response object and 185 | #' the response content parsed into a list. 186 | #' 187 | #' @noRd 188 | #' 189 | #' @import httr 190 | #' @importFrom jsonlite fromJSON 191 | make_request_ <- function(url, query=NULL, body=FALSE, json=TRUE, method="GET", .wait=0.1, .retries=1) { 192 | user_agent <- get_user_agent_() 193 | 194 | Sys.sleep(.wait) 195 | 196 | response <- RETRY(method, url, user_agent, query=query, body=body, 197 | .times=.retries, encode="json", quiet=TRUE) 198 | 199 | if (http_error(response)) { 200 | status <- http_status(response) 201 | code <- status_code(response) 202 | message <- status$message 203 | 204 | stop( 205 | glue("Request to '{url}' failed with code {code}: {message}"), 206 | call.=FALSE 207 | ) 208 | } 209 | 210 | if (http_type(response) != "application/json" & json) { 211 | stop("API did not return json", call.=FALSE) 212 | } 213 | 214 | parsed <- content(response, "text") 215 | if (json) { 216 | parsed <- fromJSON(parsed, simplifyVector=FALSE) 217 | } 218 | 219 | list(response=response, content=parsed) 220 | } 221 | -------------------------------------------------------------------------------- /R/powo.R: -------------------------------------------------------------------------------- 1 | #' Search POWO. 2 | #' 3 | #' Query Plants of the World Online for taxon information. 4 | #' 5 | #' [Plants of the World Online (POWO)](http://www.plantsoftheworldonline.org/) 6 | #' is a database of information on the world's flora. It curates information from 7 | #' published floras and other sources of floristic information. 8 | #' 9 | #' The search API allows users to query the database using plant names, 10 | #' geographic terms, and floristic characters. These can be queried using 11 | #' keyword arguments. Use the `get_keywords` function for a list of all implemented keywords. 12 | #' 13 | #' The API returns taxonomic information as well as species descriptions and 14 | #' image locations if available. These results can be limited, for example to accepted species, 15 | #' using filters. Use the `get_filters` function to get a list of all implemented filters. 16 | #' 17 | #' Distributions in POWO are categorised using the [World Geographical Scheme for 18 | #' Recording Plant Distributions (WGSRP)](https://www.tdwg.org/standards/wgsrpd/). 19 | #' Users can query POWO using distributions listed under WGSRPD levels 1 (continents), 20 | #' 2 (regions), and 3 (botanical countries). 21 | #' 22 | #' @param query The string to query POWO with. If using keywords, 23 | #' the query must be formatted as a list. 24 | #' @param filters Filter to apply to search results. 25 | #' Multiple filters must be supplied as a character vector. 26 | #' @param cursor A cursor returned by a previous search. 27 | #' If used, the query and filter must be exactly the same. 28 | #' @param limit The maximum number of records to return. 29 | #' @param .wait Time to wait before making a request, to help 30 | #' rate limiting. 31 | #' 32 | #' @return 33 | #' Returns an object of class `powo_search` that is a simple 34 | #' structure with slots for: 35 | #' 36 | #' * `total`: the total number of results held in POWO for the query 37 | #' * `pages`: the total number of results pages for the query. 38 | #' * `limit`: the maximum number of results requested from the API, per page. 39 | #' * `cursor`: a cursor to retrieve the next page of results from the API. 40 | #' * `results`: the query results parsed into a list. 41 | #' * `query`: the query string submitted to the API. 42 | #' * `response`: the [httr response object][httr::response]. 43 | #' 44 | #' @examples 45 | #' # search for all entries containing a genus name 46 | #' search_powo("Myrcia") 47 | #' 48 | #' # search for all accepted species within a genus 49 | #' search_powo("Myrcia", filters=c("species", "accepted")) 50 | #' 51 | #' # search for up to 100 species in a genus 52 | #' search_powo("Poa", filters=c("species"), limit=100) 53 | #' 54 | #' # search for all names in a family 55 | #' search_powo(list(family="Myrtaceae")) 56 | #' 57 | #' # search for all accepted species with blue flowers 58 | #' search_powo(list(flower="blue"), filters=c("accepted", "species")) 59 | #' 60 | #' # search for all accepted genera in Mexico 61 | #' search_powo(list(distribution="Mexico"), filters=c("accepted", "genera")) 62 | #' 63 | #' # search for a species name and print the results 64 | #' r <- search_powo("Myrcia guianensis", filters=c("species")) 65 | #' print(r) 66 | #' 67 | #' # simplify search results to a `tibble` 68 | #' r <- search_powo("Poa", filters=c("species")) 69 | #' tidy(r) 70 | #' 71 | #' @family POWO functions 72 | #' @seealso 73 | #' * [lookup_powo()] to look up a taxon in POWO using the IPNI ID. 74 | #' 75 | #' @export 76 | search_powo <- function(query, filters=NULL, cursor=NULL, limit=50, .wait=0.2) { 77 | url <- powo_search_url_() 78 | 79 | # keeping a copy of this to return in the result object 80 | original_query <- query 81 | 82 | query <- format_query_(query, "powo") 83 | 84 | query$perPage <- limit 85 | query$cursor <- cursor 86 | query$f <- format_filters_(filters, "powo") 87 | 88 | results <- make_request_(url, query, .wait=.wait) 89 | 90 | structure( 91 | list( 92 | total=results$content$totalResults, 93 | pages=results$content$totalPages, 94 | limit=results$content$perPage, 95 | cursor=results$content$cursor, 96 | results=results$content$results, 97 | query=original_query, 98 | filters=filters, 99 | response=results$response 100 | ), 101 | class=c("powo_search", "powo") 102 | ) 103 | } 104 | 105 | #' Look up a taxon in POWO. 106 | #' 107 | #' Request the record for a taxon in Plants of the World Online (POWO) 108 | #' using the IPNI ID. 109 | #' 110 | #' [Plants of the World Online (POWO)](http://www.plantsoftheworldonline.org/) 111 | #' is a database of information on the world's flora. It curates information from 112 | #' published floras and other sources of floristic information. 113 | #' 114 | #' The taxon lookup API allows users to retrieve information about 115 | #' a specific taxon name using the unique IPNI ID. If this is not known, 116 | #' it can be found out using the [POWO search API][kewr::search_powo]. 117 | #' 118 | #' @param taxonid A string containing a valid IPNI ID. 119 | #' @param distribution Include distribution in results (default `FALSE`). 120 | #' @param .wait Time to wait before making a request, to help 121 | #' rate limiting. 122 | #' 123 | #' @return A `powo_taxon` object, which is a simple structure with fields 124 | #' for each of the fields returned by the lookup API, as well as the the [httr response object][httr::response]. 125 | #' 126 | #' @examples 127 | #' # retrieve information for a taxon name 128 | #' lookup_powo("271445-2") 129 | #' 130 | #' # print a summary of the returned information 131 | #' r <- lookup_powo("271445-2") 132 | #' print(r) 133 | #' 134 | #' # tidy returned record into a tibble 135 | #' r <- lookup_powo("271445-2") 136 | #' tidy(r) 137 | #' 138 | #' # tidy the returned list of synonyms into a tibble 139 | #' r <- lookup_wcvp("60447743-2") 140 | #' tidied <- tidy(r) 141 | #' tidyr::unnest(tidied, cols=synonyms, names_sep="_") 142 | #' 143 | #' # tidy the returned list of children into a tibble 144 | #' r <- lookup_wcvp("30000055-2") 145 | #' tidied <- tidy(r) 146 | #' tidyr::unnest(tidied, cols=children, names_sep="_") 147 | #' 148 | #' @family POWO functions 149 | #' @seealso 150 | #' * [search_powo()] to search POWO using a taxon name. 151 | #' 152 | #' @export 153 | lookup_powo <- function(taxonid, distribution=FALSE, .wait=0.2) { 154 | url <- powo_taxon_url_(taxonid) 155 | 156 | query <- NULL 157 | if (distribution) { 158 | query <- list(fields="distribution") 159 | } 160 | 161 | result <- make_request_(url, query=query, .wait=.wait) 162 | 163 | # this might be better if things were explicitly listed 164 | record <- result$content 165 | record$response <- result$response 166 | record$queryId <- taxonid 167 | 168 | structure( 169 | record, 170 | class=c("powo_taxon", "powo") 171 | ) 172 | } 173 | 174 | #' @noRd 175 | powo_search_url_ <- function() { 176 | base <- get_url_("powo") 177 | 178 | paste0(base, "/search") 179 | } 180 | 181 | #' @noRd 182 | powo_taxon_url_ <- function(taxonid) { 183 | base <- get_url_("powo") 184 | 185 | glue("{base}/taxon/urn:lsid:ipni.org:names:{taxonid}") 186 | } 187 | -------------------------------------------------------------------------------- /R/kew_citation.R: -------------------------------------------------------------------------------- 1 | #' Get citation for Kew data resource. 2 | #' 3 | #' Given the result of a query to a Kew data resource, get the appropriate 4 | #' citation. 5 | #' 6 | #' @param x Result of a call to [search_powo()], [lookup_powo()], [search_wcvp()], 7 | #' [lookup_wcvp()], [search_ipni()], [lookup_ipni()], [search_tol()], [load_tol()], 8 | #' [match_knms()], or [match_krs()] 9 | #' 10 | #' @return A citation object with a print method for nice display. 11 | #' 12 | #' @examples 13 | #' r <- search_powo(list(characteristic="poison")) 14 | #' kew_citation(r) 15 | #' 16 | #' r <- lookup_powo("320035-2") 17 | #' kew_citation(r) 18 | #' 19 | #' r <- search_wcvp(list(genus="Poa")) 20 | #' kew_citation(r) 21 | #' 22 | #' r <- lookup_wcvp("320035-2") 23 | #' kew_citation(r) 24 | #' 25 | #' r <- search_ipni(list(publishing_author="L.")) 26 | #' kew_citation(r) 27 | #' 28 | #' r <- lookup_ipni("12653-1") 29 | #' kew_citation(r) 30 | #' 31 | #' r <- search_tol("Poa") 32 | #' kew_citation(r) 33 | #' 34 | #' r <- lookup_tol(2717) 35 | #' kew_citation(r) 36 | #' 37 | #' tree <- load_tol() 38 | #' kew_citation(tree) 39 | #' 40 | #' match <- match_knms("Poa annua") 41 | #' kew_citation(match) 42 | #' 43 | #' match <- match_krs("Poa annua") 44 | #' kew_citation(match) 45 | #' 46 | #' 47 | #' @export 48 | kew_citation <- function(x) { 49 | UseMethod("kew_citation") 50 | } 51 | 52 | 53 | #' @importFrom glue glue 54 | #' 55 | #' @export 56 | kew_citation.powo <- function(x) { 57 | header <- "To cite POWO in publication please use:" 58 | 59 | info <- list( 60 | title="Plants of the World Online.", 61 | author="POWO", 62 | year="2021", 63 | note="Facilitated by the Royal Botanic Gardens, Kew.", 64 | accessed=Sys.Date(), 65 | url="http://www.plantsoftheworldonline.org/" 66 | ) 67 | 68 | ref <- bibentry( 69 | bibtype="Manual", 70 | textVersion=glue("{info$author} ({info$year}).", 71 | "{info$title} {info$note} {info$url}.", 72 | "Accessed {info$accessed}", 73 | .sep=" "), 74 | header=header, 75 | other=info 76 | ) 77 | 78 | class(ref) <- c("citation", "bibentry") 79 | 80 | ref 81 | } 82 | 83 | 84 | #' @importFrom glue glue 85 | #' @importFrom utils bibentry 86 | #' 87 | #' @export 88 | kew_citation.wcvp <- function(x) { 89 | header <- "To cite WCVP in publication please use:" 90 | 91 | info <- list( 92 | title="World Checklist of Vascular Plants", 93 | author="WCVP", 94 | year="2021", 95 | version="2.0", 96 | note="Facilitated by the Royal Botanic Gardens, Kew.", 97 | accessed=Sys.Date(), 98 | url="http://wcvp.science.kew.org/" 99 | ) 100 | 101 | ref1 <- bibentry( 102 | bibtype="Manual", 103 | textVersion=glue("{info$author} ({info$year}).", 104 | "{info$title}, version {info$version}.", 105 | "{info$note} {info$url}.", 106 | "Accessed {info$accessed}", 107 | .sep=" "), 108 | other=info 109 | ) 110 | 111 | info <- list( 112 | title="The World Checklist of Vascular Plants, a continuously updated resource for exploring global plant diversity", 113 | author="Govaerts, R., Nic Lughadha, E., Black, N., Turner, R. and Paton, A.", 114 | year="2021", 115 | journal="Scientific Data", 116 | volume=8, 117 | number=215, 118 | url="https://doi.org/10.1038/s41597-021-00997-6" 119 | ) 120 | 121 | ref2 <- bibentry( 122 | bibtype="Article", 123 | textVersion=glue("{info$author} ({info$year}).", 124 | "{info$title}. {info$journal},", 125 | "{info$volume}({info$number}), {info$url}", 126 | .sep=" "), 127 | other=info 128 | ) 129 | 130 | ref <- c(ref1, ref2) 131 | attr(ref, "mheader") <- paste(header, collapse="\n") 132 | 133 | class(ref) <- c("citation", "bibentry") 134 | 135 | ref 136 | } 137 | 138 | #' @importFrom glue glue 139 | #' @importFrom utils bibentry 140 | #' 141 | #' @export 142 | kew_citation.ipni <- function(x) { 143 | header <- "To cite IPNI in publication please use:" 144 | 145 | info <- list( 146 | title="International Plant Names Index", 147 | author="IPNI", 148 | year="2021", 149 | note="The Royal Botanic Gardens, Kew, Harvard University Herbaria & Libraries and Australian National Botanic Gardens", 150 | accessed=Sys.Date(), 151 | url="https://ipni.org/" 152 | ) 153 | 154 | ref <- bibentry( 155 | bibtype="Manual", 156 | textVersion=glue("{info$author} ({info$year}).", 157 | "{info$title}. {info$note}. {info$url}.", 158 | "Accessed {info$accessed}", 159 | .sep=" "), 160 | header=header, 161 | other=info 162 | ) 163 | 164 | class(ref) <- c("citation", "bibentry") 165 | 166 | ref 167 | } 168 | 169 | #' @importFrom glue glue 170 | #' @importFrom utils bibentry 171 | #' 172 | #' @export 173 | kew_citation.tol <- function(x) { 174 | header <- "To cite ToL in publication please use:" 175 | 176 | info <- list( 177 | title="A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life", 178 | author="Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. and Forest F.", 179 | year="2021", 180 | note="The Royal Botanic Gardens, Kew, Harvard University Herbaria & Libraries and Australian National Botanic Gardens", 181 | journal="Systematic Biology", 182 | volume="syab035", 183 | url="https://doi.org/10.1093/sysbio/syab035" 184 | ) 185 | 186 | ref <- bibentry( 187 | bibtype="Article", 188 | textVersion=glue("{info$author} ({info$year}).", 189 | "{info$title}. {info$journal},", 190 | "{info$volume}, {info$url}", 191 | .sep=" "), 192 | other=info, 193 | header=header 194 | ) 195 | 196 | class(ref) <- c("citation", "bibentry") 197 | 198 | ref 199 | } 200 | 201 | #' @importFrom glue glue 202 | #' @importFrom utils bibentry 203 | #' 204 | #' @export 205 | kew_citation.knms_match <- function(x) { 206 | header <- "To cite KNMS in publication please use:" 207 | 208 | info <- list( 209 | title="Kew Names Matching Service", 210 | author="KNMS", 211 | year="2021", 212 | accessed=Sys.Date(), 213 | url="http://namematch.science.kew.org/" 214 | ) 215 | 216 | ref <- bibentry( 217 | bibtype="Manual", 218 | textVersion=glue("{info$author} ({info$year}).", 219 | "{info$title}. {info$url}.", 220 | "Accessed {info$accessed}", 221 | .sep=" "), 222 | header=header, 223 | other=info 224 | ) 225 | 226 | class(ref) <- c("citation", "bibentry") 227 | 228 | ref 229 | } 230 | 231 | 232 | #' @importFrom glue glue 233 | #' @importFrom utils bibentry 234 | #' 235 | #' @export 236 | kew_citation.krs_match <- function(x) { 237 | header <- "To cite KRS in publication please use:" 238 | 239 | info <- list( 240 | title="Kew Reconciliation Service", 241 | author="KRS", 242 | year="2016", 243 | accessed=Sys.Date(), 244 | url="http://data1.kew.org/reconciliation/" 245 | ) 246 | 247 | ref <- bibentry( 248 | bibtype="Manual", 249 | textVersion=glue("{info$author} ({info$year}).", 250 | "{info$title}. {info$url}.", 251 | "Accessed {info$accessed}", 252 | .sep=" "), 253 | header=header, 254 | other=info 255 | ) 256 | 257 | class(ref) <- c("citation", "bibentry") 258 | 259 | ref 260 | } 261 | 262 | -------------------------------------------------------------------------------- /vignettes/articles/mapping-assessed-species.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Mapping assessed species" 3 | --- 4 | 5 | ```{r, include = FALSE} 6 | knitr::opts_chunk$set( 7 | collapse = TRUE, 8 | comment = "#>" 9 | ) 10 | ``` 11 | 12 | This is a demonstration of one use for some of the kewr resources, in calculating the proportion of species that have been assessed in a country. 13 | 14 | To do this, we'll follow these steps: 15 | 16 | 1. Retrieve a list of assessed species in one country from the IUCN Red List of Threatened Species. 17 | 2. Match names to an IPNI ID using the Kew Names Matching Service. 18 | 3. Resolve assessments to accepted names using the World Checklist of Vascular Plants. 19 | 4. Get a list of all accepted vascular plant species in the country from Plants of the World Online. 20 | 5. Calculate the proportion of assessed and threatened species in our country of interest. 21 | 22 | ## Setup 23 | 24 | In addition to *kewr*, we'll load: 25 | 26 | * *dplyr* to manipulate the data 27 | * *tidyr* to reshape data frames 28 | * *stringr* to extract some data from strings 29 | * *ggplot2* to make some plots of our final data 30 | 31 | ```{r setup, message=FALSE, warning=FALSE} 32 | library(kewr) 33 | library(dplyr) 34 | library(tidyr) 35 | library(stringr) 36 | library(ggplot2) 37 | ``` 38 | 39 | ## 1. Getting assessment information 40 | 41 | The first step in this problem is to get a list of assessed species for a country. 42 | 43 | The most authoritative list of global species assessments is the IUCN Red List of Threatened Species. For this package, I used the `rl_sp_country` function from *rredlist* to request all assessments for taxa found in Denmark from the IUCN Red List API. I then used the `rl_search` function to request full assessment information for each taxon. 44 | 45 | You can do the same by [installing the *rredlist* package](https://github.com/ropensci/rredlist). You'll need an API key to use the IUCN Red List API, which you can [register for here](https://apiv3.iucnredlist.org/api/v3/token). Alternatively, you can download the information you need directly from [the IUCN Red List website](https://www.iucnredlist.org/). 46 | 47 | I've bundled up the Danish plant assessments in this package to make things easier for this analysis. 48 | 49 | ```{r view-danish-plants} 50 | head(danish_plants) 51 | ``` 52 | 53 | In total, there are assessments for `r format(nrow(danish_plants), big.mark=",")` vascular plants from Denmark. 54 | 55 | ## 2. Match names to an IPNI ID 56 | 57 | To match these names to an IPNI ID, we'll use the Kew Names Matching Service. The first thing we'll do, is join the taxonomic authority to the scientific name, to hopefully reduce the number of matches for each name. 58 | 59 | ```{r make-full-names} 60 | danish_plants <- 61 | danish_plants %>% 62 | unite("full_name", scientific_name, authority, sep=" ", remove=FALSE) 63 | ``` 64 | 65 | Then we put the full names through KNMS. 66 | 67 | ```{r match-full-names} 68 | full_matches <- match_knms(danish_plants$full_name) 69 | full_matches 70 | ``` 71 | 72 | Most of our names had matches! Which makes things simpler. But we also got some names that returned multiple matches. 73 | 74 | First we'll try putting just the scientific names of our missing matches through KNMS again. 75 | 76 | ```{r match-partial-names} 77 | full_matches <- tidy(full_matches) 78 | 79 | unmatched <- filter(full_matches, !matched) 80 | 81 | to_match <- 82 | danish_plants %>% 83 | filter(full_name %in% unmatched$submitted) 84 | 85 | part_matches <- match_knms(to_match$scientific_name) 86 | part_matches 87 | ``` 88 | Almost everything returned a match. We could attempt to manually match the final name, but as it's just one we'll leave it out. 89 | 90 | Now we can join all of our matches together, link them to the IUCN taxon ID, and resolve any synonyms. 91 | 92 | ```{r resolve-matches} 93 | part_matches <- tidy(part_matches) 94 | 95 | full_matches <- 96 | full_matches %>% 97 | filter(matched) %>% 98 | left_join( 99 | danish_plants %>% select(taxonid, full_name, category), 100 | by=c("submitted"="full_name") 101 | ) 102 | 103 | part_matches <- 104 | part_matches %>% 105 | filter(matched) %>% 106 | left_join( 107 | danish_plants %>% select(taxonid, scientific_name, category), 108 | by=c("submitted"="scientific_name") 109 | ) 110 | 111 | matched_names <- bind_rows(full_matches, part_matches) 112 | 113 | head(matched_names) 114 | ``` 115 | 116 | ## 3. Resolve assessments to accepted names 117 | 118 | Now that we have an IPNI ID attached to each assessment, we can look up the record for the taxa in WCVP. This will let us find out the taxonomic status of each name - the first step in resolving any synonymy issues. 119 | 120 | ```{r resolve-synonyms} 121 | # wrap up the lookup_wcvp function to make sure it comes back as a list 122 | f <- function(taxonid) { 123 | list(lookup_wcvp(taxonid)) 124 | } 125 | 126 | resolved_names <- 127 | matched_names %>% 128 | nest_by(taxonid, ipni_id) %>% 129 | mutate(wcvp_record=f(ipni_id)) 130 | 131 | resolved_names <- 132 | resolved_names %>% 133 | mutate(status=wcvp_record$status) 134 | ``` 135 | 136 | With the taxonomic status, we'll first remove any taxa that are unplaced. We'll also remove any non-homotypic synonyms - even if we resolve these to accepted species, we can't be sure that the assessment would be valid for the new concept, so that accepted species would not be assessed. 137 | 138 | ```{r remove-non-homotypic} 139 | resolved_names <- 140 | resolved_names %>% 141 | filter(status %in% c("accepted", "homotypic synonym")) 142 | ``` 143 | 144 | Next we'll check if there are any taxa that still have multiple matches in WCVP. 145 | 146 | ```{r check-multiple-matches} 147 | resolved_names %>% 148 | ungroup() %>% 149 | add_count(taxonid) %>% 150 | summarise(multiple_matches=sum(n > 1)) 151 | ``` 152 | 153 | There are not. 154 | 155 | So the final step is to find the accepted names for all homotypic synonyms and remove anything that is a lower rank than species. 156 | 157 | ```{r find-accepted-names} 158 | resolved_names <- 159 | resolved_names %>% 160 | mutate(accepted_id=ifelse(status != "accepted", 161 | wcvp_record$accepted$id, 162 | wcvp_record$id), 163 | accepted_name=ifelse(status != "accepted", 164 | wcvp_record$accepted$name, 165 | wcvp_record$name), 166 | accepted_author=ifelse(status != "accepted", 167 | wcvp_record$accepted$author, 168 | wcvp_record$authors), 169 | accepted_rank=ifelse(status != "accepted", 170 | wcvp_record$accepted$rank, 171 | wcvp_record$rank)) 172 | 173 | resolved_names <- 174 | resolved_names %>% 175 | select(-wcvp_record) %>% 176 | unnest(cols=c(data)) %>% 177 | rename(match_id=ipni_id) 178 | 179 | accepted_species <- 180 | resolved_names %>% 181 | filter(accepted_rank == "Species") %>% 182 | ungroup() 183 | 184 | nrow(accepted_species) 185 | ``` 186 | 187 | We are now left with `r nrow(accepted_species)` accepted species from Denmark with assessments. 188 | 189 | ## 4. Get a list of all species in the country 190 | 191 | To calculate the number of species that are assessed in Denmark, we need a checklist of all accepted species. 192 | 193 | We can get this from Plants of the World Online. 194 | 195 | ```{r download-powo-checklist} 196 | checklist <- search_powo(list(distribution="Denmark"), 197 | filters=c("accepted", "species"), 198 | limit=5000) 199 | checklist 200 | ``` 201 | 202 | Now we have this, we just need to join our assessments to our checklist. 203 | 204 | ```{r join-assessments-checklist} 205 | checklist <- 206 | tidy(checklist) %>% 207 | mutate(ipni_id=str_extract(fqId, "[0-9\\-]+")) %>% 208 | select(ipni_id, family, name, author) %>% 209 | distinct(ipni_id, .keep_all=TRUE) %>% 210 | left_join( 211 | accepted_species %>% select(accepted_id, category), 212 | by=c("ipni_id"="accepted_id") 213 | ) 214 | ``` 215 | 216 | ## 5. Calculating the proportion of assessed species 217 | 218 | And now we can calculate the proportion of species assessed in Denmark! 219 | 220 | ```{r calculate-proportions} 221 | checklist %>% 222 | summarise(p_assessed=mean(! is.na(category))) 223 | ``` 224 | 225 | And make a simple bar chart of the number of species in each category. 226 | 227 | ```{r plot-bars} 228 | iucn_colours <- c("NE"="#ffffff", 229 | "DD"="#d1d1d6", 230 | "LC"="#60c659", 231 | "NT"="#cce226", 232 | "VU"="#f9e814", 233 | "EN"="#fc7f3f", 234 | "CR"="d81e05", 235 | "EW"="#542344", 236 | "EX"="#000000") 237 | checklist %>% 238 | replace_na(list(category="NE")) %>% 239 | mutate(category=factor(category, levels=names(iucn_colours), 240 | ordered=TRUE)) %>% 241 | ggplot(mapping=aes(y=category, fill=category, 242 | colour=category == "NE")) + 243 | geom_bar() + 244 | scale_fill_manual(values=iucn_colours, drop=FALSE) + 245 | scale_colour_manual(values=c(`TRUE`="black", `FALSE`=NA)) + 246 | scale_y_discrete(drop=FALSE) + 247 | guides(fill=FALSE, colour=FALSE) + 248 | labs(x="Number of species", y="IUCN Red List category") 249 | ``` 250 | 251 | 252 | -------------------------------------------------------------------------------- /R/print.R: -------------------------------------------------------------------------------- 1 | # wcvp ---- 2 | 3 | #' @importFrom glue glue glue_collapse 4 | #' @importFrom utils str head 5 | #' 6 | #' @export 7 | print.wcvp_search <- function(x, ...) { 8 | if (! is.null(x$filters)) { 9 | filters <- glue_collapse(x$filters, sep=", ") 10 | } else { 11 | filters <- "none" 12 | } 13 | 14 | if (! is.null(names(x$query))) { 15 | query <- glue("{names(x$query)}='{x$query}'") 16 | query <- glue_collapse(query, sep=", ") 17 | } else { 18 | query <- glue("'{x$query}'") 19 | } 20 | 21 | message <- glue("", 22 | "total results: {x$total}", 23 | "returned results: {length(x$results)}", 24 | "total pages: {x$pages}", 25 | "current page: {x$page}", 26 | "", 27 | .sep="\n", .trim=FALSE, .null="missing") 28 | 29 | cat(message) 30 | if (! is.null(x$results)) { 31 | str(head(x$results, 1), max.level=2) 32 | } 33 | invisible() 34 | } 35 | 36 | #' @importFrom glue glue 37 | #' @importFrom utils str 38 | #' @export 39 | print.wcvp_taxon <- function(x, ...) { 40 | accepted_id <- ifelse(is.null(x$accepted), x$id, x$accepted$id) 41 | 42 | message <- glue("", 43 | "Name: {x$name}", 44 | "Authors: {x$authors}", 45 | "Status: {x$status}", 46 | "Rank: {x$rank}", 47 | "Accepted taxon ID: {accepted_id}", 48 | "Synonyms: {length(x$synonyms)}", 49 | "", 50 | .sep="\n", .trim=FALSE, .null="missing") 51 | 52 | cat(message) 53 | invisible() 54 | } 55 | 56 | # powo ---- 57 | 58 | #' @importFrom glue glue glue_collapse 59 | #' @importFrom utils str head 60 | #' 61 | #' @export 62 | print.powo_search <- function(x, ...) { 63 | if (! is.null(x$filters)) { 64 | filters <- glue_collapse(x$filters, sep=", ") 65 | } else { 66 | filters <- "none" 67 | } 68 | 69 | if (! is.null(names(x$query))) { 70 | query <- glue("{names(x$query)}='{x$query}'") 71 | query <- glue_collapse(query, sep=", ") 72 | } else { 73 | query <- glue("'{x$query}'") 74 | } 75 | 76 | message <- glue("", 77 | "total results: {x$total}", 78 | "returned results: {length(x$results)}", 79 | "total pages: {x$pages}", 80 | "First result:", 81 | "", 82 | .sep="\n", .trim=FALSE, .null="missing") 83 | 84 | cat(message) 85 | if (! is.null(x$results)) { 86 | str(head(x$results, 1), max.level=2) 87 | } 88 | invisible() 89 | } 90 | 91 | #' @importFrom glue glue 92 | #' @importFrom stringr str_extract 93 | #' @importFrom utils str 94 | #' 95 | #' @export 96 | print.powo_taxon <- function(x, ...) { 97 | if ("accepted" %in% names(x)) { 98 | accepted_id <- str_extract(x$accepted$fqId, 99 | "(?<=names\\:)[0-9\\-]+$") 100 | } else if (x$taxonomicStatus == "Accepted") { 101 | accepted_id <- x$queryId 102 | } else { 103 | accepted_id <- "UNDEFINED" 104 | } 105 | 106 | has_distribution <- "distribution" %in% names(x) 107 | 108 | message <- glue("", 109 | "Name: {x$name}", 110 | "Authors: {x$authors}", 111 | "Status: {x$taxonomicStatus}", 112 | "Rank: {x$rank}", 113 | "Accepted taxon ID: {accepted_id}", 114 | "Synonyms: {length(x$synonyms)}", 115 | "Includes distribution: {has_distribution}", 116 | "", 117 | .sep="\n", .trim=FALSE, .null="missing") 118 | 119 | cat(message) 120 | invisible() 121 | } 122 | 123 | # ipni ---- 124 | 125 | #' @importFrom glue glue glue_collapse 126 | #' @importFrom utils str head 127 | #' 128 | #' @export 129 | print.ipni_search <- function(x, ...) { 130 | if (! is.null(x$filters)) { 131 | filters <- glue_collapse(x$filters, sep=", ") 132 | } else { 133 | filters <- "none" 134 | } 135 | 136 | if (! is.null(names(x$query))) { 137 | query <- glue("{names(x$query)}='{x$query}'") 138 | query <- glue_collapse(query, sep=", ") 139 | } else { 140 | query <- glue("'{x$query}'") 141 | } 142 | 143 | message <- glue("", 144 | "total results: {x$total}", 145 | "returned results: {length(x$results)}", 146 | "total pages: {x$pages}", 147 | "current page: {x$page}", 148 | "", 149 | .sep="\n", .trim=FALSE, .null="missing") 150 | 151 | cat(message) 152 | if (! is.null(x$results)) { 153 | str(head(x$results, 1), max.level=2) 154 | } 155 | invisible() 156 | } 157 | 158 | #' @importFrom glue glue 159 | #' @importFrom utils str 160 | #' @export 161 | print.ipni_citation <- function(x, ...) { 162 | message <- glue("", 163 | "Name: {x$name}", 164 | "Authors: {x$authors}", 165 | "Publication: {x$publication}", 166 | "Publication Year: {x$publicationYear}", 167 | "Reference: {x$reference}", 168 | "Rank: {x$rank}", 169 | "In POWO: {x$inPowo}", 170 | "", 171 | .sep="\n", .trim=FALSE, .null="missing") 172 | 173 | cat(message) 174 | invisible() 175 | } 176 | 177 | #' @importFrom glue glue 178 | #' @importFrom utils str 179 | #' @export 180 | print.ipni_author <- function(x, ...) { 181 | message <- glue("", 182 | "Name: {x$forename} {x$surname}", 183 | "Standard form: {x$standardForm}", 184 | "Dates: {x$dates}", 185 | "Focal groups: {x$taxonGroups}", 186 | "Example taxon: {x$examples}", 187 | "", 188 | .sep="\n", .trim=FALSE, .null="missing") 189 | 190 | cat(message) 191 | invisible() 192 | } 193 | 194 | #' @importFrom glue glue 195 | #' @importFrom utils str 196 | #' @export 197 | print.ipni_publication <- function(x, ...) { 198 | message <- glue("", 199 | "Title: {x$title}", 200 | "Abbreviation: {x$abbreviation}", 201 | "LC Number: {ifelse(is.null(x$lcNumber), '', x$lcNumber)}", 202 | "BPH Number: {x$bphNumber}", 203 | "", 204 | .sep="\n", .trim=FALSE, .null="missing") 205 | 206 | cat(message) 207 | invisible() 208 | } 209 | 210 | # tol ----- 211 | 212 | #' @importFrom glue glue 213 | #' @importFrom utils str head 214 | #' 215 | #' @export 216 | print.tol_search <- function(x, ...) { 217 | if (! is.null(names(x$query))) { 218 | query <- glue("{names(x$query)}='{x$query}'") 219 | query <- glue_collapse(query, sep=", ") 220 | } else { 221 | query <- glue("'{x$query}'") 222 | } 223 | 224 | message <- glue("", 225 | "total results: {x$total}", 226 | "returned results: {length(x$results)}", 227 | "total pages: {x$pages}", 228 | "current page: {x$page}", 229 | "", 230 | .sep="\n", .trim=FALSE, .null="missing") 231 | 232 | cat(message) 233 | if (! is.null(x$results)) { 234 | str(head(x$results, 1), max.level=2) 235 | } 236 | invisible() 237 | } 238 | 239 | #' @importFrom glue glue 240 | #' @importFrom utils str 241 | #' @export 242 | print.tol_specimen <- function(x, ...) { 243 | 244 | raw_reads <- x$raw_reads[[1]] 245 | taxonomy <- x$taxonomy 246 | 247 | message <- glue("", 248 | "Species: {taxonomy$species}", 249 | "Family: {taxonomy$family}", 250 | "Order: {taxonomy$order}", 251 | "Collector: {x$collector}", 252 | "Project: {x$project$data_source$name}", 253 | "No. of reads: {format(raw_reads$reads_count, big.mark=',')}", 254 | "Sequencing platform: {raw_reads$sequence_platform}", 255 | "Suspicious placement: {x$is_suspicious_placement}", 256 | "", 257 | .sep="\n", .trim=FALSE, .null="missing") 258 | 259 | cat(message) 260 | invisible() 261 | } 262 | 263 | #' @importFrom glue glue 264 | #' @importFrom utils str 265 | #' @export 266 | print.tol_gene <- function(x, ...) { 267 | 268 | raw_reads <- x$raw_reads[[1]] 269 | taxonomy <- x$taxonomy 270 | 271 | message <- glue("", 272 | "Exemplar name: {x$exemplar_name}", 273 | "Exemplar source species: {x$exemplar_species}", 274 | "No. species: {x$species_count}", 275 | "No. genera: {x$genera_count}", 276 | "Avg. recovered length: {x$average_contig_length}", 277 | "Avg. % recovered: {x$average_contig_length_percent}", 278 | "", 279 | .sep="\n", .trim=FALSE, .null="missing") 280 | 281 | cat(message) 282 | invisible() 283 | } 284 | 285 | #' @importFrom glue glue 286 | #' @importFrom utils str 287 | #' @export 288 | print.tol_tree <- function(x, ...) { 289 | 290 | message <- glue("", 291 | "Preview:", 292 | substr(x$content, 1, 100), 293 | "", 294 | .sep="\n", .trim=FALSE) 295 | 296 | cat(message) 297 | invisible() 298 | } 299 | 300 | #' @importFrom glue glue 301 | #' @importFrom utils str 302 | #' @export 303 | print.tol_fasta <- function(x, ...) { 304 | 305 | message <- glue("", 306 | "Preview:", 307 | substr(x$content, 1, 100), 308 | "", 309 | .sep="\n", .trim=FALSE) 310 | 311 | cat(message) 312 | invisible() 313 | } 314 | 315 | # knms ---- 316 | 317 | #' @importFrom glue glue 318 | #' @export 319 | print.knms_match <- function(x, ...) { 320 | message <- glue("", 321 | "Matches returned: {x$matched}", 322 | "Multiple matches: {x$multiple_matches}", 323 | "Unmatched names: {x$unmatched}", 324 | "", 325 | .sep="\n", .trim=FALSE, .null="missing") 326 | 327 | cat(message) 328 | str(head(x$results, 1)) 329 | invisible() 330 | } 331 | 332 | # krs ---- 333 | 334 | #' @importFrom glue glue 335 | #' @export 336 | print.krs_match <- function(x, ...) { 337 | if (! is.null(names(x$query))) { 338 | query <- glue("{names(x$query)}='{x$query}'") 339 | query <- glue_collapse(query, sep=", ") 340 | } else { 341 | query <- glue("'{x$query}'") 342 | } 343 | 344 | message <- glue("", 345 | "", 346 | .sep="\n", .trim=FALSE) 347 | 348 | cat(message) 349 | str(head(x$results, 1)) 350 | invisible() 351 | } 352 | -------------------------------------------------------------------------------- /R/wcvp.R: -------------------------------------------------------------------------------- 1 | #' Search WCVP for a taxon. 2 | #' 3 | #' Query the World Checklist of Vascular Plants search API 4 | #' for a taxon string. 5 | #' 6 | #' The [World Checklist of Vascular Plants (WCVP)](https://wcvp.science.kew.org/) 7 | #' is a global consensus view of all known vascular plant species. 8 | #' It has been compiled by staff at RBG Kew in consultation with plant 9 | #' group experts. 10 | #' 11 | #' The search API allows users to query the checklist for plant names. 12 | #' Currently, it does not support partial or fuzzy matching. 13 | #' In order to get a result, the user must supply a valid name string. 14 | #' For example, 'Myrcia' and 'Myrcia guianensis' will return results, 15 | #' but 'M' or 'Myr' will not. 16 | #' 17 | #' There is some support for querying using keyword arguments. The API is 18 | #' not currently documented, so only keywords that are definitely there have 19 | #' been implemented. Use the `get_keywords` function to view a list of all implemented keywords. 20 | #' 21 | #' The API will return taxonomic information (the family, authority, status, and rank) 22 | #' of all names matching the query. These results can be limited, for example to accepted species, 23 | #' using filters. Use the `get_filters` function to view a list of all implemented filters. 24 | #' 25 | #' @param query The taxon string to search WCVP for. If using keywords, 26 | #' the query must be formatted as a list. 27 | #' @param filters Filter to apply to search results. 28 | #' Multiple filters must be supplied as a character vector. 29 | #' @param cursor A cursor returned by a previous search. 30 | #' If used, the query and filter must be exactly the same. 31 | #' @param limit An integer specifying the maximum number of results 32 | #' to return. 33 | #' @param .wait Time to wait before making a request, to help 34 | #' rate limiting. 35 | #' 36 | #' @return Returns an object of class `wcvp_search` that is a simple 37 | #' structure with slots for: 38 | #' 39 | #' * `total`: the total number of results held in WCVP for the query 40 | #' * `cursor`: a cursor to retrieve the next page of results from the API. 41 | #' * `limit`: the maximum number of results requested from the API. 42 | #' * `results`: the query results parsed into a list. 43 | #' * `query`: the query string submitted to the API. 44 | #' * `filter`: the filter strings submitted to the API. 45 | #' * `response`: the [httr response object][httr::response]. 46 | #' 47 | #' @examples 48 | #' # search for all entries containing a genus name 49 | #' search_wcvp("Myrcia") 50 | #' 51 | #' # search for all accepted species within a genus 52 | #' search_wcvp("Myrcia", filters=c("species", "accepted")) 53 | #' 54 | #' # search for up to 10,000 species in a genus 55 | #' search_wcvp("Poa", filters=c("species"), limit=10000) 56 | #' 57 | #' # search for all names in a family 58 | #' search_wcvp(list(family="Myrtaceae")) 59 | #' 60 | #' # search for genera within a family 61 | #' search_wcvp(list(family="Myrtaceae"), filters=c("genera")) 62 | #' 63 | #' # search for all names with a specific epithet 64 | #' search_wcvp(list(species="guianensis")) 65 | #' 66 | #' # search for a species name and print the results 67 | #' r <- search_wcvp("Myrcia guianensis", filters=c("species")) 68 | #' print(r) 69 | #' 70 | #' # simplify search results to a `tibble` 71 | #' r <- search_wcvp("Poa", filters=c("species")) 72 | #' tidy(r) 73 | #' 74 | #' # accepted name info is nested inside the records for synonyms 75 | #' # simplify accepted name info to the name ID 76 | #' r <- search_wcvp("Poa", filters=c("species")) 77 | #' tidied <- tidy(r) 78 | #' tidyr::unnest(tidied, cols=synonymOf, names_sep="_") 79 | #' 80 | #' @references 81 | #' WCVP (2020). World Checklist of Vascular Plants, version 2.0. Facilitated by the Royal Botanic Gardens, Kew. Published on the Internet; http://wcvp.science.kew.org/ 82 | #' 83 | #' @family WCVP functions 84 | #' @seealso 85 | #' * [lookup_wcvp()] to lookup information about a taxon name 86 | #' using a valid IPNI ID. 87 | #' * [download_wcvp()] to download the entire WCVP. 88 | #' 89 | #' @export 90 | search_wcvp <- function(query, filters=NULL, cursor="*", limit=50, .wait=0.1) { 91 | url <- wcvp_search_url_() 92 | 93 | # keeping a copy of this to return in the result object 94 | original_query <- query 95 | 96 | query <- format_query_(query, "wcvp") 97 | 98 | query$limit <- limit 99 | query$cursor <- cursor 100 | query$f <- format_filters_(filters, "wcvp") 101 | 102 | results <- make_request_(url, query, .wait=.wait) 103 | 104 | # calculate total number of pages, because it isn't returned 105 | total_pages <- ceiling(results$content$total / results$content$limit) 106 | 107 | structure( 108 | list( 109 | total=results$content$total, 110 | pages=total_pages, 111 | cursor=results$content$cursor, 112 | limit=results$content$limit, 113 | results=results$content$results, 114 | query=original_query, 115 | filters=filters, 116 | response=results$response 117 | ), 118 | class=c("wcvp_search", "wcvp") 119 | ) 120 | } 121 | 122 | #' Look up a taxon in WCVP. 123 | #' 124 | #' Request the record for a taxon in the World Checklist of 125 | #' Vascular Plants (WCVP) using the IPNI ID. 126 | #' 127 | #' The [World Checklist of Vascular Plants (WCVP)](https://wcvp.science.kew.org/) 128 | #' is a global consensus view of all known vascular plant species. 129 | #' It has been compiled by staff at RBG Kew in consultation with plant 130 | #' group experts. 131 | #' 132 | #' The taxon lookup API allows users to retrieve taxonomic information for 133 | #' a specific taxon name using the unique IPNI ID. If this is not known, 134 | #' it can be found out using the [WCVP search API][kewr::search_wcvp]. 135 | #' 136 | #' @param taxonid A string containing a valid IPNI ID. 137 | #' @param .wait Time to wait before making a request, to help 138 | #' rate limiting. 139 | #' 140 | #' @return A `wcvp_taxon` object, which is a simple structure with fields 141 | #' for each of the fields returned by the lookup API, as well as the the [httr response object][httr::response]. 142 | #' 143 | #' @examples 144 | #' 145 | #' # retrieve taxonomic information for a taxon name 146 | #' lookup_wcvp("271445-2") 147 | #' 148 | #' # print a summary of the returned information 149 | #' r <- lookup_wcvp("271445-2") 150 | #' print(r) 151 | #' 152 | #' # tidy into a tibble 153 | #' r <- lookup_wcvp("271445-2") 154 | #' tidy(r) 155 | #' 156 | #' # tidy the returned list of synonyms into a tibble 157 | #' r <- lookup_wcvp("60447743-2") 158 | #' tidied <- tidy(r) 159 | #' tidyr::unnest(tidied, cols=synonyms, names_sep="_") 160 | #' 161 | #' # expand the child entries returned for each entry 162 | #' r <- lookup_wcvp("30000055-2") 163 | #' tidied <- tidy(r) 164 | #' tidyr::unnest(tidied, cols=children, names_sep="_") 165 | #' 166 | #' @family WCVP functions 167 | #' @seealso 168 | #' * [search_wcvp()] to search WCVP using a taxon name. 169 | #' * [download_wcvp()] to download the entire WCVP. 170 | #' 171 | #' @references 172 | #' WCVP (2020). World Checklist of Vascular Plants, version 2.0. Facilitated by the Royal Botanic Gardens, Kew. Published on the Internet; http://wcvp.science.kew.org/ 173 | #' 174 | #' @export 175 | lookup_wcvp <- function(taxonid, .wait=0.1) { 176 | url <- wcvp_taxon_url_(taxonid) 177 | 178 | result <- make_request_(url, query=NULL, .wait=.wait) 179 | 180 | # this might be better if things were explicitly listed 181 | record <- result$content 182 | record$response <- result$response 183 | record$queryId <- taxonid 184 | 185 | # fill in status if unplaced 186 | status <- record$status 187 | record$status <- ifelse(is.null(status), "unplaced", status) 188 | 189 | # make sure author string is not null 190 | authors <- record$authors 191 | record$authors <- ifelse(is.null(authors), NA_character_, authors) 192 | 193 | structure( 194 | record, 195 | class=c("wcvp_taxon", "wcvp") 196 | ) 197 | } 198 | 199 | #' Download the whole of the WCVP. 200 | #' 201 | #' Download the latest or a specific version of the World 202 | #' Checklist of Vascular Plants (WCVP). 203 | #' 204 | #' The [World Checklist of Vascular Plants (WCVP)](https://wcvp.science.kew.org/) 205 | #' is a global consensus view of all known vascular plant species. 206 | #' It has been compiled by staff at RBG Kew in consultation with plant 207 | #' group experts. 208 | #' 209 | #' Versioned downloads of the whole WCVP are provided on the website. 210 | #' This function allows the user to download the latest or a specific 211 | #' version of the WCVP. 212 | #' 213 | #' @param save_dir A string specifying the folder to save the download in. If 214 | #' no value is provided, \link[here]{here} will be used. 215 | #' @param version An integer version number to download. The latest 216 | #' version will be downloaded by default. 217 | #' 218 | #' @examples 219 | #' \dontrun{ 220 | #' # download the latest version 221 | #' download_wcvp() 222 | #' 223 | #' # download version 1 224 | #' download_wcvp(version=1) 225 | #' } 226 | #' 227 | #' @family WCVP functions 228 | #' @seealso 229 | #' * [lookup_wcvp()] to lookup information about a taxon name 230 | #' using a valid IPNI ID. 231 | #' * [search_wcvp()] to search WCVP using a taxon name. 232 | #' 233 | #' @references 234 | #' WCVP (2020). World Checklist of Vascular Plants, version 2.0. Facilitated by the Royal Botanic Gardens, Kew. Published on the Internet; http://wcvp.science.kew.org/ 235 | #' 236 | #' @importFrom here here 237 | #' @importFrom glue glue 238 | #' @importFrom stringr str_extract 239 | #' @importFrom utils download.file 240 | #' 241 | #' @export 242 | download_wcvp <- function(save_dir=NULL, version=NULL) { 243 | if (is.null(save_dir)) { 244 | save_dir <- here() 245 | } 246 | 247 | download_link <- wcvp_download_url_(version) 248 | filename <- str_extract(download_link, "(?<=/)wcvp.+\\.zip$") 249 | save_path <- file.path(save_dir, filename) 250 | 251 | if (is.null(version)) { 252 | version <- "latest" 253 | } 254 | 255 | message <- glue("Downloading WCVP version {version}", 256 | "to: {save_path}\n", 257 | .sep=" ", .trim=FALSE) 258 | 259 | cat(message) 260 | 261 | download.file(download_link, save_path) 262 | 263 | invisible() 264 | } 265 | 266 | #' Make the WCVP taxon lookup URL. 267 | #' 268 | #' @param taxonid A valid IPNI ID. 269 | #' 270 | #' @noRd 271 | #' 272 | #' @importFrom glue glue 273 | wcvp_taxon_url_ <- function(taxonid) { 274 | base <- get_url_("wcvp") 275 | 276 | glue("{base}/taxon/{taxonid}") 277 | } 278 | 279 | #' Make the WCVP search URL. 280 | #' 281 | #' @noRd 282 | wcvp_search_url_ <- function() { 283 | base <- get_url_("wcvp") 284 | 285 | paste0(base, "/search") 286 | } 287 | 288 | #' Get a WCVP download URL. 289 | #' 290 | #' @importFrom httr GET 291 | #' @importFrom rvest html_nodes html_attr 292 | #' @importFrom stringr str_detect str_extract 293 | #' @importFrom glue glue 294 | #' 295 | #' @noRd 296 | wcvp_download_url_ <- function(version=NULL) { 297 | base <- "http://sftp.kew.org/pub/data-repositories/WCVP/" 298 | response <- GET(base) 299 | 300 | page <- content(response) 301 | link_nodes <- html_nodes(page, "a") 302 | links <- html_attr(link_nodes, "href") 303 | 304 | download_links <- links[str_detect(links, "\\.zip$")] 305 | versions <- str_extract(download_links, "(?<=_v)\\d+") 306 | 307 | if (is.null(version)) { 308 | version <- max(versions) 309 | } 310 | 311 | if (! version %in% versions) { 312 | message <- glue("Not a recognised version of WCVP: {version}", 313 | "Available versions: {paste0(versions, collapse=',')}", 314 | "", 315 | .sep="\n", .trim=FALSE) 316 | 317 | stop(message, call.=FALSE) 318 | } 319 | 320 | download_link <- download_links[str_detect(download_links, paste0("_v", version))] 321 | paste0(base, download_link) 322 | } 323 | 324 | 325 | -------------------------------------------------------------------------------- /R/tol.R: -------------------------------------------------------------------------------- 1 | #' Search Kew's Tree of Life for specimens or genes. 2 | #' 3 | #' Query Kew's Tree of Life for specimens that have 4 | #' been sampled for sequencing. 5 | #' 6 | #' The [Tree of Life](https://treeoflife.kew.org/) is a database 7 | #' of specimens sequenced as part of Kew's efforts to build 8 | #' a comprehensive evolutionary tree of life for flowering plants. 9 | #' 10 | #' The search API allows users to query the database for specimens 11 | #' based on their taxonomic information. Filtering and keyword-search 12 | #' are not currently implemented. All searches are based on taxonomic 13 | #' information, so `Myrcia` and `Myrtales` will return results, but 14 | #' `Brummitt` will not. 15 | #' 16 | #' The search API also allows users to download information about sequenced 17 | #' genes. There is currently no ability to search within the results for genes, 18 | #' but a table of all genes can be accessed using keyword argument `genes=TRUE`. 19 | #' 20 | #' @param query The string to query the database with. 21 | #' @param genes Set to TRUE to download results for genes instead of specimens. 22 | #' @param limit An integer specifying the number of results 23 | #' to return. 24 | #' @param page An integer specify the page of results to request. 25 | #' @param .wait Time to wait before making a requests, to help 26 | #' rate limiting. 27 | #' 28 | #' @return Returns an object of class `tol_search` that is a simple 29 | #' structure with slots for: 30 | #' 31 | #' * `total`: the total number of results held in ToL for the query. 32 | #' * `page`: the page of results requested. 33 | #' * `limit`: the maximum number of results requested from the API. 34 | #' * `results`: the query results parsed into a list. 35 | #' * `query`: the query string submitted to the API. 36 | #' * `response`: the [httr response object][httr::response]. 37 | #' 38 | #' @examples 39 | #' # get the first 50 of all sequenced specimens 40 | #' search_tol(limit=50) 41 | #' 42 | #' # search for all sequenced Myrcia specimens 43 | #' search_tol("Myrcia") 44 | #' 45 | #' # get all sequenced specimens 46 | #' search_tol(limit=5000) 47 | #' 48 | #' # search for a species name and print the results 49 | #' r <- search_tol("Myrcia guianensis") 50 | #' print(r) 51 | #' 52 | #' # simplify search results to a `tibble` 53 | #' r <- search_tol("Myrcia") 54 | #' tidy(r) 55 | #' 56 | #' # gene stats are nested in the results 57 | #' r <- search_tol("Myrcia") 58 | #' tidied <- tidy(r) 59 | #' tidyr::unnest(tidied, cols=gene_stats) 60 | #' 61 | #' # species names are nested in the results 62 | #' r <- search_tol("Myrcia") 63 | #' tidied <- tidy(r) 64 | #' tidyr::unnest(tidied, cols=species, names_sep="_") 65 | #' 66 | #' # as is higher taxonomy 67 | #' r <- search_tol("Myrcia") 68 | #' tidied <- tidy(r) 69 | #' tidyr::unnest(tidied, cols=species, names_sep="_") 70 | #' 71 | #' # search for all gene entries and print results 72 | #' r <- search_tol(genes=TRUE, limit=500) 73 | #' print(r) 74 | #' 75 | #' # tidy the returned genes 76 | #' tidy(r) 77 | #' 78 | #' @references 79 | #' Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035 80 | #' 81 | #' @family ToL functions 82 | #' * [lookup_tol()] to lookup information about a sequenced specimen 83 | #' using a valid ToL ID. 84 | #' * [download_tol()] to download a file from the ToL SFTP server. 85 | #' * [load_tol()] load a file from the ToL SFTP server. 86 | #' 87 | #' @export 88 | search_tol <- function(query="", genes=FALSE, limit=50, page=1, .wait=0.2) { 89 | if (genes) { 90 | url <- tol_search_url_(type="genes") 91 | query <- "genes" 92 | } else { 93 | url <- tol_search_url_() 94 | } 95 | 96 | original_query <- query 97 | query <- format_query_(query, "tol") 98 | 99 | query$per_page <- limit 100 | query$page <- page 101 | results <- make_request_(url, query, .wait=.wait) 102 | 103 | # calculate total number of pages, because it isn't returned 104 | total_pages <- ceiling(results$content$total / limit) 105 | 106 | structure( 107 | list( 108 | total=results$content$total, 109 | pages=total_pages, 110 | page=results$content$page, 111 | limit=limit, 112 | results=results$content$items, 113 | query=original_query, 114 | response=results$response 115 | ), 116 | class=c("tol_search", "tol") 117 | ) 118 | } 119 | 120 | #' Look up a sequenced specimen or gene in ToL. 121 | #' 122 | #' Request the record for a sequenced specimen or gene in ToL using 123 | #' its ToL ID. 124 | #' 125 | #' The [Tree of Life](https://treeoflife.kew.org/) is a database 126 | #' of specimens sequenced as part of Kew's efforts to build 127 | #' a comprehensive evolutionary tree of life for flowering plants. 128 | #' 129 | #' The lookup API allows users to retrieve taxonomic and sequencing 130 | #' information for a specific sequenced specimen or gene using the unique ToL ID. 131 | #' If this is not known, it can be found out using the [ToL search API][kewr::search_tol]. 132 | #' 133 | #' @param id A string containing a valid ToL ID. 134 | #' @param type The type of record to lookup, either `specimen` or `gene`. 135 | #' @param .wait Time to wait before making a request, to help 136 | #' rate limiting. 137 | #' 138 | #' @return A `tol_{type}` object, which is a simple structure with fields 139 | #' for each of the fields returned by the lookup API, 140 | #' as well as the the [httr response object][httr::response]. 141 | #' 142 | #' @examples 143 | #' 144 | #' # retrieve information for a particular specimen 145 | #' lookup_tol("1296") 146 | #' 147 | #' # print a summary of the returned information 148 | #' r <- lookup_tol("1296") 149 | #' print(r) 150 | #' 151 | #' # tidy into a tibble 152 | #' r <- lookup_tol("1296") 153 | #' tidy(r) 154 | #' 155 | #' # extract the returned gene stats for the specimen 156 | #' r <- lookup_tol("1296") 157 | #' tidied <- tidy(r) 158 | #' tidied$gene_stats 159 | #' 160 | #' # expand the taxonomy info 161 | #' r <- lookup_tol("1296") 162 | #' tidied <- tidy(r) 163 | #' tidyr::unnest(tidied, cols=taxonomy, names_sep="_") 164 | #' 165 | #' # retrieve information for a particular gene 166 | #' lookup_tol("51", type="gene") 167 | #' 168 | #' # print a summary of the returned information 169 | #' r <- lookup_tol("51", type="gene") 170 | #' print(r) 171 | #' 172 | #' # tidy into a tibble 173 | #' r <- lookup_tol("51", type="gene") 174 | #' tidy(r) 175 | #' 176 | #' @family ToL functions 177 | #' @seealso 178 | #' * [search_tol()] to search ToL using taxonomic information. 179 | #' * [download_tol()] to download a file from the ToL SFTP server. 180 | #' * [load_tol()] load a file from the ToL SFTP server. 181 | #' 182 | #' @references 183 | #' Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035 184 | #' 185 | #' @export 186 | lookup_tol <- function(id, type=c("specimen", "gene"), .wait=0.1) { 187 | type <- match.arg(type) 188 | url <- tol_lookup_url_(id, type) 189 | 190 | result <- make_request_(url, query=NULL, .wait=.wait) 191 | 192 | # this might be better if things were explicitly listed 193 | record <- result$content 194 | record$response <- result$response 195 | record$queryId <- id 196 | 197 | structure( 198 | record, 199 | class=c(paste0("tol_", type), "tol") 200 | ) 201 | } 202 | 203 | #' Load the Tree of Life or another file from ToL. 204 | #' 205 | #' Request a tree file for the whole ToL or an alignment, 206 | #' sequence, or gene tree for a particular specimen or gene. 207 | #' 208 | #' The [Tree of Life](https://treeoflife.kew.org/) is a database 209 | #' of specimens sequenced as part of Kew's efforts to build 210 | #' a comprehensive evolutionary tree of life for flowering plants. 211 | #' 212 | #' Newick tree, alignment, and sequence files are help on an SFTP server 213 | #' for download. The URLs to access these are stored in entries for specimens 214 | #' and genes in the ToL database. These can be accessed by either using [search_tol()] 215 | #' to get all specimens for a particular order, family, genus, or species or by 216 | #' looking up a specific specimen or gene using [lookup_tol()]. If no URL is specified, 217 | #' this will load the ToL tree. 218 | #' 219 | #' @param url URL pointing to a file on the ToL SFTP server. 220 | #' @param .wait Time to wait before making a request, to help 221 | #' rate limiting. 222 | #' 223 | #' @examples 224 | #' # load the ToL 225 | #' load_tol() 226 | #' 227 | #' # load a specimen fasta file 228 | #' specimen_info <- lookup_tol("1296") 229 | #' load_tol(specimen_info$fasta_file_url) 230 | #' 231 | #' # load a gene alignment file 232 | #' gene_info <- lookup_tol("51", type="gene") 233 | #' load_tol(gene_info$alignment_file_url) 234 | #' 235 | #' # load the gene tree 236 | #' load_tol(gene_info$tree_file_url) 237 | #' 238 | #' @family ToL functions 239 | #' 240 | #' @seealso 241 | #' * [lookup_tol()] to lookup information about a sequenced specimen 242 | #' using a valid ToL ID. 243 | #' * [search_tol()] to search ToL using taxonomic info. 244 | #' * [download_tol()] to save a file on the ToL SFTP server to file. 245 | #' 246 | #' @references 247 | #' Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035 248 | #' 249 | #' @importFrom glue glue 250 | #' @importFrom stringr str_extract 251 | #' 252 | #' @export 253 | load_tol <- function(url=NULL, .wait=0.1) { 254 | if (is.null(url)) { 255 | url <- tol_download_url_() 256 | } 257 | 258 | result <- make_request_(url, query=NULL, json=FALSE, .wait=.wait) 259 | 260 | # this might be better if things were explicitly listed 261 | record <- list( 262 | content=result$content, 263 | response=result$response 264 | ) 265 | 266 | type <- str_extract(url, "[a-z]+$") 267 | 268 | structure( 269 | record, 270 | class=c(paste0("tol_", type), "tol") 271 | ) 272 | } 273 | 274 | #' Download a file from the ToL SFTP server. 275 | #' 276 | #' Download an alignment, sequence, or tree file from the ToL 277 | #' SFTP server. 278 | #' 279 | #' The [Tree of Life](https://treeoflife.kew.org/) is a database 280 | #' of specimens sequenced as part of Kew's efforts to build 281 | #' a comprehensive evolutionary tree of life for flowering plants. 282 | #' 283 | #' Sequence, alignment, and Newick tree files are help on an SFTP server 284 | #' for download. The URLs to access these are stored in entries for specimens 285 | #' and genes in the ToL database. These can be accessed by either using [search_tol()] 286 | #' to get all specimens for a particular order, family, genus, or species or by 287 | #' looking up a specific specimen or gene using [lookup_tol()] 288 | #' 289 | #' @param download_link A string specifying the URL to download the file from. 290 | #' You can get a download URL for a particular specimen or gene using [lookup_tol()]. 291 | #' @param save_dir A string specifying the folder to save the download in. If 292 | #' no value is provided, \link[here]{here} will be used. 293 | #' 294 | #' @examples 295 | #' \dontrun{ 296 | #' # download a specimen fasta file 297 | #' specimen_info <- lookup_tol("1296") 298 | #' download_tol(specimen_info$fasta_file_url) 299 | #' 300 | #' # download a gene alignment file 301 | #' gene_info <- lookup_tol("51", type="gene") 302 | #' download_tol(gene_info$alignment_file_url) 303 | #' 304 | #' # download the gene tree 305 | #' download_tol(gene_info$tree_file_url) 306 | #' } 307 | #' 308 | #' @family ToL functions 309 | #' @seealso 310 | #' * [lookup_tol()] to lookup information about a sequenced specimen 311 | #' using a valid ToL ID. 312 | #' * [search_tol()] to search ToL using taxonomic info. 313 | #' * [load_tol()] load a file from the ToL SFTP server. 314 | #' 315 | #' @references 316 | #' Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035 317 | #' 318 | #' @importFrom here here 319 | #' @importFrom glue glue 320 | #' @importFrom stringr str_extract 321 | #' @importFrom utils download.file 322 | #' 323 | #' @export 324 | download_tol <- function(download_link=NULL, save_dir=NULL) { 325 | if (is.null(save_dir)) { 326 | save_dir <- here() 327 | } 328 | 329 | if (is.null(download_link)) { 330 | download_link <- tol_download_url_() 331 | } 332 | 333 | filename <- str_extract(download_link, "(?<=/)[^/]+$") 334 | save_path <- file.path(save_dir, filename) 335 | 336 | message <- glue("Downloading file {filename}", 337 | "to: {save_path}\n", 338 | .sep=" ", .trim=FALSE) 339 | 340 | cat(message) 341 | 342 | download.file(download_link, save_path) 343 | 344 | invisible() 345 | } 346 | 347 | #' Make the ToL lookup URL. 348 | #' 349 | #' @param id A valid ToL ID. 350 | #' 351 | #' @noRd 352 | #' 353 | #' @importFrom glue glue 354 | tol_lookup_url_ <- function(id, type=c("specimen", "gene")) { 355 | type <- match.arg(type) 356 | base <- get_url_("tol") 357 | 358 | glue("{base}/{type}s/{id}") 359 | } 360 | 361 | #' Make Tree of Life search URL. 362 | #' 363 | #' @importFrom glue glue 364 | #' 365 | #' @noRd 366 | tol_search_url_ <- function(type=c("specimens", "genes")) { 367 | type <- match.arg(type) 368 | base <- get_url_("tol") 369 | 370 | glue("{base}/{type}") 371 | } 372 | 373 | #' Make a download URL for the Tree of Life. 374 | #' 375 | #' @importFrom glue glue 376 | #' 377 | #' @noRd 378 | tol_download_url_ <- function() { 379 | base <- get_url_("tol") 380 | 381 | glue("{base}/tree") 382 | } 383 | --------------------------------------------------------------------------------