├── .github
    ├── .gitignore
    └── workflows
    │   ├── pkgdown.yaml
    │   └── R-CMD-check.yaml
├── vignettes
    ├── .gitignore
    ├── articles
    │   ├── figure
    │   │   ├── tol-assessment-bars-1.png
    │   │   └── tol-assessment-tree-1.png
    │   ├── precompile.R
    │   ├── retrieve-all-query-results.Rmd
    │   ├── building-checklist.Rmd
    │   └── mapping-assessed-species.Rmd
    ├── KNMS.Rmd
    ├── POWO.Rmd
    ├── IPNI.Rmd
    ├── KRS.Rmd
    ├── WCVP.Rmd
    └── ToL.Rmd
├── _pkgdown.yml
├── LICENSE
├── .gitignore
├── R
    ├── reexports.R
    ├── utils.R
    ├── data.R
    ├── krs.R
    ├── request-next.R
    ├── knms.R
    ├── format.R
    ├── tidy.R
    ├── query-formatting.R
    ├── ipni.R
    ├── requests.R
    ├── powo.R
    ├── kew_citation.R
    ├── print.R
    ├── wcvp.R
    └── tol.R
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-requests.R
    │   ├── test-krs.R
    │   ├── test-knms.R
    │   ├── test-query-formatting.R
    │   ├── test-request-next.R
    │   ├── test-tol.R
    │   ├── test-ipni.R
    │   ├── test-wcvp.R
    │   └── test-powo.R
├── data
    ├── danish_plants.rda
    └── angiosperm_assessments.rda
├── .Rbuildignore
├── man
    ├── reexports.Rd
    ├── get_filters.Rd
    ├── get_keywords.Rd
    ├── danish_plants.Rd
    ├── angiosperm_assessments.Rd
    ├── request_next.Rd
    ├── kew_citation.Rd
    ├── match_krs.Rd
    ├── download_wcvp.Rd
    ├── lookup_ipni.Rd
    ├── match_knms.Rd
    ├── lookup_powo.Rd
    ├── lookup_wcvp.Rd
    ├── load_tol.Rd
    ├── download_tol.Rd
    ├── lookup_tol.Rd
    ├── search_ipni.Rd
    ├── search_powo.Rd
    ├── search_tol.Rd
    └── search_wcvp.Rd
├── kewr.Rproj
├── data-raw
    ├── angiosperm_assessments.R
    └── danish_plants.R
├── DESCRIPTION
├── LICENSE.md
├── NAMESPACE
├── README.Rmd
└── README.md


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://github.com/barnabywalker/kewr/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2020
2 | COPYRIGHT HOLDER: Barnaby Walker
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | inst/doc
5 | *.zip
6 | docs
7 | 


--------------------------------------------------------------------------------
/R/reexports.R:
--------------------------------------------------------------------------------
1 | #' @importFrom generics tidy
2 | #' @export
3 | generics::tidy
4 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(kewr)
3 | 
4 | test_check("kewr")
5 | 


--------------------------------------------------------------------------------
/data/danish_plants.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/barnabywalker/kewr/HEAD/data/danish_plants.rda


--------------------------------------------------------------------------------
/data/angiosperm_assessments.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/barnabywalker/kewr/HEAD/data/angiosperm_assessments.rda


--------------------------------------------------------------------------------
/vignettes/articles/figure/tol-assessment-bars-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/barnabywalker/kewr/HEAD/vignettes/articles/figure/tol-assessment-bars-1.png


--------------------------------------------------------------------------------
/vignettes/articles/figure/tol-assessment-tree-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/barnabywalker/kewr/HEAD/vignettes/articles/figure/tol-assessment-tree-1.png


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^kewr\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^LICENSE\.md$
 4 | ^README\.Rmd$
 5 | ^\.github$
 6 | ^data-raw$
 7 | ^_pkgdown\.yml$
 8 | ^docs$
 9 | ^pkgdown$
10 | ^vignettes/articles$
11 | 


--------------------------------------------------------------------------------
/tests/testthat/test-requests.R:
--------------------------------------------------------------------------------
1 | test_that("raises 404 error for bad URL", {
2 |   expect_error(make_request_("NOT A VALID URL"))
3 | })
4 | 
5 | test_that("raises error for non-json response", {
6 |   url <- "https://www.wcvp.science.kew.org"
7 |   expect_error(make_request_(url))
8 | })
9 | 


--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reexports.R
 3 | \docType{import}
 4 | \name{reexports}
 5 | \alias{reexports}
 6 | \alias{tidy}
 7 | \title{Objects exported from other packages}
 8 | \keyword{internal}
 9 | \description{
10 | These objects are imported from other packages. Follow the links
11 | below to see their documentation.
12 | 
13 | \describe{
14 |   \item{generics}{\code{\link[generics]{tidy}}}
15 | }}
16 | 
17 | 


--------------------------------------------------------------------------------
/kewr.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 | 
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 | 


--------------------------------------------------------------------------------
/man/get_filters.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{get_filters}
 4 | \alias{get_filters}
 5 | \title{Get valid filters for a resource.}
 6 | \usage{
 7 | get_filters(resource = c("wcvp", "powo", "ipni", "tol"))
 8 | }
 9 | \arguments{
10 | \item{resource}{The resource being queried.}
11 | }
12 | \value{
13 | A character vector of valid filters.
14 | }
15 | \description{
16 | Get valid filters for a resource.
17 | }
18 | \examples{
19 | get_filters("wcvp")
20 | get_filters("powo")
21 | get_filters("ipni")
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/man/get_keywords.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{get_keywords}
 4 | \alias{get_keywords}
 5 | \title{Get valid keywords for a resource.}
 6 | \usage{
 7 | get_keywords(resource = c("wcvp", "powo", "ipni", "tol"))
 8 | }
 9 | \arguments{
10 | \item{resource}{The resource being queried.}
11 | }
12 | \value{
13 | A character vector of valid keywords.
14 | }
15 | \description{
16 | Get valid keywords for a resource.
17 | }
18 | \examples{
19 | get_keywords("wcvp")
20 | get_keywords("powo")
21 | get_keywords("ipni")
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/vignettes/articles/precompile.R:
--------------------------------------------------------------------------------
 1 | #' precompile articles that take a long time to run
 2 | library(knitr)
 3 | library(here)
 4 | 
 5 | # remove the figures folder for regeneration
 6 | if (dir.exists(here("vignettes/articles/figure"))) {
 7 |   unlink(here("vignettes/articles/figure"), recursive=TRUE)
 8 | }
 9 | 
10 | # Conservation status on the Tree of Life
11 | knit("vignettes/articles/conservation-status-treeoflife.Rmd.orig",
12 |      "vignettes/articles/conservation-status-treeoflife.Rmd")
13 | 
14 | # move any figures that have been created to the articles folder
15 | file.rename(here("figure"), here("vignettes/articles/figure"))
16 | 


--------------------------------------------------------------------------------
/tests/testthat/test-krs.R:
--------------------------------------------------------------------------------
 1 | test_that("match URL returns status 200", {
 2 |   url <- krs_url_()
 3 |   response <- httr::RETRY("GET", url, times=3)
 4 | 
 5 |   expect_equal(httr::status_code(response), 200)
 6 | })
 7 | 
 8 | test_that("match URL response is json", {
 9 |   url <- krs_url_()
10 | 
11 |   response <- httr::GET(url)
12 | 
13 |   expect_equal(httr::http_type(response), "application/json")
14 | })
15 | 
16 | test_that("raises error for unimplemented keyword", {
17 |   query <- list(published="1920")
18 |   expect_error(match_krs(query), "Query keyword.+ not recognised")
19 | })
20 | 
21 | test_that("tidy match results returns tibble", {
22 |   results <- match_krs("Poa annua")
23 |   tidied <- tidy(results)
24 | 
25 |   expect_s3_class(tidied, "tbl_df")
26 | })
27 | 
28 | 


--------------------------------------------------------------------------------
/data-raw/angiosperm_assessments.R:
--------------------------------------------------------------------------------
 1 | # libraries ----
 2 | library(rredlist)  # make requests to IUCN Red List
 3 | library(dplyr)     # manipulate data
 4 | library(purrr)     # map functions over lists
 5 | 
 6 | # set key ----
 7 | key <- Sys.getenv("IUCN_REDLIST_KEY")
 8 | 
 9 | # download all assessments ----
10 | all_assessments <- rl_sp(all=TRUE, key=key)
11 | all_assessments <- map_dfr(all_assessments, ~.x$result)
12 | 
13 | # filter for just angiosperms ----
14 | angiosperm_assessments <-
15 |   all_assessments %>%
16 |   filter(class_name %in% c("MAGNOLIOPSIDA", "LILIOPSIDA")) %>%
17 |   select(taxonid, scientific_name, taxonomic_authority, category)
18 | 
19 | # convert to tibble for ease
20 | angiosperm_assessments <- as_tibble(angiosperm_assessments)
21 | 
22 | usethis::use_data(angiosperm_assessments, overwrite = TRUE)
23 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | #' Get valid filters for a resource.
 2 | #'
 3 | #' @param resource The resource being queried.
 4 | #'
 5 | #' @return A character vector of valid filters.
 6 | #'
 7 | #' @examples
 8 | #' get_filters("wcvp")
 9 | #' get_filters("powo")
10 | #' get_filters("ipni")
11 | #'
12 | #' @export
13 | get_filters <- function(resource=c("wcvp", "powo", "ipni", "tol")) {
14 |   resource <- match.arg(resource)
15 | 
16 |   filters <- get_filters_(resource)
17 |   names(filters)
18 | }
19 | 
20 | #' Get valid keywords for a resource.
21 | #'
22 | #' @param resource The resource being queried.
23 | #'
24 | #' @return A character vector of valid keywords.
25 | #'
26 | #' @examples
27 | #' get_keywords("wcvp")
28 | #' get_keywords("powo")
29 | #' get_keywords("ipni")
30 | #'
31 | #' @export
32 | get_keywords <- function(resource=c("wcvp", "powo", "ipni", "tol")) {
33 |   resource <- match.arg(resource)
34 | 
35 |   keywords <- get_keywords_(resource)
36 |   names(keywords)
37 | }
38 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: kewr
 2 | Title: R Package to Access Kew Data APIs
 3 | Version: 0.6.1
 4 | Authors@R: 
 5 |     person(given = "Barnaby",
 6 |            family = "Walker",
 7 |            role = c("aut", "cre"),
 8 |            email = "barnabywalker08@gmail.com",
 9 |            comment = c(ORCID = "0000-0002-3884-671X"))
10 | Description: An R package to access data from RGB Kew’s APIs.
11 | License: MIT + file LICENSE
12 | Encoding: UTF-8
13 | LazyData: true
14 | Roxygen: list(markdown = TRUE)
15 | RoxygenNote: 7.2.0
16 | Suggests: 
17 |     testthat,
18 |     knitr,
19 |     rmarkdown,
20 |     ggplot2,
21 |     progress
22 | URL: https://barnabywalker.github.io/kewr/, https://github.com/barnabywalker/kewr/
23 | BugReports: https://github.com/barnabywalker/kewr/issues
24 | Imports: 
25 |     httr,
26 |     jsonlite,
27 |     glue,
28 |     tibble,
29 |     purrr,
30 |     dplyr,
31 |     stringr,
32 |     rvest,
33 |     here,
34 |     tidyr,
35 |     rlang,
36 |     generics
37 | VignetteBuilder: knitr
38 | Depends: 
39 |     R (>= 2.10)
40 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2020 Barnaby Walker
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/man/danish_plants.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{danish_plants}
 5 | \alias{danish_plants}
 6 | \title{Extinction risk assessments for Danish plants.}
 7 | \format{
 8 | A data frame with 361 rows and 4 variables:
 9 | \describe{
10 | \item{taxonid}{IUCN Red List unique ID for the taxon}
11 | \item{scientific_name}{The scientific name of the taxon}
12 | \item{authority}{The taxonomic authority of the taxon name}
13 | \item{category}{The IUCN Red List assessment category}
14 | }
15 | }
16 | \source{
17 | \url{https://www.iucnredlist.org/}
18 | }
19 | \usage{
20 | danish_plants
21 | }
22 | \description{
23 | A dataset containing global extinction risk assessments for
24 | plants found in Denmark. Source from the IUCN Red List of
25 | Threatened Plants using the \code{rredlist} package.
26 | }
27 | \references{
28 | Scott Chamberlain (2020). rredlist: 'IUCN' Red List Client.
29 | R package version 0.7.0. https://CRAN.R-project.org/package=rredlist
30 | 
31 | IUCN 2021. The IUCN Red List of Threatened Species. Version 2020-3.
32 | \url{https://www.iucnredlist.org}
33 | }
34 | \keyword{datasets}
35 | 


--------------------------------------------------------------------------------
/data-raw/danish_plants.R:
--------------------------------------------------------------------------------
 1 | # libraries ----
 2 | library(rredlist)  # make requests to IUCN Red List
 3 | library(dplyr)     # manipulate data
 4 | library(purrr)     # map functions over lists
 5 | 
 6 | # set key ----
 7 | key <- Sys.getenv("IUCN_REDLIST_KEY")
 8 | 
 9 | # download danish assessments ----
10 | iso_code <- "DK"
11 | assessments <- rl_sp_country(iso_code, key=key)
12 | 
13 | # download full assessment info for each taxon ----
14 | taxa_idx <- unique(assessments$result$taxonid)
15 | 
16 | # wrap the search function to add a wait, so IUCN don't get upset
17 | f <- function(taxonid) {
18 |   Sys.sleep(0.1)
19 |   rl_search(id=taxonid, key=key)
20 | }
21 | 
22 | # might still need a few retries to get everything without an error
23 | full_assessments <- map(taxa_idx, f)
24 | full_assessments <- map_dfr(full_assessments, ~.x$result)
25 | 
26 | # narrow down to vascular plants ----
27 | danish_plants <-
28 |   full_assessments %>%
29 |   filter(phylum == "TRACHEOPHYTA") %>%
30 |   select(taxonid, scientific_name, authority, category)
31 | 
32 | # convert to a tibble for ease
33 | danish_plants <- as_tibble(danish_plants)
34 | 
35 | # save to data folder ----
36 | usethis::use_data(danish_plants, overwrite = TRUE)
37 | 


--------------------------------------------------------------------------------
/man/angiosperm_assessments.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{angiosperm_assessments}
 5 | \alias{angiosperm_assessments}
 6 | \title{Extinction risk assessments for angiosperms.}
 7 | \format{
 8 | A data frame with 53,542 rows and 4 variables:
 9 | \describe{
10 | \item{taxonid}{IUCN Red List unique ID for the taxon}
11 | \item{scientific_name}{The scientific name of the taxon}
12 | \item{taxonomic_authority}{The taxonomic authority of the taxon name}
13 | \item{category}{The IUCN Red List assessment category}
14 | }
15 | }
16 | \source{
17 | \url{https://www.iucnredlist.org/}
18 | }
19 | \usage{
20 | angiosperm_assessments
21 | }
22 | \description{
23 | A dataset containing global extinction risk assessments for
24 | all assessed angiosperm species. Sourced from the IUCN Red List of
25 | Threatened Plants using the \code{rredlist} package. Last updated with
26 | version 2021-1 of the IUCN Red List.
27 | }
28 | \references{
29 | Scott Chamberlain (2020). rredlist: 'IUCN' Red List Client.
30 | R package version 0.7.0. https://CRAN.R-project.org/package=rredlist
31 | 
32 | IUCN 2021. The IUCN Red List of Threatened Species. Version 2020-3.
33 | \url{https://www.iucnredlist.org}
34 | }
35 | \keyword{datasets}
36 | 


--------------------------------------------------------------------------------
/tests/testthat/test-knms.R:
--------------------------------------------------------------------------------
 1 | test_that("GET request to KNMS returns 405 error", {
 2 |   url <- knms_url_()
 3 |   response <- httr::GET(url)
 4 | 
 5 |   expect_equal(httr::status_code(response), 405)
 6 | })
 7 | 
 8 | test_that("POST request to KNMS returns 200", {
 9 |   url <- knms_url_()
10 |   response <- httr::POST(url, body=list(""), encode="json")
11 | 
12 |   expect_equal(httr::status_code(response), 200)
13 | })
14 | 
15 | test_that("POST request to KNMS returns a json", {
16 |   url <- knms_url_()
17 |   response <- httr::POST(url, body=list(""), encode="json")
18 | 
19 |   expect_equal(httr::http_type(response), "application/json")
20 | })
21 | 
22 | test_that("Raises error if missing value in names to match", {
23 |   names <- c("Poa annua", NA_character_, NA_character_, "Myrcia almasensis")
24 | 
25 |   expect_error(match_knms(names), regexp="NA is present")
26 | })
27 | 
28 | test_that("Line parsing returns a tibble", {
29 |   names <- c("Bad plant")
30 |   matches <- match_knms(names)
31 |   parsed <- parse_knms_line(matches$results[[1]])
32 | 
33 |   expect_s3_class(parsed, "tbl_df")
34 | })
35 | 
36 | test_that("Match tidying returns a tibble", {
37 |   names <- c("Bad plant", "Poa annua", "Myrcia guianensis")
38 |   matches <- match_knms(names)
39 |   tidied <- tidy(matches)
40 | 
41 |   expect_s3_class(tidied, "tbl_df")
42 | })
43 | 


--------------------------------------------------------------------------------
/man/request_next.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/request-next.R
 3 | \name{request_next}
 4 | \alias{request_next}
 5 | \alias{request_next.wcvp_search}
 6 | \alias{request_next.powo_search}
 7 | \alias{request_next.ipni_search}
 8 | \alias{request_next.tol_search}
 9 | \title{Request the next page of search results for a kewr results object.}
10 | \usage{
11 | request_next(object)
12 | 
13 | \method{request_next}{wcvp_search}(object)
14 | 
15 | \method{request_next}{powo_search}(object)
16 | 
17 | \method{request_next}{ipni_search}(object)
18 | 
19 | \method{request_next}{tol_search}(object)
20 | }
21 | \arguments{
22 | \item{object}{A kewr search results object.}
23 | }
24 | \value{
25 | The next page of results as a kewr search object of
26 | the same type.
27 | }
28 | \description{
29 | Request the next page of search results for a kewr results object.
30 | }
31 | \section{Methods (by class)}{
32 | \itemize{
33 | \item \code{wcvp_search}: Request the next page of WCVP search results.
34 | 
35 | \item \code{powo_search}: Request the next page of POWO search results.
36 | 
37 | \item \code{ipni_search}: Request the next page of IPNI search results.
38 | 
39 | \item \code{tol_search}: Request the next page of ToL search results.
40 | }}
41 | 
42 | \examples{
43 | r <- search_wcvp("Poa")
44 | request_next(r)
45 | 
46 | 
47 | \donttest{
48 | r <- search_powo("Poa")
49 | request_next(r)
50 | }
51 | 
52 | r <- search_ipni("Poa")
53 | request_next(r)
54 | 
55 | r <- search_tol("Poa")
56 | request_next(r)
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/tests/testthat/test-query-formatting.R:
--------------------------------------------------------------------------------
 1 | test_that("error for unimplemented WCVP filters", {
 2 |   filters <- c("accepted", "has_image", "monkey")
 3 | 
 4 |   expect_error(format_filters_(filters, "wcvp"),
 5 |                ".+\\[has_image,monkey\\] are not recognised.")
 6 | })
 7 | 
 8 | test_that("error for unimplemented POWO filters", {
 9 |   filters <- c("accepted", "author", "monkey")
10 | 
11 |   expect_error(format_filters_(filters, "powo"),
12 |                ".+\\[author,monkey\\] are not recognised.")
13 | })
14 | 
15 | test_that("error for unimplemented IPNI filters", {
16 |   filters <- c("infrafamilies", "author", "monkey")
17 | 
18 |   expect_error(format_filters_(filters, "ipni"),
19 |                ".+\\[author,monkey\\] are not recognised.")
20 | })
21 | 
22 | test_that("error for unimplemented WCVP keywords", {
23 |   query <- list("distribution"="Mexico")
24 | 
25 |   expect_error(format_query_(query, "wcvp"),
26 |                ".+\\[distribution\\] are not recognised")
27 | })
28 | 
29 | test_that("error for unimplemented IPNI keywords", {
30 |   query <- list("common_name"="daisy")
31 | 
32 |   expect_error(format_query_(query, "ipni"),
33 |                ".+\\[common_name\\] are not recognised")
34 | })
35 | 
36 | test_that("error for unimplemented POWO keywords", {
37 |   query <- list("lifeform"="epiphyte")
38 | 
39 |   expect_error(format_query_(query, "powo"),
40 |                ".+\\[lifeform\\] are not recognised")
41 | })
42 | 
43 | test_that("error for unrecognised resource", {
44 |   filters <- c("accepted")
45 | 
46 |   expect_error(format_filters_(filter, "google"))
47 | })
48 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |       - master
 6 | 
 7 | name: pkgdown
 8 | 
 9 | jobs:
10 |   pkgdown:
11 |     runs-on: macOS-latest
12 |     env:
13 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
14 |     steps:
15 |       - uses: actions/checkout@v2
16 | 
17 |       - uses: r-lib/actions/setup-r@v1
18 | 
19 |       - uses: r-lib/actions/setup-pandoc@v1
20 | 
21 |       - name: Query dependencies
22 |         run: |
23 |           install.packages('remotes')
24 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
25 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
26 |         shell: Rscript {0}
27 | 
28 |       - name: Cache R packages
29 |         uses: actions/cache@v2
30 |         with:
31 |           path: ${{ env.R_LIBS_USER }}
32 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
33 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
34 | 
35 |       - name: Install dependencies
36 |         run: |
37 |           remotes::install_deps(dependencies = TRUE)
38 |           install.packages("pkgdown", type = "binary")
39 |         shell: Rscript {0}
40 | 
41 |       - name: Install package
42 |         run: R CMD INSTALL .
43 | 
44 |       - name: Deploy package
45 |         run: |
46 |           git config --local user.email "actions@github.com"
47 |           git config --local user.name "GitHub Actions"
48 |           Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)'
49 | 


--------------------------------------------------------------------------------
/man/kew_citation.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/kew_citation.R
 3 | \name{kew_citation}
 4 | \alias{kew_citation}
 5 | \title{Get citation for Kew data resource.}
 6 | \usage{
 7 | kew_citation(x)
 8 | }
 9 | \arguments{
10 | \item{x}{Result of a call to \code{\link[=search_powo]{search_powo()}}, \code{\link[=lookup_powo]{lookup_powo()}}, \code{\link[=search_wcvp]{search_wcvp()}},
11 | \code{\link[=lookup_wcvp]{lookup_wcvp()}}, \code{\link[=search_ipni]{search_ipni()}}, \code{\link[=lookup_ipni]{lookup_ipni()}}, \code{\link[=search_tol]{search_tol()}}, \code{\link[=load_tol]{load_tol()}},
12 | \code{\link[=match_knms]{match_knms()}}, or \code{\link[=match_krs]{match_krs()}}}
13 | }
14 | \value{
15 | A citation object with a print method for nice display.
16 | }
17 | \description{
18 | Given the result of a query to a Kew data resource, get the appropriate
19 | citation.
20 | }
21 | \examples{
22 | r <- search_powo(list(characteristic="poison"))
23 | kew_citation(r)
24 | 
25 | r <- lookup_powo("320035-2")
26 | kew_citation(r)
27 | 
28 | r <- search_wcvp(list(genus="Poa"))
29 | kew_citation(r)
30 | 
31 | r <- lookup_wcvp("320035-2")
32 | kew_citation(r)
33 | 
34 | r <- search_ipni(list(publishing_author="L."))
35 | kew_citation(r)
36 | 
37 | r <- lookup_ipni("12653-1")
38 | kew_citation(r)
39 | 
40 | r <- search_tol("Poa")
41 | kew_citation(r)
42 | 
43 | r <- lookup_tol(2717)
44 | kew_citation(r)
45 | 
46 | tree <- load_tol()
47 | kew_citation(tree)
48 | 
49 | match <- match_knms("Poa annua")
50 | kew_citation(match)
51 | 
52 | match <- match_krs("Poa annua")
53 | kew_citation(match)
54 | 
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/man/match_krs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/krs.R
 3 | \name{match_krs}
 4 | \alias{match_krs}
 5 | \title{Match a name using KRS.}
 6 | \usage{
 7 | match_krs(query, .wait = 0.2, .retries = 3)
 8 | }
 9 | \arguments{
10 | \item{query}{The name to match using the reconciliation service. Use a named list to
11 | match parts of a name.}
12 | 
13 | \item{.wait}{Time to wait before making a request, to help
14 | rate limiting.}
15 | 
16 | \item{.retries}{The max number of times to retry the request to KRS. KRS seems
17 | to fail every other request, so adding a small number of retries helps prevent
18 | unnecessary failure.}
19 | }
20 | \value{
21 | Returns an object of class \code{krs_match} that is a simple
22 | structure with slots for:
23 | \itemize{
24 | \item \code{results}: the query results parsed into a list.
25 | \item \code{response}: the \link[httr:response]{httr response object}.
26 | }
27 | }
28 | \description{
29 | Use the Kew Reconciliation Service to match a name against IPNI.
30 | }
31 | \details{
32 | The \href{http://data1.kew.org/reconciliation/about/IpniName}{Kew Reconciliation Service (KRS)}
33 | allows name matching against IPNI using an Open Refine reconcilliation API.
34 | }
35 | \examples{
36 | # Match a name.
37 | match_krs("Solanum sanchez-vegae")
38 | 
39 | # Match a name using name parts
40 | match_krs(list(genus="Solanum", species="sanchez-vegae", author="Knapp"))
41 | 
42 | # Format a returned match as a dataframe
43 | match <- match_krs(list(genus="Solanum", species="sanchez-vegae", author="Knapp"))
44 | tidy(match)
45 | 
46 | }
47 | \seealso{
48 | \itemize{
49 | \item \code{\link[=match_knms]{match_knms()}} to use simple matching for a vector of names.
50 | }
51 | }
52 | 


--------------------------------------------------------------------------------
/man/download_wcvp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wcvp.R
 3 | \name{download_wcvp}
 4 | \alias{download_wcvp}
 5 | \title{Download the whole of the WCVP.}
 6 | \usage{
 7 | download_wcvp(save_dir = NULL, version = NULL)
 8 | }
 9 | \arguments{
10 | \item{save_dir}{A string specifying the folder to save the download in. If
11 | no value is provided, \link[here]{here} will be used.}
12 | 
13 | \item{version}{An integer version number to download. The latest
14 | version will be downloaded by default.}
15 | }
16 | \description{
17 | Download the latest or a specific version of the World
18 | Checklist of Vascular Plants (WCVP).
19 | }
20 | \details{
21 | The \href{https://wcvp.science.kew.org/}{World Checklist of Vascular Plants (WCVP)}
22 | is a global consensus view of all known vascular plant species.
23 | It has been compiled by staff at RBG Kew in consultation with plant
24 | group experts.
25 | 
26 | Versioned downloads of the whole WCVP are provided on the website.
27 | This function allows the user to download the latest or a specific
28 | version of the WCVP.
29 | }
30 | \examples{
31 | \dontrun{
32 |  # download the latest version
33 |  download_wcvp()
34 | 
35 |  # download version 1
36 |  download_wcvp(version=1)
37 | }
38 | 
39 | }
40 | \references{
41 | WCVP (2020). World Checklist of Vascular Plants, version 2.0. Facilitated by the Royal Botanic Gardens, Kew. Published on the Internet; http://wcvp.science.kew.org/
42 | }
43 | \seealso{
44 | \itemize{
45 | \item \code{\link[=lookup_wcvp]{lookup_wcvp()}} to lookup information about a taxon name
46 | using a valid IPNI ID.
47 | \item \code{\link[=search_wcvp]{search_wcvp()}} to search WCVP using a taxon name.
48 | }
49 | 
50 | Other WCVP functions: 
51 | \code{\link{lookup_wcvp}()},
52 | \code{\link{search_wcvp}()}
53 | }
54 | \concept{WCVP functions}
55 | 


--------------------------------------------------------------------------------
/man/lookup_ipni.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ipni.R
 3 | \name{lookup_ipni}
 4 | \alias{lookup_ipni}
 5 | \title{Look up a name in IPNI.}
 6 | \usage{
 7 | lookup_ipni(id, type = c("taxon", "author", "publication"), .wait = 0.1)
 8 | }
 9 | \arguments{
10 | \item{id}{A string containing a valid IPNI ID.}
11 | 
12 | \item{type}{The type of record to look up. Either \code{taxon}, \code{author}, or \code{publication}.}
13 | 
14 | \item{.wait}{Time to wait before making a request, to help
15 | rate limiting.}
16 | }
17 | \value{
18 | An \code{ipni_taxon} object, which is a simple structure with fields
19 | for each of the fields returned by the lookup API, as well as the the \link[httr:response]{httr response object}.
20 | }
21 | \description{
22 | Request the record for a taxon, author, or publication name in IPNI,
23 | using the IPNI ID.
24 | }
25 | \details{
26 | The \href{https://www.ipni.org/}{International Plant Names Index (IPNI)}
27 | is a service that provides nomenclatural information for vascular plant names.
28 | 
29 | The name lookup API allows users to retrieve information for
30 | a specific taxon, author, or publication name using the unique IPNI ID. If this is not known,
31 | it can be found out using the \link[=search_ipni]{IPNI search API}.
32 | }
33 | \examples{
34 | 
35 | # retrieve nomenclatural information for a taxon name
36 | name <- lookup_ipni("271445-2", "taxon")
37 | print(name)
38 | 
39 | # tidy the results in a table
40 | tidy(name)
41 | 
42 | # retrieve nomenclatural information for an author
43 | name <- lookup_ipni("20028192-1", type="author")
44 | tidy(name)
45 | 
46 | # retrieve nomenclatural information for a publication
47 | name <- lookup_ipni("987-2", type="publication")
48 | tidy(name)
49 | 
50 | }
51 | \seealso{
52 | \itemize{
53 | \item \code{\link[=search_ipni]{search_ipni()}} to search IPNI using a taxon name.
54 | }
55 | 
56 | Other IPNI functions: 
57 | \code{\link{search_ipni}()}
58 | }
59 | \concept{IPNI functions}
60 | 


--------------------------------------------------------------------------------
/man/match_knms.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/knms.R
 3 | \name{match_knms}
 4 | \alias{match_knms}
 5 | \title{Match names with KNMS.}
 6 | \usage{
 7 | match_knms(names)
 8 | }
 9 | \arguments{
10 | \item{names}{A list or character vector of taxon names for matching.
11 | Must not contain missing values.}
12 | }
13 | \value{
14 | A \code{knms_match} object - a simple structure containing the match
15 | results and some statistics about the number of matches.
16 | }
17 | \description{
18 | Use the Kew Names Matching Service to match taxon names to
19 | records in Plants of the World Online.
20 | }
21 | \details{
22 | The \href{http://namematch.science.kew.org/}{Kew Names Matching Service (KNMS)} allows
23 | a user to submit taxon names for matching against records in
24 | \href{http://powo.science.kew.org/}{Plants of the World Online (POWO)}.
25 | As far as I can tell, it uses exact matching as well as some rules-based matching
26 | to account for common orthographic variants and Latin mistakes.
27 | 
28 | Names can be submitted to KNMS with or without an author string.
29 | If a name can match to multiple different records, for instance
30 | with synonyms, KNMS will return multiple matches. As such, we recommend
31 | submitting names first with the taxonomic authority and then without
32 | if no match can be found.
33 | 
34 | KNMS allows multiple names to be submitted at once. However, it can
35 | be slow in returning results if too many names are submitted. For lots of names,
36 | \href{http://namematch.science.kew.org/csv}{the website provides an interface for submitting a CSV file}.
37 | }
38 | \examples{
39 | 
40 | # match a name
41 | match_knms("Poa annua L.")
42 | 
43 | # match a vector of names
44 | names <- c("Myrcia guianensis", "Calyptranthes ranulphii", "Poa annua")
45 | match_knms(names)
46 | 
47 | # tidy match results into a table
48 | names <- c("Myrcia guianensis", "Bad plant", "Poa annua")
49 | matches <- match_knms(names)
50 | tidy(matches)
51 | 
52 | }
53 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' Extinction risk assessments for Danish plants.
 2 | #'
 3 | #' A dataset containing global extinction risk assessments for
 4 | #' plants found in Denmark. Source from the IUCN Red List of
 5 | #' Threatened Plants using the `rredlist` package.
 6 | #'
 7 | #' @format A data frame with 361 rows and 4 variables:
 8 | #' \describe{
 9 | #'   \item{taxonid}{IUCN Red List unique ID for the taxon}
10 | #'   \item{scientific_name}{The scientific name of the taxon}
11 | #'   \item{authority}{The taxonomic authority of the taxon name}
12 | #'   \item{category}{The IUCN Red List assessment category}
13 | #' }
14 | #'
15 | #' @source \url{https://www.iucnredlist.org/}
16 | #'
17 | #' @references
18 | #' Scott Chamberlain (2020). rredlist: 'IUCN' Red List Client.
19 | #' R package version 0.7.0. https://CRAN.R-project.org/package=rredlist
20 | #'
21 | #' IUCN 2021. The IUCN Red List of Threatened Species. Version 2020-3.
22 | #' <https://www.iucnredlist.org>
23 | "danish_plants"
24 | 
25 | #' Extinction risk assessments for angiosperms.
26 | #'
27 | #' A dataset containing global extinction risk assessments for
28 | #' all assessed angiosperm species. Sourced from the IUCN Red List of
29 | #' Threatened Plants using the `rredlist` package. Last updated with
30 | #' version 2021-1 of the IUCN Red List.
31 | #'
32 | #' @format A data frame with 53,542 rows and 4 variables:
33 | #' \describe{
34 | #'   \item{taxonid}{IUCN Red List unique ID for the taxon}
35 | #'   \item{scientific_name}{The scientific name of the taxon}
36 | #'   \item{taxonomic_authority}{The taxonomic authority of the taxon name}
37 | #'   \item{category}{The IUCN Red List assessment category}
38 | #' }
39 | #'
40 | #' @source \url{https://www.iucnredlist.org/}
41 | #'
42 | #' @references
43 | #' Scott Chamberlain (2020). rredlist: 'IUCN' Red List Client.
44 | #' R package version 0.7.0. https://CRAN.R-project.org/package=rredlist
45 | #'
46 | #' IUCN 2021. The IUCN Red List of Threatened Species. Version 2020-3.
47 | #' <https://www.iucnredlist.org>
48 | "angiosperm_assessments"
49 | 
50 | 


--------------------------------------------------------------------------------
/man/lookup_powo.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/powo.R
 3 | \name{lookup_powo}
 4 | \alias{lookup_powo}
 5 | \title{Look up a taxon in POWO.}
 6 | \usage{
 7 | lookup_powo(taxonid, distribution = FALSE, .wait = 0.2)
 8 | }
 9 | \arguments{
10 | \item{taxonid}{A string containing a valid IPNI ID.}
11 | 
12 | \item{distribution}{Include distribution in results (default \code{FALSE}).}
13 | 
14 | \item{.wait}{Time to wait before making a request, to help
15 | rate limiting.}
16 | }
17 | \value{
18 | A \code{powo_taxon} object, which is a simple structure with fields
19 | for each of the fields returned by the lookup API, as well as the the \link[httr:response]{httr response object}.
20 | }
21 | \description{
22 | Request the record for a taxon in Plants of the World Online (POWO)
23 | using the IPNI ID.
24 | }
25 | \details{
26 | \href{http://www.plantsoftheworldonline.org/}{Plants of the World Online (POWO)}
27 | is a database of information on the world's flora. It curates information from
28 | published floras and other sources of floristic information.
29 | 
30 | The taxon lookup API allows users to retrieve information about
31 | a specific taxon name using the unique IPNI ID. If this is not known,
32 | it can be found out using the \link[=search_powo]{POWO search API}.
33 | }
34 | \examples{
35 | # retrieve information for a taxon name
36 | lookup_powo("271445-2")
37 | 
38 | # print a summary of the returned information
39 | r <- lookup_powo("271445-2")
40 | print(r)
41 | 
42 | # tidy returned record into a tibble
43 | r <- lookup_powo("271445-2")
44 | tidy(r)
45 | 
46 | # tidy the returned list of synonyms into a tibble
47 | r <- lookup_wcvp("60447743-2")
48 | tidied <- tidy(r)
49 | tidyr::unnest(tidied, cols=synonyms, names_sep="_")
50 | 
51 | # tidy the returned list of children into a tibble
52 | r <- lookup_wcvp("30000055-2")
53 | tidied <- tidy(r)
54 | tidyr::unnest(tidied, cols=children, names_sep="_")
55 | 
56 | }
57 | \seealso{
58 | \itemize{
59 | \item \code{\link[=search_powo]{search_powo()}} to search POWO using a taxon name.
60 | }
61 | 
62 | Other POWO functions: 
63 | \code{\link{search_powo}()}
64 | }
65 | \concept{POWO functions}
66 | 


--------------------------------------------------------------------------------
/R/krs.R:
--------------------------------------------------------------------------------
 1 | #' Match a name using KRS.
 2 | #'
 3 | #' Use the Kew Reconciliation Service to match a name against IPNI.
 4 | #'
 5 | #' The [Kew Reconciliation Service (KRS)](http://data1.kew.org/reconciliation/about/IpniName)
 6 | #' allows name matching against IPNI using an Open Refine reconcilliation API.
 7 | #'
 8 | #' @param query The name to match using the reconciliation service. Use a named list to
 9 | #'  match parts of a name.
10 | #' @param .wait Time to wait before making a request, to help
11 | #'  rate limiting.
12 | #' @param .retries The max number of times to retry the request to KRS. KRS seems
13 | #'  to fail every other request, so adding a small number of retries helps prevent
14 | #'  unnecessary failure.
15 | #'
16 | #' @return
17 | #' Returns an object of class `krs_match` that is a simple
18 | #' structure with slots for:
19 | #'
20 | #'  * `results`: the query results parsed into a list.
21 | #'  * `response`: the [httr response object][httr::response].
22 | #'
23 | #' @examples
24 | #' # Match a name.
25 | #' match_krs("Solanum sanchez-vegae")
26 | #'
27 | #' # Match a name using name parts
28 | #' match_krs(list(genus="Solanum", species="sanchez-vegae", author="Knapp"))
29 | #'
30 | #' # Format a returned match as a dataframe
31 | #' match <- match_krs(list(genus="Solanum", species="sanchez-vegae", author="Knapp"))
32 | #' tidy(match)
33 | #'
34 | #' @seealso
35 | #'  * [match_knms()] to use simple matching for a vector of names.
36 | #'
37 | #' @importFrom jsonlite toJSON
38 | #'
39 | #' @export
40 | match_krs <- function(query, .wait=0.2, .retries=3) {
41 |   url <- krs_url_()
42 | 
43 |   # keeping a copy of this to return in the result object
44 |   original_query <- query
45 | 
46 |   query <- format_refine_query_(query, "krs")
47 | 
48 |   results <- make_request_(url, query, .wait=.wait, .retries=.retries)
49 | 
50 |   structure(
51 |     list(
52 |       matches=length(results$content$result),
53 |       results=results$content$result,
54 |       query=original_query,
55 |       response=results$response
56 |     ),
57 |     class="krs_match"
58 |   )
59 | }
60 | 
61 | #' Make the KNMS URL.
62 | #'
63 | #' @noRd
64 | krs_url_ <- function() {
65 |   get_url_("krs")
66 | }
67 | 


--------------------------------------------------------------------------------
/tests/testthat/test-request-next.R:
--------------------------------------------------------------------------------
 1 | test_that("method exists for WCVP search results", {
 2 |   method_list <- methods(class="wcvp_search")
 3 |   method_list <- as.list(method_list)
 4 | 
 5 |   expect_true("request_next.wcvp_search" %in% method_list)
 6 | })
 7 | 
 8 | test_that("method exists for POWO search results", {
 9 |   method_list <- methods(class="powo_search")
10 |   method_list <- as.list(method_list)
11 | 
12 |   expect_true("request_next.powo_search" %in% method_list)
13 | })
14 | 
15 | test_that("method exists for IPNI search results", {
16 |   method_list <- methods(class="ipni_search")
17 |   method_list <- as.list(method_list)
18 | 
19 |   expect_true("request_next.ipni_search" %in% method_list)
20 | })
21 | 
22 | test_that("cursor changes for WCVP", {
23 |   page1 <- search_wcvp(list(genus="Poa"), filters="accepted")
24 |   page2 <- request_next(page1)
25 | 
26 |   expect_false(page1$cursor == page2$cursor)
27 | })
28 | 
29 | test_that("cursor changes for POWO", {
30 |   page1 <- search_powo(list(genus="Poa"), filters="accepted")
31 |   page2 <- request_next(page1)
32 | 
33 |   expect_false(page1$cursor == page2$cursor)
34 | })
35 | 
36 | test_that("cursor changes for IPNI", {
37 |   page1 <- search_ipni(list(genus="Poa"), filters="species")
38 |   page2 <- request_next(page1)
39 | 
40 |   expect_false(page1$cursor == page2$cursor)
41 | })
42 | 
43 | test_that("results change for WCVP", {
44 |   page1 <- search_wcvp(list(genus="Poa"), filters="accepted")
45 |   page2 <- request_next(page1)
46 | 
47 |   expect_false(page1$results[[1]]$id == page2$results[[1]]$id)
48 | })
49 | 
50 | test_that("results change for POWO", {
51 |   page1 <- search_powo(list(genus="Poa"), filters="accepted")
52 |   page2 <- request_next(page1)
53 | 
54 |   expect_false(page1$results[[1]]$fqId == page2$results[[1]]$fqId)
55 | })
56 | 
57 | test_that("results change for IPNI", {
58 |   page1 <- search_ipni(list(genus="Poa"), filters="species")
59 |   page2 <- request_next(page1)
60 | 
61 |   expect_false(page1$results[[1]]$id == page2$results[[1]]$id)
62 | })
63 | 
64 | test_that("results change for ToL", {
65 |   page1 <- search_tol()
66 |   page2 <- request_next(page1)
67 | 
68 |   expect_false(page1$results[[1]]$id == page2$results[[1]]$id)
69 | })
70 | 


--------------------------------------------------------------------------------
/tests/testthat/test-tol.R:
--------------------------------------------------------------------------------
 1 | test_that("search URL returns status 200", {
 2 |   url <- tol_search_url_()
 3 |   response <- httr::GET(url)
 4 | 
 5 |   expect_equal(httr::status_code(response), 200)
 6 | })
 7 | 
 8 | test_that("search URL response is json", {
 9 |   url <- tol_search_url_()
10 |   response <- httr::GET(url)
11 | 
12 |   expect_equal(httr::http_type(response), "application/json")
13 | })
14 | 
15 | test_that("search URL is for genes when asked for", {
16 |   url <- tol_search_url_(type="genes")
17 | 
18 |   expect_true(stringr::str_detect(url, "/genes"))
19 | })
20 | 
21 | test_that("specimen URL response is json", {
22 |   url <- tol_lookup_url_("2699")
23 |   response <- httr::GET(url)
24 | 
25 |   expect_equal(httr::http_type(response), "application/json")
26 | })
27 | 
28 | test_that("gene lookup returns gene URL", {
29 |   url <- tol_lookup_url_("51", type="gene")
30 | 
31 |   expect_true(stringr::str_detect(url, "/genes/"))
32 | })
33 | 
34 | test_that("specimen URL returns 404 for bad ID", {
35 |   url <- tol_lookup_url_("plant")
36 |   response <- httr::GET(url)
37 |   expect_equal(status_code(response), 404)
38 | })
39 | 
40 | test_that("raises error for keyword search", {
41 |   query <- list(name="Myrcia guianensis")
42 | 
43 |   expect_error(search_tol(query),
44 |                "Keyword-based search not implemented")
45 | })
46 | 
47 | test_that("raises error for bad query input type", {
48 |   query <- c("this", "is", "a", "bad", "query")
49 | 
50 |   expect_error(search_tol(query))
51 | })
52 | 
53 | test_that("tidy search results returns tibble", {
54 |   results <- search_tol("Poa annua")
55 |   tidied <- tidy(results)
56 | 
57 |   expect_s3_class(tidied, "tbl_df")
58 | })
59 | 
60 | test_that("tidy lookup results returns tibble", {
61 |   results <- lookup_tol("2699")
62 |   tidied <- tidy(results)
63 | 
64 |   expect_s3_class(tidied, "tbl_df")
65 | })
66 | 
67 | test_that("loading fasta gives a fasta object", {
68 |   url <- "http://sftp.kew.org/pub/paftol/current_release/fasta/by_gene/5328.dna.fasta"
69 |   r <- load_tol(url)
70 | 
71 |   expect_equal(class(r), c("tol_fasta", "tol"))
72 | })
73 | 
74 | test_that("loading tree gives a tree object", {
75 |   r <- load_tol()
76 | 
77 |   expect_equal(class(r), c("tol_tree", "tol"))
78 | })
79 | 


--------------------------------------------------------------------------------
/man/lookup_wcvp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wcvp.R
 3 | \name{lookup_wcvp}
 4 | \alias{lookup_wcvp}
 5 | \title{Look up a taxon in WCVP.}
 6 | \usage{
 7 | lookup_wcvp(taxonid, .wait = 0.1)
 8 | }
 9 | \arguments{
10 | \item{taxonid}{A string containing a valid IPNI ID.}
11 | 
12 | \item{.wait}{Time to wait before making a request, to help
13 | rate limiting.}
14 | }
15 | \value{
16 | A \code{wcvp_taxon} object, which is a simple structure with fields
17 | for each of the fields returned by the lookup API, as well as the the \link[httr:response]{httr response object}.
18 | }
19 | \description{
20 | Request the record for a taxon in the World Checklist of
21 | Vascular Plants (WCVP) using the IPNI ID.
22 | }
23 | \details{
24 | The \href{https://wcvp.science.kew.org/}{World Checklist of Vascular Plants (WCVP)}
25 | is a global consensus view of all known vascular plant species.
26 | It has been compiled by staff at RBG Kew in consultation with plant
27 | group experts.
28 | 
29 | The taxon lookup API allows users to retrieve taxonomic information for
30 | a specific taxon name using the unique IPNI ID. If this is not known,
31 | it can be found out using the \link[=search_wcvp]{WCVP search API}.
32 | }
33 | \examples{
34 | 
35 | # retrieve taxonomic information for a taxon name
36 | lookup_wcvp("271445-2")
37 | 
38 | # print a summary of the returned information
39 | r <- lookup_wcvp("271445-2")
40 | print(r)
41 | 
42 | # tidy into a tibble
43 | r <- lookup_wcvp("271445-2")
44 | tidy(r)
45 | 
46 | # tidy the returned list of synonyms into a tibble
47 | r <- lookup_wcvp("60447743-2")
48 | tidied <- tidy(r)
49 | tidyr::unnest(tidied, cols=synonyms, names_sep="_")
50 | 
51 | # expand the child entries returned for each entry
52 | r <- lookup_wcvp("30000055-2")
53 | tidied <- tidy(r)
54 | tidyr::unnest(tidied, cols=children, names_sep="_")
55 | 
56 | }
57 | \references{
58 | WCVP (2020). World Checklist of Vascular Plants, version 2.0. Facilitated by the Royal Botanic Gardens, Kew. Published on the Internet; http://wcvp.science.kew.org/
59 | }
60 | \seealso{
61 | \itemize{
62 | \item \code{\link[=search_wcvp]{search_wcvp()}} to search WCVP using a taxon name.
63 | \item \code{\link[=download_wcvp]{download_wcvp()}} to download the entire WCVP.
64 | }
65 | 
66 | Other WCVP functions: 
67 | \code{\link{download_wcvp}()},
68 | \code{\link{search_wcvp}()}
69 | }
70 | \concept{WCVP functions}
71 | 


--------------------------------------------------------------------------------
/R/request-next.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' Request the next page of search results for a kewr results object.
  3 | #'
  4 | #' @param object A kewr search results object.
  5 | #'
  6 | #' @return The next page of results as a kewr search object of
  7 | #' the same type.
  8 | #'
  9 | #' @export
 10 | request_next <- function(object) {
 11 |   UseMethod("request_next")
 12 | }
 13 | 
 14 | #' @describeIn request_next Request the next page of WCVP search results.
 15 | #'
 16 | #' @examples
 17 | #' r <- search_wcvp("Poa")
 18 | #' request_next(r)
 19 | #'
 20 | #' @export
 21 | request_next.wcvp_search <- function(object) {
 22 |   .wait <- calculate_wait_(object)
 23 |   current_page <- object$page
 24 | 
 25 |   search_wcvp(
 26 |     query=object$query,
 27 |     filters=object$filters,
 28 |     limit=object$limit,
 29 |     cursor=object$cursor,
 30 |     .wait=.wait
 31 |   )
 32 | }
 33 | 
 34 | #' @describeIn request_next Request the next page of POWO search results.
 35 | #'
 36 | #' @examples
 37 | #'
 38 | #' \donttest{
 39 | #' r <- search_powo("Poa")
 40 | #' request_next(r)
 41 | #' }
 42 | #'
 43 | #' @export
 44 | request_next.powo_search <- function(object) {
 45 |   .wait <- calculate_wait_(object)
 46 |   current_page <- object$page
 47 | 
 48 |   search_powo(
 49 |     query=object$query,
 50 |     filters=object$filters,
 51 |     limit=object$limit,
 52 |     cursor=object$cursor,
 53 |     .wait=.wait
 54 |   )
 55 | }
 56 | 
 57 | #' @describeIn request_next Request the next page of IPNI search results.
 58 | #'
 59 | #' @examples
 60 | #' r <- search_ipni("Poa")
 61 | #' request_next(r)
 62 | #'
 63 | #' @export
 64 | request_next.ipni_search <- function(object) {
 65 |   .wait <- calculate_wait_(object)
 66 |   current_page <- object$page
 67 | 
 68 |   search_ipni(
 69 |     query=object$query,
 70 |     filters=object$filters,
 71 |     limit=object$limit,
 72 |     cursor=object$cursor,
 73 |     .wait=.wait
 74 |   )
 75 | }
 76 | 
 77 | #' @describeIn request_next Request the next page of ToL search results.
 78 | #'
 79 | #' @examples
 80 | #' r <- search_tol("Poa")
 81 | #' request_next(r)
 82 | #'
 83 | #' @export
 84 | request_next.tol_search <- function(object) {
 85 |   .wait <- calculate_wait_(object)
 86 |   current_page <- object$page
 87 | 
 88 |   search_tol(
 89 |     query=object$query,
 90 |     limit=object$limit,
 91 |     page=current_page + 1,
 92 |     .wait=.wait
 93 |   )
 94 | }
 95 | 
 96 | calculate_wait_ <- function(object) {
 97 |   response_time <- object$response$times["total"]
 98 | 
 99 |   response_time / 2
100 | }
101 | 


--------------------------------------------------------------------------------
/man/load_tol.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tol.R
 3 | \name{load_tol}
 4 | \alias{load_tol}
 5 | \title{Load the Tree of Life or another file from ToL.}
 6 | \usage{
 7 | load_tol(url = NULL, .wait = 0.1)
 8 | }
 9 | \arguments{
10 | \item{url}{URL pointing to a file on the ToL SFTP server.}
11 | 
12 | \item{.wait}{Time to wait before making a request, to help
13 | rate limiting.}
14 | }
15 | \description{
16 | Request a tree file for the whole ToL or an alignment,
17 | sequence, or gene tree for a particular specimen or gene.
18 | }
19 | \details{
20 | The \href{https://treeoflife.kew.org/}{Tree of Life} is a database
21 | of specimens sequenced as part of Kew's efforts to build
22 | a comprehensive evolutionary tree of life for flowering plants.
23 | 
24 | Newick tree, alignment, and sequence files are help on an SFTP server
25 | for download. The URLs to access these are stored in entries for specimens
26 | and genes in the ToL database. These can be accessed by either using \code{\link[=search_tol]{search_tol()}}
27 | to get all specimens for a particular order, family, genus, or species or by
28 | looking up a specific specimen or gene using \code{\link[=lookup_tol]{lookup_tol()}}. If no URL is specified,
29 | this will load the ToL tree.
30 | }
31 | \examples{
32 |  # load the ToL
33 |  load_tol()
34 | 
35 |  # load a specimen fasta file
36 |  specimen_info <- lookup_tol("1296")
37 |  load_tol(specimen_info$fasta_file_url)
38 | 
39 |  # load a gene alignment file
40 |  gene_info <- lookup_tol("51", type="gene")
41 |  load_tol(gene_info$alignment_file_url)
42 | 
43 |  # load the gene tree
44 |  load_tol(gene_info$tree_file_url)
45 | 
46 | }
47 | \references{
48 | Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035
49 | }
50 | \seealso{
51 | \itemize{
52 | \item \code{\link[=lookup_tol]{lookup_tol()}} to lookup information about a sequenced specimen
53 | using a valid ToL ID.
54 | \item \code{\link[=search_tol]{search_tol()}} to search ToL using taxonomic info.
55 | \item \code{\link[=download_tol]{download_tol()}} to save a file on the ToL SFTP server to file.
56 | }
57 | 
58 | Other ToL functions: 
59 | \code{\link{download_tol}()},
60 | \code{\link{lookup_tol}()}
61 | }
62 | \concept{ToL functions}
63 | 


--------------------------------------------------------------------------------
/man/download_tol.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tol.R
 3 | \name{download_tol}
 4 | \alias{download_tol}
 5 | \title{Download a file from the ToL SFTP server.}
 6 | \usage{
 7 | download_tol(download_link = NULL, save_dir = NULL)
 8 | }
 9 | \arguments{
10 | \item{download_link}{A string specifying the URL to download the file from.
11 | You can get a download URL for a particular specimen or gene using \code{\link[=lookup_tol]{lookup_tol()}}.}
12 | 
13 | \item{save_dir}{A string specifying the folder to save the download in. If
14 | no value is provided, \link[here]{here} will be used.}
15 | }
16 | \description{
17 | Download an alignment, sequence, or tree file from the ToL
18 | SFTP server.
19 | }
20 | \details{
21 | The \href{https://treeoflife.kew.org/}{Tree of Life} is a database
22 | of specimens sequenced as part of Kew's efforts to build
23 | a comprehensive evolutionary tree of life for flowering plants.
24 | 
25 | Sequence, alignment, and Newick tree files are help on an SFTP server
26 | for download. The URLs to access these are stored in entries for specimens
27 | and genes in the ToL database. These can be accessed by either using \code{\link[=search_tol]{search_tol()}}
28 | to get all specimens for a particular order, family, genus, or species or by
29 | looking up a specific specimen or gene using \code{\link[=lookup_tol]{lookup_tol()}}
30 | }
31 | \examples{
32 | \dontrun{
33 |  # download a specimen fasta file
34 |  specimen_info <- lookup_tol("1296")
35 |  download_tol(specimen_info$fasta_file_url)
36 | 
37 |  # download a gene alignment file
38 |  gene_info <- lookup_tol("51", type="gene")
39 |  download_tol(gene_info$alignment_file_url)
40 | 
41 |  # download the gene tree
42 |  download_tol(gene_info$tree_file_url)
43 | }
44 | 
45 | }
46 | \references{
47 | Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035
48 | }
49 | \seealso{
50 | \itemize{
51 | \item \code{\link[=lookup_tol]{lookup_tol()}} to lookup information about a sequenced specimen
52 | using a valid ToL ID.
53 | \item \code{\link[=search_tol]{search_tol()}} to search ToL using taxonomic info.
54 | \item \code{\link[=load_tol]{load_tol()}} load a file from the ToL SFTP server.
55 | }
56 | 
57 | Other ToL functions: 
58 | \code{\link{load_tol}()},
59 | \code{\link{lookup_tol}()}
60 | }
61 | \concept{ToL functions}
62 | 


--------------------------------------------------------------------------------
/vignettes/KNMS.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "KNMS"
 3 | output: rmarkdown::html_vignette
 4 | vignette: >
 5 |   %\VignetteIndexEntry{KNMS}
 6 |   %\VignetteEngine{knitr::rmarkdown}
 7 |   %\VignetteEncoding{UTF-8}
 8 | ---
 9 | 
10 | ```{r, include = FALSE}
11 | knitr::opts_chunk$set(
12 |   collapse = TRUE,
13 |   comment = "#>"
14 | )
15 | ```
16 | 
17 | The [Kew Names Matching Service (KNMS)](http://namematch.science.kew.org/) allows a user to submit taxon names for matching against records in [Plants of the World Online (POWO)](http://powo.science.kew.org/). As far as I can tell, it uses exact matching as well as some rules-based matching to account for common orthographic variants and Latin mistakes.
18 | 
19 | The website provides interfaces for submitting a list of names copied into a text field, as well as through a CSV upload. I've made this package interface with the text field submission.
20 | 
21 | I'm not sure if KNMS is being maintained or updated at the moment, and may point towards an old version of POWO/WCVP. Therefore, it's a good first pass for matching the bulk of names but it's worth checking through any unmatched names, as they may be in POWO or WCVP.
22 | 
23 | ```{r setup}
24 | library(kewr)
25 | ```
26 | 
27 | ## Matching names
28 | 
29 | To use KNMS, you just need to submit names for matching as a character vector or list.
30 | 
31 | ```{r}
32 | matches <- match_knms("Poa annua")
33 | matches
34 | ```
35 | Where a name could be matched with multiple records - for example, if the same name has been published as different concepts - KNMS will return multiple matches, as it has here.
36 | 
37 | The raw results from KNMS can be a bit difficult to interpret, so it is best to `tidy` them for inspection.
38 | 
39 | ```{r}
40 | tidy(matches)
41 | ```
42 | We can see here that `Poa annua` has matched to both *Poa annua L.* and *Poa annua Schltdl. & Cham.*.
43 | 
44 | To avoid having to resolve these multiple matches, it is usually best to submit the author string as part of the name, if it is available.
45 | 
46 | ```{r}
47 | matches <- match_knms("Poa annua L.")
48 | tidy(matches)
49 | ```
50 | 
51 | Multiple names can be submitted at once, as a list or character vector.
52 | 
53 | ```{r}
54 | names_to_match <- c("Poa annua L.", "Myrcia guianensis", "Bulbophyllum sp.")
55 | 
56 | matches <- match_knms(names_to_match)
57 | tidy(matches)
58 | ```
59 | 
60 | KNMS may not always match to the rank that you want. For instance, in the previous example *Bulbophyllum sp.* was matched to the genus name *Bulbophyllum*, rather than returning no match at all.
61 | 
62 | KNMS will accept a large number of names for matching, but may hang if too many are submitted. The `match_knms` function will return a warning if you submit more than 1000 names. In these cases, it might be easier to split your list of names into chunks, or use the CSV upload on the KNMS website.
63 | 


--------------------------------------------------------------------------------
/R/knms.R:
--------------------------------------------------------------------------------
 1 | #' Match names with KNMS.
 2 | #'
 3 | #' Use the Kew Names Matching Service to match taxon names to
 4 | #' records in Plants of the World Online.
 5 | #'
 6 | #' The [Kew Names Matching Service (KNMS)](http://namematch.science.kew.org/) allows
 7 | #' a user to submit taxon names for matching against records in
 8 | #' [Plants of the World Online (POWO)](http://powo.science.kew.org/).
 9 | #' As far as I can tell, it uses exact matching as well as some rules-based matching
10 | #' to account for common orthographic variants and Latin mistakes.
11 | #'
12 | #' Names can be submitted to KNMS with or without an author string.
13 | #' If a name can match to multiple different records, for instance
14 | #' with synonyms, KNMS will return multiple matches. As such, we recommend
15 | #' submitting names first with the taxonomic authority and then without
16 | #' if no match can be found.
17 | #'
18 | #' KNMS allows multiple names to be submitted at once. However, it can
19 | #' be slow in returning results if too many names are submitted. For lots of names,
20 | #' [the website provides an interface for submitting a CSV file](http://namematch.science.kew.org/csv).
21 | #'
22 | #' @param names A list or character vector of taxon names for matching.
23 | #'   Must not contain missing values.
24 | #'
25 | #' @return A `knms_match` object - a simple structure containing the match
26 | #'   results and some statistics about the number of matches.
27 | #'
28 | #' @examples
29 | #'
30 | #' # match a name
31 | #' match_knms("Poa annua L.")
32 | #'
33 | #' # match a vector of names
34 | #' names <- c("Myrcia guianensis", "Calyptranthes ranulphii", "Poa annua")
35 | #' match_knms(names)
36 | #'
37 | #' # tidy match results into a table
38 | #' names <- c("Myrcia guianensis", "Bad plant", "Poa annua")
39 | #' matches <- match_knms(names)
40 | #' tidy(matches)
41 | #'
42 | #' @importFrom glue glue
43 | #'
44 | #' @export
45 | match_knms <- function(names) {
46 |   if (any(is.na(names))) {
47 |     na_idx <- which(is.na(names))
48 |     message <- glue("KNMS cannot match if NA is present.",
49 |                     "You have NAs at positions {paste0(na_idx, collapse=', ')}",
50 |                     "",
51 |                     .sep="\n", .trim=FALSE)
52 |     stop(message)
53 |   }
54 | 
55 |   url <- knms_url_()
56 | 
57 |   body <- format_body_(names)
58 | 
59 |   results <- make_request_(url, body=body, method="POST")
60 | 
61 |   structure(
62 |     list(
63 |       response=results$response,
64 |       submitted=body,
65 |       matched=results$content$stats$matched,
66 |       unmatched=results$content$stats$unmatched,
67 |       multiple_matches=results$content$stats$multipleMatches,
68 |       results=results$content$records
69 |     ),
70 |     class="knms_match"
71 |   )
72 | }
73 | 
74 | #' Make the KNMS URL.
75 | #'
76 | #' @noRd
77 | knms_url_ <- function() {
78 |   get_url_("knms")
79 | }
80 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(format,ipni_author)
 4 | S3method(format,ipni_citation)
 5 | S3method(format,ipni_publication)
 6 | S3method(format,ipni_search)
 7 | S3method(format,knms_match)
 8 | S3method(format,powo_search)
 9 | S3method(format,powo_taxon)
10 | S3method(format,wcvp_search)
11 | S3method(format,wcvp_taxon)
12 | S3method(kew_citation,ipni)
13 | S3method(kew_citation,knms_match)
14 | S3method(kew_citation,krs_match)
15 | S3method(kew_citation,powo)
16 | S3method(kew_citation,tol)
17 | S3method(kew_citation,wcvp)
18 | S3method(print,ipni_author)
19 | S3method(print,ipni_citation)
20 | S3method(print,ipni_publication)
21 | S3method(print,ipni_search)
22 | S3method(print,knms_match)
23 | S3method(print,krs_match)
24 | S3method(print,powo_search)
25 | S3method(print,powo_taxon)
26 | S3method(print,tol_fasta)
27 | S3method(print,tol_gene)
28 | S3method(print,tol_search)
29 | S3method(print,tol_specimen)
30 | S3method(print,tol_tree)
31 | S3method(print,wcvp_search)
32 | S3method(print,wcvp_taxon)
33 | S3method(request_next,ipni_search)
34 | S3method(request_next,powo_search)
35 | S3method(request_next,tol_search)
36 | S3method(request_next,wcvp_search)
37 | S3method(tidy,ipni_author)
38 | S3method(tidy,ipni_citation)
39 | S3method(tidy,ipni_publication)
40 | S3method(tidy,ipni_search)
41 | S3method(tidy,knms_match)
42 | S3method(tidy,krs_match)
43 | S3method(tidy,powo_search)
44 | S3method(tidy,powo_taxon)
45 | S3method(tidy,tol_gene)
46 | S3method(tidy,tol_search)
47 | S3method(tidy,tol_specimen)
48 | S3method(tidy,wcvp_search)
49 | S3method(tidy,wcvp_taxon)
50 | export(download_tol)
51 | export(download_wcvp)
52 | export(get_filters)
53 | export(get_keywords)
54 | export(kew_citation)
55 | export(load_tol)
56 | export(lookup_ipni)
57 | export(lookup_powo)
58 | export(lookup_tol)
59 | export(lookup_wcvp)
60 | export(match_knms)
61 | export(match_krs)
62 | export(request_next)
63 | export(search_ipni)
64 | export(search_powo)
65 | export(search_tol)
66 | export(search_wcvp)
67 | export(tidy)
68 | import(httr)
69 | importFrom(dplyr,na_if)
70 | importFrom(generics,tidy)
71 | importFrom(glue,glue)
72 | importFrom(glue,glue_collapse)
73 | importFrom(here,here)
74 | importFrom(httr,GET)
75 | importFrom(httr,user_agent)
76 | importFrom(jsonlite,fromJSON)
77 | importFrom(jsonlite,toJSON)
78 | importFrom(purrr,every)
79 | importFrom(purrr,flatten_chr)
80 | importFrom(purrr,map)
81 | importFrom(purrr,map2)
82 | importFrom(purrr,map_chr)
83 | importFrom(purrr,map_dfr)
84 | importFrom(purrr,map_lgl)
85 | importFrom(purrr,pluck)
86 | importFrom(rlang,.data)
87 | importFrom(rvest,html_attr)
88 | importFrom(rvest,html_nodes)
89 | importFrom(stringr,str_detect)
90 | importFrom(stringr,str_extract)
91 | importFrom(tibble,as_tibble_row)
92 | importFrom(tibble,tibble)
93 | importFrom(tidyr,fill)
94 | importFrom(utils,bibentry)
95 | importFrom(utils,download.file)
96 | importFrom(utils,head)
97 | importFrom(utils,str)
98 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - master
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |       - master
12 | 
13 | name: R-CMD-check
14 | 
15 | jobs:
16 |   R-CMD-check:
17 |     runs-on: ${{ matrix.config.os }}
18 | 
19 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
20 | 
21 |     strategy:
22 |       fail-fast: false
23 |       matrix:
24 |         config:
25 |           - {os: windows-latest, r: 'release'}
26 |           - {os: macOS-latest, r: 'release'}
27 |           - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
28 |           - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
29 | 
30 |     env:
31 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
32 |       RSPM: ${{ matrix.config.rspm }}
33 | 
34 |     steps:
35 |       - uses: actions/checkout@v2
36 | 
37 |       - uses: r-lib/actions/setup-r@v1
38 |         with:
39 |           r-version: ${{ matrix.config.r }}
40 | 
41 |       - uses: r-lib/actions/setup-pandoc@v1
42 | 
43 |       - name: Query dependencies
44 |         run: |
45 |           install.packages('remotes')
46 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
47 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
48 |         shell: Rscript {0}
49 | 
50 |       - name: Cache R packages
51 |         if: runner.os != 'Windows'
52 |         uses: actions/cache@v2
53 |         with:
54 |           path: ${{ env.R_LIBS_USER }}
55 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
56 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
57 | 
58 |       - name: Install system dependencies
59 |         if: runner.os == 'Linux'
60 |         run: |
61 |           while read -r cmd
62 |           do
63 |             eval sudo $cmd
64 |           done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
65 | 
66 |       - name: Install dependencies
67 |         run: |
68 |           remotes::install_deps(dependencies = TRUE)
69 |           remotes::install_cran("rcmdcheck")
70 |         shell: Rscript {0}
71 | 
72 |       - name: Check
73 |         env:
74 |           _R_CHECK_CRAN_INCOMING_REMOTE_: false
75 |         run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
76 |         shell: Rscript {0}
77 | 
78 |       - name: Upload check results
79 |         if: failure()
80 |         uses: actions/upload-artifact@main
81 |         with:
82 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
83 |           path: check
84 | 


--------------------------------------------------------------------------------
/man/lookup_tol.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tol.R
 3 | \name{lookup_tol}
 4 | \alias{lookup_tol}
 5 | \title{Look up a sequenced specimen or gene in ToL.}
 6 | \usage{
 7 | lookup_tol(id, type = c("specimen", "gene"), .wait = 0.1)
 8 | }
 9 | \arguments{
10 | \item{id}{A string containing a valid ToL ID.}
11 | 
12 | \item{type}{The type of record to lookup, either \code{specimen} or \code{gene}.}
13 | 
14 | \item{.wait}{Time to wait before making a request, to help
15 | rate limiting.}
16 | }
17 | \value{
18 | A \verb{tol_\{type\}} object, which is a simple structure with fields
19 | for each of the fields returned by the lookup API,
20 | as well as the the \link[httr:response]{httr response object}.
21 | }
22 | \description{
23 | Request the record for a sequenced specimen or gene in ToL using
24 | its ToL ID.
25 | }
26 | \details{
27 | The \href{https://treeoflife.kew.org/}{Tree of Life} is a database
28 | of specimens sequenced as part of Kew's efforts to build
29 | a comprehensive evolutionary tree of life for flowering plants.
30 | 
31 | The lookup API allows users to retrieve taxonomic and sequencing
32 | information for a specific sequenced specimen or gene using the unique ToL ID.
33 | If this is not known, it can be found out using the \link[=search_tol]{ToL search API}.
34 | }
35 | \examples{
36 | 
37 | # retrieve information for a particular specimen
38 | lookup_tol("1296")
39 | 
40 | # print a summary of the returned information
41 | r <- lookup_tol("1296")
42 | print(r)
43 | 
44 | # tidy into a tibble
45 | r <- lookup_tol("1296")
46 | tidy(r)
47 | 
48 | # extract the returned gene stats for the specimen
49 | r <- lookup_tol("1296")
50 | tidied <- tidy(r)
51 | tidied$gene_stats
52 | 
53 | # expand the taxonomy info
54 | r <- lookup_tol("1296")
55 | tidied <- tidy(r)
56 | tidyr::unnest(tidied, cols=taxonomy, names_sep="_")
57 | 
58 | # retrieve information for a particular gene
59 | lookup_tol("51", type="gene")
60 | 
61 | # print a summary of the returned information
62 | r <- lookup_tol("51", type="gene")
63 | print(r)
64 | 
65 | # tidy into a tibble
66 | r <- lookup_tol("51", type="gene")
67 | tidy(r)
68 | 
69 | }
70 | \references{
71 | Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035
72 | }
73 | \seealso{
74 | \itemize{
75 | \item \code{\link[=search_tol]{search_tol()}} to search ToL using taxonomic information.
76 | \item \code{\link[=download_tol]{download_tol()}} to download a file from the ToL SFTP server.
77 | \item \code{\link[=load_tol]{load_tol()}} load a file from the ToL SFTP server.
78 | }
79 | 
80 | Other ToL functions: 
81 | \code{\link{download_tol}()},
82 | \code{\link{load_tol}()}
83 | }
84 | \concept{ToL functions}
85 | 


--------------------------------------------------------------------------------
/tests/testthat/test-ipni.R:
--------------------------------------------------------------------------------
  1 | test_that("search URL returns status 200", {
  2 |   url <- ipni_search_url_()
  3 |   response <- httr::GET(url)
  4 | 
  5 |   expect_equal(httr::status_code(response), 200)
  6 | })
  7 | 
  8 | test_that("search URL response is json", {
  9 |   url <- ipni_search_url_()
 10 |   response <- httr::GET(url)
 11 | 
 12 |   expect_equal(httr::http_type(response), "application/json")
 13 | })
 14 | 
 15 | test_that("raises error for unimplemented keyword", {
 16 |   query <- list(name="Myrcia guianensis")
 17 | 
 18 |   expect_error(search_ipni(query),
 19 |                "Query keyword.+ not recognised")
 20 | })
 21 | 
 22 | test_that("raises error for bad query input type", {
 23 |   query <- c("this", "is", "a", "bad", "query")
 24 | 
 25 |   expect_error(search_ipni(query))
 26 | })
 27 | 
 28 | test_that("tidy search results returns tibble", {
 29 |   results <- search_ipni("Poa annua")
 30 |   tidied <- tidy(results)
 31 | 
 32 |   expect_s3_class(tidied, "tbl_df")
 33 | })
 34 | 
 35 | test_that("tidy lookup results returns tibble", {
 36 |   results <- lookup_ipni("30001404-2")
 37 |   tidied <- tidy(results)
 38 | 
 39 |   expect_s3_class(tidied, "tbl_df")
 40 | })
 41 | 
 42 | test_that("specific filter only returns species", {
 43 |   query <- "Myrcia"
 44 |   filters <- c("species")
 45 | 
 46 |   results <- search_ipni(query, filters)
 47 |   all_species <- purrr::every(results$results,
 48 |                               ~.x$rank == "spec.")
 49 | 
 50 |   expect_true(all_species)
 51 | })
 52 | 
 53 | test_that("infraspecific filter only returns infraspecifics", {
 54 |   infra_ranks <- c("Variety", "Subspecies", "Form")
 55 | 
 56 |   query <- "Poa annua"
 57 |   filters <- c("infraspecies")
 58 | 
 59 |   results <- search_wcvp(query, filters)
 60 |   all_infra <- purrr::every(results$results,
 61 |                             ~.x$rank %in% infra_ranks)
 62 | 
 63 |   expect_true(all_infra)
 64 | })
 65 | 
 66 | test_that("generic filter only returns genera", {
 67 |   query <- "Myrcia"
 68 |   filters <- c("genera")
 69 | 
 70 |   results <- search_ipni(query, filters)
 71 |   all_genera <- purrr::every(results$results,
 72 |                              ~.x$rank == "gen.")
 73 | 
 74 |   expect_true(all_genera)
 75 | })
 76 | 
 77 | test_that("infrageneric filter only returns infragenera", {
 78 |   query <- "Behenantha"
 79 |   filters <- c("infragenera")
 80 | 
 81 |   results <- search_ipni(query, filters)
 82 |   all_genera <- purrr::every(results$results,
 83 |                              ~.x$rank == "sect.")
 84 | 
 85 |   expect_true(all_genera)
 86 | })
 87 | 
 88 | test_that("family filter only returns families", {
 89 | 
 90 |   query <- "poaceae"
 91 |   filters <- c("families")
 92 | 
 93 |   results <- search_wcvp(query, filters)
 94 |   all_families <- purrr::every(results$results,
 95 |                                ~.x$rank == "fam.")
 96 | 
 97 |   expect_true(all_families)
 98 | })
 99 | 
100 | test_that("infrafamily filter only returns infrafamilies", {
101 | 
102 |   query <- "Rosoideae"
103 |   filters <- c("infrafamilies")
104 | 
105 |   results <- search_ipni(query, filters)
106 |   all_families <- purrr::every(results$results,
107 |                                ~.x$rank == "subfam.")
108 | 
109 |   expect_true(all_families)
110 | })
111 | 
112 | test_that("cursor returns next page of results", {
113 |   query <- list(genus="Ulex")
114 | 
115 |   page1 <- search_ipni(query)
116 |   page2 <- search_ipni(query, cursor=page1$cursor)
117 | 
118 |   expect_false(page1$results[[1]]$fqId == page2$results[[1]]$fqId)
119 | })
120 | 


--------------------------------------------------------------------------------
/man/search_ipni.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ipni.R
 3 | \name{search_ipni}
 4 | \alias{search_ipni}
 5 | \title{Search IPNI.}
 6 | \usage{
 7 | search_ipni(query, filters = NULL, cursor = "*", limit = 50, .wait = 0.1)
 8 | }
 9 | \arguments{
10 | \item{query}{The string to query IPNI with. If using keywords, the query
11 | must be formatted as a list.}
12 | 
13 | \item{filters}{Filter to apply to search results.}
14 | 
15 | \item{cursor}{A cursor returned by a previous search.
16 | If used, the query and filter must be exactly the same.}
17 | 
18 | \item{limit}{The maximum number of records to return. This has a maximum of 1000.}
19 | 
20 | \item{.wait}{Time to wait before making a request, to help
21 | rate limiting.}
22 | }
23 | \value{
24 | Returns an object of class \code{ipni_search} that is a simple
25 | structure with slots for:
26 | \itemize{
27 | \item \code{total}: the total number of results held in POWO for the query
28 | \item \code{pages}: the total number of results pages for the query.
29 | \item \code{limit}: the maximum number of results requested from the API, per page.
30 | \item \code{cursor}: a cursor to retrieve the next page of results from the API.
31 | \item \code{results}: the query results parsed into a list.
32 | \item \code{query}: the query string submitted to the API.
33 | \item \code{response}: the \link[httr:response]{httr response object}.
34 | }
35 | }
36 | \description{
37 | Query the International Plant Names Index for nomenclatural information.
38 | }
39 | \details{
40 | The \href{https://www.ipni.org/}{International Plant Names Index (IPNI)}
41 | is a service that provides nomenclatural information for vascular plant names.
42 | 
43 | The search API allows users to query the database for plant names,
44 | as well as authors and publications. There may be limited support for
45 | some sort of fuzzy matching.
46 | 
47 | There is some support for querying using keyword arguments. Documentation for
48 | the API is currently available in the \href{https://github.com/RBGKew/pykew/blob/master/pykew/ipni_terms.py}{\code{pykew} package},
49 | so keywords have been copied across from there. There are sets of keywords
50 | relating to plants, authors, and publications. A full list of keywords can be viewed
51 | using the \code{get_keywords} function.
52 | 
53 | The API will return nomenclatural information (publication date, nomenclatural status, author, etc.)
54 | of all names matching the query. These results can be limited, for example to only family names,
55 | using filters. Use the \code{get_filters} function to get a list of all implemented filters.
56 | }
57 | \examples{
58 | # search for all names containing Poa annua
59 | results <- search_ipni("Poa annua")
60 | 
61 | # tidy search results in a table
62 | tidy(results)
63 | 
64 | # extract author team information for the search results
65 | results_tbl <- tidy(results)
66 | tidyr::unnest(results_tbl, cols=c(authorTeam), names_sep="_")
67 | 
68 | # filter results to only species names
69 | species_results <- search_ipni("Poa annua", filters="species")
70 | tidy(species_results)
71 | 
72 | # search for species from Mexico published in 1989
73 | q <- list(published="1989", distribution="Mexico")
74 | f <- "species"
75 | results <- search_ipni(q, filters=f)
76 | tidy(results)
77 | 
78 | # search for an author by surname
79 | author_results <- search_ipni(list(author_surname="Gardiner"))
80 | tidy(author_results)
81 | 
82 | }
83 | \seealso{
84 | \itemize{
85 | \item \code{\link[=lookup_ipni]{lookup_ipni()}} to look up a name using an IPNI ID.
86 | }
87 | 
88 | Other IPNI functions: 
89 | \code{\link{lookup_ipni}()}
90 | }
91 | \concept{IPNI functions}
92 | 


--------------------------------------------------------------------------------
/R/format.R:
--------------------------------------------------------------------------------
  1 | # wcvp ----
  2 | #' @importFrom purrr map_dfr
  3 | #'
  4 | #' @export
  5 | format.wcvp_search <- function(x, ...) {
  6 |   .Deprecated("tidy.wcvp_search")
  7 |   map_dfr(x$results, parse_nested_list)
  8 | }
  9 | 
 10 | #' @export
 11 | format.wcvp_taxon <- function(x, ...) {
 12 |   .Deprecated("tidy.wcvp_taxon")
 13 |   x <- x[! names(x) %in% c("response", "queryId")]
 14 | 
 15 |   parse_nested_list(x)
 16 | }
 17 | 
 18 | # powo ----
 19 | 
 20 | #' @importFrom purrr map_dfr
 21 | #'
 22 | #' @export
 23 | format.powo_search <- function(x, ...) {
 24 |   .Deprecated("tidy.powo_search")
 25 |   map_dfr(x$results, parse_nested_list)
 26 | }
 27 | 
 28 | #' @export
 29 | format.powo_taxon <- function(x, field=c("none", "accepted", "synonyms", "classification", "basionym", "distribution", "distributionEnvelope"), ...) {
 30 |   .Deprecated("tidy.powo_taxon")
 31 |   x <- x[! names(x) %in% c("response", "queryId")]
 32 | 
 33 |   parse_nested_list(x)
 34 | }
 35 | 
 36 | # ipni ----
 37 | 
 38 | #' @importFrom purrr map_dfr
 39 | #'
 40 | #' @export
 41 | format.ipni_search <- function(x, ...) {
 42 |   .Deprecated("tidy.ipni_search")
 43 |   map_dfr(x$results, parse_nested_list)
 44 | }
 45 | 
 46 | #' @export
 47 | format.ipni_citation <- function(x, ...) {
 48 |   .Deprecated("tidy.ipni_citation")
 49 |   x <- x[! names(x) %in% c("response", "queryId")]
 50 | 
 51 |   parse_nested_list(x)
 52 | }
 53 | 
 54 | #' @export
 55 | format.ipni_author <- function(x, ...) {
 56 |   .Deprecated("tidy.ipni_author")
 57 |   x <- x[! names(x) %in% c("response", "queryId")]
 58 | 
 59 |   parse_nested_list(x)
 60 | }
 61 | 
 62 | #' @export
 63 | format.ipni_publication <- function(x, ...) {
 64 |   .Deprecated("tidy.ipni_publication")
 65 |   x <- x[! names(x) %in% c("response", "queryId")]
 66 | 
 67 |   parse_nested_list(x)
 68 | }
 69 | 
 70 | # knms ----
 71 | 
 72 | #' @importFrom purrr map_lgl map_dfr pluck
 73 | #' @importFrom tidyr fill
 74 | #' @importFrom rlang .data
 75 | #'
 76 | #' @export
 77 | format.knms_match <- function(x, ...) {
 78 |   .Deprecated("tidy.knms_match")
 79 |   parsed <- map_dfr(x$results, parse_knms_line)
 80 | 
 81 |   formatted <- fill(parsed, .data$submitted, .data$matched)
 82 |   formatted$matched <- formatted$matched %in% c("true", "multiple_matches")
 83 | 
 84 |   formatted
 85 | }
 86 | 
 87 | # utils ----
 88 | 
 89 | #' Simple utility to wrap nested lists into a tibble.
 90 | #'
 91 | #' Nested lists are also converted to tibbles and inserted in list
 92 | #' columns.
 93 | #'
 94 | #' @importFrom purrr map_chr map
 95 | #' @importFrom tibble as_tibble_row
 96 | #'
 97 | #' @noRd
 98 | parse_nested_list <- function(l) {
 99 |   if (is.null(names(l))) {
100 |     return(map_dfr(l, parse_nested_list))
101 |   }
102 | 
103 |   null_cols <- map_lgl(l, is.null)
104 |   l[null_cols] <- NA_character_
105 | 
106 |   list_cols <- map_lgl(l, is.list)
107 |   l[list_cols] <- map(l[list_cols], ~list(parse_nested_list(.x)))
108 | 
109 |   as_tibble_row(l)
110 | }
111 | 
112 | #' Parse and format a single match result from KNMS.
113 | #'
114 | #' @importFrom stringr str_extract
115 | #' @importFrom dplyr na_if
116 | #' @importFrom tibble tibble
117 | #'
118 | #' @noRd
119 | parse_knms_line <- function(line) {
120 |   submitted <- na_if(line[[1]], "")
121 |   matched <- na_if(line[[2]], "")
122 | 
123 |   if (length(line) > 2) {
124 |     ipni_id <- str_extract(line[[3]], "(?<=names:)[0-9\\-]+$")
125 |   } else {
126 |     ipni_id <- NA_character_
127 |   }
128 | 
129 |   if (length(line) > 3) {
130 |     matched_record <- line[[4]]
131 |   } else {
132 |     matched_record <- NA_character_
133 |   }
134 | 
135 |   tibble(submitted=submitted,
136 |          matched=matched,
137 |          ipni_id=ipni_id,
138 |          matched_record=matched_record)
139 | }
140 | 


--------------------------------------------------------------------------------
/man/search_powo.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/powo.R
 3 | \name{search_powo}
 4 | \alias{search_powo}
 5 | \title{Search POWO.}
 6 | \usage{
 7 | search_powo(query, filters = NULL, cursor = NULL, limit = 50, .wait = 0.2)
 8 | }
 9 | \arguments{
10 | \item{query}{The string to query POWO with. If using keywords,
11 | the query must be formatted as a list.}
12 | 
13 | \item{filters}{Filter to apply to search results.
14 | Multiple filters must be supplied as a character vector.}
15 | 
16 | \item{cursor}{A cursor returned by a previous search.
17 | If used, the query and filter must be exactly the same.}
18 | 
19 | \item{limit}{The maximum number of records to return.}
20 | 
21 | \item{.wait}{Time to wait before making a request, to help
22 | rate limiting.}
23 | }
24 | \value{
25 | Returns an object of class \code{powo_search} that is a simple
26 | structure with slots for:
27 | \itemize{
28 | \item \code{total}: the total number of results held in POWO for the query
29 | \item \code{pages}: the total number of results pages for the query.
30 | \item \code{limit}: the maximum number of results requested from the API, per page.
31 | \item \code{cursor}: a cursor to retrieve the next page of results from the API.
32 | \item \code{results}: the query results parsed into a list.
33 | \item \code{query}: the query string submitted to the API.
34 | \item \code{response}: the \link[httr:response]{httr response object}.
35 | }
36 | }
37 | \description{
38 | Query Plants of the World Online for taxon information.
39 | }
40 | \details{
41 | \href{http://www.plantsoftheworldonline.org/}{Plants of the World Online (POWO)}
42 | is a database of information on the world's flora. It curates information from
43 | published floras and other sources of floristic information.
44 | 
45 | The search API allows users to query the database using plant names,
46 | geographic terms, and floristic characters. These can be queried using
47 | keyword arguments. Use the \code{get_keywords} function for a list of all implemented keywords.
48 | 
49 | The API returns taxonomic information as well as species descriptions and
50 | image locations if available. These results can be limited, for example to accepted species,
51 | using filters. Use the \code{get_filters} function to get a list of all implemented filters.
52 | 
53 | Distributions in POWO are categorised using the \href{https://www.tdwg.org/standards/wgsrpd/}{World Geographical Scheme for Recording Plant Distributions (WGSRP)}.
54 | Users can query POWO using distributions listed under WGSRPD levels 1 (continents),
55 | 2 (regions), and 3 (botanical countries).
56 | }
57 | \examples{
58 | # search for all entries containing a genus name
59 | search_powo("Myrcia")
60 | 
61 | # search for all accepted species within a genus
62 | search_powo("Myrcia", filters=c("species", "accepted"))
63 | 
64 | # search for up to 100 species in a genus
65 | search_powo("Poa", filters=c("species"), limit=100)
66 | 
67 | # search for all names in a family
68 | search_powo(list(family="Myrtaceae"))
69 | 
70 | # search for all accepted species with blue flowers
71 | search_powo(list(flower="blue"), filters=c("accepted", "species"))
72 | 
73 | # search for all accepted genera in Mexico
74 | search_powo(list(distribution="Mexico"), filters=c("accepted", "genera"))
75 | 
76 | # search for a species name and print the results
77 | r <- search_powo("Myrcia guianensis", filters=c("species"))
78 | print(r)
79 | 
80 | # simplify search results to a `tibble`
81 | r <- search_powo("Poa", filters=c("species"))
82 | tidy(r)
83 | 
84 | }
85 | \seealso{
86 | \itemize{
87 | \item \code{\link[=lookup_powo]{lookup_powo()}} to look up a taxon in POWO using the IPNI ID.
88 | }
89 | 
90 | Other POWO functions: 
91 | \code{\link{lookup_powo}()}
92 | }
93 | \concept{POWO functions}
94 | 


--------------------------------------------------------------------------------
/vignettes/POWO.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "POWO"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{POWO}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>"
 14 | )
 15 | ```
 16 | 
 17 | [Plants of the World Online (POWO)](http://www.plantsoftheworldonline.org/) is a database of information on the world's flora. It curates information from published floras and other sources of floristic information. POWO covers much of the same grounds as WCVP - you can get taxonomic information for names from it. But it also has more general information about taxa, like descriptions, lifeforms, images, and distributions.
 18 | 
 19 | ```{r setup}
 20 | library(kewr)
 21 | library(dplyr)
 22 | library(tidyr)
 23 | ```
 24 | 
 25 | 
 26 | ## Searching POWO for taxa
 27 | 
 28 | Searching POWO works by exact matching, so any mispellings or unrecognised orthographic variants will not be matched.
 29 | 
 30 | For example, searching `Ulex europeaus` will not return any results:
 31 | ```{r}
 32 | results <- search_powo("Ulex europeaus")
 33 | results
 34 | ```
 35 | 
 36 | But searching `Ulex europaeus` will:
 37 | 
 38 | ```{r}
 39 | results <- search_powo("Ulex europaeus")
 40 | results
 41 | ```
 42 | 
 43 | ## Pagination
 44 | 
 45 | By default, `search_powo` limits the number of results returned to 50. If you are expecting more results, you can increase this limit.
 46 | 
 47 | ```{r}
 48 | results <- search_powo("Ulex", limit=100)
 49 | results
 50 | ```
 51 | 
 52 | However, for searches with a large number of results, this may not be practical. In this case, or if you just don't know how many results to expect, you can request the next page of results.
 53 | 
 54 | ```{r}
 55 | ulex_page1 <- search_powo("Ulex")
 56 | ulex_page2 <- request_next(ulex_page1)
 57 | 
 58 | bind_rows(
 59 |   tidy(ulex_page1),
 60 |   tidy(ulex_page2)
 61 | )
 62 | ```
 63 | 
 64 | ## Keyword searches and filtering
 65 | 
 66 | You can perform more complicated searches using keywords and filters.
 67 | For example, you can search for all accepted species in a particular family.
 68 | 
 69 | ```{r}
 70 | results <- search_powo(list(family="Ephedraceae"), 
 71 |                        filters=c("accepted", "species"))
 72 | results
 73 | ```
 74 | 
 75 | As well as taxonomic keywords, it is possible to search by other characteristics.
 76 | 
 77 | For example, finding all accepted legumes with blue flowers.
 78 | 
 79 | ```{r}
 80 | blue_flowers <- search_powo(list(flower="blue", family="Fabaceae"),
 81 |                             filters=c("accepted"))
 82 | blue_flowers
 83 | ```
 84 | 
 85 | Another use is to get a checklist of accepted species in a country.
 86 | 
 87 | ```{r}
 88 | checklist <- search_powo(list(distribution="Mexico"),
 89 |                          filters=c("accepted"))
 90 | 
 91 | checklist
 92 | ```
 93 | 
 94 | A full list of keywords and filters can be found in the help page for `search_powo`. 
 95 | 
 96 | ## Looking up information about a taxon
 97 | 
 98 | As well as searching by keyword, it is possible to use the IPNI ID of a taxon to look up its record in POWO. To get the IPNI ID, you can search WCVP, POWO, or IPNI, or use KNMS to find matches for the name.
 99 | 
100 | ```{r}
101 | ipni_id <- "119003-2"
102 | 
103 | record <- lookup_powo(ipni_id)
104 | record
105 | ```
106 | 
107 | Optionally, you can choose to request the distribution information for the taxon as well.
108 | 
109 | ```{r}
110 | record <- lookup_powo(ipni_id, distribution=TRUE)
111 | 
112 | tidied <- tidy(record)
113 | 
114 | tidied %>%
115 |   select(fqId, distribution) %>%
116 |   unnest(cols=distribution) %>%
117 |   select(-introduced) %>%
118 |   unnest(cols=natives)
119 | ```
120 | 


--------------------------------------------------------------------------------
/vignettes/IPNI.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "IPNI"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{IPNI}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>"
 14 | )
 15 | ```
 16 | 
 17 | ```{r setup}
 18 | library(kewr)
 19 | library(dplyr)
 20 | ```
 21 | 
 22 | The [International Plant Names Index (IPNI)](https://www.ipni.org/) is a service that provides nomenclatural information for vascular plant names.
 23 | 
 24 | It provides information on published plant names, as well as authors and publications. The type of information that it holds for plant names includes when it was published, the publication, the author, as well as any nomenclatural remarks.
 25 | 
 26 | ## Searching IPNI for a record
 27 | 
 28 | Searching for IPNI operates on exact matching.
 29 | 
 30 | For example, `Poa anua` will not return any results:
 31 | 
 32 | ```{r}
 33 | results <- search_ipni("Poa anua")
 34 | results
 35 | ```
 36 | But `Poa annua` will:
 37 | 
 38 | ```{r}
 39 | results <- search_ipni("Poa annua")
 40 | results
 41 | ```
 42 | 
 43 | The exact matching does, however, work on fragments of a name. For instance, `Poa` will return results for the everything that includes that term:
 44 | 
 45 | ```{r}
 46 | results <- search_ipni("Poa")
 47 | results
 48 | ```
 49 | It should be noted that this will return results for authors and publications as well as taxon names. We can see the first result for this search is the publication "Poa de France, Begique et Suisse".
 50 | 
 51 | ## Pagination
 52 | 
 53 | Looking at the search above, we can see that there were a total of `r results$total` records found but only the first `r results$limit` were returned. There are two possible ways to get the rest of the search results.
 54 | 
 55 | The first is to increase the record limit to ensure we get everything:
 56 | 
 57 | ```{r}
 58 | results <- search_ipni("Poa", limit=1000)
 59 | results
 60 | ```
 61 | However, IPNI caps the limit at a maximum of 1000 records. Also, we often don't know how many records a search will return, or whether it will return a large number of records.
 62 | 
 63 | In those cases, we can ask IPNI for the next page of results.
 64 | 
 65 | ```{r}
 66 | query <- "Poa"
 67 | page1 <- search_ipni(query)
 68 | page2 <- request_next(page1)
 69 | 
 70 | bind_rows(
 71 |   tidy(page1),
 72 |   tidy(page2)
 73 | )
 74 | ```
 75 | 
 76 | ## Keywords and filtering
 77 | 
 78 | You can perform more complicated searches using keywords and filters.
 79 | For example, you can search for all genera names published for a particular family.
 80 | 
 81 | ```{r}
 82 | results <- search_ipni(list(family="Ephedraceae"), 
 83 |                        filters="genera")
 84 | results
 85 | ```
 86 | 
 87 | Or for all species names published in a particular year.
 88 | 
 89 | ```{r}
 90 | results <- search_ipni(list(published=1989),
 91 |                        filters=c("species"))
 92 | results
 93 | ```
 94 | 
 95 | You can also use the keywords to search for author records.
 96 | 
 97 | ```{r}
 98 | results <- search_ipni(list(author_surname="Gardiner"))
 99 | 
100 | results
101 | ```
102 | And for publication records.
103 | 
104 | ```{r}
105 | results <- search_ipni(list(title="Bulletin"))
106 | results
107 | ```
108 | 
109 | 
110 | A full list of keywords and filters can be found in the help page for `search_ipni`.
111 | 
112 | ## Looking up a specific record
113 | 
114 | Specific records for taxon names, authors, and publications can be looked up using the IPNI ID. These IDs are specific to each individual type of record, so the type needs provided.
115 | 
116 | ```{r}
117 | name <- lookup_ipni("385169-1", type="taxon")
118 | name
119 | ```
120 | 
121 | ```{r}
122 | author <- lookup_ipni("20028192-1", type="author")
123 | author
124 | ```
125 | 
126 | ```{r}
127 | pub <- lookup_ipni("12471-2", type="publication")
128 | pub
129 | ```
130 | 


--------------------------------------------------------------------------------
/tests/testthat/test-wcvp.R:
--------------------------------------------------------------------------------
  1 | test_that("search URL returns status 200", {
  2 |   url <- wcvp_search_url_()
  3 |   response <- httr::GET(url)
  4 | 
  5 |   expect_equal(httr::status_code(response), 200)
  6 | })
  7 | 
  8 | test_that("search URL response is json", {
  9 |   url <- wcvp_search_url_()
 10 |   response <- httr::GET(url)
 11 | 
 12 |   expect_equal(httr::http_type(response), "application/json")
 13 | })
 14 | 
 15 | test_that("taxon URL response is json", {
 16 |   url <- wcvp_taxon_url_("30001404-2")
 17 |   response <- httr::GET(url)
 18 | 
 19 |   expect_equal(httr::http_type(response), "application/json")
 20 | })
 21 | 
 22 | test_that("taxon URL returns 404 for bad ID", {
 23 |   url <- wcvp_taxon_url_("bad id")
 24 |   response <- httr::GET(url)
 25 |   expect_equal(status_code(response), 404)
 26 | })
 27 | 
 28 | test_that("raises error for unimplemented keyword", {
 29 |   query <- list(name="Myrcia guianensis")
 30 | 
 31 |   expect_error(search_wcvp(query),
 32 |                "Query keyword.+ not recognised")
 33 | })
 34 | 
 35 | test_that("raises error for bad query input type", {
 36 |   query <- c("this", "is", "a", "bad", "query")
 37 | 
 38 |   expect_error(search_wcvp(query))
 39 | })
 40 | 
 41 | test_that("accepted filter only returns accepted names", {
 42 |   query <- "Myrcia"
 43 |   filters <- c("accepted")
 44 | 
 45 |   results <- search_wcvp(query, filters)
 46 |   all_accepted <- purrr::every(results$results,
 47 |                                ~.x$accepted)
 48 | 
 49 |   expect_true(all_accepted)
 50 | })
 51 | 
 52 | test_that("specific filter only returns species", {
 53 |   query <- "Myrcia"
 54 |   filters <- c("species")
 55 | 
 56 |   results <- search_wcvp(query, filters)
 57 |   all_species <- purrr::every(results$results,
 58 |                                ~.x$rank == "Species")
 59 | 
 60 |   expect_true(all_species)
 61 | })
 62 | 
 63 | test_that("generic filter only returns genera", {
 64 |   query <- "Myrcia"
 65 |   filters <- c("genera")
 66 | 
 67 |   results <- search_wcvp(query, filters)
 68 |   all_genera <- purrr::every(results$results,
 69 |                               ~.x$rank == "Genus")
 70 | 
 71 |   expect_true(all_genera)
 72 | })
 73 | 
 74 | test_that("infraspecific filter only returns infraspecifics", {
 75 |   infra_ranks <- c("Variety", "Subspecies", "Form")
 76 | 
 77 |   query <- "Poa annua"
 78 |   filters <- c("infraspecies")
 79 | 
 80 |   results <- search_wcvp(query, filters)
 81 |   all_infra <- purrr::every(results$results,
 82 |                              ~.x$rank %in% infra_ranks)
 83 | 
 84 |   expect_true(all_infra)
 85 | })
 86 | 
 87 | test_that("family filter only returns families", {
 88 | 
 89 |   query <- "poaceae"
 90 |   filters <- c("families")
 91 | 
 92 |   results <- search_wcvp(query, filters)
 93 |   all_families <- purrr::every(results$results,
 94 |                                ~.x$rank == "Family")
 95 | 
 96 |   expect_true(all_families)
 97 | })
 98 | 
 99 | test_that("tidy search results returns tibble", {
100 |   results <- search_wcvp("Poa annua", filters=c("species"))
101 |   tidied <- tidy(results)
102 | 
103 |   expect_s3_class(tidied, "tbl_df")
104 | })
105 | 
106 | test_that("tidy lookup results returns tibble", {
107 |   results <- lookup_wcvp("30001404-2")
108 |   tidied <- tidy(results)
109 | 
110 |   expect_s3_class(tidied, "tbl_df")
111 | })
112 | 
113 | test_that("wcvp download link is a zip file", {
114 |   download_link <- wcvp_download_url_()
115 | 
116 |   expect_true(endsWith(download_link, "zip"))
117 | })
118 | 
119 | test_that("wcvp download link returns right version", {
120 |   download_link <- wcvp_download_url_(2)
121 | 
122 |   expect_true(stringr::str_detect(download_link, "2"))
123 | })
124 | 
125 | test_that("wcvp download link errors for unimplemented version", {
126 |   expect_error(wcvp_download_url_(3000),
127 |                "Not a recognised version")
128 | })
129 | 
130 | test_that("cursor returns next page of results", {
131 |   query <- list(genus="Ulex")
132 | 
133 |   page1 <- search_wcvp(query)
134 |   page2 <- search_wcvp(query, cursor=page1$cursor)
135 | 
136 |   expect_false(page1$results[[1]]$fqId == page2$results[[1]]$fqId)
137 | })
138 | 


--------------------------------------------------------------------------------
/R/tidy.R:
--------------------------------------------------------------------------------
  1 | # wcvp ----
  2 | #' @importFrom purrr map_dfr
  3 | #'
  4 | #' @export
  5 | tidy.wcvp_search <- function(x, ...) {
  6 |   map_dfr(x$results, parse_nested_list_)
  7 | }
  8 | 
  9 | #' @export
 10 | tidy.wcvp_taxon <- function(x, ...) {
 11 |   x <- x[! names(x) %in% c("response", "queryId")]
 12 | 
 13 |   parse_nested_list_(x)
 14 | }
 15 | 
 16 | # powo ----
 17 | 
 18 | #' @importFrom purrr map_dfr
 19 | #'
 20 | #' @export
 21 | tidy.powo_search <- function(x, ...) {
 22 |   map_dfr(x$results, parse_nested_list_)
 23 | }
 24 | 
 25 | #' @export
 26 | tidy.powo_taxon <- function(x, field=c("none", "accepted", "synonyms", "classification", "basionym", "distribution", "distributionEnvelope"), ...) {
 27 |   x <- x[! names(x) %in% c("response", "queryId")]
 28 | 
 29 |   parse_nested_list_(x)
 30 | }
 31 | 
 32 | # ipni ----
 33 | 
 34 | #' @importFrom purrr map_dfr
 35 | #'
 36 | #' @export
 37 | tidy.ipni_search <- function(x, ...) {
 38 |   map_dfr(x$results, parse_nested_list_)
 39 | }
 40 | 
 41 | #' @export
 42 | tidy.ipni_citation <- function(x, ...) {
 43 |   x <- x[! names(x) %in% c("response", "queryId")]
 44 | 
 45 |   parse_nested_list_(x)
 46 | }
 47 | 
 48 | #' @export
 49 | tidy.ipni_author <- function(x, ...) {
 50 |   x <- x[! names(x) %in% c("response", "queryId")]
 51 | 
 52 |   parse_nested_list_(x)
 53 | }
 54 | 
 55 | #' @export
 56 | tidy.ipni_publication <- function(x, ...) {
 57 |   x <- x[! names(x) %in% c("response", "queryId")]
 58 | 
 59 |   parse_nested_list_(x)
 60 | }
 61 | 
 62 | # tol ----
 63 | #' @importFrom purrr map_dfr
 64 | #'
 65 | #' @export
 66 | tidy.tol_search <- function(x, ...) {
 67 |   map_dfr(x$results, parse_nested_list_)
 68 | }
 69 | 
 70 | #' @export
 71 | tidy.tol_specimen <- function(x, ...) {
 72 |   x <- x[! names(x) %in% c("response", "queryId")]
 73 | 
 74 |   parse_nested_list_(x)
 75 | }
 76 | 
 77 | #' @export
 78 | tidy.tol_gene <- function(x, ...) {
 79 |   x <- x[! names(x) %in% c("response", "queryId")]
 80 | 
 81 |   parse_nested_list_(x)
 82 | }
 83 | 
 84 | # knms ----
 85 | 
 86 | #' @importFrom purrr map_lgl map_dfr pluck
 87 | #' @importFrom tidyr fill
 88 | #' @importFrom rlang .data
 89 | #'
 90 | #' @export
 91 | tidy.knms_match <- function(x, ...) {
 92 |   parsed <- map_dfr(x$results, parse_knms_line_)
 93 | 
 94 |   tidied <- fill(parsed, .data$submitted, .data$matched)
 95 |   tidied$matched <- tidied$matched %in% c("true", "multiple_matches")
 96 | 
 97 |   tidied
 98 | }
 99 | 
100 | # krs ----
101 | 
102 | #' @importFrom purrr map_dfr
103 | #'
104 | #' @export
105 | tidy.krs_match <- function(x, ...) {
106 |   map_dfr(x$results, parse_nested_list_)
107 | }
108 | 
109 | # utils ----
110 | 
111 | #' Simple utility to wrap nested lists into a tibble.
112 | #'
113 | #' Nested lists are also converted to tibbles and inserted in list
114 | #' columns.
115 | #'
116 | #' @importFrom purrr map_chr map flatten_chr every
117 | #' @importFrom tibble as_tibble_row
118 | #'
119 | #' @noRd
120 | parse_nested_list_ <- function(l) {
121 |   if (is.null(names(l)) & every(l, is.character)) {
122 |     return(list(flatten_chr(l)))
123 |   }
124 | 
125 |   if (is.null(names(l))) {
126 |     return(map_dfr(l, parse_nested_list_))
127 |   }
128 | 
129 |   null_cols <- map_lgl(l, is.null)
130 |   l[null_cols] <- NA
131 | 
132 |   list_cols <- map_lgl(l, is.list)
133 |   l[list_cols] <- map(l[list_cols], ~list(parse_nested_list_(.x)))
134 | 
135 |   as_tibble_row(l)
136 | }
137 | 
138 | #' Parse a single match result from KNMS.
139 | #'
140 | #' @importFrom stringr str_extract
141 | #' @importFrom dplyr na_if
142 | #' @importFrom tibble tibble
143 | #'
144 | #' @noRd
145 | parse_knms_line_ <- function(line) {
146 |   submitted <- na_if(line[[1]], "")
147 |   matched <- na_if(line[[2]], "")
148 | 
149 |   if (length(line) > 2) {
150 |     ipni_id <- str_extract(line[[3]], "(?<=names:)[0-9\\-]+$")
151 |   } else {
152 |     ipni_id <- NA_character_
153 |   }
154 | 
155 |   if (length(line) > 3) {
156 |     matched_record <- line[[4]]
157 |   } else {
158 |     matched_record <- NA_character_
159 |   }
160 | 
161 |   tibble(submitted=submitted,
162 |          matched=matched,
163 |          ipni_id=ipni_id,
164 |          matched_record=matched_record)
165 | }
166 | 


--------------------------------------------------------------------------------
/man/search_tol.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/tol.R
  3 | \name{search_tol}
  4 | \alias{search_tol}
  5 | \title{Search Kew's Tree of Life for specimens or genes.}
  6 | \usage{
  7 | search_tol(query = "", genes = FALSE, limit = 50, page = 1, .wait = 0.2)
  8 | }
  9 | \arguments{
 10 | \item{query}{The string to query the database with.}
 11 | 
 12 | \item{genes}{Set to TRUE to download results for genes instead of specimens.}
 13 | 
 14 | \item{limit}{An integer specifying the number of results
 15 | to return.}
 16 | 
 17 | \item{page}{An integer specify the page of results to request.}
 18 | 
 19 | \item{.wait}{Time to wait before making a requests, to help
 20 | rate limiting.}
 21 | }
 22 | \value{
 23 | Returns an object of class \code{tol_search} that is a simple
 24 | structure with slots for:
 25 | \itemize{
 26 | \item \code{total}: the total number of results held in ToL for the query.
 27 | \item \code{page}: the page of results requested.
 28 | \item \code{limit}: the maximum number of results requested from the API.
 29 | \item \code{results}: the query results parsed into a list.
 30 | \item \code{query}: the query string submitted to the API.
 31 | \item \code{response}: the \link[httr:response]{httr response object}.
 32 | }
 33 | }
 34 | \description{
 35 | Query Kew's Tree of Life for specimens that have
 36 | been sampled for sequencing.
 37 | }
 38 | \details{
 39 | The \href{https://treeoflife.kew.org/}{Tree of Life} is a database
 40 | of specimens sequenced as part of Kew's efforts to build
 41 | a comprehensive evolutionary tree of life for flowering plants.
 42 | 
 43 | The search API allows users to query the database for specimens
 44 | based on their taxonomic information. Filtering and keyword-search
 45 | are not currently implemented. All searches are based on taxonomic
 46 | information, so \code{Myrcia} and \code{Myrtales} will return results, but
 47 | \code{Brummitt} will not.
 48 | 
 49 | The search API also allows users to download information about sequenced
 50 | genes. There is currently no ability to search within the results for genes,
 51 | but a table of all genes can be accessed using keyword argument \code{genes=TRUE}.
 52 | }
 53 | \examples{
 54 | # get the first 50 of all sequenced specimens
 55 | search_tol(limit=50)
 56 | 
 57 | # search for all sequenced Myrcia specimens
 58 | search_tol("Myrcia")
 59 | 
 60 | # get all sequenced specimens
 61 | search_tol(limit=5000)
 62 | 
 63 | # search for a species name and print the results
 64 | r <- search_tol("Myrcia guianensis")
 65 | print(r)
 66 | 
 67 | # simplify search results to a `tibble`
 68 | r <- search_tol("Myrcia")
 69 | tidy(r)
 70 | 
 71 | # gene stats are nested in the results
 72 | r <- search_tol("Myrcia")
 73 | tidied <- tidy(r)
 74 | tidyr::unnest(tidied, cols=gene_stats)
 75 | 
 76 | # species names are nested in the results
 77 | r <- search_tol("Myrcia")
 78 | tidied <- tidy(r)
 79 | tidyr::unnest(tidied, cols=species, names_sep="_")
 80 | 
 81 | # as is higher taxonomy
 82 | r <- search_tol("Myrcia")
 83 | tidied <- tidy(r)
 84 | tidyr::unnest(tidied, cols=species, names_sep="_")
 85 | 
 86 | # search for all gene entries and print results
 87 | r <- search_tol(genes=TRUE, limit=500)
 88 | print(r)
 89 | 
 90 | # tidy the returned genes
 91 | tidy(r)
 92 | 
 93 | }
 94 | \references{
 95 | Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035
 96 | }
 97 | \concept{ToL functions
 98 | \itemize{
 99 | \item \code{\link[=lookup_tol]{lookup_tol()}} to lookup information about a sequenced specimen
100 | using a valid ToL ID.
101 | \item \code{\link[=download_tol]{download_tol()}} to download a file from the ToL SFTP server.
102 | \item \code{\link[=load_tol]{load_tol()}} load a file from the ToL SFTP server.
103 | }}
104 | 


--------------------------------------------------------------------------------
/tests/testthat/test-powo.R:
--------------------------------------------------------------------------------
  1 | test_that("search URL returns status 200", {
  2 |   url <- powo_search_url_()
  3 | 
  4 |   Sys.sleep(0.1)
  5 |   response <- httr::GET(url)
  6 | 
  7 |   expect_equal(httr::status_code(response), 200)
  8 | })
  9 | 
 10 | test_that("search URL response is json", {
 11 |   url <- powo_search_url_()
 12 | 
 13 |   Sys.sleep(0.1)
 14 |   response <- httr::GET(url)
 15 | 
 16 |   expect_equal(httr::http_type(response), "application/json")
 17 | })
 18 | 
 19 | test_that("taxon URL response is json", {
 20 |   url <- powo_taxon_url_("30001404-2")
 21 | 
 22 |   Sys.sleep(0.1)
 23 |   response <- httr::GET(url)
 24 | 
 25 |   expect_equal(httr::http_type(response), "application/json")
 26 | })
 27 | 
 28 | test_that("taxon URL returns 404 for bad ID", {
 29 |   url <- powo_taxon_url_("bad id")
 30 | 
 31 |   Sys.sleep(0.1)
 32 |   response <- httr::GET(url)
 33 | 
 34 |   expect_equal(status_code(response), 400)
 35 | })
 36 | 
 37 | test_that("raises error for unimplemented keyword", {
 38 |   query <- list(published="1920")
 39 |   expect_error(search_powo(query, .wait=0.15),
 40 |                "Query keyword.+ not recognised")
 41 | })
 42 | 
 43 | test_that("accepted filter only returns accepted names", {
 44 |   query <- "Myrcia"
 45 |   filters <- c("accepted")
 46 | 
 47 |   results <- search_powo(query, filters, .wait=0.15)
 48 |   all_accepted <- purrr::every(results$results,
 49 |                                ~.x$accepted)
 50 | 
 51 |   expect_true(all_accepted)
 52 | })
 53 | 
 54 | test_that("specific filter only returns species", {
 55 |   query <- "Myrcia"
 56 |   filters <- c("species")
 57 | 
 58 |   results <- search_powo(query, filters, .wait=0.15)
 59 |   all_species <- purrr::every(results$results,
 60 |                               ~.x$rank == "Species")
 61 | 
 62 |   expect_true(all_species)
 63 | })
 64 | 
 65 | test_that("generic filter only returns genera", {
 66 |   query <- "Myrcia"
 67 |   filters <- c("genera")
 68 | 
 69 |   results <- search_powo(query, filters, .wait=0.15)
 70 |   all_genera <- purrr::every(results$results,
 71 |                              ~.x$rank == "Genus")
 72 | 
 73 |   expect_true(all_genera)
 74 | })
 75 | 
 76 | test_that("infraspecific filter only returns infraspecifics", {
 77 |   infra_ranks <- c("Variety", "Subspecies", "Form")
 78 | 
 79 |   query <- "Poa annua"
 80 |   filters <- c("infraspecies")
 81 | 
 82 |   results <- search_powo(query, filters, .wait=0.15)
 83 |   all_infra <- purrr::every(results$results,
 84 |                             ~.x$rank %in% infra_ranks)
 85 | 
 86 |   expect_true(all_infra)
 87 | })
 88 | 
 89 | test_that("family filter only returns families", {
 90 | 
 91 |   query <- "poaceae"
 92 |   filters <- c("families")
 93 | 
 94 |   results <- search_powo(query, filters, .wait=0.15)
 95 |   all_families <- purrr::every(results$results,
 96 |                             ~.x$rank == "Family")
 97 | 
 98 |   expect_true(all_families)
 99 | })
100 | 
101 | test_that("image filter only returns things with images", {
102 | 
103 |   query <- "Myrcia"
104 |   filters <- c("has_images")
105 | 
106 |   results <- search_powo(query, filters, .wait=0.15)
107 |   all_images <- purrr::every(results$results,
108 |                              ~length(.x$images) > 0)
109 | 
110 |   expect_true(all_images)
111 | })
112 | 
113 | test_that("lookup with distribution returns distribution field", {
114 |   taxonid <- "320035-2"
115 | 
116 |   results <- lookup_powo(taxonid, distribution=TRUE, .wait=0.15)
117 | 
118 |   expect_true("distribution" %in% names(results))
119 | })
120 | 
121 | test_that("tidy search results returns tibble", {
122 |   results <- search_powo("Poa annua", filters=c("species"), .wait=0.15)
123 |   tidied <- tidy(results)
124 | 
125 |   expect_s3_class(tidied, "tbl_df")
126 | })
127 | 
128 | test_that("tidy lookup results returns tibble", {
129 |   results <- lookup_powo("30001404-2", .wait=0.15)
130 |   tidied <- tidy(results)
131 | 
132 |   expect_s3_class(tidied, "tbl_df")
133 | })
134 | 
135 | test_that("cursor returns next page of results", {
136 |   query <- list(genus="Ulex")
137 | 
138 |   page1 <- search_powo(query, .wait=0.15)
139 |   page2 <- search_powo(query, cursor=page1$cursor, .wait=0.15)
140 | 
141 |   expect_false(page1$results[[1]]$fqId == page2$results[[1]]$fqId)
142 | })
143 | 


--------------------------------------------------------------------------------
/R/query-formatting.R:
--------------------------------------------------------------------------------
  1 | #' Format filters for search APIs.
  2 | #'
  3 | #' Checks the filters are valid before joining them
  4 | #' together with as a comma-separated string.
  5 | #'
  6 | #' @param filters A character vector of filter names.
  7 | #'
  8 | #' @noRd
  9 | format_filters_ <- function(filters, resource) {
 10 |   if (is.null(filters)) {
 11 |     return(NULL)
 12 |   }
 13 | 
 14 |   filter_map <- get_filters_(resource)
 15 |   valid_filters <- names(filter_map)
 16 |   bad_filters <- setdiff(filters, valid_filters)
 17 | 
 18 |   if (length(bad_filters) > 0) {
 19 |     stop(
 20 |       sprintf(
 21 |         "Filters for [%s] must be one of [%s]\n[%s] are not recognised.",
 22 |         resource,
 23 |         paste(valid_filters, collapse=","),
 24 |         paste(bad_filters, collapse=",")
 25 |       )
 26 |     )
 27 |   }
 28 | 
 29 |   paste(filter_map[filters], collapse=",")
 30 | }
 31 | 
 32 | #' Format queries for search APIs.
 33 | #'
 34 | #' Checks if query is valid and then formats it correctly.
 35 | #'
 36 | #' @param query A string or list specifying the query.
 37 | #' @param resource A string specifying the resource being queried.
 38 | #'
 39 | #' @importFrom glue glue
 40 | #'
 41 | #' @noRd
 42 | format_query_ <- function(query, resource) {
 43 | 
 44 |   if (! is.list(query) & length(query) > 1) {
 45 |     message <- glue("{resource} search query must be a named list or a string.",
 46 |                     "Provided query type: {typeof(query)}",
 47 |                     "Provided query length: {query_length}",
 48 |                     "",
 49 |                     .sep="\n", .trim=FALSE)
 50 | 
 51 |     stop(message, call.=FALSE)
 52 |   }
 53 | 
 54 |   if (is.list(query)) {
 55 |     keywords <- names(query)
 56 |     keyword_map <- get_keywords_(resource)
 57 |     valid_keywords <- names(keyword_map)
 58 |     bad_keywords <- setdiff(keywords, valid_keywords)
 59 | 
 60 |     if (length(bad_keywords) > 0) {
 61 |       stop(
 62 |         sprintf(
 63 |           "Query keywords for [%s] must be one of [%s]\n[%s] are not recognised.",
 64 |           resource,
 65 |           paste(valid_keywords, collapse=","),
 66 |           paste(bad_keywords, collapse=",")
 67 |         )
 68 |       )
 69 |     }
 70 |   }
 71 | 
 72 |   if(is.list(query)) {
 73 |     names(query) <- keyword_map[keywords]
 74 |     query
 75 |   } else if(resource == "krs") {
 76 |     list(query=query)
 77 |   } else {
 78 |     list(q=query)
 79 |   }
 80 | }
 81 | 
 82 | #' Format query for an Open Refine API.
 83 | #'
 84 | #' Checks if query is valid, formats the keywords correctly, and makes it
 85 | #' a JSON string.
 86 | #'
 87 | #' @param query A string or list specifying the query.
 88 | #' @param resource A string specifying the resource being queried.
 89 | #'
 90 | #' @importFrom glue glue
 91 | #' @importFrom jsonlite toJSON
 92 | #' @importFrom purrr map2
 93 | #'
 94 | #' @noRd
 95 | format_refine_query_ <- function(query, resource) {
 96 |   query <- format_query_(query, resource)
 97 | 
 98 |   properties <- query[names(query) != "query"]
 99 |   properties <- map2(names(properties), properties, format_refine_property_)
100 | 
101 |   q <- query$query
102 |   query <- list(properties=properties)
103 |   if(!is.null(q)) {
104 |     query$query <- q
105 |   }
106 | 
107 |   query <- toJSON(query, auto_unbox=TRUE)
108 |   list(query=query)
109 | }
110 | 
111 | #' Format an Open Refine property for an API request.
112 | #'
113 | #' @noRd
114 | format_refine_property_ <- function(name, value) {
115 |   list(p=name, pid=name, v=value)
116 | }
117 | 
118 | 
119 | #' Format body for a POST request.
120 | #'
121 | #' The body of a POST request must be a list.
122 | #' So far, only KNMS uses POST requests. Names
123 | #' for matching might be input as a character vector,
124 | #' so we need to ensure the body is a list and coerce it
125 | #' if not.
126 | #'
127 | #' @param body The raw body as input.
128 | #'
129 | #' @noRd
130 | format_body_ <- function(body) {
131 | 
132 |   if (! is.list(body) & ! is.character(body)) {
133 |     message <- glue("The body of a POST request must be either a list or character vector.",
134 |                     "Provided body type: {typeof(query)}",
135 |                     "",
136 |                     .sep="\n", .trim=FALSE)
137 | 
138 |     stop(message, call.=FALSE)
139 |   }
140 | 
141 |   if (! is.list(body)) {
142 |     body <- as.list(body)
143 |   }
144 | 
145 |   body
146 | }
147 | 


--------------------------------------------------------------------------------
/vignettes/KRS.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "KRS"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{KRS}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>"
 14 | )
 15 | ```
 16 | 
 17 | The [Kew Reconciliation Service (KRS)](http://data1.kew.org/reconciliation/about/IpniName) allows a user submit a taxon for matching against [IPNI](https://ipni.org/). 
 18 | 
 19 | The reconciliation service is an Open Refine style API that allows matching of a single name against IPNI. The matching is done by a series of transformations configured to botanical names
 20 | in IPNI. These transformations are detailed [here](https://github.com/RBGKew/String-Transformers#string-transformers), I think.
 21 | 
 22 | It appears that KRS is the service that sits behind [KNMS](KNMS.html). KNMS allows matching of
 23 | batches of names in one request but does not allow matching to different parts of a name. If you have a set of names to match and just want to do simple matching, I'd use KNMS. But if you want
 24 | to specify which parts of the names to match on, I'd use KRS.
 25 | 
 26 | 
 27 | ```{r setup}
 28 | library(kewr)
 29 | library(dplyr)
 30 | library(tidyr)
 31 | ```
 32 | 
 33 | ## Matching names
 34 | 
 35 | To use KRS, you can just submit a single name for matching.
 36 | 
 37 | ```{r}
 38 | match <- match_krs("Solanum sanchez-vegae S.Knapp")
 39 | match
 40 | ```
 41 | 
 42 | This also works without the author string included:
 43 | 
 44 | ```{r}
 45 | match <- match_krs("Solanum sanchez-vegae")
 46 | match
 47 | ```
 48 | 
 49 | The match results can be converted to a table for easier inspection.
 50 | 
 51 | ```{r}
 52 | tidy(match)
 53 | ```
 54 | 
 55 | ## Matching parts of a name
 56 | 
 57 | The reconciliation service provides a specification for matching to different parts
 58 | of a botanical name. This is described in detail [here](http://data1.kew.org/reconciliation/about/IpniName).
 59 | 
 60 | For example, if we want to match to the genus name *Myrcia*, we could submit a
 61 | simple request like before.
 62 | 
 63 | ```{r}
 64 | match <- match_krs("Myrcia")
 65 | match
 66 | ```
 67 | 
 68 | But this has returned more than one result. We can be more specific by matching 
 69 | to the genus and the author.
 70 | 
 71 | ```{r}
 72 | match <- match_krs(list(genus="Myrcia", author="DC"))
 73 | match
 74 | ```
 75 | 
 76 | Which has narrowed it down more.
 77 | 
 78 | We can specify a match for every part of a name like this.
 79 | 
 80 | ```{r}
 81 | match <- match_krs(list(genus="Myrcia", species="magnolifolia", infra="latifolia",
 82 |                         author="Berg"))
 83 | match
 84 | ```
 85 | 
 86 | This match has worked even though there's a minor misspelling of the specific epithet
 87 | and the author string. Matching to the taxon name works by a set of pre-configured
 88 | string transformations that catch some common mistakes in botanical names. The
 89 | matching to author strings is also slightly fuzzy.
 90 | 
 91 | This matching also handles different taxonomic ranks using ordered epithets, where
 92 | the highest rank is specified as `epithet_1` down to `epithet_3`.
 93 | 
 94 | ```{r}
 95 | match <- match_krs(list(epithet_1="Solanaceae"))
 96 | match
 97 | ```
 98 | 
 99 | This also works for infrageneric names.
100 | 
101 | ```{r}
102 | match <- match_krs(list(epithet_1="Acacia", epithet_2="Aculeiferum", author="Vassal"))
103 | match
104 | ```
105 | 
106 | It should be noted that these last two examples give a score lower than 100,
107 | because they return more than one match.
108 | 
109 | ## Matching more than one name
110 | 
111 | If you want to do simple matching to more than one name, it might be easier to use
112 | KNMS.
113 | 
114 | If you want to match the individual parts of multiple names, you can apply the
115 | matching function to the rows of a data frame, using `dplyr::rowwise`.
116 | 
117 | ```{r}
118 | names <- tibble(
119 |   genus=c("Poa", "Myrcia", "Solanum"),
120 |   species=c("annua", "almasensis", "sanchez-vegae"),
121 |   author=c("L.", "NicLugh.", "S.Knapp")
122 | )
123 | 
124 | 
125 | matches <-
126 |   names %>%
127 |   rowwise() %>%
128 |   mutate(match=list(match_krs(list(genus=genus, species=species, author=author)))) %>%
129 |   mutate(match=list(tidy(match))) %>%
130 |   unnest(cols=c(match))
131 | 
132 | matches
133 | ```
134 | 
135 | 


--------------------------------------------------------------------------------
/man/search_wcvp.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/wcvp.R
  3 | \name{search_wcvp}
  4 | \alias{search_wcvp}
  5 | \title{Search WCVP for a taxon.}
  6 | \usage{
  7 | search_wcvp(query, filters = NULL, cursor = "*", limit = 50, .wait = 0.1)
  8 | }
  9 | \arguments{
 10 | \item{query}{The taxon string to search WCVP for. If using keywords,
 11 | the query must be formatted as a list.}
 12 | 
 13 | \item{filters}{Filter to apply to search results.
 14 | Multiple filters must be supplied as a character vector.}
 15 | 
 16 | \item{cursor}{A cursor returned by a previous search.
 17 | If used, the query and filter must be exactly the same.}
 18 | 
 19 | \item{limit}{An integer specifying the maximum number of results
 20 | to return.}
 21 | 
 22 | \item{.wait}{Time to wait before making a request, to help
 23 | rate limiting.}
 24 | }
 25 | \value{
 26 | Returns an object of class \code{wcvp_search} that is a simple
 27 | structure with slots for:
 28 | \itemize{
 29 | \item \code{total}: the total number of results held in WCVP for the query
 30 | \item \code{cursor}: a cursor to retrieve the next page of results from the API.
 31 | \item \code{limit}: the maximum number of results requested from the API.
 32 | \item \code{results}: the query results parsed into a list.
 33 | \item \code{query}: the query string submitted to the API.
 34 | \item \code{filter}: the filter strings submitted to the API.
 35 | \item \code{response}: the \link[httr:response]{httr response object}.
 36 | }
 37 | }
 38 | \description{
 39 | Query the World Checklist of Vascular Plants search API
 40 | for a taxon string.
 41 | }
 42 | \details{
 43 | The \href{https://wcvp.science.kew.org/}{World Checklist of Vascular Plants (WCVP)}
 44 | is a global consensus view of all known vascular plant species.
 45 | It has been compiled by staff at RBG Kew in consultation with plant
 46 | group experts.
 47 | 
 48 | The search API allows users to query the checklist for plant names.
 49 | Currently, it does not support partial or fuzzy matching.
 50 | In order to get a result, the user must supply a valid name string.
 51 | For example, 'Myrcia' and 'Myrcia guianensis' will return results,
 52 | but 'M' or 'Myr' will not.
 53 | 
 54 | There is some support for querying using keyword arguments. The API is
 55 | not currently documented, so only keywords that are definitely there have
 56 | been implemented. Use the \code{get_keywords} function to view a list of all implemented keywords.
 57 | 
 58 | The API will return taxonomic information (the family, authority, status, and rank)
 59 | of all names matching the query. These results can be limited, for example to accepted species,
 60 | using filters. Use the \code{get_filters} function to view a list of all implemented filters.
 61 | }
 62 | \examples{
 63 | # search for all entries containing a genus name
 64 | search_wcvp("Myrcia")
 65 | 
 66 | # search for all accepted species within a genus
 67 | search_wcvp("Myrcia", filters=c("species", "accepted"))
 68 | 
 69 | # search for up to 10,000 species in a genus
 70 | search_wcvp("Poa", filters=c("species"), limit=10000)
 71 | 
 72 | # search for all names in a family
 73 | search_wcvp(list(family="Myrtaceae"))
 74 | 
 75 | # search for genera within a family
 76 | search_wcvp(list(family="Myrtaceae"), filters=c("genera"))
 77 | 
 78 | # search for all names with a specific epithet
 79 | search_wcvp(list(species="guianensis"))
 80 | 
 81 | # search for a species name and print the results
 82 | r <- search_wcvp("Myrcia guianensis", filters=c("species"))
 83 | print(r)
 84 | 
 85 | # simplify search results to a `tibble`
 86 | r <- search_wcvp("Poa", filters=c("species"))
 87 | tidy(r)
 88 | 
 89 | # accepted name info is nested inside the records for synonyms
 90 | # simplify accepted name info to the name ID
 91 | r <- search_wcvp("Poa", filters=c("species"))
 92 | tidied <- tidy(r)
 93 | tidyr::unnest(tidied, cols=synonymOf, names_sep="_")
 94 | 
 95 | }
 96 | \references{
 97 | WCVP (2020). World Checklist of Vascular Plants, version 2.0. Facilitated by the Royal Botanic Gardens, Kew. Published on the Internet; http://wcvp.science.kew.org/
 98 | }
 99 | \seealso{
100 | \itemize{
101 | \item \code{\link[=lookup_wcvp]{lookup_wcvp()}} to lookup information about a taxon name
102 | using a valid IPNI ID.
103 | \item \code{\link[=download_wcvp]{download_wcvp()}} to download the entire WCVP.
104 | }
105 | 
106 | Other WCVP functions: 
107 | \code{\link{download_wcvp}()},
108 | \code{\link{lookup_wcvp}()}
109 | }
110 | \concept{WCVP functions}
111 | 


--------------------------------------------------------------------------------
/vignettes/articles/retrieve-all-query-results.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Retrieving all results for a query"
  3 | ---
  4 | 
  5 | ```{r, include = FALSE}
  6 | knitr::opts_chunk$set(
  7 |   collapse = TRUE,
  8 |   comment = "#>"
  9 | )
 10 | ```
 11 | 
 12 | To reduce the load on the servers, some of the Kew resources limit the number of results returned for a query. This tutorial will demonstrate how to download all the results for a query in a way that (hopefully) shouldn't upset the servers.
 13 | 
 14 | ```{r setup, message=FALSE, warning=FALSE}
 15 | library(kewr)
 16 | library(dplyr)
 17 | ```
 18 | 
 19 | ## Increasing the maximum number of results returned
 20 | 
 21 | Possibly the simplest option is to just tell the resource that you want more results.
 22 | 
 23 | By default, the search functions in *kewr* set the maximum number of results to 50. You can increase this to whatever you want, to make sure you get all the results you want.
 24 | 
 25 | For instance, I know for sure that there are fewer than 2000 accepted species in the genus *Myrcia*. If I want to get a list of all these species from WCVP, I can, therefore, increase the maximum number of results to 2000.
 26 | 
 27 | ```{r max-limit-search}
 28 | results <- search_wcvp(query=list(genus="Myrcia"),
 29 |                        filters=c("accepted", "species"),
 30 |                        limit=2000)
 31 | results
 32 | ```
 33 | 
 34 | We can see from the results object that we have a single page of results that contains the entries for all 748 accepted species in the genus.
 35 | 
 36 | However, this only really works when two things are true:
 37 | 
 38 | 1. You know for sure there aren't more results than a certain number.
 39 | 2. That number isn't too big.
 40 | 
 41 | This strategy worked in this case because I knew there definitely weren't more than 2000 accepted species, and 2000 is a relatively small number as things go. If I there are more results than I expected, I run the risk of missing some entries. If my expected number of results was too big, say 20,000 or even 200,000, the request might time-out before I get anything back.
 42 | 
 43 | ### Advantages:
 44 | 
 45 | * You only have to make one request.
 46 | 
 47 | ### Disadvantages:
 48 | 
 49 | * You could miss some entries if there are more than you expect.
 50 | * You might not get any results back if you ask for too many.
 51 | 
 52 | ## Making multiple requests to get multiple pages of results
 53 | 
 54 | The other way to get all of your results is to iterate over all the pages of your request.
 55 | 
 56 | Making multiple smaller requests avoids the request hanging because you asked for too much data. However, some resources could have rate-limiting enable, which means they will block you if you make too many requests in a certain time period. Therefore, you need to balance the size of the request with the number that you're making.
 57 | 
 58 | One way to make multiple requests is with a `for` loop.
 59 | 
 60 | To get started, we'll make our first request outside of the for loop. This way, we can see how many pages we need to loop over. I've chosen a limit of 100 results per page here.
 61 | 
 62 | ```{r paged-initial-search}
 63 | query <- list(genus="Myrcia")
 64 | filters <- c("accepted", "species")
 65 | 
 66 | r <- search_wcvp(query, filters=filters, limit=100)
 67 | r
 68 | ```
 69 | 
 70 | Before we get the rest of the results in a `for` loop, it's worth tidying our first result into a data frame, which we'll use to add all our subsequent results to.
 71 | 
 72 | ```{r}
 73 | results <- tidy(r)
 74 | ```
 75 | 
 76 | Now we can loop through and get the rest of our query results. 
 77 | 
 78 | **IMPORTANT**: making too many requests in a short period of time to POWO can cause problems for the server. By default,
 79 | the `request_next` function adds in a little waiting period before making a new request. But you might get back an error if
 80 | you're asking for lot's of things one after the other.
 81 | 
 82 | ```{r paged-loop-search}
 83 | for (i in 2:r$pages) {
 84 |   r <- request_next(r)
 85 |   
 86 |   new_results <- tidy(r)
 87 |   results <- bind_rows(results, new_results)
 88 | }
 89 | 
 90 | head(results)
 91 | ```
 92 | 
 93 | We can check we have all the results by looking at the length of
 94 | our results data frame:
 95 | 
 96 | ```{r check-results-length}
 97 | nrow(results)
 98 | ```
 99 | 
100 | ### Advantages:
101 | 
102 | * Smaller requests are less likely to time-out.
103 | * You don't have to know how many results you expect before you start.
104 | 
105 | ### Disadvantages
106 | 
107 | * Making too many requests could overload the server and get you blocked.
108 | 


--------------------------------------------------------------------------------
/vignettes/WCVP.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "WCVP"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{WCVP}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>"
 14 | )
 15 | ```
 16 | 
 17 | ```{r setup, warning=FALSE, message=FALSE}
 18 | library(kewr)
 19 | library(here)
 20 | library(dplyr)
 21 | ```
 22 | 
 23 | The [World Checklist of Vascular Plants (WCVP)](https://wcvp.science.kew.org/) is a global consensus view of all known vascular plant species. It has been compiled by staff at RBG Kew in consultation with plant group experts.
 24 | 
 25 | The WCVP is a taxonomic database, and as such contains information like the taxonomic status and synonymy of plant names. It can be used for a number of tasks, including searching for all records with a particular name, getting a list of all accepted species in a genus, or looking up the accepted name for a species.
 26 | 
 27 | ## Searching WCVP for a taxon name
 28 | 
 29 | Searching in WCVP works by exact matching. This means that a misspelled name will not return any results.
 30 | 
 31 | For instance, searching for `Poa anua` gets nothing:
 32 | 
 33 | ```{r}
 34 | results <- search_wcvp("Poa anua")
 35 | results
 36 | ```
 37 | 
 38 | But searching for the correctly spelled name will give a result:
 39 | 
 40 | ```{r}
 41 | results <- search_wcvp("Poa annua")
 42 | results
 43 | ```
 44 | 
 45 | Similarly, searching with partial matching does not work:
 46 | 
 47 | ```{r}
 48 | results <- search_wcvp("Ulex e")
 49 | results
 50 | ```
 51 | 
 52 | But searching for a genus name will return all taxa within that genus as well:
 53 | 
 54 | ```{r}
 55 | results <- search_wcvp("Ulex")
 56 | results
 57 | ```
 58 | 
 59 | With this search result, we can see that there are 92 records for `Ulex` but the API has only returned the first 50.
 60 | 
 61 | To get all results for this search, we can either increase the number of returned results:
 62 | 
 63 | ```{r}
 64 | ulex_all <- search_wcvp("Ulex", limit=100)
 65 | ulex_all
 66 | ```
 67 | 
 68 | Or advance the searh on one page:
 69 | 
 70 | ```{r}
 71 | ulex_page1 <- search_wcvp("Ulex")
 72 | ulex_page2 <- request_next(ulex_page1)
 73 | ulex_page2
 74 | ```
 75 | 
 76 | In both cases, we can tidy the results into a tibble to make subsequent analysis easier:
 77 | 
 78 | ```{r}
 79 | tidy(ulex_all)
 80 | ```
 81 | 
 82 | ```{r}
 83 | bind_rows(
 84 |   tidy(ulex_page1),
 85 |   tidy(ulex_page2)
 86 | )
 87 | ```
 88 | 
 89 | ## Getting a list of accepted names in a genus
 90 | 
 91 | The search function also accepts a number of keywords and filters that can be used to narrow down the results returned. A full list of keywords and filters can be found in the function documentation.
 92 | 
 93 | An example use of these filters is retrieving a checklist of accepted species in a genus. In the search below, the `genus` keyword to retrieve all records in the genus *Myrcia*, while the filters `accepted` and `species` narrow these records down to just accepted species:
 94 | 
 95 | ```{r}
 96 | checklist <- search_wcvp(list(genus="Myrcia"), 
 97 |                          filters=c("accepted", "species"),
 98 |                          limit=1000)
 99 | 
100 | checklist
101 | ```
102 | 
103 | ## Looking up accepted names
104 | 
105 | As well as searching by name, the WCVP API can be used to look up taxonomic information for a known IPNI ID. The returned information can then be used to find all synonyms for a name, find the accepted name for a taxon, or find the parent/child taxa.
106 | 
107 | The IPNI ID can be found a few different ways, for example using the `KNMS` API to match a name to an IPNI ID. In this case we will find it using WCVP.
108 | 
109 | ```{r}
110 | search_result <- search_wcvp("Calyptranthes acevedoi", 
111 |                              filters=c("species"))
112 | 
113 | ipni_id <- search_result$results[[1]]$id
114 | ipni_id
115 | ```
116 | 
117 | With this, we can look the record up.
118 | 
119 | ```{r}
120 | taxon_info <- lookup_wcvp(ipni_id)
121 | taxon_info
122 | ```
123 | From this record, we can see that the name we had is a Homotypic Synonym. This record also contains the taxonomic information for the current accepted name:
124 | 
125 | ```{r}
126 | tidied <- tidy(taxon_info)
127 | tidied$accepted[[1]]
128 | ```
129 | 
130 | ## Downloading the whole WCVP
131 | 
132 | There are cases where it might be easier to download all of the WCVP, rather than make lots of requests to it. The `download_wcvp` function lets you download the whole WCVP to whatever directory you want.
133 | 
134 | ```{r, eval=FALSE}
135 | save_path <- here()
136 | download_wcvp(save_path)
137 | ```
138 | Previous versions of the WCVP are also available, for posterity and reproducibility. These can be downloaded by specifying which version you want.
139 | 
140 | ```{r, eval=FALSE}
141 | download_wcvp(save_path, version=1)
142 | ```
143 | 
144 | 


--------------------------------------------------------------------------------
/vignettes/articles/building-checklist.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Building a species checklist"
  3 | ---
  4 | 
  5 | ```{r, include = FALSE}
  6 | knitr::opts_chunk$set(
  7 |   collapse = TRUE,
  8 |   comment = "#>"
  9 | )
 10 | ```
 11 | 
 12 | A common task that Plants of the World Online (POWO) can be used for is to build a checklist of native species for a country.
 13 | 
 14 | In this demonstration, we will:
 15 | 
 16 | 1. Request a list of all accepted species that occur in a country.
 17 | 2. Get the native distribution of all those species.
 18 | 3. Narrow our checklist down to native species.
 19 | 4. Build another checklist for endemic species.
 20 | 
 21 | ## Setup 
 22 | 
 23 | In addition to *kewr*, we'll load:
 24 | 
 25 | * *dplyr* to manipulate the data
 26 | * *tidyr* to reshape data frames
 27 | * *purrr* to map functions across items in a list
 28 | * *progress* to add a progress bar
 29 | * *stringr* to extract some data from strings
 30 | 
 31 | ```{r setup, message=FALSE, warning=FALSE}
 32 | library(kewr)
 33 | library(dplyr)
 34 | library(tidyr)
 35 | library(purrr)
 36 | library(progress)
 37 | library(stringr)
 38 | ```
 39 | 
 40 | ## 1. Requesting a list of accepted species
 41 | 
 42 | We'll get our list of accepted species for Iceland, using the POWO search API.
 43 | 
 44 | ```{r iceland-species-query}
 45 | query <- list(distribution="Iceland")
 46 | filters <- c("accepted", "species")
 47 | 
 48 | iceland_species <- search_powo(query, filters=filters, limit=1000)
 49 | ```
 50 | 
 51 | In total, we have this many accepted species in Iceland:
 52 | 
 53 | ```{r display-total}
 54 | iceland_species$total
 55 | ```
 56 | 
 57 | ## 2. Get the native distribution of all the species
 58 | 
 59 | To get the native distribution for all our species, we need to use POWO's lookup API for every single one.
 60 | 
 61 | First we'll extract a list of IDs from our results, using the `map` function from *purrr*.
 62 | 
 63 | ```{r extract-species-id}
 64 | ids <- map(iceland_species$results, ~str_extract(.x$fqId, "[\\d\\-]+$"))
 65 | ```
 66 | 
 67 | Then we need to make all of our requests. To make things easier, we'll define a simple function that just accepts a species ID, and makes use of a progress bar to track our requests!
 68 | 
 69 | ```{r download-distributions}
 70 | pb <- progress_bar$new(
 71 |   format="  requesting [:bar] :current/:total (:percent)",
 72 |   total=length(ids)
 73 | )
 74 | 
 75 | fcn <- function(id) {
 76 |   pb$tick()
 77 |   
 78 |   lookup_powo(id, distribution=TRUE)
 79 | }
 80 | 
 81 | iceland_records <- map(ids, fcn)
 82 | ```
 83 | 
 84 | Now we have all the records for our species, we can tidy them as a data frame to make subsequent analysis a bit easier.
 85 | 
 86 | ```{r tidy-results}
 87 | iceland_checklist <- map_dfr(iceland_records, tidy)
 88 | iceland_checklist
 89 | ```
 90 | 
 91 | ## 3. Narrow the checklist to native species
 92 | 
 93 | To narrow our species down, we'll add an extra column to indicate if a species is native to Iceland or not. This will let us filter our data using that column.
 94 | 
 95 | I've done this below in a single, chained command by using the pipe (`%>%`) operator from *dplyr*. I've also taken advantage of the `rowwise` feature in the newer versions of *dplyr*.
 96 | 
 97 | ```{r filter-native}
 98 | check_native <- function(dist, country="Iceland") {
 99 |   native_dist <- dist$natives[[1]]
100 |   
101 |   country %in% native_dist$name
102 | }
103 | 
104 | iceland_checklist <-
105 |   iceland_checklist %>%
106 |   rowwise() %>%
107 |   mutate(is_native=check_native(distribution)) %>%
108 |   ungroup() %>%
109 |   filter(is_native)
110 | ```
111 | 
112 | Now all we have to do is tidy up our data frame by removing any columns we don't want anymore.
113 | 
114 | ```{r tidy-native-checklist}
115 | iceland_checklist <-
116 |   iceland_checklist %>%
117 |   select(fqId, name, authors, taxonomicStatus, plantae, fungi,
118 |          kingdom, phylum, family, genus, species)
119 | 
120 | iceland_checklist
121 | ```
122 | 
123 | ## 4. Build a checklist of endemic species
124 | 
125 | We can use our results from before to narrow the list down further to just species that are endemic to Iceland.
126 | 
127 | ```{r filter-endemic}
128 | check_endemic <- function(dist, country="Iceland") {
129 |   native_dist <- dist$natives[[1]]
130 |   
131 |   native <- country %in% native_dist$name
132 |   endemic <- length(native_dist$name) == 1
133 |   
134 |   native & endemic
135 | }
136 | 
137 | iceland_endemics <- map_dfr(iceland_records, tidy)
138 | 
139 | iceland_endemics <-
140 |   iceland_endemics %>%
141 |   rowwise() %>%
142 |   mutate(is_endemic=check_endemic(distribution)) %>%
143 |   ungroup() %>%
144 |   filter(is_endemic) %>%
145 |   select(fqId, name, authors, taxonomicStatus, plantae, fungi,
146 |          kingdom, phylum, family, genus, species)
147 | ```
148 | 
149 | How do the number of species in each list compare?
150 | 
151 | ```{r compare-checklist-lengths}
152 | paste("native species: ", nrow(iceland_checklist))
153 | paste("endemic species: ", nrow(iceland_endemics))
154 | ```
155 | 


--------------------------------------------------------------------------------
/vignettes/ToL.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "ToL"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{ToL}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>"
 14 | )
 15 | ```
 16 | 
 17 | ```{r setup, warning=FALSE, message=FALSE}
 18 | library(kewr)
 19 | library(dplyr)
 20 | library(tidyr)
 21 | ```
 22 | 
 23 | 
 24 | The [Tree of Life](https://treeoflife.kew.org/) is a database of specimens sequenced as part of Kew's efforts to build a comprehensive evolutionary tree of life for flowering plants.
 25 | 
 26 | This package accesses data from the Tree of Life Explorer, an output of the Plant and Fungal Trees of Life Project (PAFTOL). The data in the Tree of Life is generated by target sequence capture using the universal Angiosperm353 probe set.
 27 | 
 28 | The Tree of Life contains information about specimens that have been sequenced and genes recovered in the process. It lets you download sequence data for the specimens, as well as alignments and trees for the genes.
 29 | 
 30 | ## Viewing the Tree of Life
 31 | 
 32 | The [Tree of Life Explorer](https://treeoflife.kew.org/tree-of-life) lets users view the tree of life constructed from the current dataset of samples.
 33 | 
 34 | You can view it using `kewr` by loading it in:
 35 | 
 36 | ```{r}
 37 | tree <- load_tol()
 38 | tree
 39 | ```
 40 | 
 41 | This reads it as a single string, so you need to use other packages to parse it and view it (e.g, [ape](http://ape-package.ird.fr/)).
 42 | 
 43 | ## Searching ToL for specimens
 44 | 
 45 | The Tree of Life contains information about the specimens that have been sequenced to construct the tree. The long-term aim is to sample at least on species from every flowering plant genus. This means that, typically, there will be one specimen per species.
 46 | 
 47 | You can search this information using the `search_tol` function. There is no filtering or keyword-search functionality, so queries are just the name of an order/family/genus/species. For example, to get all specimens for the genus *Myrcia*:
 48 | 
 49 | ```{r}
 50 | specimens <- search_tol("Myrcia")
 51 | specimens
 52 | ```
 53 | 
 54 | This searching works by exact matching, and the taxonomy follows [WCVP](https://wcvp.science.kew.org/) so only accepted names will work. For example, if we mispell *Myrcia* we get nothing:
 55 | 
 56 | ```{r}
 57 | search_tol("Mercya")
 58 | ```
 59 | 
 60 | And if we search for an outdated synonym we get nothing:
 61 | 
 62 | ```{r}
 63 | search_tol("Gomidesia")
 64 | ```
 65 | 
 66 | But search using higher taxonomy will work:
 67 | 
 68 | ```{r}
 69 | specimens <- search_tol("Myrtaceae")
 70 | specimens
 71 | ```
 72 | 
 73 | To get all these results, we can either increase the limit in the search function:
 74 | 
 75 | ```{r}
 76 | myrts_all <- search_tol("Myrtaceae", limit=500)
 77 | myrts_all
 78 | ```
 79 | Or do paged searching:
 80 | 
 81 | ```{r}
 82 | myrts1 <- search_tol("Myrtaceae")
 83 | myrts2 <- request_next(myrts1)
 84 | myrts2
 85 | ```
 86 | 
 87 | And we can tidy our results into a dataframe:
 88 | 
 89 | ```{r}
 90 | tidied <- tidy(myrts_all)
 91 | tidied
 92 | ```
 93 | 
 94 | Some information is nested inside the tidied dataframe, but we can get to it by unnesting:
 95 | 
 96 | ```{r}
 97 | tidied %>%
 98 |   select(id, raw_reads, taxonomy) %>%
 99 |   unnest(col=c(taxonomy, raw_reads), names_sep="_")
100 | ```
101 | 
102 | ## Getting gene information
103 | 
104 | The Tree of Life also contains information about the genes captured during sequencing. These can be accessed using the `search_tol` function:
105 | 
106 | ```{r}
107 | genes_all <- search_tol(genes=TRUE, limit=500)
108 | tidy(genes_all)
109 | ```
110 | 
111 | But they cannot currently be queried, so the best bet is just to grab all of them.
112 | 
113 | ## Looking up a record
114 | 
115 | Information about a single specimen or gene can be looked up using their ID:
116 | 
117 | ```{r}
118 | specimen <- lookup_tol("2660")
119 | specimen
120 | ```
121 | ```{r}
122 | gene <- lookup_tol("51", type="gene")
123 | gene
124 | ```
125 | ## Loading data
126 | 
127 | Records returned by `search_tol` and `lookup_tol` contain links to data files on an SFTP server. You can load these into R using the `load_tol` function. As you saw at the top of this vignette, if you don't provide any URL to `load_tol`, it will load the whole Tree of Life tree file.
128 | 
129 | To load a sequence file for a particular specimen:
130 | 
131 | ```{r}
132 | load_tol(specimen$fasta_file_url)
133 | ```
134 | 
135 | To load a sequence file for a gene:
136 | ```{r}
137 | load_tol(gene$fasta_file_url)
138 | ```
139 | 
140 | Or the alignment file:
141 | ```{r}
142 | load_tol(gene$alignment_file_url)
143 | ```
144 | 
145 | Or the gene tree:
146 | ```{r}
147 | load_tol(gene$tree_file_url)
148 | ```
149 | 
150 | All files are returned as strings, so you will need to parse them to use them downstream.
151 | 
152 | If you want to download these files directly, you can use the `download_tol` function.
153 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, include = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = "#>",
 11 |   fig.path = "man/figures/README-",
 12 |   out.width = "100%"
 13 | )
 14 | ```
 15 | 
 16 | # kewr
 17 | 
 18 | <!-- badges: start -->
 19 | [![R build status](https://github.com/barnabywalker/kewr/workflows/R-CMD-check/badge.svg)](https://github.com/barnabywalker/kewr/actions)
 20 | <!-- badges: end -->
 21 | 
 22 | An R package to access data from RGB Kew's APIs.
 23 | 
 24 | ## Overview
 25 | 
 26 | kewr is meant to make accessing data from one of RGB Kew easier and to provide a consistent interface their public APIs.
 27 | 
 28 | This package should cover:
 29 | 
 30 | - [x] [World Checklist of Vascular Plants](https://wcvp.science.kew.org/)
 31 | - [x] [Plants of the World Online](http://powo.science.kew.org/)
 32 | - [x] [International Plant Names Index](https://www.ipni.org/)
 33 | - [x] [Kew Names Matching Service](http://namematch.science.kew.org/)
 34 | - [x] [Kew's Tree of Life](https://treeoflife.kew.org)
 35 | - [x] [Kew Reconciliation Service](http://data1.kew.org/reconciliation/about/IpniName)
 36 | 
 37 | New sources will be added as they come up.
 38 | 
 39 | ## Installation
 40 | 
 41 | kewr is not on CRAN yet but you can install the latest development version from GitHub:
 42 | 
 43 | ``` r
 44 | # install.packages("devtools")
 45 | devtools::install_github("barnabywalker/kewr")
 46 | ```
 47 | 
 48 | ## Usage
 49 | 
 50 | Functions in this package all start with a prefix specifying what action you want to perform and a suffix referring to the resource.
 51 | 
 52 | Four of the resources (POWO, WCVP, IPNI, and ToL) are databases storing flora, taxonomic, nomenclatural, or genetic information. These three resources all have a `search_*` and `lookup_*`.
 53 | 
 54 | ### Retrieving records
 55 | 
 56 | The `lookup_` functions can be used to retrieve a particular record by its unique IPNI ID:
 57 | 
 58 | ``` r
 59 | lookup_powo("320035-2")
 60 | lookup_wcvp("320035-2")
 61 | lookup_ipni("320035-2")
 62 | ```
 63 | 
 64 | IPNI contains records for authors and publications, which can also be retrieved using the `lookup_ipni` function:
 65 | 
 66 | ``` r
 67 | lookup_ipni("20885-1", type="author")
 68 | lookup_ipni("987-2", type="publication")
 69 | ```
 70 | 
 71 | The ToL uses its own ID system. These IDs can be found by first searching the database.
 72 | 
 73 | ``` r
 74 | lookup_tol("2717")
 75 | ```
 76 | 
 77 | ### Searching databases
 78 | 
 79 | All four of these databases can be searched as well:
 80 | 
 81 | ``` r
 82 | search_powo("Poa annua")
 83 | search_wcvp("Poa annua")
 84 | search_ipni("Poa annua")
 85 | search_tol("Poa annua")
 86 | ```
 87 | And all, except the ToL, use filters and keywords for more advanced searches:
 88 | 
 89 | ``` r
 90 | search_powo(list(genus="Poa", distribution="Madagascar"), 
 91 |             filters=c("accepted", "species"))
 92 | search_wcvp(list(genus="Poa"), filters=c("accepted", "species"))
 93 | search_ipni(list(genus="Poa", published=1920),
 94 |             filters=c("species"))
 95 | ```
 96 | 
 97 | The number of search results returned are determined by the `limit` keyword:
 98 | 
 99 | ```r
100 | search_powo(list(genus="Poa"), limit=20)
101 | search_wcvp(list(genus="Poa"), limit=20)
102 | search_ipni(list(genus="Poa"), limit=20)
103 | search_tol("Poa", limit=20)
104 | ```
105 | 
106 | The next page for a set of search results can be requested using the `request_next` function:
107 | 
108 | ```r
109 | results <- search_powo(list(genus="Poa"))
110 | request_next(results)
111 | ```
112 | 
113 | ### Loading data from ToL
114 | 
115 | Tree and gene data can be loaded directly from ToL into R.
116 | 
117 | For instance, you can load the whole Tree of Life.
118 | ``` r
119 | load_tol()
120 | ```
121 | 
122 | Or a gene tree for a particular gene.
123 | ``` r
124 | gene_info <- lookup_tol("51", type="gene")
125 | load_tol(gene_info$tree_file_url)
126 | ```
127 | 
128 | Or a FASTA file for a specimen.
129 | ``` r
130 | specimen_info <- lookup_tol("1296")
131 | load_tol(specimen_info$fasta_file_url)
132 | ```
133 | 
134 | ### Downloading from the ToL
135 | 
136 | The corresponding files can also be downloaded for use later or in other programmes.
137 | 
138 | ``` r
139 | specimen_info <- lookup_tol("1296")
140 | download_tol(specimen_info$fasta_file_url)
141 | ```
142 | 
143 | ### Downloading the WCVP
144 | 
145 | The whole of WCVP can be download to a directory using:
146 | 
147 | ``` r
148 | download_wcvp()
149 | ```
150 | 
151 | ### Matching names
152 | 
153 | The KNMS resource is only used for matching names to records in POWO/WCVP:
154 | 
155 | ```r
156 | match_knms(c("Poa annua", "Magnolia grandifolia", "Bulbophyllum sp."))
157 | ```
158 | 
159 | Single names can also be matched to IPNI using the KRS resources.
160 | 
161 | ``` r
162 | match_krs("Poa annua")
163 | ```
164 | 
165 | KRS is slower for matching many names, as a request needs to be made for each one.
166 | But it has the advantage of allowing more complex matching:
167 | 
168 | ```r
169 | match_krs(list(genus="Solanum", species="sanchez-vegae", author="S.Knapp"))
170 | ```
171 | 
172 | ### Tidying results
173 | 
174 | Each function in this package returns an object that stores the original response as well as the content of the response parsed into a list. This is to give the user as much flexibility as possible and to make debugging things a bit easier.
175 | 
176 | But this can be hard to use, so all the results objects can be tidied as a `tibble`:
177 | 
178 | ``` r
179 | results <- search_powo("Poa annua")
180 | tidy(results)
181 | ```
182 | 
183 | ## Citing
184 | 
185 | You can get information about how to cite `kewr` by using:
186 | 
187 | ```r
188 | citation("kewr")
189 | ```
190 | 
191 | You can also get the citation to use for each data service using the different results objects:
192 | 
193 | ```
194 | r <- search_wcvp("Poa")
195 | kew_citation(r)
196 | ```
197 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # kewr
  5 | 
  6 | <!-- badges: start -->
  7 | 
  8 | [![R build
  9 | status](https://github.com/barnabywalker/kewr/workflows/R-CMD-check/badge.svg)](https://github.com/barnabywalker/kewr/actions)
 10 | <!-- badges: end -->
 11 | 
 12 | An R package to access data from RGB Kew’s APIs.
 13 | 
 14 | ## Overview
 15 | 
 16 | kewr is meant to make accessing data from one of RGB Kew easier and to
 17 | provide a consistent interface their public APIs.
 18 | 
 19 | This package should cover:
 20 | 
 21 | -   [x] [World Checklist of Vascular
 22 |     Plants](https://wcvp.science.kew.org/)
 23 | -   [x] [Plants of the World Online](http://powo.science.kew.org/)
 24 | -   [x] [International Plant Names Index](https://www.ipni.org/)
 25 | -   [x] [Kew Names Matching Service](http://namematch.science.kew.org/)
 26 | -   [x] [Kew’s Tree of Life](https://treeoflife.kew.org)
 27 | -   [x] [Kew Reconciliation
 28 |     Service](http://data1.kew.org/reconciliation/about/IpniName)
 29 | 
 30 | New sources will be added as they come up.
 31 | 
 32 | ## Installation
 33 | 
 34 | kewr is not on CRAN yet but you can install the latest development
 35 | version from GitHub:
 36 | 
 37 | ``` r
 38 | # install.packages("devtools")
 39 | devtools::install_github("barnabywalker/kewr")
 40 | ```
 41 | 
 42 | ## Usage
 43 | 
 44 | Functions in this package all start with a prefix specifying what action
 45 | you want to perform and a suffix referring to the resource.
 46 | 
 47 | Four of the resources (POWO, WCVP, IPNI, and ToL) are databases storing
 48 | flora, taxonomic, nomenclatural, or genetic information. These three
 49 | resources all have a `search_*` and `lookup_*`.
 50 | 
 51 | ### Retrieving records
 52 | 
 53 | The `lookup_` functions can be used to retrieve a particular record by
 54 | its unique IPNI ID:
 55 | 
 56 | ``` r
 57 | lookup_powo("320035-2")
 58 | lookup_wcvp("320035-2")
 59 | lookup_ipni("320035-2")
 60 | ```
 61 | 
 62 | IPNI contains records for authors and publications, which can also be
 63 | retrieved using the `lookup_ipni` function:
 64 | 
 65 | ``` r
 66 | lookup_ipni("20885-1", type="author")
 67 | lookup_ipni("987-2", type="publication")
 68 | ```
 69 | 
 70 | The ToL uses its own ID system. These IDs can be found by first
 71 | searching the database.
 72 | 
 73 | ``` r
 74 | lookup_tol("2717")
 75 | ```
 76 | 
 77 | ### Searching databases
 78 | 
 79 | All four of these databases can be searched as well:
 80 | 
 81 | ``` r
 82 | search_powo("Poa annua")
 83 | search_wcvp("Poa annua")
 84 | search_ipni("Poa annua")
 85 | search_tol("Poa annua")
 86 | ```
 87 | 
 88 | And all, except the ToL, use filters and keywords for more advanced
 89 | searches:
 90 | 
 91 | ``` r
 92 | search_powo(list(genus="Poa", distribution="Madagascar"), 
 93 |             filters=c("accepted", "species"))
 94 | search_wcvp(list(genus="Poa"), filters=c("accepted", "species"))
 95 | search_ipni(list(genus="Poa", published=1920),
 96 |             filters=c("species"))
 97 | ```
 98 | 
 99 | The number of search results returned are determined by the `limit`
100 | keyword:
101 | 
102 | ``` r
103 | search_powo(list(genus="Poa"), limit=20)
104 | search_wcvp(list(genus="Poa"), limit=20)
105 | search_ipni(list(genus="Poa"), limit=20)
106 | search_tol("Poa", limit=20)
107 | ```
108 | 
109 | The next page for a set of search results can be requested using the
110 | `request_next` function:
111 | 
112 | ``` r
113 | results <- search_powo(list(genus="Poa"))
114 | request_next(results)
115 | ```
116 | 
117 | ### Loading data from ToL
118 | 
119 | Tree and gene data can be loaded directly from ToL into R.
120 | 
121 | For instance, you can load the whole Tree of Life.
122 | 
123 | ``` r
124 | load_tol()
125 | ```
126 | 
127 | Or a gene tree for a particular gene.
128 | 
129 | ``` r
130 | gene_info <- lookup_tol("51", type="gene")
131 | load_tol(gene_info$tree_file_url)
132 | ```
133 | 
134 | Or a FASTA file for a specimen.
135 | 
136 | ``` r
137 | specimen_info <- lookup_tol("1296")
138 | load_tol(specimen_info$fasta_file_url)
139 | ```
140 | 
141 | ### Downloading from the ToL
142 | 
143 | The corresponding files can also be downloaded for use later or in other
144 | programmes.
145 | 
146 | ``` r
147 | specimen_info <- lookup_tol("1296")
148 | download_tol(specimen_info$fasta_file_url)
149 | ```
150 | 
151 | ### Downloading the WCVP
152 | 
153 | The whole of WCVP can be download to a directory using:
154 | 
155 | ``` r
156 | download_wcvp()
157 | ```
158 | 
159 | ### Matching names
160 | 
161 | The KNMS resource is only used for matching names to records in
162 | POWO/WCVP:
163 | 
164 | ``` r
165 | match_knms(c("Poa annua", "Magnolia grandifolia", "Bulbophyllum sp."))
166 | ```
167 | 
168 | Single names can also be matched to IPNI using the KRS resources.
169 | 
170 | ``` r
171 | match_krs("Poa annua")
172 | ```
173 | 
174 | KRS is slower for matching many names, as a request needs to be made for
175 | each one. But it has the advantage of allowing more complex matching:
176 | 
177 | ``` r
178 | match_krs(list(genus="Solanum", species="sanchez-vegae", author="S.Knapp"))
179 | ```
180 | 
181 | ### Tidying results
182 | 
183 | Each function in this package returns an object that stores the original
184 | response as well as the content of the response parsed into a list. This
185 | is to give the user as much flexibility as possible and to make
186 | debugging things a bit easier.
187 | 
188 | But this can be hard to use, so all the results objects can be tidied as
189 | a `tibble`:
190 | 
191 | ``` r
192 | results <- search_powo("Poa annua")
193 | tidy(results)
194 | ```
195 | 
196 | ## Citing
197 | 
198 | You can get information about how to cite `kewr` by using:
199 | 
200 | ``` r
201 | citation("kewr")
202 | ```
203 | 
204 | You can also get the citation to use for each data service using the
205 | different results objects:
206 | 
207 |     r <- search_wcvp("Poa")
208 |     kew_citation(r)
209 | 


--------------------------------------------------------------------------------
/R/ipni.R:
--------------------------------------------------------------------------------
  1 | #' Search IPNI.
  2 | #'
  3 | #' Query the International Plant Names Index for nomenclatural information.
  4 | #'
  5 | #' The [International Plant Names Index (IPNI)](https://www.ipni.org/)
  6 | #' is a service that provides nomenclatural information for vascular plant names.
  7 | #'
  8 | #' The search API allows users to query the database for plant names,
  9 | #' as well as authors and publications. There may be limited support for
 10 | #' some sort of fuzzy matching.
 11 | #'
 12 | #' There is some support for querying using keyword arguments. Documentation for
 13 | #' the API is currently available in the [`pykew` package](https://github.com/RBGKew/pykew/blob/master/pykew/ipni_terms.py),
 14 | #' so keywords have been copied across from there. There are sets of keywords
 15 | #' relating to plants, authors, and publications. A full list of keywords can be viewed
 16 | #' using the `get_keywords` function.
 17 | #'
 18 | #' The API will return nomenclatural information (publication date, nomenclatural status, author, etc.)
 19 | #' of all names matching the query. These results can be limited, for example to only family names,
 20 | #' using filters. Use the `get_filters` function to get a list of all implemented filters.
 21 | #'
 22 | #' @param query The string to query IPNI with. If using keywords, the query
 23 | #'  must be formatted as a list.
 24 | #' @param filters Filter to apply to search results.
 25 | #' @param cursor A cursor returned by a previous search.
 26 | #'  If used, the query and filter must be exactly the same.
 27 | #' @param limit The maximum number of records to return. This has a maximum of 1000.
 28 | #' @param .wait Time to wait before making a request, to help
 29 | #'  rate limiting.
 30 | #'
 31 | #' @return
 32 | #' Returns an object of class `ipni_search` that is a simple
 33 | #' structure with slots for:
 34 | #'
 35 | #'  * `total`: the total number of results held in POWO for the query
 36 | #'  * `pages`: the total number of results pages for the query.
 37 | #'  * `limit`: the maximum number of results requested from the API, per page.
 38 | #'  * `cursor`: a cursor to retrieve the next page of results from the API.
 39 | #'  * `results`: the query results parsed into a list.
 40 | #'  * `query`: the query string submitted to the API.
 41 | #'  * `response`: the [httr response object][httr::response].
 42 | #'
 43 | #' @examples
 44 | #' # search for all names containing Poa annua
 45 | #' results <- search_ipni("Poa annua")
 46 | #'
 47 | #' # tidy search results in a table
 48 | #' tidy(results)
 49 | #'
 50 | #' # extract author team information for the search results
 51 | #' results_tbl <- tidy(results)
 52 | #' tidyr::unnest(results_tbl, cols=c(authorTeam), names_sep="_")
 53 | #'
 54 | #' # filter results to only species names
 55 | #' species_results <- search_ipni("Poa annua", filters="species")
 56 | #' tidy(species_results)
 57 | #'
 58 | #' # search for species from Mexico published in 1989
 59 | #' q <- list(published="1989", distribution="Mexico")
 60 | #' f <- "species"
 61 | #' results <- search_ipni(q, filters=f)
 62 | #' tidy(results)
 63 | #'
 64 | #' # search for an author by surname
 65 | #' author_results <- search_ipni(list(author_surname="Gardiner"))
 66 | #' tidy(author_results)
 67 | #'
 68 | #' @family IPNI functions
 69 | #' @seealso
 70 | #'  * [lookup_ipni()] to look up a name using an IPNI ID.
 71 | #'
 72 | #' @export
 73 | search_ipni <- function(query, filters=NULL, cursor="*", limit=50, .wait=0.1) {
 74 |   url <- ipni_search_url_()
 75 | 
 76 |   # keeping a copy of this to return in the result object
 77 |   original_query <- query
 78 | 
 79 |   query <- format_query_(query, "ipni")
 80 | 
 81 |   query$perPage <- limit
 82 |   query$cursor <- cursor
 83 |   query$f <- format_filters_(filters, "ipni")
 84 | 
 85 |   results <- make_request_(url, query, .wait=.wait)
 86 | 
 87 |   structure(
 88 |     list(
 89 |       total=results$content$totalResults,
 90 |       pages=results$content$totalPages,
 91 |       cursor=results$content$cursor,
 92 |       limit=results$content$perPage,
 93 |       results=results$content$results,
 94 |       query=original_query,
 95 |       filters=filters,
 96 |       response=results$response
 97 |     ),
 98 |     class=c("ipni_search", "ipni")
 99 |   )
100 | }
101 | 
102 | #' Look up a name in IPNI.
103 | #'
104 | #' Request the record for a taxon, author, or publication name in IPNI,
105 | #' using the IPNI ID.
106 | #'
107 | #' The [International Plant Names Index (IPNI)](https://www.ipni.org/)
108 | #' is a service that provides nomenclatural information for vascular plant names.
109 | #'
110 | #' The name lookup API allows users to retrieve information for
111 | #' a specific taxon, author, or publication name using the unique IPNI ID. If this is not known,
112 | #' it can be found out using the [IPNI search API][kewr::search_ipni].
113 | #'
114 | #' @param id A string containing a valid IPNI ID.
115 | #' @param type The type of record to look up. Either `taxon`, `author`, or `publication`.
116 | #' @param .wait Time to wait before making a request, to help
117 | #'  rate limiting.
118 | #'
119 | #' @return An `ipni_taxon` object, which is a simple structure with fields
120 | #'   for each of the fields returned by the lookup API, as well as the the [httr response object][httr::response].
121 | #'
122 | #' @examples
123 | #'
124 | #' # retrieve nomenclatural information for a taxon name
125 | #' name <- lookup_ipni("271445-2", "taxon")
126 | #' print(name)
127 | #'
128 | #' # tidy the results in a table
129 | #' tidy(name)
130 | #'
131 | #' # retrieve nomenclatural information for an author
132 | #' name <- lookup_ipni("20028192-1", type="author")
133 | #' tidy(name)
134 | #'
135 | #' # retrieve nomenclatural information for a publication
136 | #' name <- lookup_ipni("987-2", type="publication")
137 | #' tidy(name)
138 | #'
139 | #' @family IPNI functions
140 | #' @seealso
141 | #'  * [search_ipni()] to search IPNI using a taxon name.
142 | #'
143 | #' @export
144 | lookup_ipni <- function(id, type=c("taxon", "author", "publication"), .wait=0.1) {
145 |   type <- match.arg(type)
146 | 
147 |   url <- ipni_lookup_url_(id, type)
148 | 
149 |   result <- make_request_(url, query=NULL, .wait=.wait)
150 | 
151 |   # this might be better if things were explicitly listed
152 |   record <- result$content
153 |   record$response <- result$response
154 |   record$queryId <- id
155 | 
156 |   structure(
157 |     record,
158 |     class=c(paste0("ipni_", record$recordType), "ipni")
159 |   )
160 | }
161 | 
162 | #' Make the IPNI search URL.
163 | #'
164 | #' @noRd
165 | ipni_search_url_ <- function() {
166 |   base <- get_url_("ipni")
167 | 
168 |   paste0(base, "/search")
169 | }
170 | 
171 | #' Make the IPNI name lookup URL.
172 | #'
173 | #' @param id A valid IPNI ID.
174 | #' @param type The type of name to lookup.
175 | #'
176 | #' @noRd
177 | #'
178 | #' @importFrom glue glue
179 | ipni_lookup_url_ <- function(id, type) {
180 |   base <- get_url_("ipni")
181 | 
182 |   type <- switch(type,
183 |                  taxon="n",
184 |                  author="a",
185 |                  publication="p")
186 | 
187 |   glue("{base}/{type}/{id}")
188 | }
189 | 


--------------------------------------------------------------------------------
/R/requests.R:
--------------------------------------------------------------------------------
  1 | #' Get the names of valid keywords for querying a resource.
  2 | #'
  3 | #' @param resource The resource being queried.
  4 | #'
  5 | #' @return A named character vector of keywords.
  6 | #'
  7 | #' @importFrom glue glue
  8 | #'
  9 | #' @noRd
 10 | get_keywords_ <- function(resource=c("wcvp", "powo", "ipni", "tol", "krs")) {
 11 |   resource <- match.arg(resource)
 12 | 
 13 |   if (resource %in% c("tol")) {
 14 |     stop(glue("Keyword-based search not implemented for resource: {resource}"))
 15 |   }
 16 | 
 17 |   switch(
 18 |     resource,
 19 |     wcvp=c(
 20 |       family="family",
 21 |       genus="genus",
 22 |       species="species"
 23 |     ),
 24 |     powo=c(
 25 |       # name
 26 |       name="name",
 27 |       common_name="common name",
 28 |       family="family",
 29 |       genus="genus",
 30 |       species="species",
 31 |       author="author",
 32 |       # characteristics
 33 |       characteristic="characteristic",
 34 |       summary="summary",
 35 |       appearance="appearance",
 36 |       flower="flower",
 37 |       fruit="fruit",
 38 |       leaf="leaf",
 39 |       inflorescence="inflorescence",
 40 |       seed="seed",
 41 |       cloning="cloning",
 42 |       use="use",
 43 |       # geography
 44 |       distribution="location"
 45 |     ),
 46 |     ipni=c(
 47 |       # name
 48 |       added="added",
 49 |       author="name author",
 50 |       basionym="basionym",
 51 |       basionym_author="basionym author",
 52 |       bibliographic_reference="bibliographic reference",
 53 |       citation_type="citation type",
 54 |       collection_number="collection number",
 55 |       collectors="collector team",
 56 |       distribution="distribution",
 57 |       family="family",
 58 |       full_name="full name",
 59 |       genus="genus",
 60 |       in_powo="in powo",
 61 |       infrafamily="infrafamily",
 62 |       infragenus="infragenus",
 63 |       infraspecies="infraspecies",
 64 |       modified="modified",
 65 |       name_status="name status",
 66 |       published="published",
 67 |       published_in="published in",
 68 |       publishing_author="publishing author",
 69 |       rank="rank",
 70 |       scientific_name="scientific name",
 71 |       species="species",
 72 |       species_author="species author",
 73 |       version="version",
 74 |       # author
 75 |       author_forename="author forename",
 76 |       author_full_name="author name",
 77 |       author_std_form="author std",
 78 |       author_surname="author surname",
 79 |       # publication
 80 |       pub_std_form="publication std",
 81 |       bph_number="bph number",
 82 |       pub_date="date",
 83 |       isbn="isbn",
 84 |       issn="issn",
 85 |       lc_number="lc number",
 86 |       preceded_by="preceded by",
 87 |       superceded_by="superceded by",
 88 |       title="publication title",
 89 |       tl2_author="tl2 author",
 90 |       tl2_number="tl2 number"
 91 |     ),
 92 |     krs=c(
 93 |       query="query",
 94 |       genus="epithet_1",
 95 |       species="epithet_2",
 96 |       infra="epithet_3",
 97 |       epithet_1="epithet_1",
 98 |       epithet_2="epithet_2",
 99 |       epithet_3="epithet_3",
100 |       author="publishing_author",
101 |       full_name="full_name",
102 |       basionym_author="basionym_author"
103 |     )
104 |   )
105 | }
106 | 
107 | #' Get the names of valid filters for a resource.
108 | #'
109 | #' @param resource The resource being queried.
110 | #'
111 | #' @return A character vector of filter names.
112 | #'
113 | #' @importFrom glue glue
114 | #'
115 | #' @noRd
116 | get_filters_ <- function(resource=c("wcvp", "powo", "ipni", "tol")) {
117 |   resource <- match.arg(resource)
118 | 
119 |   if (resource %in% c("tol")) {
120 |     stop(glue("Filters not implemented for resource: {resource}"))
121 |   }
122 | 
123 |   switch(
124 |     resource,
125 |     wcvp=c(accepted="accepted",
126 |            families="family",
127 |            genera="generic",
128 |            species="specific",
129 |            infraspecies="infraspecific"),
130 |     powo=c(accepted="accepted_names",
131 |            has_images="has_images",
132 |            families="family_f",
133 |            genera="genus_f",
134 |            species="species_f",
135 |            infraspecies="infraspecific_f"),
136 |     ipni=c(families="f_familial",
137 |            genera="f_generic",
138 |            species="f_specific",
139 |            infraspecies="f_infraspecific",
140 |            infragenera="f_infrageneric",
141 |            infrafamilies="f_infrafamilial")
142 |   )
143 | }
144 | 
145 | #' Get the base URL for a particular resource.
146 | #'
147 | #' @param resource Name of a Kew resource.
148 | #' @return The base URL for the requested resource.
149 | #'
150 | #' @noRd
151 | get_url_ <- function(resource=c("wcvp", "powo", "knms", "ipni", "tol", "krs")) {
152 |   resource <- match.arg(resource)
153 | 
154 |   switch(resource,
155 |          wcvp="https://wcvp.science.kew.org/api/v1",
156 |          powo="http://www.plantsoftheworldonline.org/api/2",
157 |          knms="http://namematch.science.kew.org/api/v2/powo/match",
158 |          ipni="https://www.ipni.org/api/1",
159 |          tol="https://treeoflife.kew.org/api",
160 |          krs="http://data1.kew.org/reconciliation/reconcile/IpniName")
161 | }
162 | 
163 | #' Get the package user agent.
164 | #'
165 | #' @noRd
166 | #'
167 | #' @importFrom httr user_agent
168 | get_user_agent_ <- function() {
169 |   user_agent("https://github.com/barnabywalker/kewr")
170 | }
171 | 
172 | #' Make a request to a Kew resource.
173 | #'
174 | #' @param url The URL for the resource API.
175 | #' @param query A list specifying a query.
176 | #' @param body A list specifying an optional body.
177 | #' @param json Whether to expect a json response or not, default TRUE.
178 | #' @param method The request method to make, e.g. 'GET' or 'POST'.
179 | #' @param .wait The time to wait before making the request,
180 | #'  to help with rate limiting.
181 | #' @param .retries The max number of times to try a request before throwing
182 | #'  an error.
183 | #'
184 | #' @return A list containing the returned response object and
185 | #'   the response content parsed into a list.
186 | #'
187 | #' @noRd
188 | #'
189 | #' @import httr
190 | #' @importFrom jsonlite fromJSON
191 | make_request_ <- function(url, query=NULL, body=FALSE, json=TRUE, method="GET", .wait=0.1, .retries=1) {
192 |   user_agent <- get_user_agent_()
193 | 
194 |   Sys.sleep(.wait)
195 | 
196 |   response <- RETRY(method, url, user_agent, query=query, body=body,
197 |                     .times=.retries, encode="json", quiet=TRUE)
198 | 
199 |   if (http_error(response)) {
200 |     status <- http_status(response)
201 |     code <- status_code(response)
202 |     message <- status$message
203 | 
204 |     stop(
205 |       glue("Request to '{url}' failed with code {code}: {message}"),
206 |       call.=FALSE
207 |     )
208 |   }
209 | 
210 |   if (http_type(response) != "application/json" & json) {
211 |     stop("API did not return json", call.=FALSE)
212 |   }
213 | 
214 |   parsed <- content(response, "text")
215 |   if (json) {
216 |     parsed <- fromJSON(parsed, simplifyVector=FALSE)
217 |   }
218 | 
219 |   list(response=response, content=parsed)
220 | }
221 | 


--------------------------------------------------------------------------------
/R/powo.R:
--------------------------------------------------------------------------------
  1 | #' Search POWO.
  2 | #'
  3 | #' Query Plants of the World Online for taxon information.
  4 | #'
  5 | #' [Plants of the World Online (POWO)](http://www.plantsoftheworldonline.org/)
  6 | #' is a database of information on the world's flora. It curates information from
  7 | #' published floras and other sources of floristic information.
  8 | #'
  9 | #' The search API allows users to query the database using plant names,
 10 | #' geographic terms, and floristic characters. These can be queried using
 11 | #' keyword arguments. Use the `get_keywords` function for a list of all implemented keywords.
 12 | #'
 13 | #' The API returns taxonomic information as well as species descriptions and
 14 | #' image locations if available. These results can be limited, for example to accepted species,
 15 | #' using filters. Use the `get_filters` function to get a list of all implemented filters.
 16 | #'
 17 | #' Distributions in POWO are categorised using the [World Geographical Scheme for
 18 | #' Recording Plant Distributions (WGSRP)](https://www.tdwg.org/standards/wgsrpd/).
 19 | #' Users can query POWO using distributions listed under WGSRPD levels 1 (continents),
 20 | #' 2 (regions), and 3 (botanical countries).
 21 | #'
 22 | #' @param query The string to query POWO with. If using keywords,
 23 | #'  the query must be formatted as a list.
 24 | #' @param filters Filter to apply to search results.
 25 | #'  Multiple filters must be supplied as a character vector.
 26 | #' @param cursor A cursor returned by a previous search.
 27 | #'  If used, the query and filter must be exactly the same.
 28 | #' @param limit The maximum number of records to return.
 29 | #' @param .wait Time to wait before making a request, to help
 30 | #'  rate limiting.
 31 | #'
 32 | #' @return
 33 | #' Returns an object of class `powo_search` that is a simple
 34 | #' structure with slots for:
 35 | #'
 36 | #'  * `total`: the total number of results held in POWO for the query
 37 | #'  * `pages`: the total number of results pages for the query.
 38 | #'  * `limit`: the maximum number of results requested from the API, per page.
 39 | #'  * `cursor`: a cursor to retrieve the next page of results from the API.
 40 | #'  * `results`: the query results parsed into a list.
 41 | #'  * `query`: the query string submitted to the API.
 42 | #'  * `response`: the [httr response object][httr::response].
 43 | #'
 44 | #' @examples
 45 | #' # search for all entries containing a genus name
 46 | #' search_powo("Myrcia")
 47 | #'
 48 | #' # search for all accepted species within a genus
 49 | #' search_powo("Myrcia", filters=c("species", "accepted"))
 50 | #'
 51 | #' # search for up to 100 species in a genus
 52 | #' search_powo("Poa", filters=c("species"), limit=100)
 53 | #'
 54 | #' # search for all names in a family
 55 | #' search_powo(list(family="Myrtaceae"))
 56 | #'
 57 | #' # search for all accepted species with blue flowers
 58 | #' search_powo(list(flower="blue"), filters=c("accepted", "species"))
 59 | #'
 60 | #' # search for all accepted genera in Mexico
 61 | #' search_powo(list(distribution="Mexico"), filters=c("accepted", "genera"))
 62 | #'
 63 | #' # search for a species name and print the results
 64 | #' r <- search_powo("Myrcia guianensis", filters=c("species"))
 65 | #' print(r)
 66 | #'
 67 | #' # simplify search results to a `tibble`
 68 | #' r <- search_powo("Poa", filters=c("species"))
 69 | #' tidy(r)
 70 | #'
 71 | #' @family POWO functions
 72 | #' @seealso
 73 | #'  * [lookup_powo()] to look up a taxon in POWO using the IPNI ID.
 74 | #'
 75 | #' @export
 76 | search_powo <- function(query, filters=NULL, cursor=NULL, limit=50, .wait=0.2) {
 77 |   url <- powo_search_url_()
 78 | 
 79 |   # keeping a copy of this to return in the result object
 80 |   original_query <- query
 81 | 
 82 |   query <- format_query_(query, "powo")
 83 | 
 84 |   query$perPage <- limit
 85 |   query$cursor <- cursor
 86 |   query$f <- format_filters_(filters, "powo")
 87 | 
 88 |   results <- make_request_(url, query, .wait=.wait)
 89 | 
 90 |   structure(
 91 |     list(
 92 |       total=results$content$totalResults,
 93 |       pages=results$content$totalPages,
 94 |       limit=results$content$perPage,
 95 |       cursor=results$content$cursor,
 96 |       results=results$content$results,
 97 |       query=original_query,
 98 |       filters=filters,
 99 |       response=results$response
100 |     ),
101 |     class=c("powo_search", "powo")
102 |   )
103 | }
104 | 
105 | #' Look up a taxon in POWO.
106 | #'
107 | #' Request the record for a taxon in Plants of the World Online (POWO)
108 | #' using the IPNI ID.
109 | #'
110 | #' [Plants of the World Online (POWO)](http://www.plantsoftheworldonline.org/)
111 | #' is a database of information on the world's flora. It curates information from
112 | #' published floras and other sources of floristic information.
113 | #'
114 | #' The taxon lookup API allows users to retrieve information about
115 | #' a specific taxon name using the unique IPNI ID. If this is not known,
116 | #' it can be found out using the [POWO search API][kewr::search_powo].
117 | #'
118 | #' @param taxonid A string containing a valid IPNI ID.
119 | #' @param distribution Include distribution in results (default `FALSE`).
120 | #' @param .wait Time to wait before making a request, to help
121 | #'  rate limiting.
122 | #'
123 | #' @return A `powo_taxon` object, which is a simple structure with fields
124 | #'   for each of the fields returned by the lookup API, as well as the the [httr response object][httr::response].
125 | #'
126 | #' @examples
127 | #' # retrieve information for a taxon name
128 | #' lookup_powo("271445-2")
129 | #'
130 | #' # print a summary of the returned information
131 | #' r <- lookup_powo("271445-2")
132 | #' print(r)
133 | #'
134 | #' # tidy returned record into a tibble
135 | #' r <- lookup_powo("271445-2")
136 | #' tidy(r)
137 | #'
138 | #' # tidy the returned list of synonyms into a tibble
139 | #' r <- lookup_wcvp("60447743-2")
140 | #' tidied <- tidy(r)
141 | #' tidyr::unnest(tidied, cols=synonyms, names_sep="_")
142 | #'
143 | #' # tidy the returned list of children into a tibble
144 | #' r <- lookup_wcvp("30000055-2")
145 | #' tidied <- tidy(r)
146 | #' tidyr::unnest(tidied, cols=children, names_sep="_")
147 | #'
148 | #' @family POWO functions
149 | #' @seealso
150 | #'  * [search_powo()] to search POWO using a taxon name.
151 | #'
152 | #' @export
153 | lookup_powo <- function(taxonid, distribution=FALSE, .wait=0.2) {
154 |   url <- powo_taxon_url_(taxonid)
155 | 
156 |   query <- NULL
157 |   if (distribution) {
158 |     query <- list(fields="distribution")
159 |   }
160 | 
161 |   result <- make_request_(url, query=query, .wait=.wait)
162 | 
163 |   # this might be better if things were explicitly listed
164 |   record <- result$content
165 |   record$response <- result$response
166 |   record$queryId <- taxonid
167 | 
168 |   structure(
169 |     record,
170 |     class=c("powo_taxon", "powo")
171 |   )
172 | }
173 | 
174 | #' @noRd
175 | powo_search_url_ <- function() {
176 |   base <- get_url_("powo")
177 | 
178 |   paste0(base, "/search")
179 | }
180 | 
181 | #' @noRd
182 | powo_taxon_url_ <- function(taxonid) {
183 |   base <- get_url_("powo")
184 | 
185 |   glue("{base}/taxon/urn:lsid:ipni.org:names:{taxonid}")
186 | }
187 | 


--------------------------------------------------------------------------------
/R/kew_citation.R:
--------------------------------------------------------------------------------
  1 | #' Get citation for Kew data resource.
  2 | #'
  3 | #' Given the result of a query to a Kew data resource, get the appropriate
  4 | #' citation.
  5 | #'
  6 | #' @param x Result of a call to [search_powo()], [lookup_powo()], [search_wcvp()],
  7 | #'  [lookup_wcvp()], [search_ipni()], [lookup_ipni()], [search_tol()], [load_tol()],
  8 | #'  [match_knms()], or [match_krs()]
  9 | #'
 10 | #' @return A citation object with a print method for nice display.
 11 | #'
 12 | #' @examples
 13 | #' r <- search_powo(list(characteristic="poison"))
 14 | #' kew_citation(r)
 15 | #'
 16 | #' r <- lookup_powo("320035-2")
 17 | #' kew_citation(r)
 18 | #'
 19 | #' r <- search_wcvp(list(genus="Poa"))
 20 | #' kew_citation(r)
 21 | #'
 22 | #' r <- lookup_wcvp("320035-2")
 23 | #' kew_citation(r)
 24 | #'
 25 | #' r <- search_ipni(list(publishing_author="L."))
 26 | #' kew_citation(r)
 27 | #'
 28 | #' r <- lookup_ipni("12653-1")
 29 | #' kew_citation(r)
 30 | #'
 31 | #' r <- search_tol("Poa")
 32 | #' kew_citation(r)
 33 | #'
 34 | #' r <- lookup_tol(2717)
 35 | #' kew_citation(r)
 36 | #'
 37 | #' tree <- load_tol()
 38 | #' kew_citation(tree)
 39 | #'
 40 | #' match <- match_knms("Poa annua")
 41 | #' kew_citation(match)
 42 | #'
 43 | #' match <- match_krs("Poa annua")
 44 | #' kew_citation(match)
 45 | #'
 46 | #'
 47 | #' @export
 48 | kew_citation <- function(x) {
 49 |   UseMethod("kew_citation")
 50 | }
 51 | 
 52 | 
 53 | #' @importFrom glue glue
 54 | #'
 55 | #' @export
 56 | kew_citation.powo <- function(x) {
 57 |   header <- "To cite POWO in publication please use:"
 58 | 
 59 |   info <- list(
 60 |     title="Plants of the World Online.",
 61 |     author="POWO",
 62 |     year="2021",
 63 |     note="Facilitated by the Royal Botanic Gardens, Kew.",
 64 |     accessed=Sys.Date(),
 65 |     url="http://www.plantsoftheworldonline.org/"
 66 |   )
 67 | 
 68 |   ref <- bibentry(
 69 |     bibtype="Manual",
 70 |     textVersion=glue("{info$author} ({info$year}).",
 71 |                      "{info$title} {info$note} {info$url}.",
 72 |                      "Accessed {info$accessed}",
 73 |                      .sep=" "),
 74 |     header=header,
 75 |     other=info
 76 |   )
 77 | 
 78 |   class(ref) <- c("citation", "bibentry")
 79 | 
 80 |   ref
 81 | }
 82 | 
 83 | 
 84 | #' @importFrom glue glue
 85 | #' @importFrom  utils bibentry
 86 | #'
 87 | #' @export
 88 | kew_citation.wcvp <- function(x) {
 89 |   header <- "To cite WCVP in publication please use:"
 90 | 
 91 |   info <- list(
 92 |     title="World Checklist of Vascular Plants",
 93 |     author="WCVP",
 94 |     year="2021",
 95 |     version="2.0",
 96 |     note="Facilitated by the Royal Botanic Gardens, Kew.",
 97 |     accessed=Sys.Date(),
 98 |     url="http://wcvp.science.kew.org/"
 99 |   )
100 | 
101 |   ref1 <- bibentry(
102 |     bibtype="Manual",
103 |     textVersion=glue("{info$author} ({info$year}).",
104 |                      "{info$title}, version {info$version}.",
105 |                      "{info$note} {info$url}.",
106 |                      "Accessed {info$accessed}",
107 |                      .sep=" "),
108 |     other=info
109 |   )
110 | 
111 |   info <- list(
112 |     title="The World Checklist of Vascular Plants, a continuously updated resource for exploring global plant diversity",
113 |     author="Govaerts, R., Nic Lughadha, E., Black, N., Turner, R. and Paton, A.",
114 |     year="2021",
115 |     journal="Scientific Data",
116 |     volume=8,
117 |     number=215,
118 |     url="https://doi.org/10.1038/s41597-021-00997-6"
119 |   )
120 | 
121 |   ref2 <- bibentry(
122 |     bibtype="Article",
123 |     textVersion=glue("{info$author} ({info$year}).",
124 |                      "{info$title}. {info$journal},",
125 |                      "{info$volume}({info$number}), {info$url}",
126 |                      .sep=" "),
127 |     other=info
128 |   )
129 | 
130 |   ref <- c(ref1, ref2)
131 |   attr(ref, "mheader") <- paste(header, collapse="\n")
132 | 
133 |   class(ref) <- c("citation", "bibentry")
134 | 
135 |   ref
136 | }
137 | 
138 | #' @importFrom glue glue
139 | #' @importFrom  utils bibentry
140 | #'
141 | #' @export
142 | kew_citation.ipni <- function(x) {
143 |   header <- "To cite IPNI in publication please use:"
144 | 
145 |   info <- list(
146 |     title="International Plant Names Index",
147 |     author="IPNI",
148 |     year="2021",
149 |     note="The Royal Botanic Gardens, Kew, Harvard University Herbaria & Libraries and Australian National Botanic Gardens",
150 |     accessed=Sys.Date(),
151 |     url="https://ipni.org/"
152 |   )
153 | 
154 |   ref <- bibentry(
155 |     bibtype="Manual",
156 |     textVersion=glue("{info$author} ({info$year}).",
157 |                      "{info$title}. {info$note}. {info$url}.",
158 |                      "Accessed {info$accessed}",
159 |                      .sep=" "),
160 |     header=header,
161 |     other=info
162 |   )
163 | 
164 |   class(ref) <- c("citation", "bibentry")
165 | 
166 |   ref
167 | }
168 | 
169 | #' @importFrom glue glue
170 | #' @importFrom  utils bibentry
171 | #'
172 | #' @export
173 | kew_citation.tol <- function(x) {
174 |   header <- "To cite ToL in publication please use:"
175 | 
176 |   info <- list(
177 |     title="A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life",
178 |     author="Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. and Forest F.",
179 |     year="2021",
180 |     note="The Royal Botanic Gardens, Kew, Harvard University Herbaria & Libraries and Australian National Botanic Gardens",
181 |     journal="Systematic Biology",
182 |     volume="syab035",
183 |     url="https://doi.org/10.1093/sysbio/syab035"
184 |   )
185 | 
186 |   ref <- bibentry(
187 |     bibtype="Article",
188 |     textVersion=glue("{info$author} ({info$year}).",
189 |                      "{info$title}. {info$journal},",
190 |                      "{info$volume}, {info$url}",
191 |                      .sep=" "),
192 |     other=info,
193 |     header=header
194 |   )
195 | 
196 |   class(ref) <- c("citation", "bibentry")
197 | 
198 |   ref
199 | }
200 | 
201 | #' @importFrom glue glue
202 | #' @importFrom  utils bibentry
203 | #'
204 | #' @export
205 | kew_citation.knms_match <- function(x) {
206 |   header <- "To cite KNMS in publication please use:"
207 | 
208 |   info <- list(
209 |     title="Kew Names Matching Service",
210 |     author="KNMS",
211 |     year="2021",
212 |     accessed=Sys.Date(),
213 |     url="http://namematch.science.kew.org/"
214 |   )
215 | 
216 |   ref <- bibentry(
217 |     bibtype="Manual",
218 |     textVersion=glue("{info$author} ({info$year}).",
219 |                      "{info$title}. {info$url}.",
220 |                      "Accessed {info$accessed}",
221 |                      .sep=" "),
222 |     header=header,
223 |     other=info
224 |   )
225 | 
226 |   class(ref) <- c("citation", "bibentry")
227 | 
228 |   ref
229 | }
230 | 
231 | 
232 | #' @importFrom glue glue
233 | #' @importFrom  utils bibentry
234 | #'
235 | #' @export
236 | kew_citation.krs_match <- function(x) {
237 |   header <- "To cite KRS in publication please use:"
238 | 
239 |   info <- list(
240 |     title="Kew Reconciliation Service",
241 |     author="KRS",
242 |     year="2016",
243 |     accessed=Sys.Date(),
244 |     url="http://data1.kew.org/reconciliation/"
245 |   )
246 | 
247 |   ref <- bibentry(
248 |     bibtype="Manual",
249 |     textVersion=glue("{info$author} ({info$year}).",
250 |                      "{info$title}. {info$url}.",
251 |                      "Accessed {info$accessed}",
252 |                      .sep=" "),
253 |     header=header,
254 |     other=info
255 |   )
256 | 
257 |   class(ref) <- c("citation", "bibentry")
258 | 
259 |   ref
260 | }
261 | 
262 | 


--------------------------------------------------------------------------------
/vignettes/articles/mapping-assessed-species.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Mapping assessed species"
  3 | ---
  4 | 
  5 | ```{r, include = FALSE}
  6 | knitr::opts_chunk$set(
  7 |   collapse = TRUE,
  8 |   comment = "#>"
  9 | )
 10 | ```
 11 | 
 12 | This is a demonstration of one use for some of the kewr resources, in calculating the proportion of species that have been assessed in a country.
 13 | 
 14 | To do this, we'll follow these steps:
 15 | 
 16 | 1. Retrieve a list of assessed species in one country from the IUCN Red List of Threatened Species.
 17 | 2. Match names to an IPNI ID using the Kew Names Matching Service.
 18 | 3. Resolve assessments to accepted names using the World Checklist of Vascular Plants.
 19 | 4. Get a list of all accepted vascular plant species in the country from Plants of the World Online.
 20 | 5. Calculate the proportion of assessed and threatened species in our country of interest.
 21 | 
 22 | ## Setup 
 23 | 
 24 | In addition to *kewr*, we'll load:
 25 | 
 26 | * *dplyr* to manipulate the data
 27 | * *tidyr* to reshape data frames
 28 | * *stringr* to extract some data from strings
 29 | * *ggplot2* to make some plots of our final data
 30 | 
 31 | ```{r setup, message=FALSE, warning=FALSE}
 32 | library(kewr)
 33 | library(dplyr)
 34 | library(tidyr)
 35 | library(stringr)
 36 | library(ggplot2)
 37 | ```
 38 | 
 39 | ## 1. Getting assessment information
 40 | 
 41 | The first step in this problem is to get a list of assessed species for a country.
 42 | 
 43 | The most authoritative list of global species assessments is the IUCN Red List of Threatened Species. For this package, I used the `rl_sp_country` function from *rredlist* to request all assessments for taxa found in Denmark from the IUCN Red List API. I then used the `rl_search` function to request full assessment information for each taxon.
 44 | 
 45 | You can do the same by [installing the *rredlist* package](https://github.com/ropensci/rredlist). You'll need an API key to use the IUCN Red List API, which you can [register for here](https://apiv3.iucnredlist.org/api/v3/token). Alternatively, you can download the information you need directly from [the IUCN Red List website](https://www.iucnredlist.org/).
 46 | 
 47 | I've bundled up the Danish plant assessments in this package to make things easier for this analysis.
 48 | 
 49 | ```{r view-danish-plants}
 50 | head(danish_plants)
 51 | ```
 52 | 
 53 | In total, there are assessments for `r format(nrow(danish_plants), big.mark=",")` vascular plants from Denmark.
 54 | 
 55 | ## 2. Match names to an IPNI ID
 56 | 
 57 | To match these names to an IPNI ID, we'll use the Kew Names Matching Service. The first thing we'll do, is join the taxonomic authority to the scientific name, to hopefully reduce the number of matches for each name.
 58 | 
 59 | ```{r make-full-names}
 60 | danish_plants <-
 61 |   danish_plants %>%
 62 |   unite("full_name", scientific_name, authority, sep=" ", remove=FALSE)
 63 | ```
 64 | 
 65 | Then we put the full names through KNMS.
 66 | 
 67 | ```{r match-full-names}
 68 | full_matches <- match_knms(danish_plants$full_name)
 69 | full_matches
 70 | ```
 71 | 
 72 | Most of our names had matches! Which makes things simpler. But we also got some names that returned multiple matches.
 73 | 
 74 | First we'll try putting just the scientific names of our missing matches through KNMS again.
 75 | 
 76 | ```{r match-partial-names}
 77 | full_matches <- tidy(full_matches)
 78 | 
 79 | unmatched <- filter(full_matches, !matched)
 80 | 
 81 | to_match <- 
 82 |   danish_plants %>%
 83 |   filter(full_name %in% unmatched$submitted)
 84 | 
 85 | part_matches <- match_knms(to_match$scientific_name)
 86 | part_matches
 87 | ```
 88 | Almost everything returned a match. We could attempt to manually match the final name, but as it's just one we'll leave it out.
 89 | 
 90 | Now we can join all of our matches together, link them to the IUCN taxon ID, and resolve any synonyms.
 91 | 
 92 | ```{r resolve-matches}
 93 | part_matches <- tidy(part_matches)
 94 | 
 95 | full_matches <- 
 96 |   full_matches %>%
 97 |   filter(matched) %>%
 98 |   left_join(
 99 |     danish_plants %>% select(taxonid, full_name, category),
100 |     by=c("submitted"="full_name")
101 |   )
102 |   
103 | part_matches <- 
104 |   part_matches %>%
105 |   filter(matched) %>%
106 |   left_join(
107 |     danish_plants %>% select(taxonid, scientific_name, category),
108 |     by=c("submitted"="scientific_name")
109 |   )
110 | 
111 | matched_names <- bind_rows(full_matches, part_matches)
112 | 
113 | head(matched_names)
114 | ```
115 | 
116 | ## 3. Resolve assessments to accepted names
117 | 
118 | Now that we have an IPNI ID attached to each assessment, we can look up the record for the taxa in WCVP. This will let us find out the taxonomic status of each name - the first step in resolving any synonymy issues.
119 | 
120 | ```{r resolve-synonyms}
121 | # wrap up the lookup_wcvp function to make sure it comes back as a list
122 | f <- function(taxonid) {
123 |   list(lookup_wcvp(taxonid))
124 | }
125 | 
126 | resolved_names <- 
127 |   matched_names %>%
128 |   nest_by(taxonid, ipni_id) %>%
129 |   mutate(wcvp_record=f(ipni_id))
130 | 
131 | resolved_names <-
132 |   resolved_names %>%
133 |   mutate(status=wcvp_record$status)
134 | ```
135 | 
136 | With the taxonomic status, we'll first remove any taxa that are unplaced. We'll also remove any non-homotypic synonyms - even if we resolve these to accepted species, we can't be sure that the assessment would be valid for the new concept, so that accepted species would not be assessed.
137 | 
138 | ```{r remove-non-homotypic}
139 | resolved_names <-
140 |   resolved_names %>%
141 |   filter(status %in% c("accepted", "homotypic synonym"))
142 | ```
143 | 
144 | Next we'll check if there are any taxa that still have multiple matches in WCVP.
145 | 
146 | ```{r check-multiple-matches}
147 | resolved_names %>%
148 |   ungroup() %>%
149 |   add_count(taxonid) %>%
150 |   summarise(multiple_matches=sum(n > 1))
151 | ```
152 | 
153 | There are not.
154 | 
155 | So the final step is to find the accepted names for all homotypic synonyms and remove anything that is a lower rank than species.
156 | 
157 | ```{r find-accepted-names}
158 | resolved_names <-
159 |   resolved_names %>%
160 |   mutate(accepted_id=ifelse(status != "accepted",
161 |                               wcvp_record$accepted$id,
162 |                               wcvp_record$id),
163 |          accepted_name=ifelse(status != "accepted",
164 |                               wcvp_record$accepted$name,
165 |                               wcvp_record$name),
166 |          accepted_author=ifelse(status != "accepted",
167 |                               wcvp_record$accepted$author,
168 |                               wcvp_record$authors),
169 |          accepted_rank=ifelse(status != "accepted",
170 |                               wcvp_record$accepted$rank,
171 |                               wcvp_record$rank))
172 | 
173 | resolved_names <- 
174 |   resolved_names %>%
175 |   select(-wcvp_record) %>%
176 |   unnest(cols=c(data)) %>%
177 |   rename(match_id=ipni_id)
178 | 
179 | accepted_species <-
180 |   resolved_names %>%
181 |   filter(accepted_rank == "Species") %>%
182 |   ungroup()
183 | 
184 | nrow(accepted_species)
185 | ```
186 | 
187 | We are now left with `r nrow(accepted_species)` accepted species from Denmark with assessments.
188 | 
189 | ## 4. Get a list of all species in the country
190 | 
191 | To calculate the number of species that are assessed in Denmark, we need a checklist of all accepted species.
192 | 
193 | We can get this from Plants of the World Online.
194 | 
195 | ```{r download-powo-checklist}
196 | checklist <- search_powo(list(distribution="Denmark"),
197 |                          filters=c("accepted", "species"),
198 |                          limit=5000)
199 | checklist
200 | ```
201 | 
202 | Now we have this, we just need to join our assessments to our checklist.
203 | 
204 | ```{r join-assessments-checklist}
205 | checklist <-
206 |   tidy(checklist) %>%
207 |   mutate(ipni_id=str_extract(fqId, "[0-9\\-]+")) %>%
208 |   select(ipni_id, family, name, author) %>%
209 |   distinct(ipni_id, .keep_all=TRUE) %>%
210 |   left_join(
211 |     accepted_species %>% select(accepted_id, category),
212 |     by=c("ipni_id"="accepted_id")
213 |   )
214 | ```
215 | 
216 | ## 5. Calculating the proportion of assessed species
217 | 
218 | And now we can calculate the proportion of species assessed in Denmark!
219 | 
220 | ```{r calculate-proportions}
221 | checklist %>%
222 |   summarise(p_assessed=mean(! is.na(category)))
223 | ```
224 | 
225 | And make a simple bar chart of the number of species in each category.
226 | 
227 | ```{r plot-bars}
228 | iucn_colours <- c("NE"="#ffffff", 
229 |                   "DD"="#d1d1d6", 
230 |                   "LC"="#60c659", 
231 |                   "NT"="#cce226", 
232 |                   "VU"="#f9e814", 
233 |                   "EN"="#fc7f3f", 
234 |                   "CR"="d81e05", 
235 |                   "EW"="#542344", 
236 |                   "EX"="#000000")
237 | checklist %>%
238 |   replace_na(list(category="NE")) %>%
239 |   mutate(category=factor(category, levels=names(iucn_colours),
240 |                          ordered=TRUE)) %>%
241 |   ggplot(mapping=aes(y=category, fill=category,
242 |                      colour=category == "NE")) +
243 |   geom_bar() +
244 |   scale_fill_manual(values=iucn_colours, drop=FALSE) +
245 |   scale_colour_manual(values=c(`TRUE`="black", `FALSE`=NA)) +
246 |   scale_y_discrete(drop=FALSE) +
247 |   guides(fill=FALSE, colour=FALSE) +
248 |   labs(x="Number of species", y="IUCN Red List category")
249 | ```
250 | 
251 | 
252 | 


--------------------------------------------------------------------------------
/R/print.R:
--------------------------------------------------------------------------------
  1 | # wcvp ----
  2 | 
  3 | #' @importFrom glue glue glue_collapse
  4 | #' @importFrom utils str head
  5 | #'
  6 | #' @export
  7 | print.wcvp_search <- function(x, ...) {
  8 |   if (! is.null(x$filters)) {
  9 |     filters <- glue_collapse(x$filters, sep=", ")
 10 |   } else {
 11 |     filters <- "none"
 12 |   }
 13 | 
 14 |   if (! is.null(names(x$query))) {
 15 |     query <- glue("{names(x$query)}='{x$query}'")
 16 |     query <- glue_collapse(query, sep=", ")
 17 |   } else {
 18 |     query <- glue("'{x$query}'")
 19 |   }
 20 | 
 21 |   message <- glue("<WCVP search: {query} filters: '{filters}'>",
 22 |                   "total results: {x$total}",
 23 |                   "returned results: {length(x$results)}",
 24 |                   "total pages: {x$pages}",
 25 |                   "current page: {x$page}",
 26 |                   "",
 27 |                   .sep="\n", .trim=FALSE, .null="missing")
 28 | 
 29 |   cat(message)
 30 |   if (! is.null(x$results)) {
 31 |     str(head(x$results, 1), max.level=2)
 32 |   }
 33 |   invisible()
 34 | }
 35 | 
 36 | #' @importFrom glue glue
 37 | #' @importFrom utils str
 38 | #' @export
 39 | print.wcvp_taxon <- function(x, ...) {
 40 |   accepted_id <- ifelse(is.null(x$accepted), x$id, x$accepted$id)
 41 | 
 42 |   message <- glue("<WCVP taxon id: {x$queryId}>",
 43 |                   "Name: {x$name}",
 44 |                   "Authors: {x$authors}",
 45 |                   "Status: {x$status}",
 46 |                   "Rank: {x$rank}",
 47 |                   "Accepted taxon ID: {accepted_id}",
 48 |                   "Synonyms: {length(x$synonyms)}",
 49 |                   "",
 50 |                   .sep="\n", .trim=FALSE, .null="missing")
 51 | 
 52 |   cat(message)
 53 |   invisible()
 54 | }
 55 | 
 56 | # powo ----
 57 | 
 58 | #' @importFrom glue glue glue_collapse
 59 | #' @importFrom utils str head
 60 | #'
 61 | #' @export
 62 | print.powo_search <- function(x, ...) {
 63 |   if (! is.null(x$filters)) {
 64 |     filters <- glue_collapse(x$filters, sep=", ")
 65 |   } else {
 66 |     filters <- "none"
 67 |   }
 68 | 
 69 |   if (! is.null(names(x$query))) {
 70 |     query <- glue("{names(x$query)}='{x$query}'")
 71 |     query <- glue_collapse(query, sep=", ")
 72 |   } else {
 73 |     query <- glue("'{x$query}'")
 74 |   }
 75 | 
 76 |   message <- glue("<POWO search: {query} filters: '{filters}'>",
 77 |                   "total results: {x$total}",
 78 |                   "returned results: {length(x$results)}",
 79 |                   "total pages: {x$pages}",
 80 |                   "First result:",
 81 |                   "",
 82 |                   .sep="\n", .trim=FALSE, .null="missing")
 83 | 
 84 |   cat(message)
 85 |   if (! is.null(x$results)) {
 86 |     str(head(x$results, 1), max.level=2)
 87 |   }
 88 |   invisible()
 89 | }
 90 | 
 91 | #' @importFrom glue glue
 92 | #' @importFrom stringr str_extract
 93 | #' @importFrom utils str
 94 | #'
 95 | #' @export
 96 | print.powo_taxon <- function(x, ...) {
 97 |   if ("accepted" %in% names(x)) {
 98 |     accepted_id <- str_extract(x$accepted$fqId,
 99 |                                "(?<=names\\:)[0-9\\-]+$")
100 |   } else if (x$taxonomicStatus == "Accepted") {
101 |     accepted_id <- x$queryId
102 |   } else {
103 |     accepted_id <- "UNDEFINED"
104 |   }
105 | 
106 |   has_distribution <- "distribution" %in% names(x)
107 | 
108 |   message <- glue("<POWO taxon id: {x$queryId}>",
109 |                   "Name: {x$name}",
110 |                   "Authors: {x$authors}",
111 |                   "Status: {x$taxonomicStatus}",
112 |                   "Rank: {x$rank}",
113 |                   "Accepted taxon ID: {accepted_id}",
114 |                   "Synonyms: {length(x$synonyms)}",
115 |                   "Includes distribution: {has_distribution}",
116 |                   "",
117 |                   .sep="\n", .trim=FALSE, .null="missing")
118 | 
119 |   cat(message)
120 |   invisible()
121 | }
122 | 
123 | # ipni ----
124 | 
125 | #' @importFrom glue glue glue_collapse
126 | #' @importFrom utils str head
127 | #'
128 | #' @export
129 | print.ipni_search <- function(x, ...) {
130 |   if (! is.null(x$filters)) {
131 |     filters <- glue_collapse(x$filters, sep=", ")
132 |   } else {
133 |     filters <- "none"
134 |   }
135 | 
136 |   if (! is.null(names(x$query))) {
137 |     query <- glue("{names(x$query)}='{x$query}'")
138 |     query <- glue_collapse(query, sep=", ")
139 |   } else {
140 |     query <- glue("'{x$query}'")
141 |   }
142 | 
143 |   message <- glue("<IPNI search: {query}, filters: '{filters}'>",
144 |                   "total results: {x$total}",
145 |                   "returned results: {length(x$results)}",
146 |                   "total pages: {x$pages}",
147 |                   "current page: {x$page}",
148 |                   "",
149 |                   .sep="\n", .trim=FALSE, .null="missing")
150 | 
151 |   cat(message)
152 |   if (! is.null(x$results)) {
153 |     str(head(x$results, 1), max.level=2)
154 |   }
155 |   invisible()
156 | }
157 | 
158 | #' @importFrom glue glue
159 | #' @importFrom utils str
160 | #' @export
161 | print.ipni_citation <- function(x, ...) {
162 |   message <- glue("<IPNI name id: {x$queryId}, type: {x$recordType}>",
163 |                   "Name: {x$name}",
164 |                   "Authors: {x$authors}",
165 |                   "Publication: {x$publication}",
166 |                   "Publication Year: {x$publicationYear}",
167 |                   "Reference: {x$reference}",
168 |                   "Rank: {x$rank}",
169 |                   "In POWO: {x$inPowo}",
170 |                   "",
171 |                   .sep="\n", .trim=FALSE, .null="missing")
172 | 
173 |   cat(message)
174 |   invisible()
175 | }
176 | 
177 | #' @importFrom glue glue
178 | #' @importFrom utils str
179 | #' @export
180 | print.ipni_author <- function(x, ...) {
181 |   message <- glue("<IPNI name id: {x$queryId}, type: {x$recordType}>",
182 |                   "Name: {x$forename} {x$surname}",
183 |                   "Standard form: {x$standardForm}",
184 |                   "Dates: {x$dates}",
185 |                   "Focal groups: {x$taxonGroups}",
186 |                   "Example taxon: {x$examples}",
187 |                   "",
188 |                   .sep="\n", .trim=FALSE, .null="missing")
189 | 
190 |   cat(message)
191 |   invisible()
192 | }
193 | 
194 | #' @importFrom glue glue
195 | #' @importFrom utils str
196 | #' @export
197 | print.ipni_publication <- function(x, ...) {
198 |   message <- glue("<IPNI name id: {x$queryId}, type: {x$recordType}>",
199 |                   "Title: {x$title}",
200 |                   "Abbreviation: {x$abbreviation}",
201 |                   "LC Number: {ifelse(is.null(x$lcNumber), '', x$lcNumber)}",
202 |                   "BPH Number: {x$bphNumber}",
203 |                   "",
204 |                   .sep="\n", .trim=FALSE, .null="missing")
205 | 
206 |   cat(message)
207 |   invisible()
208 | }
209 | 
210 | # tol -----
211 | 
212 | #' @importFrom glue glue
213 | #' @importFrom utils str head
214 | #'
215 | #' @export
216 | print.tol_search <- function(x, ...) {
217 |   if (! is.null(names(x$query))) {
218 |     query <- glue("{names(x$query)}='{x$query}'")
219 |     query <- glue_collapse(query, sep=", ")
220 |   } else {
221 |     query <- glue("'{x$query}'")
222 |   }
223 | 
224 |   message <- glue("<ToL search: {query}>",
225 |                   "total results: {x$total}",
226 |                   "returned results: {length(x$results)}",
227 |                   "total pages: {x$pages}",
228 |                   "current page: {x$page}",
229 |                   "",
230 |                   .sep="\n", .trim=FALSE, .null="missing")
231 | 
232 |   cat(message)
233 |   if (! is.null(x$results)) {
234 |     str(head(x$results, 1), max.level=2)
235 |   }
236 |   invisible()
237 | }
238 | 
239 | #' @importFrom glue glue
240 | #' @importFrom utils str
241 | #' @export
242 | print.tol_specimen <- function(x, ...) {
243 | 
244 |   raw_reads <- x$raw_reads[[1]]
245 |   taxonomy <- x$taxonomy
246 | 
247 |   message <- glue("<ToL specimen id: {x$queryId}>",
248 |                   "Species: {taxonomy$species}",
249 |                   "Family: {taxonomy$family}",
250 |                   "Order: {taxonomy$order}",
251 |                   "Collector: {x$collector}",
252 |                   "Project: {x$project$data_source$name}",
253 |                   "No. of reads: {format(raw_reads$reads_count, big.mark=',')}",
254 |                   "Sequencing platform: {raw_reads$sequence_platform}",
255 |                   "Suspicious placement: {x$is_suspicious_placement}",
256 |                   "",
257 |                   .sep="\n", .trim=FALSE, .null="missing")
258 | 
259 |   cat(message)
260 |   invisible()
261 | }
262 | 
263 | #' @importFrom glue glue
264 | #' @importFrom utils str
265 | #' @export
266 | print.tol_gene <- function(x, ...) {
267 | 
268 |   raw_reads <- x$raw_reads[[1]]
269 |   taxonomy <- x$taxonomy
270 | 
271 |   message <- glue("<ToL gene id: {x$queryId}>",
272 |                   "Exemplar name: {x$exemplar_name}",
273 |                   "Exemplar source species: {x$exemplar_species}",
274 |                   "No. species: {x$species_count}",
275 |                   "No. genera: {x$genera_count}",
276 |                   "Avg. recovered length: {x$average_contig_length}",
277 |                   "Avg. % recovered: {x$average_contig_length_percent}",
278 |                   "",
279 |                   .sep="\n", .trim=FALSE, .null="missing")
280 | 
281 |   cat(message)
282 |   invisible()
283 | }
284 | 
285 | #' @importFrom glue glue
286 | #' @importFrom utils str
287 | #' @export
288 | print.tol_tree <- function(x, ...) {
289 | 
290 |   message <- glue("<ToL tree url: {x$response$url}>",
291 |                   "Preview:",
292 |                   substr(x$content, 1, 100),
293 |                   "",
294 |                   .sep="\n", .trim=FALSE)
295 | 
296 |   cat(message)
297 |   invisible()
298 | }
299 | 
300 | #' @importFrom glue glue
301 | #' @importFrom utils str
302 | #' @export
303 | print.tol_fasta <- function(x, ...) {
304 | 
305 |   message <- glue("<ToL fasta url: {x$response$url}>",
306 |                   "Preview:",
307 |                   substr(x$content, 1, 100),
308 |                   "",
309 |                   .sep="\n", .trim=FALSE)
310 | 
311 |   cat(message)
312 |   invisible()
313 | }
314 | 
315 | # knms ----
316 | 
317 | #' @importFrom glue glue
318 | #' @export
319 | print.knms_match <- function(x, ...) {
320 |   message <- glue("<KNMS match: {length(x$submitted)} names submitted>",
321 |                   "Matches returned: {x$matched}",
322 |                   "Multiple matches: {x$multiple_matches}",
323 |                   "Unmatched names: {x$unmatched}",
324 |                   "",
325 |                   .sep="\n", .trim=FALSE, .null="missing")
326 | 
327 |   cat(message)
328 |   str(head(x$results, 1))
329 |   invisible()
330 | }
331 | 
332 | # krs ----
333 | 
334 | #' @importFrom glue glue
335 | #' @export
336 | print.krs_match <- function(x, ...) {
337 |   if (! is.null(names(x$query))) {
338 |     query <- glue("{names(x$query)}='{x$query}'")
339 |     query <- glue_collapse(query, sep=", ")
340 |   } else {
341 |     query <- glue("'{x$query}'")
342 |   }
343 | 
344 |   message <- glue("<KRS match: {length(x$results)} names matched to {query}>",
345 |                   "",
346 |                   .sep="\n", .trim=FALSE)
347 | 
348 |   cat(message)
349 |   str(head(x$results, 1))
350 |   invisible()
351 | }
352 | 


--------------------------------------------------------------------------------
/R/wcvp.R:
--------------------------------------------------------------------------------
  1 | #' Search WCVP for a taxon.
  2 | #'
  3 | #' Query the World Checklist of Vascular Plants search API
  4 | #' for a taxon string.
  5 | #'
  6 | #' The [World Checklist of Vascular Plants (WCVP)](https://wcvp.science.kew.org/)
  7 | #' is a global consensus view of all known vascular plant species.
  8 | #' It has been compiled by staff at RBG Kew in consultation with plant
  9 | #' group experts.
 10 | #'
 11 | #' The search API allows users to query the checklist for plant names.
 12 | #' Currently, it does not support partial or fuzzy matching.
 13 | #' In order to get a result, the user must supply a valid name string.
 14 | #' For example, 'Myrcia' and 'Myrcia guianensis' will return results,
 15 | #' but 'M' or 'Myr' will not.
 16 | #'
 17 | #' There is some support for querying using keyword arguments. The API is
 18 | #' not currently documented, so only keywords that are definitely there have
 19 | #' been implemented. Use the `get_keywords` function to view a list of all implemented keywords.
 20 | #'
 21 | #' The API will return taxonomic information (the family, authority, status, and rank)
 22 | #' of all names matching the query. These results can be limited, for example to accepted species,
 23 | #' using filters. Use the `get_filters` function to view a list of all implemented filters.
 24 | #'
 25 | #' @param query The taxon string to search WCVP for. If using keywords,
 26 | #'  the query must be formatted as a list.
 27 | #' @param filters Filter to apply to search results.
 28 | #'  Multiple filters must be supplied as a character vector.
 29 | #' @param cursor A cursor returned by a previous search.
 30 | #'  If used, the query and filter must be exactly the same.
 31 | #' @param limit An integer specifying the maximum number of results
 32 | #'  to return.
 33 | #' @param .wait Time to wait before making a request, to help
 34 | #'  rate limiting.
 35 | #'
 36 | #' @return Returns an object of class `wcvp_search` that is a simple
 37 | #' structure with slots for:
 38 | #'
 39 | #'  * `total`: the total number of results held in WCVP for the query
 40 | #'  * `cursor`: a cursor to retrieve the next page of results from the API.
 41 | #'  * `limit`: the maximum number of results requested from the API.
 42 | #'  * `results`: the query results parsed into a list.
 43 | #'  * `query`: the query string submitted to the API.
 44 | #'  * `filter`: the filter strings submitted to the API.
 45 | #'  * `response`: the [httr response object][httr::response].
 46 | #'
 47 | #' @examples
 48 | #' # search for all entries containing a genus name
 49 | #' search_wcvp("Myrcia")
 50 | #'
 51 | #' # search for all accepted species within a genus
 52 | #' search_wcvp("Myrcia", filters=c("species", "accepted"))
 53 | #'
 54 | #' # search for up to 10,000 species in a genus
 55 | #' search_wcvp("Poa", filters=c("species"), limit=10000)
 56 | #'
 57 | #' # search for all names in a family
 58 | #' search_wcvp(list(family="Myrtaceae"))
 59 | #'
 60 | #' # search for genera within a family
 61 | #' search_wcvp(list(family="Myrtaceae"), filters=c("genera"))
 62 | #'
 63 | #' # search for all names with a specific epithet
 64 | #' search_wcvp(list(species="guianensis"))
 65 | #'
 66 | #' # search for a species name and print the results
 67 | #' r <- search_wcvp("Myrcia guianensis", filters=c("species"))
 68 | #' print(r)
 69 | #'
 70 | #' # simplify search results to a `tibble`
 71 | #' r <- search_wcvp("Poa", filters=c("species"))
 72 | #' tidy(r)
 73 | #'
 74 | #' # accepted name info is nested inside the records for synonyms
 75 | #' # simplify accepted name info to the name ID
 76 | #' r <- search_wcvp("Poa", filters=c("species"))
 77 | #' tidied <- tidy(r)
 78 | #' tidyr::unnest(tidied, cols=synonymOf, names_sep="_")
 79 | #'
 80 | #' @references
 81 | #' WCVP (2020). World Checklist of Vascular Plants, version 2.0. Facilitated by the Royal Botanic Gardens, Kew. Published on the Internet; http://wcvp.science.kew.org/
 82 | #'
 83 | #' @family WCVP functions
 84 | #' @seealso
 85 | #'  * [lookup_wcvp()] to lookup information about a taxon name
 86 | #'   using a valid IPNI ID.
 87 | #'  * [download_wcvp()] to download the entire WCVP.
 88 | #'
 89 | #' @export
 90 | search_wcvp <- function(query, filters=NULL, cursor="*", limit=50, .wait=0.1) {
 91 |   url <- wcvp_search_url_()
 92 | 
 93 |   # keeping a copy of this to return in the result object
 94 |   original_query <- query
 95 | 
 96 |   query <- format_query_(query, "wcvp")
 97 | 
 98 |   query$limit <- limit
 99 |   query$cursor <- cursor
100 |   query$f <- format_filters_(filters, "wcvp")
101 | 
102 |   results <- make_request_(url, query, .wait=.wait)
103 | 
104 |   # calculate total number of pages, because it isn't returned
105 |   total_pages <- ceiling(results$content$total / results$content$limit)
106 | 
107 |   structure(
108 |     list(
109 |       total=results$content$total,
110 |       pages=total_pages,
111 |       cursor=results$content$cursor,
112 |       limit=results$content$limit,
113 |       results=results$content$results,
114 |       query=original_query,
115 |       filters=filters,
116 |       response=results$response
117 |     ),
118 |     class=c("wcvp_search", "wcvp")
119 |   )
120 | }
121 | 
122 | #' Look up a taxon in WCVP.
123 | #'
124 | #' Request the record for a taxon in the World Checklist of
125 | #' Vascular Plants (WCVP) using the IPNI ID.
126 | #'
127 | #' The [World Checklist of Vascular Plants (WCVP)](https://wcvp.science.kew.org/)
128 | #' is a global consensus view of all known vascular plant species.
129 | #' It has been compiled by staff at RBG Kew in consultation with plant
130 | #' group experts.
131 | #'
132 | #' The taxon lookup API allows users to retrieve taxonomic information for
133 | #' a specific taxon name using the unique IPNI ID. If this is not known,
134 | #' it can be found out using the [WCVP search API][kewr::search_wcvp].
135 | #'
136 | #' @param taxonid A string containing a valid IPNI ID.
137 | #' @param .wait Time to wait before making a request, to help
138 | #'  rate limiting.
139 | #'
140 | #' @return A `wcvp_taxon` object, which is a simple structure with fields
141 | #'   for each of the fields returned by the lookup API, as well as the the [httr response object][httr::response].
142 | #'
143 | #' @examples
144 | #'
145 | #' # retrieve taxonomic information for a taxon name
146 | #' lookup_wcvp("271445-2")
147 | #'
148 | #' # print a summary of the returned information
149 | #' r <- lookup_wcvp("271445-2")
150 | #' print(r)
151 | #'
152 | #' # tidy into a tibble
153 | #' r <- lookup_wcvp("271445-2")
154 | #' tidy(r)
155 | #'
156 | #' # tidy the returned list of synonyms into a tibble
157 | #' r <- lookup_wcvp("60447743-2")
158 | #' tidied <- tidy(r)
159 | #' tidyr::unnest(tidied, cols=synonyms, names_sep="_")
160 | #'
161 | #' # expand the child entries returned for each entry
162 | #' r <- lookup_wcvp("30000055-2")
163 | #' tidied <- tidy(r)
164 | #' tidyr::unnest(tidied, cols=children, names_sep="_")
165 | #'
166 | #' @family WCVP functions
167 | #' @seealso
168 | #'  * [search_wcvp()] to search WCVP using a taxon name.
169 | #'  * [download_wcvp()] to download the entire WCVP.
170 | #'
171 | #' @references
172 | #' WCVP (2020). World Checklist of Vascular Plants, version 2.0. Facilitated by the Royal Botanic Gardens, Kew. Published on the Internet; http://wcvp.science.kew.org/
173 | #'
174 | #' @export
175 | lookup_wcvp <- function(taxonid, .wait=0.1) {
176 |   url <- wcvp_taxon_url_(taxonid)
177 | 
178 |   result <- make_request_(url, query=NULL, .wait=.wait)
179 | 
180 |   # this might be better if things were explicitly listed
181 |   record <- result$content
182 |   record$response <- result$response
183 |   record$queryId <- taxonid
184 | 
185 |   # fill in status if unplaced
186 |   status <- record$status
187 |   record$status <- ifelse(is.null(status), "unplaced", status)
188 | 
189 |   # make sure author string is not null
190 |   authors <- record$authors
191 |   record$authors <- ifelse(is.null(authors), NA_character_, authors)
192 | 
193 |   structure(
194 |     record,
195 |     class=c("wcvp_taxon", "wcvp")
196 |   )
197 | }
198 | 
199 | #' Download the whole of the WCVP.
200 | #'
201 | #' Download the latest or a specific version of the World
202 | #' Checklist of Vascular Plants (WCVP).
203 | #'
204 | #' The [World Checklist of Vascular Plants (WCVP)](https://wcvp.science.kew.org/)
205 | #' is a global consensus view of all known vascular plant species.
206 | #' It has been compiled by staff at RBG Kew in consultation with plant
207 | #' group experts.
208 | #'
209 | #' Versioned downloads of the whole WCVP are provided on the website.
210 | #' This function allows the user to download the latest or a specific
211 | #' version of the WCVP.
212 | #'
213 | #' @param save_dir A string specifying the folder to save the download in. If
214 | #'   no value is provided, \link[here]{here} will be used.
215 | #' @param version An integer version number to download. The latest
216 | #'   version will be downloaded by default.
217 | #'
218 | #' @examples
219 | #' \dontrun{
220 | #'  # download the latest version
221 | #'  download_wcvp()
222 | #'
223 | #'  # download version 1
224 | #'  download_wcvp(version=1)
225 | #' }
226 | #'
227 | #' @family WCVP functions
228 | #' @seealso
229 | #'  * [lookup_wcvp()] to lookup information about a taxon name
230 | #'   using a valid IPNI ID.
231 | #'  * [search_wcvp()] to search WCVP using a taxon name.
232 | #'
233 | #' @references
234 | #' WCVP (2020). World Checklist of Vascular Plants, version 2.0. Facilitated by the Royal Botanic Gardens, Kew. Published on the Internet; http://wcvp.science.kew.org/
235 | #'
236 | #' @importFrom here here
237 | #' @importFrom glue glue
238 | #' @importFrom stringr str_extract
239 | #' @importFrom utils download.file
240 | #'
241 | #' @export
242 | download_wcvp <- function(save_dir=NULL, version=NULL) {
243 |   if (is.null(save_dir)) {
244 |     save_dir <- here()
245 |   }
246 | 
247 |   download_link <- wcvp_download_url_(version)
248 |   filename <- str_extract(download_link, "(?<=/)wcvp.+\\.zip$")
249 |   save_path <- file.path(save_dir, filename)
250 | 
251 |   if (is.null(version)) {
252 |     version <- "latest"
253 |   }
254 | 
255 |   message <- glue("Downloading WCVP version {version}",
256 |                   "to: {save_path}\n",
257 |                   .sep=" ", .trim=FALSE)
258 | 
259 |   cat(message)
260 | 
261 |   download.file(download_link, save_path)
262 | 
263 |   invisible()
264 | }
265 | 
266 | #' Make the WCVP taxon lookup URL.
267 | #'
268 | #' @param taxonid A valid IPNI ID.
269 | #'
270 | #' @noRd
271 | #'
272 | #' @importFrom glue glue
273 | wcvp_taxon_url_ <- function(taxonid) {
274 |   base <- get_url_("wcvp")
275 | 
276 |   glue("{base}/taxon/{taxonid}")
277 | }
278 | 
279 | #' Make the WCVP search URL.
280 | #'
281 | #' @noRd
282 | wcvp_search_url_ <- function() {
283 |   base <- get_url_("wcvp")
284 | 
285 |   paste0(base, "/search")
286 | }
287 | 
288 | #' Get a WCVP download URL.
289 | #'
290 | #' @importFrom httr GET
291 | #' @importFrom rvest html_nodes html_attr
292 | #' @importFrom stringr str_detect str_extract
293 | #' @importFrom glue glue
294 | #'
295 | #' @noRd
296 | wcvp_download_url_ <- function(version=NULL) {
297 |   base <- "http://sftp.kew.org/pub/data-repositories/WCVP/"
298 |   response <- GET(base)
299 | 
300 |   page <- content(response)
301 |   link_nodes <- html_nodes(page, "a")
302 |   links <- html_attr(link_nodes, "href")
303 | 
304 |   download_links <- links[str_detect(links, "\\.zip$")]
305 |   versions <- str_extract(download_links, "(?<=_v)\\d+")
306 | 
307 |   if (is.null(version)) {
308 |     version <- max(versions)
309 |   }
310 | 
311 |   if (! version %in% versions) {
312 |     message <- glue("Not a recognised version of WCVP: {version}",
313 |                     "Available versions: {paste0(versions, collapse=',')}",
314 |                     "",
315 |                     .sep="\n", .trim=FALSE)
316 | 
317 |     stop(message, call.=FALSE)
318 |   }
319 | 
320 |   download_link <- download_links[str_detect(download_links, paste0("_v", version))]
321 |   paste0(base, download_link)
322 | }
323 | 
324 | 
325 | 


--------------------------------------------------------------------------------
/R/tol.R:
--------------------------------------------------------------------------------
  1 | #' Search Kew's Tree of Life for specimens or genes.
  2 | #'
  3 | #' Query Kew's Tree of Life for specimens that have
  4 | #' been sampled for sequencing.
  5 | #'
  6 | #' The [Tree of Life](https://treeoflife.kew.org/) is a database
  7 | #' of specimens sequenced as part of Kew's efforts to build
  8 | #' a comprehensive evolutionary tree of life for flowering plants.
  9 | #'
 10 | #' The search API allows users to query the database for specimens
 11 | #' based on their taxonomic information. Filtering and keyword-search
 12 | #' are not currently implemented. All searches are based on taxonomic
 13 | #' information, so `Myrcia` and `Myrtales` will return results, but
 14 | #' `Brummitt` will not.
 15 | #'
 16 | #' The search API also allows users to download information about sequenced
 17 | #' genes. There is currently no ability to search within the results for genes,
 18 | #' but a table of all genes can be accessed using keyword argument `genes=TRUE`.
 19 | #'
 20 | #' @param query The string to query the database with.
 21 | #' @param genes Set to TRUE to download results for genes instead of specimens.
 22 | #' @param limit An integer specifying the number of results
 23 | #'  to return.
 24 | #' @param page An integer specify the page of results to request.
 25 | #' @param .wait Time to wait before making a requests, to help
 26 | #'  rate limiting.
 27 | #'
 28 | #' @return Returns an object of class `tol_search` that is a simple
 29 | #'  structure with slots for:
 30 | #'
 31 | #'  * `total`: the total number of results held in ToL for the query.
 32 | #'  * `page`: the page of results requested.
 33 | #'  * `limit`: the maximum number of results requested from the API.
 34 | #'  * `results`: the query results parsed into a list.
 35 | #'  * `query`: the query string submitted to the API.
 36 | #'  * `response`: the [httr response object][httr::response].
 37 | #'
 38 | #' @examples
 39 | #' # get the first 50 of all sequenced specimens
 40 | #' search_tol(limit=50)
 41 | #'
 42 | #' # search for all sequenced Myrcia specimens
 43 | #' search_tol("Myrcia")
 44 | #'
 45 | #' # get all sequenced specimens
 46 | #' search_tol(limit=5000)
 47 | #'
 48 | #' # search for a species name and print the results
 49 | #' r <- search_tol("Myrcia guianensis")
 50 | #' print(r)
 51 | #'
 52 | #' # simplify search results to a `tibble`
 53 | #' r <- search_tol("Myrcia")
 54 | #' tidy(r)
 55 | #'
 56 | #' # gene stats are nested in the results
 57 | #' r <- search_tol("Myrcia")
 58 | #' tidied <- tidy(r)
 59 | #' tidyr::unnest(tidied, cols=gene_stats)
 60 | #'
 61 | #' # species names are nested in the results
 62 | #' r <- search_tol("Myrcia")
 63 | #' tidied <- tidy(r)
 64 | #' tidyr::unnest(tidied, cols=species, names_sep="_")
 65 | #'
 66 | #' # as is higher taxonomy
 67 | #' r <- search_tol("Myrcia")
 68 | #' tidied <- tidy(r)
 69 | #' tidyr::unnest(tidied, cols=species, names_sep="_")
 70 | #'
 71 | #' # search for all gene entries and print results
 72 | #' r <- search_tol(genes=TRUE, limit=500)
 73 | #' print(r)
 74 | #'
 75 | #' # tidy the returned genes
 76 | #' tidy(r)
 77 | #'
 78 | #' @references
 79 | #' Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035
 80 | #'
 81 | #' @family ToL functions
 82 | #'  * [lookup_tol()] to lookup information about a sequenced specimen
 83 | #'    using a valid ToL ID.
 84 | #'  * [download_tol()] to download a file from the ToL SFTP server.
 85 | #'  * [load_tol()] load a file from the ToL SFTP server.
 86 | #'
 87 | #' @export
 88 | search_tol <- function(query="", genes=FALSE, limit=50, page=1, .wait=0.2) {
 89 |   if (genes) {
 90 |     url <- tol_search_url_(type="genes")
 91 |     query <- "genes"
 92 |   } else {
 93 |     url <- tol_search_url_()
 94 |   }
 95 | 
 96 |   original_query <- query
 97 |   query <- format_query_(query, "tol")
 98 | 
 99 |   query$per_page <- limit
100 |   query$page <- page
101 |   results <- make_request_(url, query, .wait=.wait)
102 | 
103 |   # calculate total number of pages, because it isn't returned
104 |   total_pages <- ceiling(results$content$total / limit)
105 | 
106 |   structure(
107 |     list(
108 |       total=results$content$total,
109 |       pages=total_pages,
110 |       page=results$content$page,
111 |       limit=limit,
112 |       results=results$content$items,
113 |       query=original_query,
114 |       response=results$response
115 |     ),
116 |     class=c("tol_search", "tol")
117 |   )
118 | }
119 | 
120 | #' Look up a sequenced specimen or gene in ToL.
121 | #'
122 | #' Request the record for a sequenced specimen or gene in ToL using
123 | #' its ToL ID.
124 | #'
125 | #' The [Tree of Life](https://treeoflife.kew.org/) is a database
126 | #' of specimens sequenced as part of Kew's efforts to build
127 | #' a comprehensive evolutionary tree of life for flowering plants.
128 | #'
129 | #' The lookup API allows users to retrieve taxonomic and sequencing
130 | #' information for a specific sequenced specimen or gene using the unique ToL ID.
131 | #' If this is not known, it can be found out using the [ToL search API][kewr::search_tol].
132 | #'
133 | #' @param id A string containing a valid ToL ID.
134 | #' @param type The type of record to lookup, either `specimen` or `gene`.
135 | #' @param .wait Time to wait before making a request, to help
136 | #'  rate limiting.
137 | #'
138 | #' @return A `tol_{type}` object, which is a simple structure with fields
139 | #'   for each of the fields returned by the lookup API,
140 | #'   as well as the the [httr response object][httr::response].
141 | #'
142 | #' @examples
143 | #'
144 | #' # retrieve information for a particular specimen
145 | #' lookup_tol("1296")
146 | #'
147 | #' # print a summary of the returned information
148 | #' r <- lookup_tol("1296")
149 | #' print(r)
150 | #'
151 | #' # tidy into a tibble
152 | #' r <- lookup_tol("1296")
153 | #' tidy(r)
154 | #'
155 | #' # extract the returned gene stats for the specimen
156 | #' r <- lookup_tol("1296")
157 | #' tidied <- tidy(r)
158 | #' tidied$gene_stats
159 | #'
160 | #' # expand the taxonomy info
161 | #' r <- lookup_tol("1296")
162 | #' tidied <- tidy(r)
163 | #' tidyr::unnest(tidied, cols=taxonomy, names_sep="_")
164 | #'
165 | #' # retrieve information for a particular gene
166 | #' lookup_tol("51", type="gene")
167 | #'
168 | #' # print a summary of the returned information
169 | #' r <- lookup_tol("51", type="gene")
170 | #' print(r)
171 | #'
172 | #' # tidy into a tibble
173 | #' r <- lookup_tol("51", type="gene")
174 | #' tidy(r)
175 | #'
176 | #' @family ToL functions
177 | #' @seealso
178 | #'  * [search_tol()] to search ToL using taxonomic information.
179 | #'  * [download_tol()] to download a file from the ToL SFTP server.
180 | #'  * [load_tol()] load a file from the ToL SFTP server.
181 | #'
182 | #' @references
183 | #' Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035
184 | #'
185 | #' @export
186 | lookup_tol <- function(id, type=c("specimen", "gene"), .wait=0.1) {
187 |   type <- match.arg(type)
188 |   url <- tol_lookup_url_(id, type)
189 | 
190 |   result <- make_request_(url, query=NULL, .wait=.wait)
191 | 
192 |   # this might be better if things were explicitly listed
193 |   record <- result$content
194 |   record$response <- result$response
195 |   record$queryId <- id
196 | 
197 |   structure(
198 |     record,
199 |     class=c(paste0("tol_", type), "tol")
200 |   )
201 | }
202 | 
203 | #' Load the Tree of Life or another file from ToL.
204 | #'
205 | #' Request a tree file for the whole ToL or an alignment,
206 | #' sequence, or gene tree for a particular specimen or gene.
207 | #'
208 | #' The [Tree of Life](https://treeoflife.kew.org/) is a database
209 | #' of specimens sequenced as part of Kew's efforts to build
210 | #' a comprehensive evolutionary tree of life for flowering plants.
211 | #'
212 | #' Newick tree, alignment, and sequence files are help on an SFTP server
213 | #' for download. The URLs to access these are stored in entries for specimens
214 | #' and genes in the ToL database. These can be accessed by either using [search_tol()]
215 | #' to get all specimens for a particular order, family, genus, or species or by
216 | #' looking up a specific specimen or gene using [lookup_tol()]. If no URL is specified,
217 | #' this will load the ToL tree.
218 | #'
219 | #' @param url URL pointing to a file on the ToL SFTP server.
220 | #' @param .wait Time to wait before making a request, to help
221 | #'  rate limiting.
222 | #'
223 | #' @examples
224 | #'  # load the ToL
225 | #'  load_tol()
226 | #'
227 | #'  # load a specimen fasta file
228 | #'  specimen_info <- lookup_tol("1296")
229 | #'  load_tol(specimen_info$fasta_file_url)
230 | #'
231 | #'  # load a gene alignment file
232 | #'  gene_info <- lookup_tol("51", type="gene")
233 | #'  load_tol(gene_info$alignment_file_url)
234 | #'
235 | #'  # load the gene tree
236 | #'  load_tol(gene_info$tree_file_url)
237 | #'
238 | #' @family ToL functions
239 | #'
240 | #' @seealso
241 | #'  * [lookup_tol()] to lookup information about a sequenced specimen
242 | #'   using a valid ToL ID.
243 | #'  * [search_tol()] to search ToL using taxonomic info.
244 | #'  * [download_tol()] to save a file on the ToL SFTP server to file.
245 | #'
246 | #' @references
247 | #' Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035
248 | #'
249 | #' @importFrom glue glue
250 | #' @importFrom stringr str_extract
251 | #'
252 | #' @export
253 | load_tol <- function(url=NULL, .wait=0.1) {
254 |   if (is.null(url)) {
255 |     url <- tol_download_url_()
256 |   }
257 | 
258 |   result <- make_request_(url, query=NULL, json=FALSE, .wait=.wait)
259 | 
260 |   # this might be better if things were explicitly listed
261 |   record <- list(
262 |     content=result$content,
263 |     response=result$response
264 |   )
265 | 
266 |   type <- str_extract(url, "[a-z]+$")
267 | 
268 |   structure(
269 |     record,
270 |     class=c(paste0("tol_", type), "tol")
271 |   )
272 | }
273 | 
274 | #' Download a file from the ToL SFTP server.
275 | #'
276 | #' Download an alignment, sequence, or tree file from the ToL
277 | #' SFTP server.
278 | #'
279 | #' The [Tree of Life](https://treeoflife.kew.org/) is a database
280 | #' of specimens sequenced as part of Kew's efforts to build
281 | #' a comprehensive evolutionary tree of life for flowering plants.
282 | #'
283 | #' Sequence, alignment, and Newick tree files are help on an SFTP server
284 | #' for download. The URLs to access these are stored in entries for specimens
285 | #' and genes in the ToL database. These can be accessed by either using [search_tol()]
286 | #' to get all specimens for a particular order, family, genus, or species or by
287 | #' looking up a specific specimen or gene using [lookup_tol()]
288 | #'
289 | #' @param download_link A string specifying the URL to download the file from.
290 | #'  You can get a download URL for a particular specimen or gene using [lookup_tol()].
291 | #' @param save_dir A string specifying the folder to save the download in. If
292 | #'   no value is provided, \link[here]{here} will be used.
293 | #'
294 | #' @examples
295 | #' \dontrun{
296 | #'  # download a specimen fasta file
297 | #'  specimen_info <- lookup_tol("1296")
298 | #'  download_tol(specimen_info$fasta_file_url)
299 | #'
300 | #'  # download a gene alignment file
301 | #'  gene_info <- lookup_tol("51", type="gene")
302 | #'  download_tol(gene_info$alignment_file_url)
303 | #'
304 | #'  # download the gene tree
305 | #'  download_tol(gene_info$tree_file_url)
306 | #' }
307 | #'
308 | #' @family ToL functions
309 | #' @seealso
310 | #'  * [lookup_tol()] to lookup information about a sequenced specimen
311 | #'   using a valid ToL ID.
312 | #'  * [search_tol()] to search ToL using taxonomic info.
313 | #'  * [load_tol()] load a file from the ToL SFTP server.
314 | #'
315 | #' @references
316 | #' Baker W.J., Bailey P., Barber V., Barker A., Bellot S., Bishop D., Botigue L.R., Brewer G., Carruthers T., Clarkson J.J., Cook J., Cowan R.S., Dodsworth S., Epitawalage N., Francoso E., Gallego B., Johnson M., Kim J.T., Leempoel K., Maurin O., McGinnie C., Pokorny L., Roy S., Stone M., Toledo E., Wickett N.J., Zuntini A.R., Eiserhardt W.L., Kersey P.J., Leitch I.J. & Forest F. 2021. A Comprehensive Phylogenomic Platform for Exploring the Angiosperm Tree of Life. Systematic Biology, 2021; syab035, https://doi.org/10.1093/sysbio/syab035
317 | #'
318 | #' @importFrom here here
319 | #' @importFrom glue glue
320 | #' @importFrom stringr str_extract
321 | #' @importFrom utils download.file
322 | #'
323 | #' @export
324 | download_tol <- function(download_link=NULL, save_dir=NULL) {
325 |   if (is.null(save_dir)) {
326 |     save_dir <- here()
327 |   }
328 | 
329 |   if (is.null(download_link)) {
330 |     download_link <- tol_download_url_()
331 |   }
332 | 
333 |   filename <- str_extract(download_link, "(?<=/)[^/]+$")
334 |   save_path <- file.path(save_dir, filename)
335 | 
336 |   message <- glue("Downloading file {filename}",
337 |                   "to: {save_path}\n",
338 |                   .sep=" ", .trim=FALSE)
339 | 
340 |   cat(message)
341 | 
342 |   download.file(download_link, save_path)
343 | 
344 |   invisible()
345 | }
346 | 
347 | #' Make the ToL lookup URL.
348 | #'
349 | #' @param id A valid ToL ID.
350 | #'
351 | #' @noRd
352 | #'
353 | #' @importFrom glue glue
354 | tol_lookup_url_ <- function(id, type=c("specimen", "gene")) {
355 |   type <- match.arg(type)
356 |   base <- get_url_("tol")
357 | 
358 |   glue("{base}/{type}s/{id}")
359 | }
360 | 
361 | #' Make Tree of Life search URL.
362 | #'
363 | #' @importFrom glue glue
364 | #'
365 | #' @noRd
366 | tol_search_url_ <- function(type=c("specimens", "genes")) {
367 |   type <- match.arg(type)
368 |   base <- get_url_("tol")
369 | 
370 |   glue("{base}/{type}")
371 | }
372 | 
373 | #' Make a download URL for the Tree of Life.
374 | #'
375 | #' @importFrom glue glue
376 | #'
377 | #' @noRd
378 | tol_download_url_ <- function() {
379 |   base <- get_url_("tol")
380 | 
381 |   glue("{base}/tree")
382 | }
383 | 


--------------------------------------------------------------------------------