├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ └── pkgdown.yaml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS.md ├── R ├── as.metadata.R ├── as.traitdata.R ├── cast.traitdata.R ├── data.R ├── get_gbif_taxonomy.R ├── glossary.R ├── mutate.traitdata.R ├── rbind.traitdata.R ├── standardize.R ├── standardize.exploratories.R ├── sysdata.rda ├── thesaurus.R ├── traitdataform-package.R └── utils.R ├── README.md ├── _pkgdown.yml ├── codemeta.json ├── cran-comments.md ├── data ├── arthropodtraits.rda ├── carabids.rda ├── heteroptera.rda └── heteroptera_raw.rda ├── draft ├── csv2xml.R └── minimalexample.R ├── inst ├── CITATION └── extdata │ ├── amniota.R │ ├── amphibio.R │ ├── arthropodtraits.R │ ├── carabids.R │ ├── eltontraits.R │ ├── heteroptera.R │ ├── heteroptera_raw.R │ ├── mammaldiet.R │ └── pantheria.R ├── man ├── amniota.Rd ├── amphibio.Rd ├── arthropodtraits.Rd ├── as.metadata.Rd ├── as.thesaurus.Rd ├── as.trait.Rd ├── as.traitdata.Rd ├── as_factor_clocale.Rd ├── carabids.Rd ├── cast.traitdata.Rd ├── fixlogical.Rd ├── get_gbif_taxonomy.Rd ├── glossary.Rd ├── heteroptera_raw.Rd ├── mammaldiet.Rd ├── mutate.traitdata.Rd ├── pantheria.Rd ├── pulldata.Rd ├── rbind.traitdata.Rd ├── read.service.Rd ├── standardize.Rd ├── standardize.exploratories.Rd ├── standardize_taxa.Rd ├── standardize_traits.Rd └── traitdataform-package.Rd ├── tests ├── testthat.R └── testthat │ ├── test.columns_to_ETS.R │ ├── test.map_gbif_taxonomy.R │ └── test.standardize.R └── vignettes ├── .gitignore ├── div05.bib ├── mee.csl └── traitdataform.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^CRAN-RELEASE$ 2 | ^Meta$ 3 | ^doc$ 4 | ^\.travis\.yml$ 5 | ^CODE_OF_CONDUCT\.md$ 6 | ^CONTRIBuTING\.md$ 7 | ^cran-comments\.md$ 8 | ^codemeta\.json$ 9 | ^docs$ 10 | ^draft$ 11 | ^data_test$ 12 | ^.travis.yml$ 13 | ^.*\.Rproj$ 14 | ^\.Rproj\.user$ 15 | ^_pkgdown\.yml$ 16 | ^pkgdown$ 17 | ^\.github$ 18 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | R_KEEP_PKG_SOURCE: yes 17 | steps: 18 | - uses: actions/checkout@v2 19 | 20 | - uses: r-lib/actions/setup-r@v1 21 | with: 22 | use-public-rspm: true 23 | 24 | - uses: r-lib/actions/setup-r-dependencies@v1 25 | with: 26 | extra-packages: rcmdcheck 27 | 28 | - uses: r-lib/actions/check-r-package@v1 29 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | release: 7 | types: [published] 8 | workflow_dispatch: 9 | 10 | name: pkgdown 11 | 12 | jobs: 13 | pkgdown: 14 | runs-on: ubuntu-latest 15 | env: 16 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 17 | steps: 18 | - uses: actions/checkout@v2 19 | 20 | - uses: r-lib/actions/setup-pandoc@v1 21 | 22 | - uses: r-lib/actions/setup-r@v1 23 | with: 24 | use-public-rspm: true 25 | 26 | - uses: r-lib/actions/setup-r-dependencies@v1 27 | with: 28 | extra-packages: pkgdown 29 | needs: website 30 | 31 | - name: Deploy package 32 | run: | 33 | git config --local user.name "$GITHUB_ACTOR" 34 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 35 | Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Meta 2 | doc 3 | docs 4 | draft 5 | .Rproj.user 6 | .Rhistory 7 | .RData 8 | traits.Rproj 9 | inst/doc 10 | docs/ 11 | CRAN-RELEASE 12 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | As contributors and maintainers of this project, we pledge to respect all people who 4 | contribute through reporting issues, posting feature requests, updating documentation, 5 | submitting pull requests or patches, and other activities. 6 | 7 | We are committed to making participation in this project a harassment-free experience for 8 | everyone, regardless of level of experience, gender, gender identity and expression, 9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. 10 | 11 | Examples of unacceptable behavior by participants include the use of sexual language or 12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment, 13 | insults, or other unprofessional conduct. 14 | 15 | Project maintainers have the right and responsibility to remove, edit, or reject comments, 16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 18 | from the project team. 19 | 20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 21 | opening an issue or contacting one or more of the project maintainers. 22 | 23 | This Code of Conduct is adapted from the Contributor Covenant 24 | (http://contributor-covenant.org), version 1.0.0, available at 25 | http://contributor-covenant.org/version/1/0/0/ 26 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # CONTRIBUTING # 2 | 3 | ### Fixing typos 4 | 5 | Small typos or grammatical errors in documentation may be edited directly using 6 | the GitHub web interface, so long as the changes are made in the _source_ file. 7 | 8 | * YES: you edit a roxygen comment in a `.R` file below `R/`. 9 | * NO: you edit an `.Rd` file below `man/`. 10 | 11 | ### Prerequisites 12 | 13 | Before you make a substantial pull request, you should always file an issue and 14 | make sure someone from the team agrees that it’s a problem. If you’ve found a 15 | bug, create an associated issue and illustrate the bug with a minimal 16 | [reprex](https://www.tidyverse.org/help/#reprex). 17 | 18 | ### Pull request process 19 | 20 | * We recommend that you create a Git branch for each pull request (PR). 21 | * Look at the Travis and AppVeyor build status before and after making changes (visible in README). 22 | * We recommend the tidyverse [style guide](http://style.tidyverse.org). 23 | * We use [roxygen2](https://cran.r-project.org/package=roxygen2). 24 | * We use [testthat](https://cran.r-project.org/package=testthat). Contributions 25 | with test cases included are easier to accept. 26 | * For user-facing changes, add a bullet to the top of `NEWS.md` below the 27 | current development version header describing the changes made followed by your 28 | GitHub username, and links to relevant issue(s)/PR(s). 29 | 30 | ### Code of Conduct 31 | 32 | Please note that the 'traitdataform' project is released with a 33 | [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this 34 | project you agree to abide by its terms. 35 | 36 | ### See rOpenSci [contributing guide](https://ropensci.github.io/dev_guide/contributingguide.html) 37 | for further details. 38 | 39 | ### Discussion forum 40 | 41 | Check out our [discussion forum](https://discuss.ropensci.org) if you think your issue requires a longer form discussion. 42 | 43 | ### Prefer to Email? 44 | 45 | Email the person listed as maintainer in the `DESCRIPTION` file of this repo. 46 | 47 | Though note that private discussions over email don't help others - of course email is totally warranted if it's a sensitive problem of any kind. 48 | 49 | ### Thanks for contributing! 50 | 51 | This contributing guide is adapted from the tidyverse contributing guide available at https://raw.githubusercontent.com/r-lib/usethis/master/inst/templates/tidy-contributing.md 52 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: traitdataform 2 | Type: Package 3 | Title: Formatting and Harmonizing Ecological Trait-Data 4 | Version: 0.6.8 5 | Date: 2022-05-25 6 | Authors@R: c(person("Florian D.", "Schneider",, "florian.dirk.schneider@gmail.com", role = c("aut", "cre"), 7 | comment = c(ORCID = "0000-0002-1494-5684")), 8 | person("Caterina", "Penone", role = c("ctb"), comment = c(ORCID = "0000-0002-8170-6659")), 9 | person("Andreas", "Ostrowski", role = c("ctb"), comment = c(ORCID = "0000-0002-2033-779X")), 10 | person("Dennis", "Heimann", role = c("ctb")), 11 | person("Felix","Neff", role = c("ctb")), 12 | person("Markus J.", "Ankenbrand", role = c("ctb")), 13 | person("Jessica", "Burnett", role = c("ctb")), 14 | person("Iñaki","Ucar", role = c("ctb")), 15 | person("Matthias", "Grenié", role = c("ctb"))) 16 | Description: Assistance for handling ecological trait data and applying the 17 | Ecological Trait-Data Standard terminology (Schneider et al. 2019 18 | ). There are two major use cases: (1) preparation of 19 | own trait datasets for publication, and (2) harmonizing 20 | trait datasets from different sources by re-formatting them into a unified 21 | format. See 'traitdataform' website for full documentation. 22 | License: MIT + file LICENSE 23 | LazyData: TRUE 24 | Depends: 25 | R (>= 2.10) 26 | Imports: 27 | data.table, 28 | stats, 29 | reshape2, 30 | taxize, 31 | getPass, 32 | units, 33 | suppdata, 34 | curl, 35 | XML, 36 | RCurl, 37 | plyr 38 | URL: https://ecologicaltraitdata.github.io/traitdataform/, 39 | https://github.com/ecologicaltraitdata/traitdataform 40 | BugReports: https://github.com/ecologicaltraitdata/traitdataform/issues 41 | VignetteBuilder: knitr 42 | RoxygenNote: 7.2.0 43 | Encoding: UTF-8 44 | Roxygen: list(markdown = TRUE) 45 | Suggests: 46 | knitr, 47 | rmarkdown, 48 | testthat 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2022 2 | COPYRIGHT HOLDER: F.D. Schneider -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,metadata) 4 | S3method(print,thesaurus) 5 | S3method(print,trait) 6 | S3method(print,traitdata) 7 | S3method(rbind,traitdata) 8 | export(as.metadata) 9 | export(as.thesaurus) 10 | export(as.trait) 11 | export(as.traitdata) 12 | export(cast.traitdata) 13 | export(get_gbif_taxonomy) 14 | export(mutate.traitdata) 15 | export(pulldata) 16 | export(standardise_taxa) 17 | export(standardise_traits) 18 | export(standardize) 19 | export(standardize.exploratories) 20 | export(standardize.taxonomy) 21 | export(standardize.traits) 22 | export(standardize_taxa) 23 | export(standardize_traits) 24 | import(RCurl) 25 | import(XML) 26 | import(curl) 27 | import(getPass) 28 | import(stats) 29 | import(suppdata) 30 | import(taxize) 31 | import(units) 32 | importFrom(data.table,as.data.table) 33 | importFrom(data.table,rbindlist) 34 | importFrom(plyr,rbind.fill) 35 | importFrom(reshape2,dcast) 36 | importFrom(reshape2,melt) 37 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # traitdataform 0.6.8 2 | 3 | - fix test value for changed dependencies: new taxon names on gbif.org. 4 | - add suppdata to NAMESPACE 5 | 6 | # traitdataform 0.6.7 7 | 8 | Dependency on external data has been revised to comply with CRAN policy: 9 | - demo data are now delivered with the package, to enable offline package testing, examples and vignette building; also more detailled documentation on these data was added. 10 | - calls on external data for research purposes now fail with an informative message without causing errors or warnings when the resource is not available 11 | - download URLs for research data are resolved from DOIs using package 'suppdata' (where possible) to provide higher stability for external resources 12 | 13 | # traitdataform 0.6.6 14 | 15 | - fix external URLs to Dryad (pull request #45 by @Rekyt) 16 | 17 | # traitdataform 0.6.5 18 | 19 | - update Rd documentation pages to include return value 20 | 21 | # traitdataform 0.6.4 22 | 23 | - fix tidyverse URL in README 24 | 25 | # traitdataform 0.6.3 26 | 27 | - fix: reference to doi in Rd documentation syntax 28 | 29 | # traitdataform 0.6.2 30 | 31 | - update call to package `units` (pull request #42 by Iñaki Ucar) 32 | - update new taxon names in tests 33 | 34 | # traitdataform 0.6.1 35 | 36 | - fix to comply to CRAN policy: Packages which use Internet resources should fail gracefully with an informative message if the resource is not available or has changed (and not give a check warning nor error). 37 | 38 | 39 | # traitdataform 0.6.0 40 | 41 | Major changes: 42 | 43 | - update internal terms to v0.10 of the Ecological Trait-data Standard (ETS). 44 | In ETS v0.10 the assignment of user-defined and standardised measurement values and trait and taxon assignments has been updated, to match labelling practice in DwC and provide a more intuitive content for those fields. Accordingly 45 | - the v0.9.1 terms `scientificName`, `traitName`, `traitValue`, and `traitUnit` have been renamed into `verbatimScientificName`, `verbatimTraitName`, `verbatimTraitValue`, and `verbatimTraitUnit` with new identifiers. 46 | - the terms `scientificNameStd`, `traitNameStd`, `traitValueStd`, and `traitUnitStd` have been renamed into `scientificName`, `traitName`, `traitValue`, and `traitUnit` and now form the default suggested content for trait data complying with the ETS. Definitions have been harmonized. The original *Std identifiers now resolve to these new terms. 47 | 48 | This causes the output file to differ from earlier outputs. To maintain compatibility with earlier versions, an option `conformsTo` has been added to `as.traitdata()` to return the data table in terms of version 0.9. By default, output is returned in ETS version 0.10. Data releases created by `traitdataform` should always be labelled with the version of ETS that applies to the data! 49 | 50 | 51 | # traitdataform 0.5.7 52 | 53 | - Preparing for R 4.0.0 change to read.table() and data.frame(): handling of factor data (trait and taxon names) is now locale-independent 54 | - updating download links for Data Dryad 55 | 56 | # traitdataform 0.5.6 57 | 58 | - fix broken data calls to datadryad.org 59 | 60 | # traitdataform 0.5.5 61 | 62 | - upgrading vignette to knitr v1.23 upon request of CRAN 63 | 64 | # traitdataform 0.5.4 65 | 66 | - enforce UTF-8 environment in Latin-1 locale now works for 67 | 68 | # traitdataform 0.5.3 (released on CRAN 2019-04-16) 69 | 70 | - character issues in strict Latin-1 locale have been resolved (apostrophe and en-dash) 71 | - a dependency on an external resource (ETS.csv) is now static. 72 | 73 | # traitdataform 0.5.2 (released on CRAN 30.12.2018) 74 | 75 | - solve minor compliance issues with CRAN: use LICENSE template 76 | 77 | # traitdataform 0.5.1 78 | 79 | - thorough spell-checking 80 | - set heavy examples to 'dontrun' 81 | 82 | # traitdataform 0.5.0 (released 04.12.2018) 83 | 84 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1921175.svg)](https://doi.org/10.5281/zenodo.1921175) 85 | 86 | stable, for submission on CRAN. From this point on, backwards compatibility will be maintained. 87 | 88 | minor changes (since v0.4): 89 | 90 | - complete package vignette 91 | - improved unit handling 92 | - solid demo data import 93 | 94 | --- 95 | 96 | # traitdataform 0.4.4 97 | 98 | - finalize vignette (include section on writing data) 99 | 100 | # traitdataform 0.4.3 101 | 102 | - update rbind.traitdataform() to specify metadata handling 103 | 104 | # traitdataform 0.4.2 105 | 106 | - finalize package vignette 107 | - simplify demo data calls 108 | - fix handling of measurementID as factor 109 | - fix return of output in standardize() 110 | 111 | # traitdataform 0.4.1 112 | 113 | - update package vignette 114 | - fix print methods 115 | - fix thesaurus function 116 | - set helper functions internal (not exported) 117 | - fix file import for carabids 118 | 119 | # traitdataform 0.4.0 (released 26.11.2018) 120 | 121 | ## bugfixes 122 | 123 | - fix locale issues with pulldata() on linux (still not solved on Mac OS X) 124 | - eliminate dependency on 'reshape' package 125 | 126 | ## Minor changes 127 | 128 | - the functions cast.traitdata() and mutate.traitdata() now support units. 129 | 130 | # traitdataform 0.3.4 131 | 132 | - update as.trait() and as.thesaurus() as well as print methods 133 | - update dependencies on units() 134 | 135 | # traitdataform 0.3.3 136 | 137 | - rename and update standardize_taxa() <- standardize.taxonomy() 138 | - rename standardize_traits() <- standardize.traits() 139 | - fix environment issues with pulldata() 140 | 141 | # traitdataform 0.3.1 142 | 143 | - clean rewrite of get_gbif_taxonomy() 144 | - test_that implementation 145 | - Travis CI implementation 146 | - add Code of Conduct and CONTRIBUTING 147 | 148 | # traitdataform 0.3.0 149 | 150 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1489528.svg)](https://doi.org/10.5281/zenodo.1489528) 151 | 152 | - solve all errors, warnings and notes on R CHECK 153 | - complete man pages 154 | - fix matching bug for get_gbif_taxonomy() 155 | 156 | --- 157 | 158 | # traitdataform 0.2.6 159 | 160 | - fix function mutate.traitdata() (remove method dependency to dplyr) 161 | - update documentation for as.trait() 162 | - successful build of vignette 163 | 164 | # traitdataform 0.2.5 165 | 166 | - fix export of get_gbif_taxonomy() 167 | - handle subspecies and superspecies cases 168 | - handle warnings in taxon mapping output 169 | - update documentation on taxon mapping 170 | 171 | # traitdataform 0.2.4 172 | 173 | - fix export of `standardize.exploratories()` to include all columns by default. 174 | - start fixing install warnings 175 | 176 | # traitdataform 0.2.3 177 | 178 | - fixing broken NAMESPACE export 179 | - update documentation website 180 | 181 | # traitdataform 0.2.2 182 | 183 | - fix bug in `standardize.taxonomy()`: reset row.names. 184 | - update data attributes in dataset `carabids` for handling in `as.traitdata()`. 185 | 186 | # traitdataform 0.2.1 187 | 188 | - add function `as.metadata()` for templating of metadata into an object class, including method `print.metadata()` 189 | - update as.traitdata, print.traitdata, rbind.traitdata to make use of `as.metadata` 190 | - provide metadata and thesaurus as attributes in data object 'carabids' 191 | - update `as.traitdata()`: enable forwarding of attributes of input object (for metadata etc. ) 192 | 193 | # traitdataform 0.2 194 | 195 | ## Major changes 196 | 197 | - updated terms to traitdata standard v0.6. 198 | - datasets are not loaded by default, but wait for a prompt using `data()`. Also added documentation for all datasets and added more datasets. 199 | - adding function method `rbind.traitdata()` for merging standardised datasets into a unified table. includes handling of metadata object in attributes 200 | - metadata can be stored in the attributes of a dataset and will be preserved when updating the dataset. Information will be handled when merging or combining multiple datasets. 201 | - trait thesauri can be added more easily from a data frame or from a list of 'trait' objects. 202 | 203 | 204 | ## Minor changes 205 | 206 | - update vignette: add section on merging and combining data. 207 | - added print methods for traits and thesaurus objects. 208 | 209 | 210 | # traitdataform 0.1.7 211 | 212 | ## Major changes 213 | 214 | - add function `standardize.exploratories()` to extract georeference data for a plotID. 215 | - add function `read.service()` (provided by Dennis Heimann & Andreas Ostrowski) for access of datasets on BExIS. I added a secure masked prompt using the 'getPass' package. 216 | 217 | ## Bug fixes 218 | 219 | - Spelling of 'occurrence' 220 | -------------------------------------------------------------------------------- /R/as.metadata.R: -------------------------------------------------------------------------------- 1 | #' Create metadata for trait dataset 2 | #' 3 | #' Function to create a defined object containing metadata paramaters according to a pre-defined template. 4 | #' 5 | #' @param ... named objects of any class. Names must be the same as given by template or they will be ignored when producing the metadata object. 6 | #' @param template The default template contains the elements as defined by the Ecological Traitdata Standard. 7 | #' 8 | #' @return an object of class 'metadata', or - if metadata objects are provided as input - a list object of class 'metadatalist'. 9 | #' @export 10 | #' 11 | #' @examples 12 | #' 13 | #' # set metadata object 14 | #' a <- as.metadata(author = "Martin Gossner", datasetName = "Heteroptera morphometric traits") 15 | #' 16 | #' # update an existing metadata object 17 | #' a1 <- as.metadata(datasetID = "heteroptera", template = a) 18 | #' 19 | 20 | as.metadata <- function(..., 21 | template = list( 22 | datasetID = NULL, 23 | datasetName = NULL, 24 | author = NULL, 25 | rightsHolder = NULL, 26 | bibliographicCitation = NULL, 27 | license = NULL, 28 | version = NULL, 29 | comments = NULL, 30 | description = NULL, 31 | region = NULL, 32 | conformsTo = NULL 33 | ) 34 | ) { 35 | 36 | if("list" %in% class(..1)) { 37 | 38 | if("metadata" %in% class(..1)) { 39 | do.call(as.metadata, list(..1, template = template)) 40 | } else { 41 | if("metadata" %in% class(..1[[1]])) { 42 | list(...) 43 | } 44 | } 45 | } else { 46 | 47 | metadata_in <- list(...) 48 | 49 | metadata_out <- lapply(names(template), function(i) { 50 | if (i %in% names(metadata_in)) metadata_in[[i]] 51 | else template[[i]] 52 | }) 53 | names(metadata_out) <- names(template) 54 | 55 | class(metadata_out) <- c("metadata", "list") 56 | return(metadata_out) 57 | 58 | } 59 | } 60 | 61 | 62 | #' @export 63 | print.metadata <- function(x, ...) { 64 | 65 | if(!is.null(x$datasetID)) { 66 | cat("\n", x$datasetID, ": ") 67 | } else { cat("\n[ ] : ")} 68 | 69 | # trait-dataset: datasetname (version) by author 70 | 71 | if(!is.null(x$datasetName)) { 72 | cat(x$datasetName) 73 | } 74 | if(!is.null(x$version)) { 75 | cat(" (", x$version, ") ") 76 | } 77 | if(!is.null(x$author)) { 78 | cat(" by", x$author,".\n") 79 | } 80 | 81 | 82 | if(!is.null(x$bibliographicCitation) | 83 | !is.null(x$license)) { 84 | cat("\n When using these data, you must acknowledge the following usage policies: \n") 85 | } 86 | 87 | # cite as: 88 | if(!is.null(x$bibliographicCitation)) { 89 | 90 | cat("\n Cite this trait dataset as: \n") 91 | print(x$bibliographicCitation) 92 | } 93 | # published under 94 | if(!is.null(x$license)) { 95 | cat("\n Published under:", x$license, "\n") 96 | } 97 | 98 | # conforms to standard: 99 | if(!is.null(x$conformsTo)) { 100 | cat("\n This dataset conforms to: ") 101 | print(x$conformsTo) 102 | cat("\n") 103 | } 104 | 105 | 106 | } -------------------------------------------------------------------------------- /R/cast.traitdata.R: -------------------------------------------------------------------------------- 1 | #' Cast long-table trait data into wide-table format 2 | #' 3 | #' Function to reformat trait data from the long-table into a matrix/wide-table 4 | #' or occurrence table format. 5 | #' 6 | #' @param .data dataset of class 'traitdata' to be cast into wide-table format. 7 | #' @param traits the column name to be kept for parsing into wide-table (default 8 | #' is `verbatimTraitName`). Note that any duplicate column that contains trait names, 9 | #' e.g. `traitName` will be omitted. 10 | #' @param units the column name containing the units of numerical values 11 | #' (default is `verbatimTraitUnit`). 12 | #' @param values the column name containing the trait values to be used to fill 13 | #' the matrix (default is `verbatimTraitValue`). Duplicate columns (e.g. 14 | #' `traitValue`) will be omitted. See notes. 15 | #' @param fun.aggregate option for [reshape2::cast()] to define method of 16 | #' aggregation. 17 | #' 18 | #' @details The wide-table will be composed while preserving the detail given in 19 | #' the dataset (occurrence level or taxa level). The cells will be filled 20 | #' with the values from 'verbatimTraitValue'. 21 | #' 22 | #' If taxa should be summarized, provide function for summarizing in parameter 23 | #' 'summarize'. This can be any function that takes a vector and returns a 24 | #' single value for **both numerical and factorial/character input vectors!**. 25 | #' The default for numerical values is to return an arithmetric mean while 26 | #' including any outliers. For factorial values, the value is accepted if it 27 | #' is equal in all of the input entries. Otherwise NA is returned. For 28 | #' heterogeneous factorial or character input, user action is required for 29 | #' homogenizing the data before calling `cast.traitdata()`. 30 | #' 31 | #' @section Duplicate columns: The function is currently not able to handle 32 | #' multiple columns of trait data (incl. names and units). Those are currently 33 | #' omitted from the output and may be added manually. You can alter the 34 | #' columns to be used to construct the matrix by specifying those in 35 | #' parameters `traits`, `values`, and `units`. Automatic handling of the `Std` 36 | #' columns might be added at a later stage. 37 | #' 38 | #' @return a wide-table data.frame object containing all taxa (and other 39 | #' differentiating parameters) in rows and all traits (extracted from column 40 | #' 'verbatimTraitName') in columns. 41 | #' 42 | #' @export 43 | #' @importFrom reshape2 dcast 44 | #' @import units 45 | #' 46 | #' @examples 47 | #' 48 | #' pulldata("arthropodtraits") 49 | #' head(arthropodtraits) 50 | #' dataset3 <- as.traitdata(arthropodtraits, 51 | #' taxa = "SpeciesID", 52 | #' traits = c("Body_Size", "Dispersal_ability", 53 | #' "Feeding_guild","Feeding_guild_short", 54 | #' "Feeding_mode", "Feeding_specialization", 55 | #' "Feeding_tissue", "Feeding_plant_part", 56 | #' "Endophagous_lifestyle", "Stratum_use", 57 | #' "Stratum_use_short"), 58 | #' units = c(Body_Size = "mm"), 59 | #' keep = c(measurementRemark = "Remark"), 60 | #' metadata = as.metadata( 61 | #' license = "http://creativecommons.org/publicdomain/zero/1.0/" 62 | #' ) 63 | #' ) 64 | #' 65 | #' head(dataset3) 66 | #' 67 | #' dd3 <-cast.traitdata(dataset3) 68 | #' head(dd3) 69 | #' 70 | #' 71 | 72 | cast.traitdata <- function(.data, 73 | values = "verbatimTraitValue", 74 | traits = "verbatimTraitName", 75 | units = "verbatimTraitUnit", 76 | fun.aggregate = NULL 77 | ) { 78 | 79 | columns <- names(.data[,-which(names(.data) %in% c(traits, values, units, "measurementID"))]) 80 | 81 | # Extract units of numerical traits 82 | if(units %in% names(.data)) unit_list <- split(.data[,c(units)], f = .data[,traits]) 83 | 84 | # out <- reshape::cast(.data, eval(expression(paste(paste(columns, collapse =" + "), "~", traits))), 85 | # value = values, 86 | # fun.aggregate = fun.aggregate, 87 | # fill = NA 88 | # ) 89 | 90 | out <- reshape2::dcast(.data, eval(expression(paste(paste(columns, collapse =" + "), "~", traits))), 91 | value.var = values, 92 | fun.aggregate = fun.aggregate, 93 | fill = NA) 94 | 95 | if(units %in% names(.data)) { 96 | for(i in levels(.data[,traits])) { 97 | 98 | if(length(unique(unit_list[[i]])) == 1 ) { 99 | unit_list[[i]] <- as.character(unique(unit_list[[i]])) 100 | } else { 101 | unit_list[[i]] <- as.character(unit_list[[i]]) 102 | } 103 | 104 | if(all(sapply(unit_list[[i]] , is_unit))) { 105 | out[,i] <- as.numeric(as.character(out[,i])) * units::as_units(unit_list[[i]]) 106 | } else { 107 | if(!all(is.na(unit_list[[i]]))) { 108 | message(paste0("Provided unit for '", names(unit_list[i]), "' is not recognised and will be dropped!")) 109 | } 110 | } 111 | 112 | } 113 | } 114 | 115 | 116 | 117 | return(out) 118 | } 119 | -------------------------------------------------------------------------------- /R/glossary.R: -------------------------------------------------------------------------------- 1 | #' Ecological Trait-data Standard vocabulary (ETS) 2 | #' 3 | #' @description The terms and concepts as defined by the ETS (https://terminologies.gfbio.org/terms/ets/pages/) 4 | "glossary" 5 | 6 | # static fallback list of terms of ETS v0.10 7 | fallback <- data.frame(columnName = c("traitID", "scientificName", "traitName", "traitValue", "traitUnit", "verbatimScientificName", "verbatimTraitName", "verbatimTraitValue", "verbatimTraitUnit", "taxonID", "measurementID", "occurrenceID", "warnings", "taxonRank", "kingdom", "phylum", "class", "order", "family", "genus", "basisOfRecord", "basisOfRecordDescription", "references", "measurementResolution", "measurementMethod", "measurementDeterminedBy", "measurementDeterminedDate", "measurementRemarks", "aggregateMeasure", "individualCount", "dispersion", "measurementValue_min", "measurementValue_max", "measurementAccuracy", "statisticalMethod", "sex", "lifeStage", "age", "morphotype", "eventID", "preparations", "samplingProtocol", "year", "month", "day", "eventDate", "locationID", "habitat", "decimalLongitude", "decimalLatitude", "elevation", "geodeticDatum", "verbatimLocality", "country", "countryCode", "occurrenceRemarks", "datasetID", "datasetName", "datasetDescription", "author", "issued", "version", "bibliographicCitation", "conformsTo", "rightsHolder", "rights", "license", "OriginBE", "BEPlotID", "Exploratory", "BEType", "identifier", "trait", "broaderTerm", "narrowerTerm", "relatedTerm", "valueType", "expectedUnit", "factorLevels", "maxAllowedValue", "minAllowedValue", "traitDescription", "comments", "source", "scientificName", "traitName", "traitID", "traitValue", "traitUnit", "scientificNameStd", "traitNameStd", "traitValueStd", "traitUnitStd", "measurementID", "occurrenceID", "basisOfRecord", "basisOfRecordDescription", "identifier", "comments", "scientificNameStd", "traitNameStd", "traitValueStd", "traitUnitStd")) 8 | 9 | glossary <- tryCatch( read.csv("https://raw.githubusercontent.com/EcologicalTraitData/ETS/v0.10/ETS.csv"), 10 | warning = function(war) { 11 | message("Direct call to ETS failed. A simplified list of terms has been provided. Please check internet connectivity and re-load package!") 12 | return(fallback) 13 | } ) 14 | -------------------------------------------------------------------------------- /R/mutate.traitdata.R: -------------------------------------------------------------------------------- 1 | #' Mutate traits within a traitdata object. 2 | #' 3 | #' This function allows to transform, factorize, or combine trait measurements 4 | #' into compound measurements or update factor levels into binaries. 5 | #' 6 | #' @param .data the traitdata object to transform 7 | #' @param ... named parameters giving definitions of new columns. 8 | #' @param traits (NOT TESTED) the column name to be kept for parsing into 9 | #' wide-table (default is `verbatimTraitName`). Note that any duplicate column that 10 | #' contains trait names, e.g. `traitName` will be omitted. 11 | #' @param units (NOT TESTED) the column name containing the units of numerical 12 | #' values (default is `verbatimTraitUnit`). 13 | #' @param values (NOT TESTED) the column name containing the trait values to be 14 | #' used to fill the matrix (default is `verbatimTraitValue`). Duplicate columns (e.g. 15 | #' `traitValue`) will be omitted. See notes. 16 | #' 17 | #' @return an updated traitdata object with the new trait measures or facts 18 | #' appended to the original table. If the given trait name has been refined, 19 | #' it will be replaced. 20 | #' 21 | #' @details The function handles units for numerical traits and returns the new 22 | #' unit of the computed value in column `verbatimTraitUnit`, if units of input 23 | #' variables were specified according to the units package. Handling of other 24 | #' columns than `verbatimTraitName` and `verbatimTraitValue` is not advised at present. 25 | #' 26 | #' It is advised to mutate traits before applying `standardize.traits()`! If 27 | #' the mutate function is applied to a standardised dataset, the new trait 28 | #' will not be mapped automatically to the provided thesaurus. (automated 29 | #' re-mapping might be added in later versions of the package.) 30 | #' 31 | #' @export mutate.traitdata 32 | #' @importFrom reshape2 melt 33 | #' @importFrom plyr rbind.fill 34 | #' 35 | #' @examples 36 | #' 37 | #' \dontrun{ 38 | #' pulldata("arthropodtraits") 39 | #' dataset3 <- as.traitdata(arthropodtraits, 40 | #' taxa = "SpeciesID", 41 | #' traits = c("Body_Size", "Dispersal_ability", 42 | #' "Feeding_guild","Feeding_guild_short", 43 | #' "Feeding_mode", "Feeding_specialization", 44 | #' "Feeding_tissue", "Feeding_plant_part", 45 | #' "Endophagous_lifestyle", "Stratum_use", 46 | #' "Stratum_use_short"), 47 | #' units = c(Body_Size = "mm", Dispersal_ability = "unitless"), 48 | #' keep = c(measurementRemark = "Remark"), 49 | #' metadata = list( 50 | #' license = "http://creativecommons.org/publicdomain/zero/1.0/" 51 | #' ) 52 | #' ) 53 | #' head(dataset3) 54 | #' 55 | #' updated <- mutate.traitdata(dataset3, predator = Feeding_guild == "c" ) 56 | #' 57 | #' head(updated[updated$verbatimTraitName == "predator",]) 58 | #' 59 | #' levels(updated$verbatimTraitName) 60 | #' 61 | #' ## 62 | #' 63 | #' pulldata("heteroptera_raw") 64 | #' dataset2 <- as.traitdata(heteroptera_raw, 65 | #' traits = c("Body_length", "Body_width", "Body_height", "Thorax_length", 66 | #' "Thorax_width", "Head_width", "Eye_width", "Antenna_Seg1", 67 | #' "Antenna_Seg2", "Antenna_Seg3", "Antenna_Seg4", "Antenna_Seg5", 68 | #' "Front.Tibia_length", "Mid.Tibia_length", "Hind.Tibia_length", 69 | #' "Front.Femur_length", "Hind.Femur_length", "Front.Femur_width", 70 | #' "Hind.Femur_width", "Rostrum_length", "Rostrum_width", 71 | #' "Wing_length", "Wing_width"), 72 | #' taxa = "SpeciesID", 73 | #' occurrences = "ID" 74 | #' ) 75 | #' updated <- mutate.traitdata(dataset2, 76 | #' Body_shape = Body_length/Body_width, 77 | #' Body_volume = Body_length*Body_width*Body_height, 78 | #' Wingload = Wing_length*Wing_width/Body_volume) 79 | #' 80 | #' head(updated[updated$verbatimTraitName %in% c( "Body_volume"),]) 81 | #' } 82 | 83 | mutate.traitdata <- function(.data, 84 | ..., 85 | values = "verbatimTraitValue", 86 | traits = "verbatimTraitName", 87 | units = "verbatimTraitUnit" 88 | ) { 89 | 90 | verbatimTraitName = NULL 91 | 92 | stopifnot(is.data.frame(.data) || is.list(.data) || is.environment(.data)) 93 | 94 | in_traits <- levels(.data[,traits]) 95 | temp <- cast.traitdata(.data, traits = traits, values = values, units = units ) 96 | 97 | # extract mutations to apply into list 98 | out_traits <- as.list(substitute(list(...))[-1]) 99 | out_traits <- out_traits[names(out_traits) != ""] 100 | 101 | # apply each list entry to matrix 102 | for (col in names(out_traits)) { 103 | 104 | temp[[col]] <- eval(out_traits[[col]], temp, parent.frame()) 105 | } 106 | 107 | out_units <- data.frame(verbatimTraitName = c(in_traits, names(out_traits)) ) 108 | out_units$verbatimTraitUnit <- sapply(as.character(out_units$verbatimTraitName), function(t) { 109 | if("units" %in% class(temp[[t]])) { 110 | as.character(units(temp[[t]])) 111 | } else { return(NA) } 112 | } ) 113 | 114 | class(temp) <- "data.frame" 115 | 116 | out <- suppressWarnings(reshape2::melt(temp, 117 | measure.vars = names(out_traits), 118 | id.vars = names(temp[, 119 | which(!names(temp) %in% c(in_traits, names(out_traits)))]), 120 | variable_name = "verbatimTraitName", 121 | na.rm = TRUE 122 | ) ) 123 | 124 | 125 | names(out)[names(out) == "variable"] <- "verbatimTraitName" 126 | names(out)[names(out) == "value"] <- "verbatimTraitValue" 127 | 128 | out <- merge(out, out_units, by = "verbatimTraitName" ) 129 | 130 | out <- plyr::rbind.fill(subset(.data, !verbatimTraitName %in% names(out_traits)), subset(out, verbatimTraitName %in% names(out_traits))) 131 | out$verbatimTraitUnit <- as.factor(out$verbatimTraitUnit) 132 | 133 | # sort columns according to glossary of terms 134 | out <- out[, order(match(names(out), glossary$columnName) )] 135 | 136 | attribs <- attributes(.data) 137 | attribs$names <- attributes(out)$names 138 | attribs$row.names <- seq_along(out[,1]) 139 | attributes(out) <- attribs 140 | 141 | return(out) 142 | } 143 | 144 | 145 | -------------------------------------------------------------------------------- /R/rbind.traitdata.R: -------------------------------------------------------------------------------- 1 | #'Combine trait datasets 2 | #' 3 | #'Method for function `rbind()` to append objects of class 'traitdata' to each 4 | #'other. 5 | #' 6 | #'@param ... two or more objects of class traitdata. 7 | #'@param metadata a list of metadata entries which are to be added as 8 | #' dataset-level information. 9 | #'@param datasetID a vector of the same length as number of objects. If `NULL` 10 | #' (default), object names will be returned as ID. 11 | #'@param metadata_as_columns logical (defaults to FALSE) or vector of columns to 12 | #' return. If TRUE , the output will contain the "author", "license", 13 | #' "datasetName" and (autogenerated) "datasetID" name, if those are provided. 14 | #' If character vector, the output will contain the listed columns. 15 | #'@param drop FALSE by default. If true, columns that are not present in all 16 | #' datasets will be dropped. 17 | #' 18 | #'@details Metadata are ideally already included in the datasets as attributes 19 | #' (see `?as.traitdata`). The function `rbind.traitdata()` takes a list of 20 | #' lists as its metadata argument. The outer list must have the same length as 21 | #' the provided objects to combine, with each entry containing objects 22 | #' according to the terms of the Ecological Traitdata Standard 23 | #' (http://ecologicaltraitdata.github.io/ETS/#metadata-vocabulary). 24 | #' 25 | #' A lookup table for dataset details will be appended as an attribute to the 26 | #' output dataset, linked to each entry via the field `datasetID`. It can be 27 | #' accessed by calling `attributes()$datasets`. 28 | #' 29 | #'@export 30 | #'@importFrom data.table rbindlist as.data.table 31 | #' 32 | #' @examples 33 | #' 34 | #' pulldata("carabids") 35 | #' 36 | #' dataset1 <- as.traitdata(carabids, 37 | #' taxa = "name_correct", 38 | #' traits = c("body_length", "antenna_length", "metafemur_length"), 39 | #' units = "mm", 40 | #' keep = c(datasetID = "source_measurement", measurementRemarks = "note"), 41 | #' metadata = as.metadata( 42 | #' bibliographicCitation = c( 43 | #' "van der Plas et al. (2017) Methods in Ecol. & Evol., doi: 10.1111/2041-210x.12728" 44 | #' ), 45 | #' author = "Fons van der Plas", 46 | #' license = "http://creativecommons.org/publicdomain/zero/1.0/" 47 | #' ) 48 | #' ) 49 | #' 50 | #' 51 | #' traits1 <- as.thesaurus( 52 | #' body_length = as.trait("body_length", 53 | #' expectedUnit = "mm", valueType = "numeric", 54 | #' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length"), 55 | #' antenna_length = as.trait("antenna_length", 56 | #' expectedUnit = "mm", valueType = "numeric", 57 | #' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Antenna_length"), 58 | #' metafemur_length = as.trait("metafemur_length", 59 | #' expectedUnit = "mm", valueType = "numeric", 60 | #' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length") 61 | #') 62 | #' 63 | #' dataset1Std <- standardize.traits(dataset1, thesaurus = traits1) 64 | #' 65 | #' # occurrence table: 66 | #' 67 | #' pulldata("heteroptera_raw") 68 | #' 69 | #' dataset2 <- as.traitdata(heteroptera_raw, 70 | #' taxa = "SpeciesID", 71 | #' traits = c("Body_length", "Antenna_Seg1", "Antenna_Seg2", 72 | #' "Antenna_Seg3", "Antenna_Seg4", "Antenna_Seg5", "Hind.Femur_length"), 73 | #' units = "mm", 74 | #' keep = c(sex = "Sex", references = "Source", lifeStage = "Wing_development"), 75 | #' metadata = as.metadata( 76 | #' bibliographicCitation = "Gossner et al. (2015) Ecology, 96:1154. doi: 10.1890/14-2159.1", 77 | #' author = "Martin Gossner", 78 | #' license = "http://creativecommons.org/publicdomain/zero/1.0/" 79 | #' ) 80 | #' ) 81 | #' 82 | #'dataset2 <- mutate.traitdata(dataset2, 83 | #' antenna_length = Antenna_Seg1 + Antenna_Seg2 + Antenna_Seg3 + Antenna_Seg4 + Antenna_Seg5 84 | #' ) 85 | #' 86 | #' 87 | #' traits2 <- as.thesaurus( 88 | #' Body_length = as.trait("body_length", 89 | #' expectedUnit = "mm", valueType = "numeric", 90 | #' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length"), 91 | #' antenna_length = as.trait("antenna_length", 92 | #' expectedUnit = "mm", valueType = "numeric", 93 | #' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Antenna_length"), 94 | #' Hind.Femur_length = as.trait("metafemur_length", 95 | #' expectedUnit = "mm", valueType = "numeric", 96 | #' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length") 97 | #') 98 | #' 99 | #' dataset2Std <- standardize_traits(dataset2, thesaurus = traits2) 100 | #' 101 | #' database <- rbind(dataset1Std, dataset2Std, 102 | #' datasetID = c("vanderplas17", "gossner15"), 103 | #' metadata_as_columns = c("author")) 104 | #' head(database) 105 | 106 | rbind.traitdata <- function(..., 107 | metadata = NULL, 108 | datasetID = NULL, 109 | metadata_as_columns = FALSE, 110 | drop = NULL # drop columns that are not present in all datasets 111 | ) { 112 | 113 | # extract object names of input 114 | input_names <- deparse(substitute(x(...))) 115 | input_names <- strsplit(gsub("[)]", "", gsub("[x(]", "", gsub( " ", "", input_names))), "[,]")[[1]] 116 | input_names <- as.factor(input_names) 117 | 118 | # compose list of input objects 119 | input <- list(...) 120 | 121 | if(is.null(metadata)) { 122 | metadata <- lapply(input, function(x) attributes(x)$metadata) 123 | has_metadata <- sapply(input, function(x) !is.null(attributes(x)$metadata)) 124 | has_id <- sapply(input, function(x) !is.null(attributes(x)$metadata$datasetID)) 125 | } else { 126 | metadata <- list(NULL) 127 | } 128 | 129 | #add datasetID value as column in core data, 130 | for(i in 1:length(metadata)) { 131 | # get datasetID from metadata, provided vector, or object names (in that order) 132 | if(is.null(metadata[[i]]$datasetID)) { 133 | if(!is.null(datasetID) && length(datasetID) == length(input)) { 134 | metadata[[i]]$datasetID <- datasetID[i] 135 | } else { 136 | metadata[[i]]$datasetID <- input_names[[i]] 137 | } 138 | } 139 | input[[i]]$datasetID <- metadata[[i]]$datasetID 140 | } 141 | names(metadata) <- sapply(metadata, function(x) x$datasetID) 142 | names(input) <- sapply(metadata, function(x) x$datasetID) 143 | 144 | 145 | ##### check for compatibility of used terms in datasets 146 | 147 | terms_used <- lapply(input, colnames) 148 | 149 | traits_standardized <- sapply(terms_used, function(x) c("traitNameStd") %in% x ) 150 | taxa_standardized <- sapply(terms_used, function(x) c("ScientificNameStd") %in% x ) 151 | 152 | # Check if trait names are compatible 153 | traits <- lapply(input, function(x) levels(x$traitName)) 154 | if(all(traits_standardized)) { traitsStd <- lapply(input, function(x) levels(x$traitNameStd)) } else { traitsStd <- NA } 155 | 156 | if(length(unlist(traits)) == length(unique(unlist(traits))) && length(unlist(traitsStd)) == length(unique(unlist(traitsStd)))) { 157 | warning("There seems to be no overlap in trait names of the provided datasets. \nIt is recommended to map 'traitNameStd' of each dataset to the same thesaurus or ontology!") 158 | } 159 | 160 | # check if taxon names are compatible 161 | taxa <- lapply(input, function(x) levels(x$ScientificName)) 162 | if(all(taxa_standardized) ) {taxaStd <- lapply(input, function(x) levels(x$ScientificNameStd)) } else { taxaStd <- NA } 163 | 164 | if(length(unlist(taxa)) == length(unique(unlist(taxa))) && length(unlist(taxaStd)) == length(unique(unlist(taxaStd))) ) { 165 | warning("There seems to be no overlap in taxon names of the provided datasets!\nIt is recommended to map 'ScientificNameStd' of each dataset to the same thesaurus or ontology!") 166 | } 167 | 168 | # rbind data objects in input list 169 | input <- lapply(input, data.table::as.data.table) 170 | 171 | out <- data.table::rbindlist(input, use.names = TRUE, fill = TRUE) 172 | 173 | out <- as.data.frame(out) 174 | 175 | # match metadata attributes according to datasetID 176 | 177 | if(isTRUE(metadata_as_columns) | is.character(metadata_as_columns) && metadata_as_columns %in% names(metadata[[1]]) ) { 178 | 179 | ##### make metadata lookup table according to input in metadata_as_column 180 | 181 | if(is.character(metadata_as_columns)) { 182 | 183 | attr_table <- do.call(rbind.data.frame, lapply(metadata, function(x) lapply(x[unique(c("datasetID", metadata_as_columns))], function(y) if(is.null(y)) "NA" else y ))) 184 | 185 | } else { 186 | 187 | attr_table <- do.call(rbind.data.frame, lapply(metadata, function(x) lapply(x[c("datasetID", "datasetName", "author", "license")], function(y) if(is.null(y)) "NA" else y ))) 188 | 189 | } 190 | 191 | # merge lookup table 192 | 193 | out <- merge(out, attr_table, by = "datasetID") 194 | 195 | } 196 | 197 | # sort columns according to glossary of terms 198 | out <- out[, order(match(names(out), glossary$columnName) )] 199 | 200 | # maintain attributes 201 | attribs <- attributes(..1) 202 | attribs$names <- attributes(out)$names 203 | attribs$row.names <- seq_along(out[,1]) 204 | 205 | attributes(out) <- attribs 206 | attr(out, "metadata") <- metadata 207 | 208 | return(out) 209 | } 210 | -------------------------------------------------------------------------------- /R/standardize.exploratories.R: -------------------------------------------------------------------------------- 1 | #' Standardize Georeference from Biodiversity Exploratories Plot ID 2 | #' 3 | #' @description Adds columns of georeference to trait-data table if measurements 4 | #' relate to specimens from the Biodiversity Exploratories plots or regions. 5 | #' 6 | #' This function requires valid credentials for the Biodiversity Exploratories 7 | #' Information System (BExIS)! 8 | #' 9 | #' @param x A traitdata table of class 'traitdata'. 10 | #' @param plots Name of column containing the plot IDs. Must match the 11 | #' Exploratories EP_PlotID scheme (e.g. AEG1, AEG12, HEW21, ...). 12 | #' @param ... If input is a rawdata table of type species-trait matrix or 13 | #' occurence table (wide table) then provide parameters according to 14 | #' `as.traitdata()`. 15 | #' @param user User name for Biodiversity Exploratories Information System 16 | #' (BExIS, https://www.bexis.uni-jena.de); required for download of exact 17 | #' geolocation. 18 | #' @param pswd password for above request. Will be prompted for if not provided. 19 | #' @param getdata logical; if `FALSE` it suppresses the extraction of location 20 | #' data from BExIS, and will not ask for user credentials. Instead, public 21 | #' data of less precision will be used. 22 | #' @param fillall if TRUE (default), the output will contain all terms suggested 23 | #' by the glossary and fill empty columns with NA. This is required for an 24 | #' upload of the data to BExIS. 25 | #' @param verbose logical; if `FALSE` all messages will be suppressed. 26 | #' 27 | #' 28 | #' @return A traitdata object with harmonized location data from the context of the Biodiversity Exploratories project. 29 | #' @export 30 | #' 31 | #' @import getPass 32 | #' @examples 33 | #' 34 | #' \dontrun{ 35 | #' moths <- read.service(21247, dec = ",") 36 | #' 37 | #' dataset1 <- as.traitdata(moths, taxa = "species", traits = c(body_mass = 38 | #' "weight", wing_length = "wing_length", wing_width = "wing_width", wing_area = 39 | #' "wing_area", wing_loading = "wing_loading"), 40 | #' keep = c(locationID = "plot")) 41 | #' 42 | #' dataset1Std <- standardize.exploratories(dataset1) 43 | #' } 44 | 45 | standardize.exploratories <- function(x, 46 | plots = "locationID", 47 | user = NULL, 48 | pswd = NULL, 49 | getdata = !is.null(user), 50 | fillall = TRUE, 51 | ..., 52 | verbose = NULL) { 53 | 54 | EP_PlotID = EP_PlotID0 = MIP = Plot_ID = Plotlevel = SoilTypeWRB = VIP = MIP = NULL 55 | 56 | if("data.frame" %in% class(x) && ! "traitdata" %in% class(x) ) x <- as.traitdata(x,...) 57 | 58 | if(getdata) { 59 | 60 | if(is.null(user)) user <- readline("BExIS user name: ") 61 | if(is.null(pswd)) pswd <- getPass::getPass("BExIS password: ", noblank = FALSE) 62 | 63 | #basic <- read.service(1000, user = user, pswd = pswd) # trying to access open data with rounded location data; but no webservice is supported. 64 | basic_ep <- read.service(20826, user = user, pswd = pswd) 65 | gridplots <- read.service(20907, user = user, pswd = pswd) 66 | 67 | rm(user, pswd) 68 | 69 | 70 | BEplots <- merge(subset(gridplots, select= c("Plot_ID", 71 | "Plotlevel", 72 | "Exploratory", 73 | "Landuse", 74 | "Longitude_Dec_Plotcenter", 75 | "Latitude_Dec_Plotcenter") 76 | ), 77 | subset(basic_ep, select = c("EP_PlotID", 78 | "PlotID", 79 | "VIP", 80 | "MIP", 81 | "LocalName", 82 | "SoilTypeWRB", 83 | "Elevation") 84 | ), 85 | by.x = "Plot_ID", by.y = "PlotID", all.x = TRUE) 86 | 87 | names(BEplots) <- c("Plot_ID", "Plotlevel", "Exploratory", "BEType", 88 | "decimalLongitude", "decimalLatitude", "EP_PlotID", 89 | "VIP", "MIP", "verbatimLocality", "SoilTypeWRB", 90 | "elevation") 91 | 92 | 93 | } 94 | 95 | 96 | if(all(levels(x[,plots]) %in% BEplots$EP_PlotID0)) { 97 | temp <- subset(BEplots, EP_PlotID0 %in% x[,plots]) 98 | temp$BEPlotID <- temp$EP_PlotID 99 | 100 | levels(x[,plots]) <- BEplots$EP_PlotID[match(x[,plots], BEplots$EP_PlotID0)] 101 | 102 | } 103 | 104 | if(all(levels(x[,plots]) %in% BEplots$EP_PlotID)) { 105 | temp <- subset(BEplots, EP_PlotID %in% x[,plots]) 106 | temp$BEPlotID <- temp$EP_PlotID 107 | 108 | } 109 | 110 | if(all(levels(x[,plots]) %in% BEplots$Plot_ID)) { 111 | temp <- subset(BEplots, Plot_ID %in% x[,plots]) 112 | temp$BEPlotID <- temp$Plot_ID 113 | } 114 | 115 | temp$OriginBE <- TRUE 116 | temp$habitat <- c("forest", "grassland")[temp$BEType] 117 | temp$geodeticDatum <- "WGS84" 118 | temp$country <- "Germany" 119 | temp$countryCode <- "DE" 120 | 121 | 122 | out <- merge.data.frame(x, subset(temp, select = c(-Plot_ID, -Plotlevel, -EP_PlotID, -VIP, -MIP, -SoilTypeWRB, -EP_PlotID0)), 123 | by.x = plots, by.y = "BEPlotID") 124 | names(out)[1] <- "BEPlotID" 125 | 126 | if(fillall) { 127 | # add all glossary terms to table and fill empty ones with NA 128 | 129 | colnames <- glossary$columnName[glossary$Namespace != "Traitlist"] 130 | glossarynames <- as.data.frame(t(data.frame(colnames, row.names = colnames)))[0,] 131 | out <- data.table::rbindlist(list(glossarynames, out), fill = TRUE) 132 | 133 | } else { 134 | 135 | # sort columns according to glossary of terms 136 | out <- out[, order(match(names(out), glossary$columnName) )] 137 | 138 | } 139 | 140 | # keep attributes of x 141 | attribs <- attributes(x) 142 | attribs$names <- attributes(out)$names 143 | attributes(out) <- attribs 144 | attr(out, "exploratories") <- temp 145 | 146 | return(out) 147 | } 148 | -------------------------------------------------------------------------------- /R/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EcologicalTraitData/traitdataform/a1a0b4656a404bddcd229ee2d42d430908be9d94/R/sysdata.rda -------------------------------------------------------------------------------- /R/thesaurus.R: -------------------------------------------------------------------------------- 1 | #' Create a list of trait definitions (a thesaurus of traits) 2 | #' 3 | #' Create a trait thesaurus object for use as a reference object/lookup table of 4 | #' traits within function `standardize.traits()`. 5 | #' 6 | #' @param ... multiple objects of class 'trait' (produced by function 7 | #' `as.trait()`) or a data.frame containing columns according to the terms 8 | #' provided by https://ecologicaltraitdata.github.io/ETS/#terms-for-thesauri. 9 | #' @param replace named character vector, with new names as values, and old names as names. 10 | #' 11 | #' @return a list of formalized objects of class 'trait', as returned by 12 | #' function as.trait(). 13 | #' 14 | #' @details the object class 'trait' comprises necessary information to map a 15 | #' trait name to a trait definition, a target unit and a globally unique 16 | #' identifier. The thesaurus will be used in function 'standardize.traits()' 17 | #' to apply unit conversion and factor level harmonization. 18 | #' 19 | #' @export 20 | #' 21 | #' @examples 22 | #' 23 | #' # provide traitlist by defining individual traits using function `as.trait()`: 24 | #' 25 | #' traitlist <- as.thesaurus(body_length = as.trait("body_length", expectedUnit = "mm", 26 | #' valueType = "numeric", 27 | #' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length"), 28 | #' antenna_length = as.trait("antenna_length", expectedUnit = "mm", 29 | #' valueType = "numeric", 30 | #' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Antenna_length"), 31 | #' metafemur_length = as.trait("metafemur_length", expectedUnit = "mm", 32 | #' valueType = "numeric", 33 | #' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length") 34 | #' ) 35 | #' 36 | #' # provide traitlist from data frame object: 37 | #' 38 | #' as.thesaurus(data.frame( 39 | #' trait = c("body_length", "antenna_length", "metafemur_length", "eyewidth_corr"), 40 | #' expectedUnit = "mm", 41 | #' valueType = "numeric", 42 | #' traitDescription = c("body length in mm", "length of antenna in mm", 43 | #' "length of metafemur in mm", "eye width in mm"), 44 | #' identifier = c("http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length", 45 | #' "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Antenna_length", 46 | #' "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length", 47 | #' "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Eye_diameter" 48 | #' ) 49 | #' ) 50 | #' ) 51 | #' 52 | #' # provide traitlist from remote archive with renaming 53 | #' # (pulls from https://github.com/EcologicalTraitData/TraitDataList) 54 | #' 55 | #' \dontrun{ 56 | #' traits1 <- as.thesaurus(read.csv("https://git.io/fpsj5"), 57 | #' replace = c(traitID = "identifier", 58 | #' traitName = "trait", 59 | #' traitUnit = "expectedUnit", 60 | #' Comments = "comments") 61 | #' ) 62 | #' } 63 | 64 | as.thesaurus <- function(..., 65 | replace = NULL 66 | ) { 67 | 68 | if( "data.frame" %in% class(..1)) { 69 | input <- ..1 70 | 71 | if(!is.null(replace)) { 72 | replacement <- replace[names(input)] 73 | names(input)[!is.na(replacement)] <- replacement[!is.na(replacement)] 74 | } 75 | 76 | 77 | input <- split(input, f = as_factor_clocale(input$trait)) 78 | traitNames <- as_factor_clocale(names(input)) 79 | 80 | input <- lapply(input, function(d) { 81 | 82 | lapply(d, function(x) if(is.factor(x)) as.character(x) else x) 83 | 84 | }) 85 | 86 | out <- lapply(input, function(y) { do.call(as.trait, y)}) 87 | 88 | } 89 | 90 | if("trait" %in% class(..1)) { 91 | out <- list(...) 92 | } 93 | 94 | traitNames <- as_factor_clocale(names(out)) 95 | names(out) <- traitNames 96 | for(i in 1:length(out)) out[[i]]$trait <- traitNames[[i]] 97 | 98 | if(! "trait" %in% class(..1) && ! "data.frame" %in% class(..1)) stop("no valid input for creating a thesaurus") 99 | 100 | class(out) <- c("thesaurus", "list") 101 | 102 | return(out) 103 | } 104 | 105 | #' @export 106 | print.thesaurus <- function(x, ...) { 107 | lapply(x, print) 108 | #out <- do.call(rbind, lapply(x,data.frame)) 109 | #print(out) 110 | } 111 | 112 | #' Create a trait definition 113 | #' 114 | #' Creating an object containing a standardised trait definition according to 115 | #' the Ecological Traitdata Standard. Parameters correspond to the definition at 116 | #' \url{https://ecologicaltraitdata.github.io/ETS/#terms-for-trait-definitions}. 117 | #' 118 | #' @param trait A character string, providing an intuitive, human-readable trait 119 | #' name. 120 | #' @param identifier Unique identifier for the trait, ideally unique and stable 121 | #' URI which identify the source of the trait definition. 122 | #' @param broaderTerm One or several terms that enclose the trait definition. 123 | #' @param narrowerTerm One or several terms that are enclosed by the trait 124 | #' definition. 125 | #' @param relatedTerm One or several terms that are related to this term 126 | #' (ideally given as URI). 127 | #' @param traitDescription A short, unambiguous definition of the trait. May 128 | #' refer to a method of measurement. 129 | #' @param comments Details and Examples for clarification of the trait 130 | #' definition. 131 | #' @param expectedUnit the unit expected for measurement entries. 132 | #' @param maxAllowedValue An upper boundary for accepted numerical values. 133 | #' @param minAllowedValue A lower boundary for accepted numerical values. 134 | #' @param factorLevels A comma separated list of terms comprising the 135 | #' constrained vocabulary for categorical traits or ordinal binary traits. 136 | #' @param replaceFactorLevels A list or vector containing synonymous factor 137 | #' levels to be mapped onto the target factor levels provided in 138 | #' 'factorLevels'. Names of the vector or list entries will be superimposed by 139 | #' entries in 'factorLevels'. 140 | #' @param valueType the type of trait values. Possible entries are 'numeric', 141 | #' 'integer', 'categorical', 'logical', or 'character'. 142 | #' @param source A character string providing a full bibliographic reference to 143 | #' the trait definition (giving title, author, year and publication). 144 | #' @param version A character string containing the version number of the 145 | #' referenced definition (e.g. "v1.2"), if applicable. 146 | #' @param author A character string or object of class 'person' (as created by 147 | #' `as.person()`) attributing the author(s) of the trait definition. 148 | #' @param ... other arguments, passed on to print function. 149 | #' 150 | #' @return A structured data.frame object of class 'trait'. 151 | #' 152 | #' @export 153 | #' 154 | #' @examples 155 | #' 156 | #' body_length <- as.trait("body_length", expectedUnit = "mm", valueType = "numeric", 157 | #' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length", 158 | #' traitDescription = "The known longest dimension of the physical structure of organisms", 159 | #' relationSource = "Maggenti and Maggenti, 2005", 160 | #' broaderTerm = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_dimension" 161 | #' ) 162 | #' 163 | as.trait <- function( 164 | trait, 165 | identifier = NA, 166 | broaderTerm = NA, 167 | narrowerTerm = NA, 168 | relatedTerm = NA, 169 | valueType = NA, 170 | expectedUnit = NA, 171 | factorLevels = NA, 172 | replaceFactorLevels = NA, 173 | maxAllowedValue = NA, 174 | minAllowedValue = NA, 175 | traitDescription = NA, 176 | comments = NA, 177 | source = NA, 178 | version = NA, 179 | author = NA, 180 | ... 181 | ) { 182 | out <- list(trait = trait, 183 | identifier = identifier, 184 | broaderTerm = broaderTerm, 185 | narrowerTerm = narrowerTerm, 186 | relatedTerm = relatedTerm, 187 | valueType = valueType, 188 | expectedUnit = expectedUnit, 189 | factorLevels = factorLevels, 190 | replaceFactorLevels = replaceFactorLevels, 191 | maxAllowedValue = maxAllowedValue, 192 | minAllowedValue = minAllowedValue, 193 | traitDescription = traitDescription, 194 | comments = comments, 195 | source = source, 196 | version = version, 197 | author = author 198 | ) 199 | 200 | class(out) <- c("trait", "list") 201 | return(out) 202 | } 203 | 204 | #' @export 205 | print.trait <- function(x, ...) { 206 | 207 | cat("\n", as.character(x$trait), ":\n\n") 208 | if(!is.na(x$traitDescription)) cat("\tDefined as:", gsub('(.{1,60})(\\s|$)', '\\1\n\t\t\t', x$traitDescription), "\n" ) 209 | if(!all(is.na(x$broaderTerm))) { cat("\tBroader term: "); cat(x$broaderTerm, sep = ";\n\t\t\t"); cat("\n") } 210 | if(!all(is.na(x$narrowerTerm))) { cat("\tNarrower term: "); cat(x$narrowerTerm, sep = ";\n\t\t\t"); cat("\n") } 211 | if(!all(is.na(x$relatedTerm))) { cat("\tRelated term: "); cat(x$relatedTerm, sep = ";\n\t\t\t"); cat("\n") } 212 | if(!is.na(x$valueType)) cat("\tValue type: ", x$valueType, "\n") 213 | if(!is.na(x$expectedUnit)) cat("\tExpected unit: ", x$expectedUnit, "\n") 214 | if(!all(is.na(x$factorLevels))) {cat("\tExpected categories: "); cat(x$factorLevels, sep = "; "); cat("\n") } 215 | if(!is.na(x$comments)) cat("\n\t(", gsub('(.{1,50})(\\s|$)', '\\1\n\t\t\t', x$comments), ")\n") 216 | if(!is.na(x$identifier)) cat( "\n\t", x$identifier) 217 | cat("\n") 218 | } 219 | 220 | -------------------------------------------------------------------------------- /R/traitdataform-package.R: -------------------------------------------------------------------------------- 1 | #' Package 'traitdataform': harmonizing ecological trait data in R 2 | #' 3 | #' This package assists in handling functional trait data and transferring them 4 | #' into the Trait Data Standard (Schneider et al. in preparation). 5 | #' 6 | #' There are two major use cases for the package: 7 | #' 8 | #' - preparation of own trait datasets for upload into public data bases, and 9 | #' - harmonizing trait datasets from different sources by moulding them into a 10 | #' unified format. 11 | #' 12 | #' The toolset of the package includes 13 | #' 14 | #' - transforming species-trait-matrix or observation table data into a unified 15 | #' long-table format 16 | #' - mapping column names into terms provided in a standard trait vocabulary 17 | #' - matching of species names into GBIF Backbone Taxonomy (taxonomic ontology 18 | #' server) 19 | #' - matching of trait names into a user-provided traitlist, i.e. a thesaurus 20 | #' of traits 21 | #' - unifying trait values into target unit format and legit factor levels 22 | #' - saving trait dataset into a desired format using templates (e.g. for BExIS) 23 | #' 24 | #' 25 | #' A documentation is available online at 26 | #' http://ecologicaltraitdata.github.io/traitdataform/ or offline in the package 27 | #' vignette 'traitdataform'. 28 | #' 29 | #' @keywords internal 30 | #' @name traitdataform-package 31 | NULL 32 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | #' BExIS access over a Webservice 2 | #' 3 | #' Read table from a web service. Inherits functionality of `read.table()`. 4 | #' 5 | #' @param datasetid Integer BExIS ID of the requested dataset. 6 | #' @param user User name on BExIS. If not provided, function will prompt input. 7 | #' @param pswd Password on BExIS. If not provided, function will prompt input. 8 | #' @param dec the character used in the file for decimal points. 9 | #' @param na.strings a character vector of strings which are to be interpreted as NA values. Blank fields are also considered to be missing values in logical, integer, numeric and complex fields. 10 | #' @param fill logical. If TRUE then in case the rows have unequal length, blank fields are implicitly added. See 'Details' of `?read.table`. 11 | #' @param sep the field separator character. Values on each line of the file are separated by this character. If sep = "" (the default for read.table) the separator is 'white space', that is one or more spaces, tabs, newlines or carriage returns. 12 | #' @param quote the set of quoting characters. To disable quoting altogether, use quote = "". See scan for the behaviour on quotes embedded in quotes. Quoting is only considered for columns read as character, which is all of them unless colClasses is specified. 13 | #' @param fileEncoding character string: if non-empty declares the encoding to be used on a file (not a connection) so the character data can be re-encoded as they are written. See [base::connections()]. 14 | #' 15 | #' @author Dennis Heimann, Andreas Ostrowski 16 | #' 17 | #' @return Returns dataset from BExIS as a data.frame. 18 | #' 19 | #' @details `read.service.blocks()` returns a data.frame array as a list of data.frames. 20 | #' 21 | #' @import XML 22 | #' @import RCurl 23 | #' @import getPass 24 | #' @keywords internal 25 | #' 26 | 27 | 28 | read.service <- function(datasetid, 29 | user = NULL, pswd = NULL, 30 | dec=".", 31 | na.strings="NA", 32 | fill=FALSE, sep="\t", 33 | quote=if(identical(sep, "\n")) "" else "'\"", 34 | fileEncoding = "UTF-8") 35 | { 36 | 37 | if(is.null(user)) user <- readline("user name: ") 38 | if(is.null(pswd)) pswd <- getPass::getPass("password: ", noblank = FALSE) 39 | 40 | 41 | opts = curlOptions(encoding="CE_UTF8" ,ssl.verifypeer = FALSE, httpheader=c(Accept = "text/plain")) 42 | params = c("datasetId"=datasetid, "username"=user, "password"=pswd) 43 | data <- postForm(uri="https://www.bexis.uni-jena.de/WebServices/DataService.asmx/DownloadData", .params=params, .opts=opts, style="post") 44 | x <- xmlTreeParse(data, asText = T, trim=FALSE) 45 | txt <- xmlValue(xmlRoot(x)[[1]]) 46 | txt <-gsub("#","?", txt) 47 | f <- textConnection(toString(txt)) 48 | d <- utils::read.table(file=f, sep=sep, header=T, dec=dec, na.strings=na.strings, fill=fill, quote=quote, fileEncoding = fileEncoding) 49 | close(f) 50 | d 51 | } 52 | 53 | #---------------------------------------------------------- 54 | #---- Read table with blocks from a web service ----------- 55 | #---- returns a data.frame array ----------- 56 | #---------------------------------------------------------- 57 | 58 | #' @rdname read.service 59 | #' @keywords internal 60 | 61 | read.service.blocks <- function(datasetid, 62 | user, pswd, 63 | dec=".", 64 | na.strings="NA", 65 | fill=FALSE, 66 | sep="\t", 67 | quote=if(identical(sep, "\n")) "" else "'\"", 68 | fileEncoding = "UTF-8") 69 | { 70 | 71 | if(is.null(user)) user <- readline("user name: ") 72 | if(is.null(pswd)) pswd <- getPass::getPass("password: ", noblank = FALSE) 73 | 74 | opts = curlOptions(encoding="CE_LATIN1" ,ssl.verifypeer = FALSE, httpheader=c(Accept = "application/x-zip-compressed")) 75 | params = c("datasetId"=datasetid, "username"=user, "password"=pswd, "includeMetadata"="false") 76 | zip <- postForm(uri="https://www.bexis.uni-jena.de/WebServices/DataService.asmx/DownloadDataText", .params=params, .opts=opts, style="post", binary=TRUE) 77 | x <- xmlTreeParse(zip, trim=FALSE, asText=T) 78 | bin <- xmlValue(xmlRoot(x)[[1]]) 79 | b <- base64Decode(bin, "raw") 80 | f <- tempfile() 81 | writeBin(b, con=f) 82 | ex <- utils::unzip(f, exdir = tempdir()) 83 | files <- attr(ex, "extracted", TRUE) 84 | soil <- lapply(files, utils::read.table, sep=sep, header=T, dec=dec, na.strings=na.strings, fill=fill, quote=quote, fileEncoding = fileEncoding) 85 | } 86 | 87 | 88 | #' Standardise logical variables 89 | #' 90 | #' @param x a vector of two different types of entries (can be of type factor, integer, logical, or character). 91 | #' @param output a switch to set the desired output format. Defaults to "logical", but can be "character", "binary" or "factor". 92 | #' @param categories output target categories for binary/logical traits harmonization if `output` is not set to 'logical'. 93 | #' 94 | #' @keywords internal 95 | #' @return A vector of harmonized logical values. 96 | #' 97 | 98 | fixlogical <- function(x, output = "logical", categories = c("No", "Yes")) { 99 | 100 | x <- factor(tolower(x)) 101 | 102 | # ToDo: add test or fuzzy matching algorithm 103 | 104 | levels(x) <- list('0'=c("0", "no", "n", "nein", "false", "non"), 105 | '1'=c("1", "yes", "y", "ja", "j", "true", "oui", "si")) 106 | 107 | levels(x) <- categories 108 | 109 | x <- switch(output, 110 | factor = x, 111 | character = as.character(x), 112 | binary = as.numeric(x)-1, 113 | logical = as.logical(as.numeric(x)-1) 114 | ) 115 | return(x) 116 | } 117 | 118 | 119 | 120 | #' Check if string is valid unit 121 | #' 122 | #' @param x ds 123 | #' @noRd 124 | #' @keywords internal 125 | 126 | is_unit <- function(x) { 127 | !c("simpleError") %in% class(tryCatch(as_units(x), error = function(e) e)) 128 | } 129 | 130 | 131 | 132 | #' Locale-independent factor conversion 133 | #' 134 | #' @param x a vector 135 | #' 136 | #' @keywords internal 137 | #' @return the same vector, but with any factorial content or colums with repeating character strings converted to factors, applying a locale-independent sorting. 138 | #' @note Only for internal application in [as.traitdata()] This deals with the problem described by Kurt Hornik [(here)](https://developer.r-project.org/Blog/public/2020/02/16/stringsasfactors/index.html) for changes coming in R 4.0.0. In traitdataform, this concerns the locale-dependence of taxon and trait name sorting when calling published trait-datasets. For traits, the sorting order of factor levels will be superimposed by the order given in the thesaurus specification (if provided). 139 | 140 | as_factor_clocale <- function(x) { 141 | lc_collate <- Sys.getlocale("LC_COLLATE") 142 | Sys.setlocale("LC_COLLATE", "C") 143 | x <- factor(as.character(x)) 144 | Sys.setlocale("LC_COLLATE", lc_collate) 145 | return(x) 146 | } 147 | 148 | 149 | #' Convert output to ETS v0.9 150 | #' 151 | #' @noRd 152 | #' @import stats 153 | #' @keywords internal 154 | 155 | convert.ets0.9 <- function(x) { 156 | if(attr(x, "metadata")$conformsTo == "Ecological Trait-data Standard (ETS) v0.10") { 157 | 158 | oldnames <- c("scientificName", "verbatimScientificName", "traitName", "verbatimTraitName", "traitUnit", "verbatimTraitUnit", "traitValue", "verbatimTraitValue") 159 | newnames <- c("scientificNameStd", "scientificName", "traitNameStd", "traitName", "traitUnitStd", "traitUnit", "traitValueStd", "traitValue") 160 | 161 | colnames(x)[names(x) %in% oldnames] <- na.omit(newnames[match(names(x), oldnames)]) 162 | 163 | attr(x, "metadata")$conformsTo <- "Ecological Trait-data Standard (ETS) v0.9" 164 | 165 | } 166 | return(x) 167 | } 168 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Package 'traitdataform': Formatting and Harmonizing Ecological Trait-Data 2 | 3 | [![lifecycle](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) 4 | [![R-CMD-check](https://github.com/EcologicalTraitData/traitdataform/workflows/R-CMD-check/badge.svg)](https://github.com/EcologicalTraitData/traitdataform/actions) 5 | [![Project Status: Active - The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) 6 | [![CRAN status](https://www.r-pkg.org/badges/version/traitdataform)](https://cran.r-project.org/package=traitdataform) 7 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1489527.svg)](https://doi.org/10.5281/zenodo.1489527) 8 | 9 | 10 | Assistance for handling functional trait data and transferring them into the Ecological Trait-Data Standard (Schneider et al. 2019 Towards an Ecological Trait-data Standard, *Methods in Ecology and Evolution* DOI: [10.1111/2041-210X.13288]( https://doi.org/10.1111/2041-210X.13288)). 11 | 12 | There are two major use cases for the package: 13 | 14 | - preparation of own trait datasets for upload into public data bases, and 15 | - harmonizing trait datasets from different sources by moulding them into a unified format. 16 | 17 | The toolset of the package includes 18 | 19 | - transforming species-trait-matrix or occurrence table data into a unified long-table format 20 | - mapping column names into terms provided in a standard trait vocabulary 21 | - matching of species names into GBIF Backbone Taxonomy (taxonomic ontology server) 22 | - matching of trait names into a user-provided trait list, i.e. a thesaurus of traits 23 | - unifying trait values into target unit format and legit factor levels 24 | - saving trait dataset into a desired format using templates (e.g. for BExIS) 25 | 26 | ## Installation 27 | 28 | Install from CRAN via 29 | 30 | ```r 31 | install.packages('traitdataform') 32 | ``` 33 | 34 | The development version of the package can be installed from Github via the 'devtools' package 35 | 36 | ```r 37 | install.packages('devtools') 38 | devtools::install_github('EcologicalTraitData/traitdataform') 39 | ``` 40 | ## System requirements 41 | 42 | The package requires the [udunits library](https://www.unidata.ucar.edu/software/udunits/) for unit conversion to be installed. It should install automatically, but if the package installation throws an error, it might need manual installation (see [this thread for help](https://github.com/r-quantities/units/issues/1)). 43 | 44 | ## Usage 45 | 46 | ```r 47 | 48 | pulldata("carabids") 49 | 50 | thesaurus <- as.thesaurus( 51 | body_length = as.trait("body_length", 52 | expectedUnit = "mm", 53 | identifier = "length" 54 | ), 55 | antenna_length = as.trait("antenna_length", 56 | expectedUnit = "mm", 57 | identifier = "antenna" 58 | ), 59 | metafemur_length = as.trait("metafemur_length", 60 | expectedUnit = "mm", 61 | identifier = "metafemur" 62 | ), 63 | eyewidth = as.trait("eyewidth_corr", 64 | expectedUnit = "mm", 65 | identifier = "eyewidth" 66 | ) 67 | ) 68 | 69 | traitdataset1 <- standardize(carabids, 70 | thesaurus = thesaurus, 71 | taxa = "name_correct", 72 | units = "mm" 73 | ) 74 | 75 | ``` 76 | ## Documentation 77 | 78 | A documentation is available online at http://ecologicaltraitdata.github.io/traitdataform/ or offline in the package vignette 'traitdataform'. 79 | 80 | ## Datasets 81 | 82 | The traitdataform package links to a couple of datasets, which are used for demo purposes in the vignette and documentation, but can be used for research and production use. 83 | 84 | The datasets have been published by their authors under [Creative Commons 0](https://creativecommons.org/publicdomain/zero/1.0/) or [Creative Commons Attribution](https://creativecommons.org/licenses/by/4.0/) license, which means they can be copied, modified, distributed without asking permission. Attribution must be given according to the dataset license. 85 | 86 | For additional information and interpretation of the data please refer to the help pages of the data objects (e.g. calling `?carabids` in R) and the original data sources given therein. 87 | 88 | If you want further data sources published under Creative Commons Licenses or in the public domain being added to this package, feel free to file a pull-request with a script for download and harmonization in the `data/` directory and a documentation appended to `R/data.R`! 89 | 90 | 91 | ## Contributing 92 | 93 | The package invites any contributions e.g. minor fixes and spell-checks, improving interoperability with data and other packages or software for trait-data handling. Please see [the Contributing Guidelines](https://ecologicaltraitdata.github.io/traitdataform/CONTRIBUTING.html) for details. 94 | 95 | Please note that the 'traitdataform' project is released with a [Contributor Code of Conduct](https://ecologicaltraitdata.github.io/traitdataform/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. 96 | 97 | ## Future features 98 | 99 | The package is under open source development. You are invited to submit pull-requests with your improvements. 100 | 101 | We are aiming to provide the following features in future iterations of the package: 102 | 103 | - extracting trait definitions and hierarchies from semantic ontologies (e.g. from the OWL files or via APIs), to facilitate analysis of comparable traits across taxa. 104 | - automated matching of user-provided trait names against trait definitions in online resources, by looking up traits from published ontologies. 105 | - harmonization of levels of factorial traits via fuzzy matching (requires lookup tables and ontologies providing legit factor levels). 106 | - managing trait databases locally in R by managing relational data (e.g. on occurrence level or measurement level, sampling event, taxon etc.) in the data.frame attributes. 107 | 108 | ## Cite as 109 | 110 | Schneider, F.D. (2022) traitdataform - harmonizing ecological trait data in R, v0.6.8, DOI: [10.5281/zenodo.1489527](http://dx.doi.org/10.5281/zenodo.1489527) http://ecologicaltraitdata.github.io/traitdataform 111 | 112 | ## License 113 | 114 | Copyright (c) 2022 F.D. Schneider, florian.dirk.schneider@gmail.com 115 | 116 | Permission is hereby granted, free of charge, to any person obtaining a copy of 117 | this software and associated documentation files (the "Software"), to deal in 118 | the Software without restriction, including without limitation the rights to 119 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 120 | the Software, and to permit persons to whom the Software is furnished to do so, 121 | subject to the following conditions: 122 | The above copyright notice and this permission notice shall be included in all 123 | copies or substantial portions of the Software. 124 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 125 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 126 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 127 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 128 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 129 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 130 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://ecologicaltraitdata.github.io/traitdataform/ 2 | template: 3 | bootstrap: 5 4 | 5 | -------------------------------------------------------------------------------- /codemeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", 3 | "@type": "SoftwareSourceCode", 4 | "identifier": "traitdataform", 5 | "description": "Assistance for handling ecological trait data and applying the Ecological Trait-Data Standard terminology (Schneider et al. 2019 ). There are two major use cases: (1) preparation of own trait datasets for publication, and (2) harmonizing trait datasets from different sources by re-formatting them into a unified format. See 'traitdataform' website for full documentation. ", 6 | "name": "traitdataform: Formatting and Harmonizing Ecological Trait-Data", 7 | "relatedLink": "https://ecologicaltraitdata.github.io/traitdataform/", 8 | "codeRepository": "https://github.com/ecologicaltraitdata/traitdataform", 9 | "issueTracker": "https://github.com/ecologicaltraitdata/traitdataform/issues", 10 | "license": "https://spdx.org/licenses/MIT", 11 | "version": "0.6.8", 12 | "programmingLanguage": { 13 | "@type": "ComputerLanguage", 14 | "name": "R", 15 | "url": "https://r-project.org" 16 | }, 17 | "runtimePlatform": "R version 4.2.0 (2022-04-22 ucrt)", 18 | "author": [ 19 | { 20 | "@type": "Person", 21 | "givenName": "Florian D.", 22 | "familyName": "Schneider", 23 | "email": "florian.dirk.schneider@gmail.com", 24 | "@id": "https://orcid.org/0000-0002-1494-5684" 25 | } 26 | ], 27 | "contributor": [ 28 | { 29 | "@type": "Person", 30 | "givenName": "Caterina", 31 | "familyName": "Penone", 32 | "@id": "https://orcid.org/0000-0002-8170-6659" 33 | }, 34 | { 35 | "@type": "Person", 36 | "givenName": "Andreas", 37 | "familyName": "Ostrowski", 38 | "@id": "https://orcid.org/0000-0002-2033-779X" 39 | }, 40 | { 41 | "@type": "Person", 42 | "givenName": "Dennis", 43 | "familyName": "Heimann" 44 | }, 45 | { 46 | "@type": "Person", 47 | "givenName": "Felix", 48 | "familyName": "Neff" 49 | }, 50 | { 51 | "@type": "Person", 52 | "givenName": "Markus J.", 53 | "familyName": "Ankenbrand" 54 | }, 55 | { 56 | "@type": "Person", 57 | "givenName": "Jessica", 58 | "familyName": "Burnett" 59 | }, 60 | { 61 | "@type": "Person", 62 | "givenName": "Iñaki", 63 | "familyName": "Ucar" 64 | }, 65 | { 66 | "@type": "Person", 67 | "givenName": "Matthias", 68 | "familyName": "Grenié" 69 | } 70 | ], 71 | "maintainer": [ 72 | { 73 | "@type": "Person", 74 | "givenName": "Florian D.", 75 | "familyName": "Schneider", 76 | "email": "florian.dirk.schneider@gmail.com", 77 | "@id": "https://orcid.org/0000-0002-1494-5684" 78 | } 79 | ], 80 | "softwareSuggestions": [ 81 | { 82 | "@type": "SoftwareApplication", 83 | "identifier": "knitr", 84 | "name": "knitr", 85 | "provider": { 86 | "@id": "https://cran.r-project.org", 87 | "@type": "Organization", 88 | "name": "Comprehensive R Archive Network (CRAN)", 89 | "url": "https://cran.r-project.org" 90 | }, 91 | "sameAs": "https://CRAN.R-project.org/package=knitr" 92 | }, 93 | { 94 | "@type": "SoftwareApplication", 95 | "identifier": "rmarkdown", 96 | "name": "rmarkdown", 97 | "provider": { 98 | "@id": "https://cran.r-project.org", 99 | "@type": "Organization", 100 | "name": "Comprehensive R Archive Network (CRAN)", 101 | "url": "https://cran.r-project.org" 102 | }, 103 | "sameAs": "https://CRAN.R-project.org/package=rmarkdown" 104 | }, 105 | { 106 | "@type": "SoftwareApplication", 107 | "identifier": "testthat", 108 | "name": "testthat", 109 | "provider": { 110 | "@id": "https://cran.r-project.org", 111 | "@type": "Organization", 112 | "name": "Comprehensive R Archive Network (CRAN)", 113 | "url": "https://cran.r-project.org" 114 | }, 115 | "sameAs": "https://CRAN.R-project.org/package=testthat" 116 | } 117 | ], 118 | "softwareRequirements": { 119 | "1": { 120 | "@type": "SoftwareApplication", 121 | "identifier": "R", 122 | "name": "R", 123 | "version": ">= 2.10" 124 | }, 125 | "2": { 126 | "@type": "SoftwareApplication", 127 | "identifier": "data.table", 128 | "name": "data.table", 129 | "provider": { 130 | "@id": "https://cran.r-project.org", 131 | "@type": "Organization", 132 | "name": "Comprehensive R Archive Network (CRAN)", 133 | "url": "https://cran.r-project.org" 134 | }, 135 | "sameAs": "https://CRAN.R-project.org/package=data.table" 136 | }, 137 | "3": { 138 | "@type": "SoftwareApplication", 139 | "identifier": "stats", 140 | "name": "stats" 141 | }, 142 | "4": { 143 | "@type": "SoftwareApplication", 144 | "identifier": "reshape2", 145 | "name": "reshape2", 146 | "provider": { 147 | "@id": "https://cran.r-project.org", 148 | "@type": "Organization", 149 | "name": "Comprehensive R Archive Network (CRAN)", 150 | "url": "https://cran.r-project.org" 151 | }, 152 | "sameAs": "https://CRAN.R-project.org/package=reshape2" 153 | }, 154 | "5": { 155 | "@type": "SoftwareApplication", 156 | "identifier": "taxize", 157 | "name": "taxize", 158 | "provider": { 159 | "@id": "https://cran.r-project.org", 160 | "@type": "Organization", 161 | "name": "Comprehensive R Archive Network (CRAN)", 162 | "url": "https://cran.r-project.org" 163 | }, 164 | "sameAs": "https://CRAN.R-project.org/package=taxize" 165 | }, 166 | "6": { 167 | "@type": "SoftwareApplication", 168 | "identifier": "getPass", 169 | "name": "getPass", 170 | "provider": { 171 | "@id": "https://cran.r-project.org", 172 | "@type": "Organization", 173 | "name": "Comprehensive R Archive Network (CRAN)", 174 | "url": "https://cran.r-project.org" 175 | }, 176 | "sameAs": "https://CRAN.R-project.org/package=getPass" 177 | }, 178 | "7": { 179 | "@type": "SoftwareApplication", 180 | "identifier": "units", 181 | "name": "units", 182 | "provider": { 183 | "@id": "https://cran.r-project.org", 184 | "@type": "Organization", 185 | "name": "Comprehensive R Archive Network (CRAN)", 186 | "url": "https://cran.r-project.org" 187 | }, 188 | "sameAs": "https://CRAN.R-project.org/package=units" 189 | }, 190 | "8": { 191 | "@type": "SoftwareApplication", 192 | "identifier": "suppdata", 193 | "name": "suppdata", 194 | "provider": { 195 | "@id": "https://cran.r-project.org", 196 | "@type": "Organization", 197 | "name": "Comprehensive R Archive Network (CRAN)", 198 | "url": "https://cran.r-project.org" 199 | }, 200 | "sameAs": "https://CRAN.R-project.org/package=suppdata" 201 | }, 202 | "9": { 203 | "@type": "SoftwareApplication", 204 | "identifier": "curl", 205 | "name": "curl", 206 | "provider": { 207 | "@id": "https://cran.r-project.org", 208 | "@type": "Organization", 209 | "name": "Comprehensive R Archive Network (CRAN)", 210 | "url": "https://cran.r-project.org" 211 | }, 212 | "sameAs": "https://CRAN.R-project.org/package=curl" 213 | }, 214 | "10": { 215 | "@type": "SoftwareApplication", 216 | "identifier": "XML", 217 | "name": "XML", 218 | "provider": { 219 | "@id": "https://cran.r-project.org", 220 | "@type": "Organization", 221 | "name": "Comprehensive R Archive Network (CRAN)", 222 | "url": "https://cran.r-project.org" 223 | }, 224 | "sameAs": "https://CRAN.R-project.org/package=XML" 225 | }, 226 | "11": { 227 | "@type": "SoftwareApplication", 228 | "identifier": "RCurl", 229 | "name": "RCurl", 230 | "provider": { 231 | "@id": "https://cran.r-project.org", 232 | "@type": "Organization", 233 | "name": "Comprehensive R Archive Network (CRAN)", 234 | "url": "https://cran.r-project.org" 235 | }, 236 | "sameAs": "https://CRAN.R-project.org/package=RCurl" 237 | }, 238 | "12": { 239 | "@type": "SoftwareApplication", 240 | "identifier": "plyr", 241 | "name": "plyr", 242 | "provider": { 243 | "@id": "https://cran.r-project.org", 244 | "@type": "Organization", 245 | "name": "Comprehensive R Archive Network (CRAN)", 246 | "url": "https://cran.r-project.org" 247 | }, 248 | "sameAs": "https://CRAN.R-project.org/package=plyr" 249 | }, 250 | "SystemRequirements": null 251 | }, 252 | "fileSize": "964.621KB", 253 | "citation": [ 254 | { 255 | "@type": "SoftwareSourceCode", 256 | "author": [ 257 | { 258 | "@type": "Person", 259 | "givenName": ["Florian", "D."], 260 | "familyName": "Schneider" 261 | } 262 | ], 263 | "name": "Package 'traitdataform'", 264 | "url": "https://ecologicaltraitdata.github.io/traitdataform/" 265 | } 266 | ], 267 | "releaseNotes": "https://github.com/ecologicaltraitdata/traitdataform/blob/master/NEWS.md", 268 | "readme": "https://github.com/EcologicalTraitData/traitdataform/blob/master/README.md", 269 | "contIntegration": "https://github.com/EcologicalTraitData/traitdataform/actions", 270 | "developmentStatus": ["https://lifecycle.r-lib.org/articles/stages.html#stable", "https://www.repostatus.org/#active"], 271 | "keywords": ["dataset", "trait-datasets", "harmonization", "r-package", "ecology"] 272 | } 273 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Resubmission 2 | 3 | Dependency on external data has been revised to comply with CRAN policy: 4 | - demo data are now delivered with the package, to enable offline package testing, examples and vignette building; also more detailled documentation on these data was added. 5 | - calls on external resources for research purposes now fail with an informative message without causing errors or warnings when the resource is not available 6 | 7 | ## Test environments 8 | 9 | * Windows 10, R version 4.1.1 on local install (2021-09-20) 10 | * Windows 10, R version 4.1.1 on local install without internet connectivity (2021-09-20) 11 | * Mac OS X 10.15.7, R version 4.1.1 (2021-08-10) on GitHub Actions (2021-09-18) 12 | * Microsoft Windows Server 2019 10.0.17763, R version 4.1.1 (2021-08-10) on GitHub Actions (2021-09-20) 13 | * Ubuntu 20.04.3 LTS, R version 4.1.1 (2021-08-10), R oldrel 4.0.5 (2021-03-31) and R devel (2021-09-18 r80932) on GitHub Actions (2021-09-20) 14 | * Windows Server 2008 (64-bit), R devel (2021-09-17 r80929) on win-builder.r-project.org (2021-09-20) 15 | 16 | ## R CMD check results 17 | 18 | There were no ERRORs or WARNINGs or NOTES. 19 | 20 | ## Downstream dependencies 21 | 22 | There are no downstream dependencies for this package yet. 23 | -------------------------------------------------------------------------------- /data/arthropodtraits.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EcologicalTraitData/traitdataform/a1a0b4656a404bddcd229ee2d42d430908be9d94/data/arthropodtraits.rda -------------------------------------------------------------------------------- /data/carabids.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EcologicalTraitData/traitdataform/a1a0b4656a404bddcd229ee2d42d430908be9d94/data/carabids.rda -------------------------------------------------------------------------------- /data/heteroptera.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EcologicalTraitData/traitdataform/a1a0b4656a404bddcd229ee2d42d430908be9d94/data/heteroptera.rda -------------------------------------------------------------------------------- /data/heteroptera_raw.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EcologicalTraitData/traitdataform/a1a0b4656a404bddcd229ee2d42d430908be9d94/data/heteroptera_raw.rda -------------------------------------------------------------------------------- /draft/csv2xml.R: -------------------------------------------------------------------------------- 1 | #' Transforms csv glossary of terms into the machine readable traitdata-template in xml format 2 | #' 3 | #' @param df 4 | #' @param name 5 | #' 6 | #' @return 7 | #' @export 8 | #' @import XML 9 | #' 10 | #' @examples 11 | #' 12 | convertToXML <- function(df,name) 13 | { 14 | xml <- XML::xmlTree("Test") 15 | xml$addNode(name, close=FALSE) 16 | for (i in 1:nrow(df)) { 17 | xml$addNode("value", close=FALSE) 18 | for (j in names(df)) { 19 | xml$addNode(j, df[i, j]) 20 | } 21 | xml$closeTag() 22 | } 23 | xml$closeTag() 24 | return(xml) 25 | } 26 | 27 | template = convertToXML(glossary,"BExIS_template") 28 | XML::saveXML(template$value(), file = "data/template.xml") 29 | -------------------------------------------------------------------------------- /draft/minimalexample.R: -------------------------------------------------------------------------------- 1 | # minimal example 2 | 3 | # review raw data 4 | head(carabids) 5 | 6 | # bring into longtable format 7 | 8 | dataset1 <- as.traitdata(carabids, 9 | taxa = "name_correct", 10 | traits = c("body_length", "antenna_length", "metafemur_length", "eyewidth_corr"), 11 | units = "mm", 12 | datasetID = "carabidtraits", 13 | keep = c(measurementDeterminedBy = "source_measurement") 14 | ) 15 | 16 | head(dataset1) 17 | 18 | 19 | # standardize taxonomy 20 | 21 | dataset1std <- standardize.taxonomy(dataset1) 22 | head(dataset1std) 23 | 24 | # standardize traits 25 | 26 | thesaurus <- as.thesaurus(body_length = as.trait("body_length", 27 | traitUnit = "mm", 28 | traitUnitStd = "mm", 29 | traitType = "numeric"), 30 | antenna_length = as.trait("antenna_length", 31 | traitUnit = "mm", 32 | traitUnitStd = "mm", 33 | traitType = "numeric"), 34 | metafemur_length = as.trait("metafemur_length", 35 | traitUnit = "mm", 36 | traitUnitStd = "mm", 37 | traitType = "numeric"), 38 | eyewidth = as.trait("eyewidth_corr", 39 | traitUnitStd = "mm", 40 | traitType = "numeric") 41 | ) 42 | 43 | str(traitmap) 44 | 45 | dataset1std2 <- standardize.traits(dataset1std, thesaurus) 46 | 47 | head(dataset1std2) 48 | 49 | # all-in-one 50 | 51 | traitdataset1 <- standardize(carabids, 52 | thesaurus = thesaurus, 53 | taxa = "name_correct", 54 | units = "mm", 55 | keep = c(measurementDeterminedBy = "source_measurement") 56 | ) 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite 'traitdataform' in publications use:") 2 | 3 | citEntry(entry = "Manual", 4 | title = "Package 'traitdataform'", 5 | author = personList(as.person("Florian D. Schneider")), 6 | url = "https://ecologicaltraitdata.github.io/traitdataform/", 7 | 8 | textVersion = 9 | paste("Florian D. Schneider (2018).", 10 | "Package 'traitdataform'", 11 | "URL https://ecologicaltraitdata.github.io/traitdataform/.") 12 | ) -------------------------------------------------------------------------------- /inst/extdata/amniota.R: -------------------------------------------------------------------------------- 1 | 2 | temp <- tempfile(fileext = ".zip") 3 | utils::download.file("https://ndownloader.figshare.com/files/8067269", temp, method = "auto", quiet = TRUE, mode="wb") 4 | utils::unzip(temp, files = "Data_Files/Amniote_Database_Aug_2015.csv", exdir = ".") 5 | unlink(temp) 6 | rm(temp) 7 | 8 | amniota <- utils::read.csv("Data_Files/Amniote_Database_Aug_2015.csv", 9 | fileEncoding = "UTF-8", 10 | stringsAsFactors = FALSE) 11 | file.remove("Data_Files/Amniote_Database_Aug_2015.csv") 12 | unlink("Data_Files", recursive=TRUE) 13 | 14 | 15 | attr(amniota, 'metadata') <- traitdataform::as.metadata( 16 | datasetName = "Amniote Database", 17 | datasetID = "amniota", 18 | bibliographicCitation = utils::bibentry( 19 | bibtype = "Article", 20 | title = "An amniote life-history database to perform comparative analyses with birds, mammals, and reptiles", 21 | journal = "Ecology", 22 | volume = 96, 23 | issue = 5, 24 | pages = 3109, 25 | author = c(utils::person(given = "Nathan P.", family = "Myhrvold" , email = "nathanm@intven.com"), utils::as.person(c("Elita Baldridge", "Benjamin Chan", "Dhileep Sivam", "Daniel L. Freeman", "S. K. Morgan Ernest")) 26 | ), 27 | year = 2015, 28 | doi = "10.1890/15-0846R.1" 29 | ), 30 | author = "Nathan P. Myhrvold", 31 | license = "http://creativecommons.org/publicdomain/zero/1.0/" 32 | ) 33 | -------------------------------------------------------------------------------- /inst/extdata/amphibio.R: -------------------------------------------------------------------------------- 1 | 2 | temp <- tempfile(fileext = ".zip") 3 | utils::download.file("https://ndownloader.figshare.com/files/8828578", temp, method = "auto", quiet = TRUE, mode="wb") 4 | utils::unzip(temp, files = "AmphiBIO_v1.csv", exdir = ".") 5 | unlink(temp) 6 | rm(temp) 7 | 8 | amphibio <- utils::read.csv("AmphiBIO_v1.csv", 9 | stringsAsFactors = FALSE) 10 | file.remove("AmphiBIO_v1.csv") 11 | 12 | attr(amphibio, 'citeAs') <- utils::bibentry( 13 | bibtype = "Article", 14 | title = "AmphiBIO, a global database for amphibian ecological traits", 15 | journal = "Scientific Data", 16 | volume = 4, 17 | pages = 170123, 18 | author = utils::as.person("Brunno Freire Oliveira, Vinícius Avelar São-Pedro, Georgina Santos-Barrera, Caterina Penone, and Gabriel C. Costa"), 19 | year = 2017, 20 | doi = "10.1038/sdata.2017.123" 21 | ) 22 | print({cat("loading dataset 'amphibio' from original data source! \n When using this data, please cite the original publication: \n") 23 | (attributes(amphibio)$citeAs) }) 24 | -------------------------------------------------------------------------------- /inst/extdata/arthropodtraits.R: -------------------------------------------------------------------------------- 1 | # for roxygen2 documentation please edit file R/data.R! 2 | arthropodtraits <- utils::read.csv(suppdata("10.5061/dryad.53ds2", "ArthropodSpeciesTraits.txt"), 3 | sep = "\t", 4 | stringsAsFactors = FALSE 5 | ) 6 | 7 | 8 | Encoding(arthropodtraits$Author) <- "latin1" 9 | arthropodtraits$Author <- iconv(arthropodtraits$Author, "latin1", "UTF-8") 10 | 11 | 12 | attr(arthropodtraits, 'metadata') <- traitdataform::as.metadata( 13 | datasetName = "Functional Arthropod Traits", 14 | datasetID = "arthropodtraits", 15 | bibliographicCitation = utils::bibentry( 16 | bibtype = "Article", 17 | title = "A summary of eight traits of Coleoptera, Hemiptera, Orthoptera and Araneae, occurring in grasslands in Germany.", 18 | journal = "Scientific Data", 19 | volume = 2, 20 | pages = 150013, 21 | author = c(utils::as.person("Martin M Gossner , Nadja K Simons, R Achtziger, T Blick, WHO Dorow, F Dziock, F Köhler, W Rabitsch, Wolfgang W Weisser") 22 | ), 23 | year = 2015, 24 | doi = "10.1038/sdata.2015.13" 25 | ), 26 | author = "Martin M Gossner", 27 | license = "http://creativecommons.org/publicdomain/zero/1.0/" 28 | ) 29 | 30 | attr(arthropodtraits, 'thesaurus') <- traitdataform:::as.thesaurus( 31 | Body_Size = traitdataform:::as.trait("Body_Size", 32 | expectedUnit = "mm", valueType = "numeric", 33 | traitDescription = "Mean body length [mm]", 34 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length"), 35 | Dispersal_ability = traitdataform:::as.trait("Dispersal_ability", 36 | expectedUnit = "unitless", valueType = "factor", 37 | factorLevels = c('0' = "very_low", '0.25' = "low", '0.5' = "medium", '0.75' = "high", '1.0' = "very_high"), 38 | traitDescription = "Based on wing dimorphism, flying ability, activity ranges, dispersal strategies, individual movement and colonization dynamics, depending on taxon", 39 | identifier = "https://www.nature.com/articles/sdata201513/tables/3#Dispersal_ability"), 40 | Feeding_guild = traitdataform:::as.trait("Feeding_guild", 41 | expectedUnit = "unitless", valueType = "factor", 42 | traitDescription = "Fine classification of feeding guild across larval and adult stages; less frequent assignments in brackets", 43 | identifier = "https://www.nature.com/articles/sdata201513/tables/3#Feeding_guild"), 44 | Feeding_guild_short = traitdataform:::as.trait("Feeding_guild_short", 45 | expectedUnit = "unitless", valueType = "factor", 46 | traitDescription = "Coarse classification of feeding guild, indicating main feeding source across larval and adult stages ", 47 | identifier = "https://www.nature.com/articles/sdata201513/tables/3#Feeding_guild_short"), 48 | Feeding_mode = traitdataform:::as.trait("Feeding_mode", 49 | expectedUnit = "unitless", valueType = "factor", 50 | traitDescription = "The way nutrients are ingested", 51 | identifier = "https://www.nature.com/articles/sdata201513/tables/3#Feeding_mode"), 52 | Feeding_specialization = traitdataform:::as.trait("Feeding_specialization", 53 | expectedUnit = "unitless", valueType = "factor", 54 | traitDescription = "Host plant specialization in herbivores", 55 | identifier = "https://www.nature.com/articles/sdata201513/tables/3#Feeding_specialization"), 56 | Feeding_tissue = traitdataform:::as.trait("Feeding_tissue", 57 | expectedUnit = "unitless", valueType = "factor", 58 | traitDescription = "Fine classification on the plant tissues sucking herbivores are feeding on ", 59 | identifier = "https://www.nature.com/articles/sdata201513/tables/3#Feeding_tissue"), 60 | Feeding_plant_part = traitdataform:::as.trait("Feeding_plant_part", 61 | expectedUnit = "unitless", valueType = "factor", 62 | traitDescription = "Fine classification on the plant parts chewing herbivores are feeding on", 63 | identifier = "https://www.nature.com/articles/sdata201513/tables/3#Feeding_plant_part"), 64 | Endophagous_lifestyle = traitdataform:::as.trait("Endophagous_lifestyle", 65 | expectedUnit = "unitless", valueType = "factor", 66 | traitDescription = "Details on endophagously living larvae", 67 | identifier = "https://www.nature.com/articles/sdata201513/tables/3#Endophagous_lifestyle"), 68 | Stratum_use = traitdataform:::as.trait("Stratum_use", 69 | expectedUnit = "unitless", valueType = "factor", 70 | traitDescription = "Vertical strata used across larval and adult stages; less frequent assignments in brackets", 71 | identifier = "https://www.nature.com/articles/sdata201513/tables/3#Stratum_use"), 72 | Stratum_use_short = traitdataform:::as.trait("Stratum_use_short", 73 | expectedUnit = "unitless", valueType = "factor", 74 | traitDescription = "Main vertical stratum used across larval and adult stages ", 75 | identifier = "https://www.nature.com/articles/sdata201513/tables/3#Stratum_use_short") 76 | ) 77 | 78 | 79 | attr(arthropodtraits, 'taxa') <- "SpeciesID" 80 | attr(arthropodtraits, 'keep') <- c(measurementRemarks = "Remark") 81 | -------------------------------------------------------------------------------- /inst/extdata/carabids.R: -------------------------------------------------------------------------------- 1 | # for roxygen2 documentation please edit file R/data.R! 2 | 3 | carabids <- utils::read.delim(suppdata("10.5061/dryad.1fn46", "carabid traits final.txt"), 4 | stringsAsFactors = FALSE 5 | ) 6 | 7 | attr(carabids, 'metadata') <- traitdataform::as.metadata( 8 | datasetName = "Carabid traits", 9 | datasetID = "carabids", 10 | bibliographicCitation = utils::bibentry( 11 | bibtype = "Article", 12 | title = "Sensitivity of functional diversity metrics to sampling intensity", 13 | journal = "Methods in Ecology and Evolution", 14 | author = c(utils::as.person("Fons van der Plas, Roel van Klink, Pete Manning, Han Olff, Markus Fischer") 15 | ), 16 | year = 2017, 17 | doi = "10.1111/2041-210x.12728" 18 | ), 19 | author = "Fons van der Plas", 20 | license = "http://creativecommons.org/publicdomain/zero/1.0/" 21 | ) 22 | 23 | attr(carabids, 'thesaurus') <- traitdataform:::as.thesaurus( 24 | body_length = traitdataform:::as.trait("body_length", 25 | expectedUnit = "mm", valueType = "numeric", 26 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length"), 27 | antenna_length = traitdataform:::as.trait("antenna_length", 28 | expectedUnit = "mm", valueType = "numeric", 29 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Antenna_length"), 30 | metafemur_length = traitdataform:::as.trait("femur_length", 31 | expectedUnit = "mm", valueType = "numeric", 32 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length"), 33 | eyewidth_corr = traitdataform:::as.trait("eye_diameter", 34 | expectedUnit = "mm", valueType = "numeric", 35 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Eye_diameter") 36 | ) 37 | 38 | attr(carabids, 'taxa') <- "name_correct" 39 | attr(carabids, 'units') <- "mm" 40 | attr(carabids, 'keep') <- c(measurementDeterminedBy = "source_measurement", measurementRemarks = "note") 41 | -------------------------------------------------------------------------------- /inst/extdata/eltontraits.R: -------------------------------------------------------------------------------- 1 | # for roxygen2 documentation please edit file R/data.R! 2 | 3 | eltontraits_bird <- utils::read.csv("https://ndownloader.figshare.com/files/5631081", sep = "\t", 4 | stringsAsFactors = FALSE) 5 | 6 | eltontraits_mam <- utils::read.csv("https://ndownloader.figshare.com/files/5631084", sep = "\t", 7 | stringsAsFactors = FALSE) 8 | 9 | 10 | eltontraits_bird[setdiff(names(eltontraits_mam), names(eltontraits_bird))] <- NA 11 | eltontraits_mam[setdiff(names(eltontraits_bird), names(eltontraits_mam))] <- NA 12 | 13 | eltontraits <- rbind(eltontraits_bird, eltontraits_mam) 14 | 15 | rm(eltontraits_bird, eltontraits_mam) 16 | 17 | attr(eltontraits,'metadata') <- traitdataform::as.metadata( 18 | datasetName = "EltonTraits 1.0", 19 | datasetID = "eltontraits", 20 | bibliographicCitation = utils::bibentry( 21 | bibtype = "Article", 22 | title = "EltonTraits 1.0: Species‐level foraging attributes of the world's birds and mammals", 23 | journal = "Ecology", 24 | volume = 95, 25 | pages = 2027, 26 | author = c(utils::as.person( 27 | "Hamish Wilman", "Jonathan Belmaker", "Jennifer Simpson", "Carolina de la Rosa", "Marcelo M. Rivadeneira"), 28 | utils::person(given = "Walter", family = "Jetz", email = "walter.jetz@yale.edu")), 29 | year = 2014, 30 | doi = "10.1890/13-1917.1" 31 | ), 32 | author = "Walter Jetz", 33 | license = "http://creativecommons.org/publicdomain/zero/1.0/", 34 | description = "Species are characterized by physiological, behavioral, and ecological attributes that are all subject to varying evolutionary and ecological constraints and jointly determine their role and function in ecosystems. Attributes such as diet, foraging strata, foraging time, and body size, in particular, determine a large portion of the “Eltonian” niches of species. Here we present a global species-level compilation of these key attributes for all 9993 and 5400 extant bird and mammal species derived from key literature sources. Global handbooks and monographs allowed the consistent sourcing of attributes for most species. For diet and foraging stratum we followed a defined protocol to translate the verbal descriptions into standardized, semiquantitative information about relative importance of different categories. Together with body size (continuous) and activity time (categorical) this enables a much finer distinction of species’ foraging ecology than typical categorical guild assignments allow. Attributes lacking information for specific species were flagged, and interpolated values based on taxonomy were provided instead. The presented data set is limited by, among others, these select cases missing observed data, by errors and uncertainty in the expert assessment as presented in the literature, and by the lack of intraspecific information. However, the standardized and transparent nature and complete global coverage of the data set should support an array of potential studies in biogeography, community ecology, macroevolution, global change biology, and conservation. Potential uses include comparative work involving these traits as focal or secondary variables, ecological research on the trait or trophic structure of communities, or conservation science concerned with the loss of function among species or in ecosystems in a changing world. We hope that this publication will spur the sharing, collaborative curation, and extension of data to the benefit of a more integrative, rigorous, and global biodiversity science." 35 | ) -------------------------------------------------------------------------------- /inst/extdata/heteroptera.R: -------------------------------------------------------------------------------- 1 | # for roxygen2 documentation please edit file R/data.R! 2 | if(!l10n_info()$`UTF-8`) {suppressWarnings(Sys.setlocale("LC_CTYPE", "en_US.UTF-8"))} 3 | 4 | heteroptera<- utils::read.delim(url("https://ndownloader.figshare.com/files/5633880", 5 | encoding = "latin1"), sep = "\t", header = TRUE, 6 | stringsAsFactors=FALSE) 7 | 8 | Encoding(heteroptera$Author) <- "latin1" 9 | heteroptera$Author <- iconv(heteroptera$Author, "latin1", "UTF-8") 10 | 11 | attr(heteroptera, 'citeAs') <- utils::bibentry( 12 | bibtype = "Article", 13 | title = "Morphometric measures of Heteroptera sampled in grasslands across three regions of Germany", 14 | journal = "Ecology", 15 | volume = 96, 16 | issue = 4, 17 | pages = 1154, 18 | author = c(utils::as.person("Martin M. Gossner , Nadja K. Simons, Leonhard Höck, Wolfgang W. Weisser") 19 | ), 20 | year = 2015, 21 | doi = "10.1890/14-2159.1" 22 | ) 23 | 24 | attr(heteroptera, 'units') <- c("mm", "mm3", rep("unitless", 8)) 25 | attr(heteroptera, 'taxa') <- "SpeciesID" 26 | -------------------------------------------------------------------------------- /inst/extdata/heteroptera_raw.R: -------------------------------------------------------------------------------- 1 | # for roxygen2 documentation please edit file R/data.R! 2 | if(!l10n_info()$`UTF-8`) {suppressWarnings(Sys.setlocale("LC_CTYPE", "en_US.UTF-8"))} 3 | 4 | heteroptera_raw <- utils::read.delim(url("https://ndownloader.figshare.com/files/5633883", 5 | encoding = "windows-1252"), 6 | stringsAsFactors=FALSE) 7 | 8 | heteroptera_raw$Center_Sampling_region <- iconv(as.character(heteroptera_raw$Center_Sampling_region), to = "UTF-8") 9 | Encoding(heteroptera_raw$Author) <- "latin1" 10 | heteroptera_raw$Author <- iconv(heteroptera_raw$Author, "latin1", "UTF-8") 11 | heteroptera_raw$Voucher_ID <- as.factor(heteroptera_raw$Voucher_ID) 12 | Encoding(levels(heteroptera_raw$Voucher_ID)) <- "latin1" 13 | levels(heteroptera_raw$Voucher_ID) <- iconv(levels(heteroptera_raw$Voucher_ID), "latin1", "UTF-8") 14 | heteroptera_raw$Source <- as.factor(heteroptera_raw$Source) 15 | Encoding(levels(heteroptera_raw$Source)) <- "latin1" 16 | levels(heteroptera_raw$Source) <- iconv(levels(heteroptera_raw$Source), to = "UTF-8") 17 | 18 | 19 | # suppdata("E096-102","HeteropteraMorphometricTraitsRAW.txt", "esa_archives", list = TRUE) 20 | # esa archives does not resolve correctly 21 | 22 | # heteroptera_raw <- data.frame() 23 | # heteroptera_raw <- readr::read_delim("http://www.esapubs.org/archive/ecol/E096/102/HeteropteraMorphometricTraitsRAW.txt", 24 | # delim = "\t", locale = readr:::locale(encoding = "windows-1252")) 25 | # heteroptera_raw <- as.data.frame(heteroptera_raw) 26 | 27 | 28 | attr(heteroptera_raw, 'metadata') <- traitdataform::as.metadata( 29 | datasetName = "Heteroptera morphometry traits", 30 | datasetID = "heteroptera", 31 | bibliographicCitation = utils::bibentry( 32 | bibtype = "Article", 33 | title = "Morphometric measures of Heteroptera sampled in grasslands across three regions of Germany", 34 | journal = "Ecology", 35 | volume = 96, 36 | issue = 4, 37 | pages = 1154, 38 | author = c(utils::as.person("Martin M. Gossner , Nadja K. Simons, Leonhard Hoeck, Wolfgang W. Weisser")), 39 | year = 2015, 40 | doi = "10.1890/14-2159.1" 41 | ), 42 | author = "Martin M. Gossner", 43 | license = "http://creativecommons.org/publicdomain/zero/1.0/" 44 | ) 45 | 46 | 47 | attr(heteroptera_raw, 'thesaurus') <- traitdataform:::as.thesaurus( 48 | Body_length = traitdataform::as.trait("Body_length", 49 | expectedUnit = "mm", valueType = "numeric", 50 | traitDescription = "From the tip of the head to the end of the abdomen"), 51 | Body_width = traitdataform::as.trait("Body_width", 52 | expectedUnit = "mm", valueType = "numeric", 53 | traitDescription = "Widest part of the body"), 54 | Body_height = traitdataform::as.trait("Body_height", 55 | expectedUnit = "mm", valueType = "numeric", 56 | traitDescription = "Thickest part of the body"), 57 | Thorax_length = traitdataform::as.trait("Thorax_length", 58 | expectedUnit = "mm", valueType = "numeric", 59 | traitDescription = "Longest part of the pronotum"), 60 | Thorax_width = traitdataform::as.trait("Thorax_width", 61 | expectedUnit = "mm", valueType = "numeric", 62 | traitDescription = "Widest part of the pronotum"), 63 | Head_width = traitdataform::as.trait("Head_width", 64 | expectedUnit = "mm", valueType = "numeric", 65 | traitDescription = "Widest part of the head including eyes"), 66 | Eye_width = traitdataform::as.trait("Eye_width", 67 | expectedUnit = "mm", valueType = "numeric", 68 | traitDescription = "Widest part of the left eye"), 69 | Antenna_Seg1 = traitdataform::as.trait("Antenna_Seg1", 70 | expectedUnit = "mm", valueType = "numeric", 71 | traitDescription = "Length of first antenna segment", 72 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 73 | Antenna_Seg2 = traitdataform::as.trait("Antenna_Seg2", 74 | expectedUnit = "mm", valueType = "numeric", 75 | traitDescription = "Length of second antenna segment", 76 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 77 | Antenna_Seg3 = traitdataform::as.trait("Antenna_Seg3", 78 | expectedUnit = "mm", valueType = "numeric", 79 | traitDescription = "Length of third antenna segment", 80 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 81 | Antenna_Seg4 = traitdataform::as.trait("Antenna_Seg4", 82 | expectedUnit = "mm", valueType = "numeric", 83 | traitDescription = "Length of fourth antenna segment", 84 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 85 | Antenna_Seg5 = traitdataform::as.trait("Antenna_Seg5", 86 | expectedUnit = "mm", valueType = "numeric", 87 | traitDescription = "Length of fifth antenna segment (only Pentatomoidea)", 88 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 89 | Front.Tibia_length = traitdataform::as.trait("Front.Tibia_length", 90 | expectedUnit = "mm", valueType = "numeric", 91 | traitDescription = "Length of the tibia of the foreleg", 92 | broaderTerm = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Tibia_length"), 93 | Mid.Tibia_length = traitdataform::as.trait("Mid.Tibia_length", 94 | expectedUnit = "mm", valueType = "numeric", 95 | traitDescription = "Length of the tibia of the mid leg", 96 | broaderTerm = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Tibia_length"), 97 | Hind.Tibia_length = traitdataform::as.trait("Hind.Tibia_length", 98 | expectedUnit = "mm", valueType = "numeric", 99 | traitDescription = "Length of the tibia of the hind leg", 100 | broaderTerm = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Tibia_length"), 101 | Front.Femur_length = traitdataform::as.trait("Front.Femur_length", 102 | expectedUnit = "mm", valueType = "numeric", 103 | traitDescription = "Length of the femur of the foreleg", 104 | broaderTerm = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length"), 105 | Hind.Femur_length = traitdataform::as.trait("Hind.Femur_length", 106 | expectedUnit = "mm", valueType = "numeric", 107 | traitDescription = "Length of the femur of the hind leg", 108 | broaderTerm = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length"), 109 | Front.Femur_width = traitdataform::as.trait("Front.Femur_width", 110 | expectedUnit = "mm", valueType = "numeric", 111 | traitDescription = "Width of the femur of the foreleg" 112 | ), 113 | Hind.Femur_width = traitdataform::as.trait("Hind.Femur_width", 114 | expectedUnit = "mm", valueType = "numeric", 115 | traitDescription = "Width of the femur of the hind leg"), 116 | Rostrum_length = traitdataform::as.trait("Rostrum_length", 117 | expectedUnit = "mm", valueType = "numeric", 118 | traitDescription = "Length of the rostrum including all segments" 119 | ), 120 | Rostrum_width = traitdataform::as.trait("Rostrum_width", 121 | expectedUnit = "mm", valueType = "numeric", 122 | traitDescription = "Widest part of the rostrum" 123 | ), 124 | Wing_length = traitdataform::as.trait("Wing_length", 125 | expectedUnit = "mm", valueType = "numeric", 126 | traitDescription = "Longest part of the forewing", 127 | broaderTerm = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Wing"), 128 | Wing_width = traitdataform::as.trait("Wing_width", 129 | expectedUnit = "mm", valueType = "numeric", 130 | traitDescription = "Widest part of the forewing", 131 | broaderTerm = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Wing") 132 | ) 133 | 134 | attr(heteroptera_raw, 'taxa') <- "SpeciesID" 135 | attr(heteroptera_raw, 'units') <- "mm" 136 | attr(heteroptera_raw, 'occurrences') <- "ID" 137 | attr(heteroptera_raw, 'keep') <- c(sex = "Sex", basisOfRecordDescription = "Source", references = "Voucher_ID", verbatimLocality = "Center_Sampling_region") 138 | -------------------------------------------------------------------------------- /inst/extdata/mammaldiet.R: -------------------------------------------------------------------------------- 1 | # for roxygen2 documentation please edit file R/data.R! 2 | 3 | mammaldiet <- utils::read.csv(suppdata("10.5061/dryad.6cd0v", "MammalDIET_v1.0.txt"), 4 | sep = "\t", 5 | fileEncoding = "UTF-8", 6 | stringsAsFactors = FALSE 7 | ) 8 | 9 | attr(mammaldiet, 'metadata') <- traitdataform::as.metadata( 10 | datasetName = "MammalDIET", 11 | datasetID = "mammaldiet", 12 | bibliographicCitation = utils::bibentry( 13 | bibtype = "Article", 14 | title = "Establishing macroecological trait datasets: digitalization, extrapolation, and validation of diet preferences in terrestrial mammals worldwide", 15 | journal = "Ecology and Evolution", 16 | volume = 4, 17 | issue = 12, 18 | pages = 2913-2930, 19 | author = c(utils::person(given = "Wilm Daniel", family = "Kissling", email = "danielkissling@web.de"), utils::person(given = "Lars", family = "Dalby"), utils::person(given = "Camilla", family = "Fløjgaard"), utils::person(given = "Jonathan", family = "Lenoir"), utils::person(given = "Brody", family = "Sandel"), utils::person(given = "Christopher", family = "Sandom"), utils::person(given = "Kristian", family = "Trøjelsgaard"), utils::person(given = "Jens-Christian", family = "Svenning")), 20 | year = 2014, 21 | doi = "10.1002/ece3.1136" 22 | ), 23 | author = "Wilm Daniel Kissling", 24 | license = "http://creativecommons.org/publicdomain/zero/1.0/", 25 | description = "Ecological trait data are essential for understanding the broad-scale distribution of biodiversity and its response to global change. For animals, diet represents a fundamental aspect of species’ evolutionary adaptations, ecological and functional roles, and trophic interactions. However, the importance of diet for macroevolutionary and macroecological dynamics remains little explored, partly because of the lack of comprehensive trait datasets. We compiled and evaluated a comprehensive global dataset of diet preferences of mammals (“MammalDIET”). Diet information was digitized from two global and cladewide data sources and errors of data entry by multiple data recorders were assessed. We then developed a hierarchical extrapolation procedure to fill-in diet information for species with missing information. Missing data were extrapolated with information from other taxonomic levels (genus, other species within the same genus, or family) and this extrapolation was subsequently validated both internally (with a jack-knife approach applied to the compiled species-level diet data) and externally (using independent species-level diet information from a comprehensive continentwide data source). Finally, we grouped mammal species into trophic levels and dietary guilds, and their species richness as well as their proportion of total richness were mapped at a global scale for those diet categories with good validation results. The success rate of correctly digitizing data was 94%, indicating that the consistency in data entry among multiple recorders was high. Data sources provided species-level diet information for a total of 2033 species (38% of all 5364 terrestrial mammal species, based on the IUCN taxonomy). For the remaining 3331 species, diet information was mostly extrapolated from genus-level diet information (48% of all terrestrial mammal species), and only rarely from other species within the same genus (6%) or from family level (8%). Internal and external validation showed that: (1) extrapolations were most reliable for primary food items; (2) several diet categories (“Animal,” “Mammal,” “Invertebrate,” “Plant,” “Seed,” “Fruit,” and “Leaf”) had high proportions of correctly predicted diet ranks; and (3) the potential of correctly extrapolating specific diet categories varied both within and among clades. Global maps of species richness and proportion showed congruence among trophic levels, but also substantial discrepancies between dietary guilds. MammalDIET provides a comprehensive, unique and freely available dataset on diet preferences for all terrestrial mammals worldwide. It enables broad-scale analyses for specific trophic levels and dietary guilds, and a first assessment of trait conservatism in mammalian diet preferences at a global scale. The digitalization, extrapolation and validation procedures could be transferable to other trait data and taxa.", 26 | taxonomy = "IUCN 2013" 27 | ) 28 | 29 | mammaldiet$scientificName <- paste(mammaldiet$Genus, mammaldiet$Species) 30 | 31 | attr(mammaldiet, 'taxa') <- "scientificName" 32 | attr(mammaldiet, 'traits') <- names(mammaldiet[c(6:21,24:29)]) 33 | attr(mammaldiet, 'keep') <- c(basisOfRecord = "DataSource") 34 | -------------------------------------------------------------------------------- /inst/extdata/pantheria.R: -------------------------------------------------------------------------------- 1 | # for roxygen2 documentation please edit file R/data.R! 2 | 3 | temp <- tempfile(fileext = ".zip") 4 | utils::download.file("https://ndownloader.figshare.com/files/5604752", temp, method = "auto", quiet = TRUE, mode="wb") 5 | utils::unzip(temp, files = "PanTHERIA_1-0_WR05_Aug2008.txt", exdir = ".") 6 | unlink(temp) 7 | rm(temp) 8 | 9 | pantheria <- utils::read.delim("PanTHERIA_1-0_WR05_Aug2008.txt", 10 | fileEncoding = "UTF-8", 11 | stringsAsFactors = FALSE) 12 | file.remove("PanTHERIA_1-0_WR05_Aug2008.txt") 13 | 14 | attr(pantheria,'metadata') <- traitdataform::as.metadata( 15 | datasetName = "PanTHERIA", 16 | datasetID = "pantheria", 17 | bibliographicCitation = utils::bibentry( 18 | bibtype = "Article", 19 | title = "PanTHERIA: a species-level database of life history, ecology, and geography of extant and recently extinct mammals", 20 | journal = "Ecology", 21 | volume = 90, 22 | pages = 2648, 23 | author = c(utils::person(given = "Kate E.", family = "Jones", email = "Kate.Jones@ioz.ac.uk"), utils::as.person("Jon Bielby, Marcel Cardillo, Susanne A. Fritz, Justin O'Dell, C. David L. Orme, Kamran Safi, Wes Sechrest, Elizabeth H. Boakes, Chris Carbone, Christina Connolly, Michael J. Cutts, Janine K. Foster, Richard Grenyer, Michael Habib, Christopher A. Plaster, Samantha A. Price, Elizabeth A. Rigby, Janna Rist, Amber Teacher, Olaf R. P. Bininda-Emonds, John L. Gittleman, Georgina M. Mace, and Andy Purvis") 24 | ), 25 | year = 2009, 26 | doi = "10.1890/08-1494.1" 27 | ), 28 | author = "Kate E. Jones", 29 | license = "http://creativecommons.org/publicdomain/zero/1.0/" 30 | 31 | ) 32 | 33 | #AET – Actual Evapotransipration Rate; C – centigrade; d – days; dd – decimal degrees; deg – degrees; EXT – extrapolated; g – grams; GR – geographic range; Grp – Group; HuPopDen – Human Population Density; Indiv – Individual; Isl - Island; Lat – Latitude; Len – Length; Long – Longitude; Max – Maximum; Met – Metabolic; Min – Minimum; mLO2hr – milliliters of O2 per hr; mm – millimeters;n/km2 – Number per km2; PET – Potential Evapotranspiration Rate; Precip – Precipitation; Temp – Temperature; and 5p – 5th percentile 34 | 35 | pantheria[pantheria == -999] <- NA 36 | attr(pantheria, 'taxa') <- "MSW05_Binomial" 37 | attr(pantheria, 'keep') <- c(references = "References", "") 38 | 39 | -------------------------------------------------------------------------------- /man/amniota.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{amniota} 4 | \alias{amniota} 5 | \title{Amniote life-history traits} 6 | \source{ 7 | Cite this dataset as \itemize{ \item P. Myhrvold, Nathan; Baldridge, 8 | Elita; Chan, Benjamin; Sivam, Dhileep; L. Freeman, Daniel; Ernest, S. K. 9 | Morgan (2016): An amniote life-history database to perform comparative 10 | analyses with birds, mammals, and reptiles. 11 | http://esapubs.org/archive/ecol/E096/269/} 12 | 13 | \href{https://creativecommons.org/publicdomain/zero/1.0/}{Creative Commons 14 | 0}. To the extent possible under law, the authors have waived all copyright 15 | and related or neighboring rights to this data. 16 | } 17 | \value{ 18 | Returns a data object that includes attributes for data standardisation. 19 | } 20 | \description{ 21 | An amniote life-history database to perform comparative analyses 22 | with birds, mammals, and reptiles, Ecological Archives E096-269 23 | } 24 | \details{ 25 | Studying life-history traits within and across taxonomic 26 | classifications has revealed many interesting and important patterns, but 27 | this approach to life history requires access to large compilations of data 28 | containing many different life-history parameters. Currently, life-history 29 | data for amniotes (birds, mammals, and reptiles) is split among a variety 30 | of publicly available databases, data tables embedded in individual papers 31 | and books, and species-specific studies by experts. Using data from this 32 | wide range of sources is a challenge for conducting macroecological studies 33 | because of a lack of standardization in taxonomic classifications, 34 | parameter values, and even in which parameters are reported. In order to 35 | facilitate comparative analyses between amniote life-history data, we 36 | created a database compiled from peer-reviewed studies on individual 37 | species, macroecological studies of multiple species, existing life-history 38 | databases, and other aggregated sources as well as published books and 39 | other compilations. First, we extracted and aggregated the raw data from 40 | the aforementioned sources. Next, we resolved spelling errors and other 41 | formatting inconsistencies in species names through a number of 42 | computational and manual methods. Once this was completed, subspecies-level 43 | data and species-level data were shared via a data-sharing algorithm to 44 | accommodate the variety of species transformations (taxonomic promotions, 45 | demotions, merges, divergences, etc.) that have occurred over time. 46 | Finally, in species where multiple raw data points were identified for a 47 | given parameter, we report the median value. Here, we report a normalized 48 | and consolidated database of up to 29 life-history parameters, containing 49 | at least one life-history parameter for 21 322 species of birds, mammals, 50 | and reptiles. 51 | } 52 | \seealso{ 53 | Other rawdata: 54 | \code{\link{amphibio}}, 55 | \code{\link{arthropodtraits}}, 56 | \code{\link{carabids}}, 57 | \code{\link{heteroptera_raw}}, 58 | \code{\link{mammaldiet}}, 59 | \code{\link{pantheria}}, 60 | \code{\link{pulldata}()} 61 | } 62 | \author{ 63 | Nathan P. Myhrvold, Elita Baldridge, Benjamin Chan, Dhileep Sivam, 64 | Daniel L. Freeman, and S. K. Morgan Ernest 65 | } 66 | \concept{rawdata} 67 | -------------------------------------------------------------------------------- /man/amphibio.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{amphibio} 4 | \alias{amphibio} 5 | \title{AmphiBIO, a global database for amphibian ecological traits} 6 | \source{ 7 | Cite as: 8 | \itemize{ 9 | \item Oliveira, B.F., São-Pedro, V.A., Santos-Barrera, G., Penone, C. & Costa, G.C. (2017). AmphiBIO, a global database for amphibian ecological traits. Scientific Data, 4:170123. doi: \doi{10.1038/sdata.2017.123} 10 | } 11 | 12 | Please also cite the data repository on figshare: 13 | \itemize{ 14 | \item Oliveira, Brunno Freire; São-Pedro, Vinícius Avelar; Santos-Barrera, Georgina; Penone, Caterina; C. Costa, Gabriel (2017): AmphiBIO_v1. figshare. \doi{10.6084/m9.figshare.4644424.v5} 15 | } 16 | 17 | \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons BY 4.0}. You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use. 18 | } 19 | \value{ 20 | This is a data object. provides instructions for \code{pulldata()}. 21 | } 22 | \description{ 23 | A comprehensive database of natural history traits for amphibians worldwide. 24 | } 25 | \details{ 26 | Current ecological and evolutionary research are increasingly moving from species- to trait-based approaches because traits provide a stronger link to organism’s function and fitness. Trait databases covering a large number of species are becoming available, but such data remains scarce for certain groups. Amphibians are among the most diverse vertebrate groups on Earth, and constitute an abundant component of major terrestrial and freshwater ecosystems. They are also facing rapid population declines worldwide, which is likely to affect trait composition in local communities, thereby impacting ecosystem processes and services. In this context, we introduce AmphiBIO, a comprehensive database of natural history traits for amphibians worldwide. The database releases information on 17 traits related to ecology, morphology and reproduction features of amphibians. We compiled data from more than 1,500 literature sources, and for more than 6,500 species of all orders (Anura, Caudata and Gymnophiona), 61 families and 531 genera. This database has the potential to allow unprecedented large-scale analyses in ecology, evolution and conservation of amphibians. 27 | } 28 | \seealso{ 29 | Other rawdata: 30 | \code{\link{amniota}}, 31 | \code{\link{arthropodtraits}}, 32 | \code{\link{carabids}}, 33 | \code{\link{heteroptera_raw}}, 34 | \code{\link{mammaldiet}}, 35 | \code{\link{pantheria}}, 36 | \code{\link{pulldata}()} 37 | } 38 | \author{ 39 | Brunno Freire Oliveira, Vinícius Avelar São-Pedro, Georgina Santos-Barrera, Caterina Penone, and Gabriel C. Costa 40 | } 41 | \concept{rawdata} 42 | -------------------------------------------------------------------------------- /man/arthropodtraits.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{arthropodtraits} 4 | \alias{arthropodtraits} 5 | \title{Functional Arthropod Traits} 6 | \format{ 7 | A data frame of 1230 observations and 17 variables. 8 | \describe{ 9 | \item{Order}{} 10 | \item{Suborder}{} 11 | \item{Family}{} 12 | \item{SpeciesID}{} 13 | \item{Author}{} 14 | \item{Body_Size}{Mean body length (mm)} 15 | \item{Dispersal_ability}{ordinal scale of 0 = very low, 1 = low, 0,5 = 16 | medium, 0,75 = high, 1 = very high; Based on wing dimorphism, flying 17 | ability, activity ranges, dispersal strategies, individual movement and 18 | colonization dynamics, depending on taxon } 19 | \item{Feeding_guild}{Fine classification of feeding guild across larval and 20 | adult stages; less frequent assignments in brackets. 21 | \describe{ 22 | \item{c}{carnivore} 23 | \item{c-d}{carni-detritivore} 24 | \item{c-d-h}{carni-detriti-herbivore} 25 | \item{c-f}{carni-fungivore} 26 | \item{c-h}{carni-herbivore} 27 | \item{c-(h)}{mainly carnivore, rarely herbivore} 28 | \item{d}{detritivor} 29 | \item{d-f}{detriti-fungivore} 30 | \item{d-h}{detriti-herbivore} 31 | \item{f}{fungivore} 32 | \item{f-h}{fungi-herbivore} 33 | \item{h}{herbivor} 34 | \item{h-(c)}{mainly herbivore, rarely carnivore} 35 | } 36 | } 37 | \item{Feeding_guild_short}{Coarse classification of feeding guild, indicating main feeding source across larval and adult stages} 38 | \item{Feeding_mode}{The way nutrients are ingested} 39 | \item{Feeding_specialization}{Host plant specialization in herbivores} 40 | \item{Feeding_tissue}{Fine classification on the plant tissues sucking herbivores are feeding on} 41 | \item{Feeding_plant_part}{Fine classification on the plant parts chewing herbivores are feeding on} 42 | \item{Endophagous_lifestyle}{Details on endophagously living larvae} 43 | \item{Stratum_use}{Vertical strata used across larval and adult stages; less frequent assignments in brackets} 44 | \item{Stratum_use_short}{Main vertical stratum used across larval and adult stages} 45 | \item{Remark}{Indicates species that do neither obligatory nor facultative occur in grasslands; * = non grasland species} 46 | 47 | } 48 | 49 | original description: https://www.nature.com/articles/sdata201513/tables/3 50 | } 51 | \source{ 52 | \doi{10.5061/dryad.53ds2}; 53 | \href{https://creativecommons.org/publicdomain/zero/1.0/}{Creative Commons 54 | 0}. To the extent possible under law, the authors have waived all copyright 55 | and related or neighboring rights to this data. 56 | } 57 | \value{ 58 | This is a data object. provides instructions for \code{pulldata()}. 59 | } 60 | \description{ 61 | Data from: A summary of eight traits of Coleoptera, Hemiptera, Orthoptera and 62 | Araneae, occurring in grasslands in Germany. 63 | } 64 | \details{ 65 | Analyses of species traits have increased our understanding of how 66 | environmental drivers such as disturbances affect the composition of 67 | arthropod communities and related processes. There are, however, few 68 | studies on which traits in the arthropod community are affected by 69 | environmental changes and which traits affect ecosystem functioning. The 70 | assembly of arthropod traits of several taxa is difficult because of the 71 | large number of species, limited availability of trait databases and 72 | differences in available traits. We sampled arthropod species data from a 73 | total of 150 managed grassland plots in three regions of Germany. These 74 | plots represent the spectrum from extensively used pastures to mown 75 | pastures to intensively managed and fertilized meadows. In this paper, we 76 | summarize information on body size, dispersal ability, feeding guild and 77 | specialization (within herbivores), feeding mode, feeding tissue (within 78 | herbivorous suckers), plant part (within herbivorous chewers), endophagous 79 | lifestyle (within herbivores), and vertical stratum use for 1,230 species 80 | of Coleoptera, Hemiptera (Heteroptera, Auchenorrhyncha), Orthoptera 81 | (Saltatoria: Ensifera, Caelifera), and Araneae, sampled by sweep-netting 82 | between 2008 and 2012. We compiled traits from various literature sources 83 | and complemented data from reliable internet sources and the authors’ 84 | experience. 85 | 86 | The data set comprises literature trait data of species that were sampled 87 | and measured in a project within the Biodiversity Exploratories which 88 | focuses on the effect of land use on arthropod community composition and 89 | related processes (e.g. species interactions such as herbivory or 90 | predation) in three regions of Germany 91 | 92 | When using this data, please cite the original publication: 93 | 94 | \itemize{ \item Gossner MM, Simons NK, Achtziger R, Blick T, Dorow WHO, 95 | Dziock F, Köhler F, Rabitsch W, Weisser WW (2015) A summary of eight traits 96 | of Coleoptera, Hemiptera, Orthoptera and Araneae, occurring in grasslands 97 | in Germany. Scientific Data 2: 150013. 98 | \doi{10.1038/sdata.2015.13} } 99 | 100 | Additionally, please cite the Dryad data package: 101 | 102 | \itemize{ \item Gossner MM, Simons NK, Achtziger R, 103 | Blick T, Dorow WHO, Dziock F, Köhler F, Rabitsch W, Weisser WW (2015) Data 104 | from: A summary of eight traits of Coleoptera, Hemiptera, Orthoptera and 105 | Araneae, occurring in grasslands in Germany. Dryad Digital Repository. 106 | \doi{10.5061/dryad.53ds2} } 107 | } 108 | \seealso{ 109 | Other rawdata: 110 | \code{\link{amniota}}, 111 | \code{\link{amphibio}}, 112 | \code{\link{carabids}}, 113 | \code{\link{heteroptera_raw}}, 114 | \code{\link{mammaldiet}}, 115 | \code{\link{pantheria}}, 116 | \code{\link{pulldata}()} 117 | } 118 | \author{ 119 | Gossner MM, Simons NK, Achtziger R, Blick T, Dorow WHO, Dziock F, Köhler F, Rabitsch W, Weisser WW 120 | } 121 | \concept{rawdata} 122 | -------------------------------------------------------------------------------- /man/as.metadata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as.metadata.R 3 | \name{as.metadata} 4 | \alias{as.metadata} 5 | \title{Create metadata for trait dataset} 6 | \usage{ 7 | as.metadata( 8 | ..., 9 | template = list(datasetID = NULL, datasetName = NULL, author = NULL, rightsHolder = 10 | NULL, bibliographicCitation = NULL, license = NULL, version = NULL, comments = NULL, 11 | description = NULL, region = NULL, conformsTo = NULL) 12 | ) 13 | } 14 | \arguments{ 15 | \item{...}{named objects of any class. Names must be the same as given by template or they will be ignored when producing the metadata object.} 16 | 17 | \item{template}{The default template contains the elements as defined by the Ecological Traitdata Standard.} 18 | } 19 | \value{ 20 | an object of class 'metadata', or - if metadata objects are provided as input - a list object of class 'metadatalist'. 21 | } 22 | \description{ 23 | Function to create a defined object containing metadata paramaters according to a pre-defined template. 24 | } 25 | \examples{ 26 | 27 | # set metadata object 28 | a <- as.metadata(author = "Martin Gossner", datasetName = "Heteroptera morphometric traits") 29 | 30 | # update an existing metadata object 31 | a1 <- as.metadata(datasetID = "heteroptera", template = a) 32 | 33 | } 34 | -------------------------------------------------------------------------------- /man/as.thesaurus.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/thesaurus.R 3 | \name{as.thesaurus} 4 | \alias{as.thesaurus} 5 | \title{Create a list of trait definitions (a thesaurus of traits)} 6 | \usage{ 7 | as.thesaurus(..., replace = NULL) 8 | } 9 | \arguments{ 10 | \item{...}{multiple objects of class 'trait' (produced by function 11 | \code{as.trait()}) or a data.frame containing columns according to the terms 12 | provided by https://ecologicaltraitdata.github.io/ETS/#terms-for-thesauri.} 13 | 14 | \item{replace}{named character vector, with new names as values, and old names as names.} 15 | } 16 | \value{ 17 | a list of formalized objects of class 'trait', as returned by 18 | function as.trait(). 19 | } 20 | \description{ 21 | Create a trait thesaurus object for use as a reference object/lookup table of 22 | traits within function \code{standardize.traits()}. 23 | } 24 | \details{ 25 | the object class 'trait' comprises necessary information to map a 26 | trait name to a trait definition, a target unit and a globally unique 27 | identifier. The thesaurus will be used in function 'standardize.traits()' 28 | to apply unit conversion and factor level harmonization. 29 | } 30 | \examples{ 31 | 32 | # provide traitlist by defining individual traits using function `as.trait()`: 33 | 34 | traitlist <- as.thesaurus(body_length = as.trait("body_length", expectedUnit = "mm", 35 | valueType = "numeric", 36 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length"), 37 | antenna_length = as.trait("antenna_length", expectedUnit = "mm", 38 | valueType = "numeric", 39 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Antenna_length"), 40 | metafemur_length = as.trait("metafemur_length", expectedUnit = "mm", 41 | valueType = "numeric", 42 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length") 43 | ) 44 | 45 | # provide traitlist from data frame object: 46 | 47 | as.thesaurus(data.frame( 48 | trait = c("body_length", "antenna_length", "metafemur_length", "eyewidth_corr"), 49 | expectedUnit = "mm", 50 | valueType = "numeric", 51 | traitDescription = c("body length in mm", "length of antenna in mm", 52 | "length of metafemur in mm", "eye width in mm"), 53 | identifier = c("http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length", 54 | "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Antenna_length", 55 | "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length", 56 | "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Eye_diameter" 57 | ) 58 | ) 59 | ) 60 | 61 | # provide traitlist from remote archive with renaming 62 | # (pulls from https://github.com/EcologicalTraitData/TraitDataList) 63 | 64 | \dontrun{ 65 | traits1 <- as.thesaurus(read.csv("https://git.io/fpsj5"), 66 | replace = c(traitID = "identifier", 67 | traitName = "trait", 68 | traitUnit = "expectedUnit", 69 | Comments = "comments") 70 | ) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /man/as.trait.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/thesaurus.R 3 | \name{as.trait} 4 | \alias{as.trait} 5 | \title{Create a trait definition} 6 | \usage{ 7 | as.trait( 8 | trait, 9 | identifier = NA, 10 | broaderTerm = NA, 11 | narrowerTerm = NA, 12 | relatedTerm = NA, 13 | valueType = NA, 14 | expectedUnit = NA, 15 | factorLevels = NA, 16 | replaceFactorLevels = NA, 17 | maxAllowedValue = NA, 18 | minAllowedValue = NA, 19 | traitDescription = NA, 20 | comments = NA, 21 | source = NA, 22 | version = NA, 23 | author = NA, 24 | ... 25 | ) 26 | } 27 | \arguments{ 28 | \item{trait}{A character string, providing an intuitive, human-readable trait 29 | name.} 30 | 31 | \item{identifier}{Unique identifier for the trait, ideally unique and stable 32 | URI which identify the source of the trait definition.} 33 | 34 | \item{broaderTerm}{One or several terms that enclose the trait definition.} 35 | 36 | \item{narrowerTerm}{One or several terms that are enclosed by the trait 37 | definition.} 38 | 39 | \item{relatedTerm}{One or several terms that are related to this term 40 | (ideally given as URI).} 41 | 42 | \item{valueType}{the type of trait values. Possible entries are 'numeric', 43 | 'integer', 'categorical', 'logical', or 'character'.} 44 | 45 | \item{expectedUnit}{the unit expected for measurement entries.} 46 | 47 | \item{factorLevels}{A comma separated list of terms comprising the 48 | constrained vocabulary for categorical traits or ordinal binary traits.} 49 | 50 | \item{replaceFactorLevels}{A list or vector containing synonymous factor 51 | levels to be mapped onto the target factor levels provided in 52 | 'factorLevels'. Names of the vector or list entries will be superimposed by 53 | entries in 'factorLevels'.} 54 | 55 | \item{maxAllowedValue}{An upper boundary for accepted numerical values.} 56 | 57 | \item{minAllowedValue}{A lower boundary for accepted numerical values.} 58 | 59 | \item{traitDescription}{A short, unambiguous definition of the trait. May 60 | refer to a method of measurement.} 61 | 62 | \item{comments}{Details and Examples for clarification of the trait 63 | definition.} 64 | 65 | \item{source}{A character string providing a full bibliographic reference to 66 | the trait definition (giving title, author, year and publication).} 67 | 68 | \item{version}{A character string containing the version number of the 69 | referenced definition (e.g. "v1.2"), if applicable.} 70 | 71 | \item{author}{A character string or object of class 'person' (as created by 72 | \code{as.person()}) attributing the author(s) of the trait definition.} 73 | 74 | \item{...}{other arguments, passed on to print function.} 75 | } 76 | \value{ 77 | A structured data.frame object of class 'trait'. 78 | } 79 | \description{ 80 | Creating an object containing a standardised trait definition according to 81 | the Ecological Traitdata Standard. Parameters correspond to the definition at 82 | \url{https://ecologicaltraitdata.github.io/ETS/#terms-for-trait-definitions}. 83 | } 84 | \examples{ 85 | 86 | body_length <- as.trait("body_length", expectedUnit = "mm", valueType = "numeric", 87 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length", 88 | traitDescription = "The known longest dimension of the physical structure of organisms", 89 | relationSource = "Maggenti and Maggenti, 2005", 90 | broaderTerm = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_dimension" 91 | ) 92 | 93 | } 94 | -------------------------------------------------------------------------------- /man/as.traitdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as.traitdata.R 3 | \name{as.traitdata} 4 | \alias{as.traitdata} 5 | \title{Standardize format of traitdata} 6 | \usage{ 7 | as.traitdata( 8 | x, 9 | traits = attributes(x)$traits, 10 | taxa = attributes(x)$taxa, 11 | occurrences = attributes(x)$occurrences, 12 | datasetID = attributes(x)$datasetID, 13 | measurements = attributes(x)$measurements, 14 | units = attributes(x)$units, 15 | keep = attributes(x)$keep, 16 | drop = attributes(x)$drop, 17 | na.rm = TRUE, 18 | id.vars = names(x)[names(x) \%in\% keep & !names(x) \%in\% drop], 19 | thesaurus = attributes(x)$thesaurus, 20 | metadata = attributes(x)$metadata, 21 | longtable = TRUE, 22 | conformsTo = "v0.10", 23 | ... 24 | ) 25 | } 26 | \arguments{ 27 | \item{x}{data.frame object, containing at least a column of taxa, and one or 28 | more columns of trait measurements.} 29 | 30 | \item{traits}{a vector of column names containing traits.} 31 | 32 | \item{taxa}{the name of the column containing taxon names.} 33 | 34 | \item{occurrences}{either a column name containing identifiers for each 35 | individual specimen on which several traits were measured, i.e. an 36 | occurrence of this taxon, or a vector of occurrence identifiers which must 37 | be of the same length as the number of rows of the table. See 'Details'.} 38 | 39 | \item{datasetID}{a unique name for this dataset (optional). Will be prepended 40 | to the occurrence ID and measurement ID.} 41 | 42 | \item{measurements}{either a column name containing identifiers for each 43 | individual measurement, or a vector of measurement identifiers. This 44 | applies, if single trait measurements span across multiple columns of data, 45 | e.g. multivariate traits like quantitative measures of chemical compounds, 46 | wavelengths or x-y-z coordinates. In most cases, a measurementID will link 47 | the data across rows in the longtable format. Make sure that the traitnames 48 | given reflect the different dimensions of the trait measurement. If 49 | \code{measurement} remains blank, sequential identifiers will be auto-generated 50 | for each measured value.} 51 | 52 | \item{units}{a single character string or named vector giving the units that 53 | apply to the traits. If only one unit type is given, it will be applied to 54 | all traits.} 55 | 56 | \item{keep}{a vector or named vector containing the names of the input 57 | columns to be kept in the output. Vector names will be used to rename the 58 | columns. It is recommended to use accepted column names of the traitdata 59 | standard for renaming!} 60 | 61 | \item{drop}{a vector acting as the inverse of \code{keep}. All columns listed will 62 | be removed from the output dataset.} 63 | 64 | \item{na.rm}{logical defaults to \code{TRUE}. If \code{FALSE}, all measured Values 65 | containing NA will be kept in the output table. This is not reccomended for 66 | most data.} 67 | 68 | \item{id.vars}{a vector of column names to return. Autogenerated from input 69 | column names and 'keep' and 'drop'.} 70 | 71 | \item{thesaurus}{an object of class 'thesaurus' as created by function 72 | \code{as.thesaurus()}. If provided, this will superimpose trait names provided 73 | in argument \code{traits}. The thesaurus will be appended as an attribute and 74 | can be revisited by calling \code{attributes(x)$thesaurus}.} 75 | 76 | \item{metadata}{a list of class metadata, as created by function 77 | \code{as.metadata()}. Metadata will be added as attributes to the data table. 78 | Possible parameters to the function call are: \code{rightsHolder}, 79 | \code{bibliographicCitation}, \code{license}, \code{author}, \code{datasetID}, \code{datasetName}, 80 | \code{version}. (see 'Details')} 81 | 82 | \item{longtable}{logical, defaults to \code{TRUE}. If \code{FALSE}, data will not be 83 | converted into lontable format, but remain in widetable format as provided. 84 | Note that any columns not indicated in arguments \code{traits}, \code{keep}, \code{units}, 85 | \code{taxa}, \code{occurrences} will be dropped from the output.} 86 | 87 | \item{conformsTo}{version of the Ecological Trait-data Standard to which the 88 | data conform. Default procedures return data conform to v0.10. If 89 | \code{conformsTo = "v0.9"}, data output will be converted to Ecological 90 | Trait-data Standard v0.9.} 91 | 92 | \item{...}{other arguments, passed on to print function.} 93 | } 94 | \value{ 95 | An object of class 'traitdata'. 96 | } 97 | \description{ 98 | Turns wide-table formats (species-traits matrix and occurrence table) into 99 | long-table format. As input, the function requires information about which 100 | columns contain traits, given as a list of trait-names, and which column 101 | contains the taxon name. For tables containing repeated measurements of 102 | traits within the same taxon, an occurrenceID should be given or will be 103 | created. 104 | } 105 | \details{ 106 | If \code{occurrences} is left blank, the script will check for the 107 | structure of the input table. If several entries are given for the same 108 | taxon, it assumes that input is an occurrence table, i.e. with multiple 109 | observations of a single taxon, and assigns identifiers. 110 | 111 | Metadata will be stored as attributes to the data frame and can be accessed 112 | via \code{attributes()}. It is not necessary but highly recommended to provide 113 | metadata when working with multiple trait data files. When appending 114 | datasets using \code{rbind()}, the metadata information will be added as 115 | additional columns and dataset attribution will be listed in attributes. 116 | } 117 | \examples{ 118 | 119 | \dontrun{ 120 | # species-trait matrix: 121 | 122 | pulldata("carabids") 123 | 124 | dataset1 <- as.traitdata(carabids, 125 | taxa = "name_correct", 126 | traits = c("body_length", "antenna_length", "metafemur_length"), 127 | units = "mm", 128 | keep = c(basisOfRecordDecription = "source_measurement", measurementRemark = "note") 129 | ) 130 | 131 | # occurrence table: 132 | 133 | pulldata("heteroptera_raw") 134 | 135 | dataset2 <- as.traitdata(heteroptera_raw, 136 | taxa = "SpeciesID", 137 | traits = c("Body_length", "Body_width", "Body_height", "Thorax_length", 138 | "Thorax_width", "Head_width", "Eye_width", "Antenna_Seg1", "Antenna_Seg2", 139 | "Antenna_Seg3", "Antenna_Seg4", "Antenna_Seg5", "Front.Tibia_length", 140 | "Mid.Tibia_length", "Hind.Tibia_length", "Front.Femur_length", 141 | "Hind.Femur_length", "Front.Femur_width", "Hind.Femur_width", 142 | "Rostrum_length", "Rostrum_width", "Wing_length", "Wing_widt"), 143 | units = "mm", 144 | keep = c(sex = "Sex", references = "Source", lifestage = "Wing_development"), 145 | metadata = as.metadata( 146 | author = "Gossner MM, Simons NK, Höck L and Weisser WW", 147 | datasetName = "Morphometric traits Heteroptera", 148 | bibliographicCitation = attributes(heteroptera_raw)$citeAs, 149 | license = "http://creativecommons.org/publicdomain/zero/1.0/" 150 | ) 151 | ) 152 | } 153 | 154 | } 155 | -------------------------------------------------------------------------------- /man/as_factor_clocale.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{as_factor_clocale} 4 | \alias{as_factor_clocale} 5 | \title{Locale-independent factor conversion} 6 | \usage{ 7 | as_factor_clocale(x) 8 | } 9 | \arguments{ 10 | \item{x}{a vector} 11 | } 12 | \value{ 13 | the same vector, but with any factorial content or colums with repeating character strings converted to factors, applying a locale-independent sorting. 14 | } 15 | \description{ 16 | Locale-independent factor conversion 17 | } 18 | \note{ 19 | Only for internal application in \code{\link[=as.traitdata]{as.traitdata()}} This deals with the problem described by Kurt Hornik \href{https://developer.r-project.org/Blog/public/2020/02/16/stringsasfactors/index.html}{(here)} for changes coming in R 4.0.0. In traitdataform, this concerns the locale-dependence of taxon and trait name sorting when calling published trait-datasets. For traits, the sorting order of factor levels will be superimposed by the order given in the thesaurus specification (if provided). 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/carabids.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{carabids} 4 | \alias{carabids} 5 | \title{Carabid morphological traits} 6 | \format{ 7 | A data frame containing following columns: 8 | \itemize{ 9 | \item name_correct = species name 10 | \item source_measurement = researcher who performed measurement; 11 | \item body_length = body length in mm; 12 | \item antenna_length = antenna length in mm; 13 | \item metafemur_length = length metafemur in mm; 14 | \item eyewidth_corr = eye width in mm; 15 | \item note = note; 16 | \item resid_femur = residual femur length in mm (i.e. residual from 17 | linear model in which femur length is explained by body length); 18 | \item resid_eye = residual eye length in mm (i.e. residual from linear 19 | model in which eye length is explained by body length) 20 | \item resid_antenna = residual antenna length in mm (i.e. residual 21 | from linear model in which antenna length is explained by body length) 22 | } 23 | } 24 | \source{ 25 | \doi{10.5061/dryad.53ds2}; 26 | \href{https://creativecommons.org/publicdomain/zero/1.0/}{Creative Commons 27 | 0}. To the extent possible under law, the authors have waived all copyright 28 | and related or neighboring rights to this data. 29 | } 30 | \value{ 31 | This is a data object. provides instructions for \code{pulldata()}. 32 | } 33 | \description{ 34 | Average body measures of 120 Carabid species occuring in the Netherlands. 35 | } 36 | \details{ 37 | When using this data, please cite the original publication: 38 | 39 | \itemize{ \item van der Plas F, van Klink R, Manning P, Olff H, Fischer M 40 | (2017) Sensitivity of functional diversity metrics to sampling intensity. 41 | Methods in Ecology and Evolution 8(9): 1072-1080. 42 | doi: \doi{10.1111/2041-210x.12728} } 43 | 44 | Additionally, please cite the Dryad data package: 45 | 46 | \itemize{ \item van der Plas F, van Klink R, Manning P, Olff H, Fischer M 47 | (2017) Data from: Sensitivity of functional diversity metrics to sampling 48 | intensity. Dryad Digital Repository. doi: \doi{10.5061/dryad.1fn46} } 49 | } 50 | \seealso{ 51 | Other rawdata: 52 | \code{\link{amniota}}, 53 | \code{\link{amphibio}}, 54 | \code{\link{arthropodtraits}}, 55 | \code{\link{heteroptera_raw}}, 56 | \code{\link{mammaldiet}}, 57 | \code{\link{pantheria}}, 58 | \code{\link{pulldata}()} 59 | } 60 | \author{ 61 | Fons van der Plas, R. van Klink, P. Manning, H. Olff, M. Fischer 62 | } 63 | \concept{rawdata} 64 | -------------------------------------------------------------------------------- /man/cast.traitdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cast.traitdata.R 3 | \name{cast.traitdata} 4 | \alias{cast.traitdata} 5 | \title{Cast long-table trait data into wide-table format} 6 | \usage{ 7 | cast.traitdata( 8 | .data, 9 | values = "verbatimTraitValue", 10 | traits = "verbatimTraitName", 11 | units = "verbatimTraitUnit", 12 | fun.aggregate = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{.data}{dataset of class 'traitdata' to be cast into wide-table format.} 17 | 18 | \item{values}{the column name containing the trait values to be used to fill 19 | the matrix (default is \code{verbatimTraitValue}). Duplicate columns (e.g. 20 | \code{traitValue}) will be omitted. See notes.} 21 | 22 | \item{traits}{the column name to be kept for parsing into wide-table (default 23 | is \code{verbatimTraitName}). Note that any duplicate column that contains trait names, 24 | e.g. \code{traitName} will be omitted.} 25 | 26 | \item{units}{the column name containing the units of numerical values 27 | (default is \code{verbatimTraitUnit}).} 28 | 29 | \item{fun.aggregate}{option for \code{\link[reshape2:cast]{reshape2::cast()}} to define method of 30 | aggregation.} 31 | } 32 | \value{ 33 | a wide-table data.frame object containing all taxa (and other 34 | differentiating parameters) in rows and all traits (extracted from column 35 | 'verbatimTraitName') in columns. 36 | } 37 | \description{ 38 | Function to reformat trait data from the long-table into a matrix/wide-table 39 | or occurrence table format. 40 | } 41 | \details{ 42 | The wide-table will be composed while preserving the detail given in 43 | the dataset (occurrence level or taxa level). The cells will be filled 44 | with the values from 'verbatimTraitValue'. 45 | 46 | If taxa should be summarized, provide function for summarizing in parameter 47 | 'summarize'. This can be any function that takes a vector and returns a 48 | single value for \strong{both numerical and factorial/character input vectors!}. 49 | The default for numerical values is to return an arithmetric mean while 50 | including any outliers. For factorial values, the value is accepted if it 51 | is equal in all of the input entries. Otherwise NA is returned. For 52 | heterogeneous factorial or character input, user action is required for 53 | homogenizing the data before calling \code{cast.traitdata()}. 54 | } 55 | \section{Duplicate columns}{ 56 | The function is currently not able to handle 57 | multiple columns of trait data (incl. names and units). Those are currently 58 | omitted from the output and may be added manually. You can alter the 59 | columns to be used to construct the matrix by specifying those in 60 | parameters \code{traits}, \code{values}, and \code{units}. Automatic handling of the \code{Std} 61 | columns might be added at a later stage. 62 | } 63 | 64 | \examples{ 65 | 66 | pulldata("arthropodtraits") 67 | head(arthropodtraits) 68 | dataset3 <- as.traitdata(arthropodtraits, 69 | taxa = "SpeciesID", 70 | traits = c("Body_Size", "Dispersal_ability", 71 | "Feeding_guild","Feeding_guild_short", 72 | "Feeding_mode", "Feeding_specialization", 73 | "Feeding_tissue", "Feeding_plant_part", 74 | "Endophagous_lifestyle", "Stratum_use", 75 | "Stratum_use_short"), 76 | units = c(Body_Size = "mm"), 77 | keep = c(measurementRemark = "Remark"), 78 | metadata = as.metadata( 79 | license = "http://creativecommons.org/publicdomain/zero/1.0/" 80 | ) 81 | ) 82 | 83 | head(dataset3) 84 | 85 | dd3 <-cast.traitdata(dataset3) 86 | head(dd3) 87 | 88 | 89 | } 90 | -------------------------------------------------------------------------------- /man/fixlogical.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{fixlogical} 4 | \alias{fixlogical} 5 | \title{Standardise logical variables} 6 | \usage{ 7 | fixlogical(x, output = "logical", categories = c("No", "Yes")) 8 | } 9 | \arguments{ 10 | \item{x}{a vector of two different types of entries (can be of type factor, integer, logical, or character).} 11 | 12 | \item{output}{a switch to set the desired output format. Defaults to "logical", but can be "character", "binary" or "factor".} 13 | 14 | \item{categories}{output target categories for binary/logical traits harmonization if \code{output} is not set to 'logical'.} 15 | } 16 | \value{ 17 | A vector of harmonized logical values. 18 | } 19 | \description{ 20 | Standardise logical variables 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /man/get_gbif_taxonomy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_gbif_taxonomy.R 3 | \name{get_gbif_taxonomy} 4 | \alias{get_gbif_taxonomy} 5 | \title{Get accepted canonical names and taxonomy for a given species name} 6 | \usage{ 7 | get_gbif_taxonomy( 8 | x, 9 | subspecies = TRUE, 10 | higherrank = FALSE, 11 | verbose = FALSE, 12 | fuzzy = TRUE, 13 | conf_threshold = 90, 14 | resolve_synonyms = TRUE 15 | ) 16 | } 17 | \arguments{ 18 | \item{x}{a character string or vector of species names.} 19 | 20 | \item{subspecies}{logical. If TRUE (default), the given name is resolved to 21 | subspecies epithet, otherwise it will be mapped to species level.} 22 | 23 | \item{higherrank}{logical. If FALSE (default), it will not allow remapping of 24 | unknown species names to higher taxon ranks (e.g. genus).} 25 | 26 | \item{verbose}{logical. If FALSE (default), warnings and messages are 27 | suppressed.} 28 | 29 | \item{fuzzy}{logical. Defaults to TRUE to deal with misspelled names. May 30 | produce wrong assignments in case of very similar taxon names. If FALSE 31 | (default), names are only resolved to exactly matching taxa on GBIF 32 | taxonomy service.} 33 | 34 | \item{conf_threshold}{numerical, ranging from 0 to 100 (default value = 90). 35 | Defines the confidence level of the request to be accepted. To cover for 36 | misspellings and errors, could go as low as 50.} 37 | 38 | \item{resolve_synonyms}{logical. If TRUE (default), user provided synonyms 39 | are mapped to the accepted names on GBIF taxonomy service.} 40 | } 41 | \value{ 42 | a data.frame mapping the user supplied names to the accepted taxon 43 | names and higher taxonomic information (kingdom, phylum, class, order, 44 | family, genus). 45 | } 46 | \description{ 47 | The function maps user provided names to accepted species names. 48 | } 49 | \details{ 50 | The function relies on package 'taxize' by Scott Chamberlain. It 51 | uses the spell-checking and fuzzy matching algorithms provided by Global 52 | Names Resolver (\code{taxize::gnr_resolve()}) and forwards synonyms to the 53 | accepted names as provided by GBIF Backbone Taxonomy 54 | (\code{taxize::get_gbif_id_()}). 55 | 56 | If 'synonym' is returned as TRUE, the user provided name has been 57 | identified as a synonym and was mapped to an accepted name. 58 | 59 | The field confidence reports the confidence of the matching procedure 60 | performed by the function \code{get_gbifid_()} of the package 'taxize'. The 61 | taxonID is a globally valid URI that links to the taxon description of the 62 | GBIF backbone taxonomy. 63 | } 64 | \examples{ 65 | 66 | get_gbif_taxonomy(c("Chorthippus albomarginatus", "Chorthippus apricarius", 67 | "Chorthippus biguttulus", "Chorthippus dorsatus", "Chorthippus montanus", 68 | "Chorthippus parallelus", "Chrysochraon dispar", "Conocephalus dorsalis", 69 | "Conocephalus fuscus", "Decticus verrucivorus", "Euthystira brachyptera", 70 | "Gomphocerippus rufus", "Gryllus campestris", "Metrioptera roeselii", 71 | "Omocestus viridulus", "Phaneroptera falcata", "Platycleis albopunctata", 72 | "Spec", "Stenobothrus lineatus", "Stenobothrus stigmaticus", 73 | "Stethophyma grossum", "Tetrix kraussi", "Tetrix subulata", 74 | "Tetrix tenuicornis", "Tetrix undulata", "Tettigonia cantans", 75 | "Tettigonia viridissima") 76 | ) 77 | 78 | get_gbif_taxonomy("Vicia") 79 | } 80 | -------------------------------------------------------------------------------- /man/glossary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/glossary.R 3 | \docType{data} 4 | \name{glossary} 5 | \alias{glossary} 6 | \title{Ecological Trait-data Standard vocabulary (ETS)} 7 | \format{ 8 | An object of class \code{data.frame} with 103 rows and 13 columns. 9 | } 10 | \usage{ 11 | glossary 12 | } 13 | \description{ 14 | The terms and concepts as defined by the ETS (https://terminologies.gfbio.org/terms/ets/pages/) 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/heteroptera_raw.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{heteroptera_raw} 4 | \alias{heteroptera_raw} 5 | \alias{heteroptera} 6 | \title{Heteroptera morphometry traits} 7 | \source{ 8 | \url{https://figshare.com/articles/dataset/Data_Paper_Data_Paper/3561936}; 9 | \href{https://creativecommons.org/publicdomain/zero/1.0/}{Creative Commons 10 | 0}. To the extent possible under law, the authors have waived all copyright 11 | and related or neighboring rights to this data. 12 | } 13 | \value{ 14 | The dataset \code{heteroptera_raw} contains multiple observations of 15 | each species (occurence table). The dataset \code{heteroptera} is a 16 | compiled species-trait matrix. 17 | 18 | Returns a data object that includes attributes for data standardisation. 19 | } 20 | \description{ 21 | Morphometric measures of Heteroptera sampled in grasslands across three 22 | regions of Germany. 23 | } 24 | \details{ 25 | Trait-based approaches have increased significantly in community 26 | ecology during the last decade. This is not least because studies on 27 | biodiversity-ecosystem functioning relationships became a major topic in 28 | ecology. Species' functions in ecosystems are mediated by their traits. For 29 | a better understanding of the relationships between environmental drivers, 30 | the community composition of organisms and ecosystems functioning, it is 31 | crucial to understand how these relationships are mediated by the 32 | communities' trait composition. While there are world-wide efforts to set 33 | up trait databases, most have so far focused on plants and species-poorer 34 | taxa such as birds or amphibians. In contrast, for insects, the large 35 | number of species makes the gathering of comparable trait data a 36 | challenging task. In addition, there is the danger that generic trait 37 | information, which is available from common textbooks, may not be 38 | sufficient to detect the response of insect communities to environmental 39 | change or the consequences of trait changes for ecosystem functioning. One 40 | method to overcome this is to take morphometric measurements of species. In 41 | this study we measured morphometric traits of a total of 179 Heteroptera 42 | species that were sampled by sweep-netting on a total of 150 managed 43 | grassland plots across three regions in Germany between 2008 and 2012. 44 | These plots represent the whole range of grassland management intensities 45 | from extensively used pastures to mown pastures to intensively managed and 46 | fertilized meadows. In this paper we provide a database of mean values of 47 | 23 morphometric measures across sex and morphotypes for each sampled 48 | Heteroptera species. Morphological traits are assumed to be related to 49 | their adaptation and function in the environment. Thus the relative 50 | morphometric traits can be used as proxies for ecological features of a 51 | species that may affect its performance or fitness. Our database can be 52 | used by future trait-based studies for developing and testing hypotheses of 53 | the functional significance of these traits. Examples include studying the 54 | functional responses of insect communities to environmental drivers or 55 | studying how the change in trait composition affects ecosystem processes. 56 | } 57 | \section{Citation}{ 58 | Cite this dataset as 59 | \itemize{ 60 | \item Gossner, M. M., N. K. Simons, L. Höck, and W. W. Weisser. 2015. 61 | Morphometric measures of Heteroptera sampled in grasslands across three 62 | regions of Germany. Ecology 96:1154-1154. 63 | \item Data publication: Gossner, M.M, Simons, N.K., Höck, L., Weisser, W.W., 64 | 2016. Morphometric measures of Heteroptera sampled in grasslands across three 65 | regions of Germany. figshare. \doi{10.6084/m9.figshare.c.3307611.v1} 66 | } 67 | } 68 | 69 | \seealso{ 70 | Other rawdata: 71 | \code{\link{amniota}}, 72 | \code{\link{amphibio}}, 73 | \code{\link{arthropodtraits}}, 74 | \code{\link{carabids}}, 75 | \code{\link{mammaldiet}}, 76 | \code{\link{pantheria}}, 77 | \code{\link{pulldata}()} 78 | 79 | Other rawdata: 80 | \code{\link{amniota}}, 81 | \code{\link{amphibio}}, 82 | \code{\link{arthropodtraits}}, 83 | \code{\link{carabids}}, 84 | \code{\link{mammaldiet}}, 85 | \code{\link{pantheria}}, 86 | \code{\link{pulldata}()} 87 | } 88 | \author{ 89 | Martin M. Gossner , Nadja K. Simons, Leonhard Höck, Wolfgang W. 90 | Weisser 91 | } 92 | \concept{rawdata} 93 | -------------------------------------------------------------------------------- /man/mammaldiet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{mammaldiet} 4 | \alias{mammaldiet} 5 | \title{Mammal diet database} 6 | \source{ 7 | Cite this dataset as: 8 | \itemize{ 9 | \item Kissling, W.D., Dalby, L., Fløjgaard, C., Lenoir, J., Sandel, B., Sandom, 10 | C., Trøjelsgaard, K., Svenning, J. (2014). Establishing macroecological 11 | trait datasets: digitalization, extrapolation, and validation of diet 12 | preferences in terrestrial mammals worldwide. Ecol Evol, 4, 2913-2930. 13 | \doi{10.1002/ece3.1136} 14 | } 15 | 16 | Additionally, please cite the Dryad data package: 17 | \itemize{ 18 | \item Kissling WD, Dalby L, Fløjgaard C, Lenoir J, Sandel B, Sandom C, 19 | Trøjelsgaard K, Svenning J-C (2014) Data from: Establishing macroecological 20 | trait datasets: digitalization, extrapolation, and validation of diet 21 | preferences in terrestrial mammals worldwide. Dryad Digital Repository. 22 | \doi{10.5061/dryad.6cd0v} 23 | } 24 | 25 | \href{https://creativecommons.org/publicdomain/zero/1.0/}{Creative Commons 26 | 0}. To the extent possible under law, the authors have waived all copyright 27 | and related or neighboring rights to this data. 28 | } 29 | \value{ 30 | This is a data object. provides instructions for \code{pulldata()}. 31 | } 32 | \description{ 33 | A comprehensive global dataset of diet preferences of mammals 34 | ('MammalDIET'). Diet information was digitized from the literature and 35 | extrapolated for species with missing information. The original and 36 | extrapolated data cover species-level diet information for >99\% of all 37 | terrestrial mammals. 38 | } 39 | \details{ 40 | Ecological trait data are essential for understanding the 41 | broad-scale distribution of biodiversity and its response to global change. 42 | For animals, diet represents a fundamental aspect of species' evolutionary 43 | adaptations, ecological and functional roles, and trophic interactions. 44 | However, the importance of diet for macroevolutionary and macroecological 45 | dynamics remains little explored, partly because of the lack of 46 | comprehensive trait datasets. We compiled and evaluated a comprehensive 47 | global dataset of diet preferences of mammals (“MammalDIET”). Diet 48 | information was digitized from two global and cladewide data sources and 49 | errors of data entry by multiple data recorders were assessed. We then 50 | developed a hierarchical extrapolation procedure to fill-in diet 51 | information for species with missing information. Missing data were 52 | extrapolated with information from other taxonomic levels (genus, other 53 | species within the same genus, or family) and this extrapolation was 54 | subsequently validated both internally (with a jack-knife approach applied 55 | to the compiled species-level diet data) and externally (using independent 56 | species-level diet information from a comprehensive continentwide data 57 | source). Finally, we grouped mammal species into trophic levels and dietary 58 | guilds, and their species richness as well as their proportion of total 59 | richness were mapped at a global scale for those diet categories with good 60 | validation results. The success rate of correctly digitizing data was 94\%, 61 | indicating that the consistency in data entry among multiple recorders was 62 | high. Data sources provided species-level diet information for a total of 63 | 2033 species (38\% of all 5364 terrestrial mammal species, based on the IUCN 64 | taxonomy). For the remaining 3331 species, diet information was mostly 65 | extrapolated from genus-level diet information (48\% of all terrestrial 66 | mammal species), and only rarely from other species within the same genus 67 | (6\%) or from family level (8\%). Internal and external validation showed 68 | that: (1) extrapolations were most reliable for primary food items; (2) 69 | several diet categories (“Animal,” “Mammal,” “Invertebrate,” “Plant,” 70 | “Seed,” “Fruit,” and “Leaf”) had high proportions of correctly predicted 71 | diet ranks; and (3) the potential of correctly extrapolating specific diet 72 | categories varied both within and among clades. Global maps of species 73 | richness and proportion showed congruence among trophic levels, but also 74 | substantial discrepancies between dietary guilds. MammalDIET provides a 75 | comprehensive, unique and freely available dataset on diet preferences for 76 | all terrestrial mammals worldwide. It enables broad-scale analyses for 77 | specific trophic levels and dietary guilds, and a first assessment of trait 78 | conservatism in mammalian diet preferences at a global scale. The 79 | digitalization, extrapolation and validation procedures could be 80 | transferable to other trait data and taxa. 81 | } 82 | \seealso{ 83 | Other rawdata: 84 | \code{\link{amniota}}, 85 | \code{\link{amphibio}}, 86 | \code{\link{arthropodtraits}}, 87 | \code{\link{carabids}}, 88 | \code{\link{heteroptera_raw}}, 89 | \code{\link{pantheria}}, 90 | \code{\link{pulldata}()} 91 | } 92 | \author{ 93 | Kissling, W.D., Dalby, L., Fløjgaard, C., 94 | Lenoir, J., Sandel, B., Sandom, C., Trøjelsgaard, K., Svenning, J. 95 | } 96 | \concept{rawdata} 97 | -------------------------------------------------------------------------------- /man/mutate.traitdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mutate.traitdata.R 3 | \name{mutate.traitdata} 4 | \alias{mutate.traitdata} 5 | \title{Mutate traits within a traitdata object.} 6 | \usage{ 7 | mutate.traitdata( 8 | .data, 9 | ..., 10 | values = "verbatimTraitValue", 11 | traits = "verbatimTraitName", 12 | units = "verbatimTraitUnit" 13 | ) 14 | } 15 | \arguments{ 16 | \item{.data}{the traitdata object to transform} 17 | 18 | \item{...}{named parameters giving definitions of new columns.} 19 | 20 | \item{values}{(NOT TESTED) the column name containing the trait values to be 21 | used to fill the matrix (default is \code{verbatimTraitValue}). Duplicate columns (e.g. 22 | \code{traitValue}) will be omitted. See notes.} 23 | 24 | \item{traits}{(NOT TESTED) the column name to be kept for parsing into 25 | wide-table (default is \code{verbatimTraitName}). Note that any duplicate column that 26 | contains trait names, e.g. \code{traitName} will be omitted.} 27 | 28 | \item{units}{(NOT TESTED) the column name containing the units of numerical 29 | values (default is \code{verbatimTraitUnit}).} 30 | } 31 | \value{ 32 | an updated traitdata object with the new trait measures or facts 33 | appended to the original table. If the given trait name has been refined, 34 | it will be replaced. 35 | } 36 | \description{ 37 | This function allows to transform, factorize, or combine trait measurements 38 | into compound measurements or update factor levels into binaries. 39 | } 40 | \details{ 41 | The function handles units for numerical traits and returns the new 42 | unit of the computed value in column \code{verbatimTraitUnit}, if units of input 43 | variables were specified according to the units package. Handling of other 44 | columns than \code{verbatimTraitName} and \code{verbatimTraitValue} is not advised at present. 45 | 46 | It is advised to mutate traits before applying \code{standardize.traits()}! If 47 | the mutate function is applied to a standardised dataset, the new trait 48 | will not be mapped automatically to the provided thesaurus. (automated 49 | re-mapping might be added in later versions of the package.) 50 | } 51 | \examples{ 52 | 53 | \dontrun{ 54 | pulldata("arthropodtraits") 55 | dataset3 <- as.traitdata(arthropodtraits, 56 | taxa = "SpeciesID", 57 | traits = c("Body_Size", "Dispersal_ability", 58 | "Feeding_guild","Feeding_guild_short", 59 | "Feeding_mode", "Feeding_specialization", 60 | "Feeding_tissue", "Feeding_plant_part", 61 | "Endophagous_lifestyle", "Stratum_use", 62 | "Stratum_use_short"), 63 | units = c(Body_Size = "mm", Dispersal_ability = "unitless"), 64 | keep = c(measurementRemark = "Remark"), 65 | metadata = list( 66 | license = "http://creativecommons.org/publicdomain/zero/1.0/" 67 | ) 68 | ) 69 | head(dataset3) 70 | 71 | updated <- mutate.traitdata(dataset3, predator = Feeding_guild == "c" ) 72 | 73 | head(updated[updated$verbatimTraitName == "predator",]) 74 | 75 | levels(updated$verbatimTraitName) 76 | 77 | ## 78 | 79 | pulldata("heteroptera_raw") 80 | dataset2 <- as.traitdata(heteroptera_raw, 81 | traits = c("Body_length", "Body_width", "Body_height", "Thorax_length", 82 | "Thorax_width", "Head_width", "Eye_width", "Antenna_Seg1", 83 | "Antenna_Seg2", "Antenna_Seg3", "Antenna_Seg4", "Antenna_Seg5", 84 | "Front.Tibia_length", "Mid.Tibia_length", "Hind.Tibia_length", 85 | "Front.Femur_length", "Hind.Femur_length", "Front.Femur_width", 86 | "Hind.Femur_width", "Rostrum_length", "Rostrum_width", 87 | "Wing_length", "Wing_width"), 88 | taxa = "SpeciesID", 89 | occurrences = "ID" 90 | ) 91 | updated <- mutate.traitdata(dataset2, 92 | Body_shape = Body_length/Body_width, 93 | Body_volume = Body_length*Body_width*Body_height, 94 | Wingload = Wing_length*Wing_width/Body_volume) 95 | 96 | head(updated[updated$verbatimTraitName \%in\% c( "Body_volume"),]) 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /man/pantheria.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{pantheria} 4 | \alias{pantheria} 5 | \title{PanTHERIA mammal traits} 6 | \source{ 7 | Cite as: 8 | 9 | E. Jones, Kate; Bielby, Jon; Cardillo, Marcel; A. Fritz, Susanne; O'Dell, 10 | Justin; David L. Orme, C.; Safi, Kamran; Sechrest, Wes; H. Boakes, 11 | Elizabeth; Carbone, Chris; Connolly, Christina; Cutts, Michael J.; Foster, 12 | Janine K.; Grenyer, Richard; Habib, Michael; Plaster, Christopher A.; 13 | Price, Samantha A.; Rigby, Elizabeth A.; Rist, Janna; Teacher, Amber; 14 | Bininda-Emonds, Olaf R. P.; Gittleman, John L.; M. Mace, Georgina; Purvis, 15 | Andy (2016): PanTHERIA: a species-level database of life history, ecology, 16 | and geography of extant and recently extinct mammals. 17 | \doi{10.1890/08-1494.1}; 18 | 19 | \href{https://creativecommons.org/publicdomain/zero/1.0/}{Creative Commons 20 | 0}. To the extent possible under law, the authors have waived all copyright 21 | and related or neighboring rights to this data. 22 | } 23 | \value{ 24 | Returns a data object that includes attributes for data standardisation. 25 | } 26 | \description{ 27 | Here we describe a global species-level data set of key 28 | life-history, ecological and geographical traits of all known extant and 29 | recently extinct mammals (PanTHERIA) developed for a number of 30 | macroecological and macroevolutionary research projects. 31 | } 32 | \details{ 33 | Data were gathered from the literature for 25 types of ecological 34 | and life history information for any extant or recently extinct species 35 | within class Mammalia (100740 data lines): 36 | \enumerate{ 37 | \item Activity Cycle; 2. Age at Eye Opening; 3. Age at First Birth; 4. Average 38 | Lifespan; 5. Body Mass; 6. Diet; 7. Dispersal Age; 8. Adult Limb Length; 9. 39 | Gestation Length; 10. Group Composition & Size; 11. Growth Data; 12. 40 | Habitat Layer; 13. Head-Body Length; 14. Interbirth Interval; 15. Litter 41 | size; 16. Litters Per Year; 17. Maximum Longevity; 18. Metabolic Rate; 19. 42 | Migratory Behaviour; 20. Mortality Data; 21. Population Density; 22. 43 | Ranging Behaviour; 23. Sexual 44 | Maturity Age; 24. Teat Number; and 25. Weaning Age. 45 | } 46 | 47 | 30 specific variables (see Class IV, Table 1) were extracted from the above 48 | data types for PanTHERIA from a total of 94729 data lines (before error 49 | checking). Additionally, 4 variables were derived from extracted variables 50 | within PanTHERIA and 19 variables were calculated from other spatial data 51 | sources (see Class V, Section C). 52 | 53 | see \doi{10.6084/m9.figshare.c.3301274.v1} for further 54 | information. 55 | } 56 | \seealso{ 57 | Other rawdata: 58 | \code{\link{amniota}}, 59 | \code{\link{amphibio}}, 60 | \code{\link{arthropodtraits}}, 61 | \code{\link{carabids}}, 62 | \code{\link{heteroptera_raw}}, 63 | \code{\link{mammaldiet}}, 64 | \code{\link{pulldata}()} 65 | } 66 | \author{ 67 | Kate E. Jones, Jon Bielby, Marcel Cardillo, Susanne A. Fritz, Justin 68 | O'Dell, C. David L. Orme, Kamran Safi, Wes Sechrest, Elizabeth H. Boakes, 69 | Chris Carbone, Christina Connolly, Michael J. Cutts, Janine K. Foster, 70 | Richard Grenyer, Michael Habib, Christopher A. Plaster, Samantha A. Price, 71 | Elizabeth A. Rigby, Janna Rist, Amber Teacher, Olaf R. P. Bininda-Emonds, 72 | John L. Gittleman, Georgina M. Mace, and Andy Purvis. 73 | } 74 | \concept{rawdata} 75 | -------------------------------------------------------------------------------- /man/pulldata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{pulldata} 4 | \alias{pulldata} 5 | \title{Pull trait data from the internet} 6 | \usage{ 7 | pulldata(x = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{the dataset to source. Leave empty for full list of available datasets.} 11 | } 12 | \value{ 13 | Makes a raw dataset available in R according to instructions provided by dataset function. 14 | } 15 | \description{ 16 | This function sources a recipe for extracting public trait data. 17 | } 18 | \details{ 19 | The package 'traitdataform' comes with a collection of recipes for public trait data. These R-scripts 20 | \enumerate{ 21 | \item define how to read the file from an online source, i.e. a URL to a txt, xlsx, or a ZIP archive 22 | \item assigns metadata attributes about authorship, license and original publication 23 | \item provide parameters for the standardize() function, i.e. a trait thesaurus, mappings and units. 24 | } 25 | 26 | New recipes can be suggested as a pull requrest via the package development page (https://github.com/EcologicalTraitData/traitdataform). 27 | } 28 | \examples{ 29 | 30 | # to get a list of all available data within the package 31 | pulldata() 32 | 33 | # to import a dataset 34 | pulldata("carabids") 35 | } 36 | \seealso{ 37 | Other rawdata: 38 | \code{\link{amniota}}, 39 | \code{\link{amphibio}}, 40 | \code{\link{arthropodtraits}}, 41 | \code{\link{carabids}}, 42 | \code{\link{heteroptera_raw}}, 43 | \code{\link{mammaldiet}}, 44 | \code{\link{pantheria}} 45 | } 46 | \author{ 47 | Florian D. Schneider 48 | } 49 | \concept{rawdata} 50 | -------------------------------------------------------------------------------- /man/rbind.traitdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rbind.traitdata.R 3 | \name{rbind.traitdata} 4 | \alias{rbind.traitdata} 5 | \title{Combine trait datasets} 6 | \usage{ 7 | \method{rbind}{traitdata}( 8 | ..., 9 | metadata = NULL, 10 | datasetID = NULL, 11 | metadata_as_columns = FALSE, 12 | drop = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{...}{two or more objects of class traitdata.} 17 | 18 | \item{metadata}{a list of metadata entries which are to be added as 19 | dataset-level information.} 20 | 21 | \item{datasetID}{a vector of the same length as number of objects. If \code{NULL} 22 | (default), object names will be returned as ID.} 23 | 24 | \item{metadata_as_columns}{logical (defaults to FALSE) or vector of columns to 25 | return. If TRUE , the output will contain the "author", "license", 26 | "datasetName" and (autogenerated) "datasetID" name, if those are provided. 27 | If character vector, the output will contain the listed columns.} 28 | 29 | \item{drop}{FALSE by default. If true, columns that are not present in all 30 | datasets will be dropped.} 31 | } 32 | \description{ 33 | Method for function \code{rbind()} to append objects of class 'traitdata' to each 34 | other. 35 | } 36 | \details{ 37 | Metadata are ideally already included in the datasets as attributes 38 | (see \code{?as.traitdata}). The function \code{rbind.traitdata()} takes a list of 39 | lists as its metadata argument. The outer list must have the same length as 40 | the provided objects to combine, with each entry containing objects 41 | according to the terms of the Ecological Traitdata Standard 42 | (http://ecologicaltraitdata.github.io/ETS/#metadata-vocabulary). 43 | 44 | A lookup table for dataset details will be appended as an attribute to the 45 | output dataset, linked to each entry via the field \code{datasetID}. It can be 46 | accessed by calling \verb{attributes()$datasets}. 47 | } 48 | \examples{ 49 | 50 | pulldata("carabids") 51 | 52 | dataset1 <- as.traitdata(carabids, 53 | taxa = "name_correct", 54 | traits = c("body_length", "antenna_length", "metafemur_length"), 55 | units = "mm", 56 | keep = c(datasetID = "source_measurement", measurementRemarks = "note"), 57 | metadata = as.metadata( 58 | bibliographicCitation = c( 59 | "van der Plas et al. (2017) Methods in Ecol. & Evol., doi: 10.1111/2041-210x.12728" 60 | ), 61 | author = "Fons van der Plas", 62 | license = "http://creativecommons.org/publicdomain/zero/1.0/" 63 | ) 64 | ) 65 | 66 | 67 | traits1 <- as.thesaurus( 68 | body_length = as.trait("body_length", 69 | expectedUnit = "mm", valueType = "numeric", 70 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length"), 71 | antenna_length = as.trait("antenna_length", 72 | expectedUnit = "mm", valueType = "numeric", 73 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Antenna_length"), 74 | metafemur_length = as.trait("metafemur_length", 75 | expectedUnit = "mm", valueType = "numeric", 76 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length") 77 | ) 78 | 79 | dataset1Std <- standardize.traits(dataset1, thesaurus = traits1) 80 | 81 | # occurrence table: 82 | 83 | pulldata("heteroptera_raw") 84 | 85 | dataset2 <- as.traitdata(heteroptera_raw, 86 | taxa = "SpeciesID", 87 | traits = c("Body_length", "Antenna_Seg1", "Antenna_Seg2", 88 | "Antenna_Seg3", "Antenna_Seg4", "Antenna_Seg5", "Hind.Femur_length"), 89 | units = "mm", 90 | keep = c(sex = "Sex", references = "Source", lifeStage = "Wing_development"), 91 | metadata = as.metadata( 92 | bibliographicCitation = "Gossner et al. (2015) Ecology, 96:1154. doi: 10.1890/14-2159.1", 93 | author = "Martin Gossner", 94 | license = "http://creativecommons.org/publicdomain/zero/1.0/" 95 | ) 96 | ) 97 | 98 | dataset2 <- mutate.traitdata(dataset2, 99 | antenna_length = Antenna_Seg1 + Antenna_Seg2 + Antenna_Seg3 + Antenna_Seg4 + Antenna_Seg5 100 | ) 101 | 102 | 103 | traits2 <- as.thesaurus( 104 | Body_length = as.trait("body_length", 105 | expectedUnit = "mm", valueType = "numeric", 106 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length"), 107 | antenna_length = as.trait("antenna_length", 108 | expectedUnit = "mm", valueType = "numeric", 109 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Antenna_length"), 110 | Hind.Femur_length = as.trait("metafemur_length", 111 | expectedUnit = "mm", valueType = "numeric", 112 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length") 113 | ) 114 | 115 | dataset2Std <- standardize_traits(dataset2, thesaurus = traits2) 116 | 117 | database <- rbind(dataset1Std, dataset2Std, 118 | datasetID = c("vanderplas17", "gossner15"), 119 | metadata_as_columns = c("author")) 120 | head(database) 121 | } 122 | -------------------------------------------------------------------------------- /man/read.service.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{read.service} 4 | \alias{read.service} 5 | \alias{read.service.blocks} 6 | \title{BExIS access over a Webservice} 7 | \usage{ 8 | read.service( 9 | datasetid, 10 | user = NULL, 11 | pswd = NULL, 12 | dec = ".", 13 | na.strings = "NA", 14 | fill = FALSE, 15 | sep = "\\t", 16 | quote = if (identical(sep, "\\n")) "" else "'\\"", 17 | fileEncoding = "UTF-8" 18 | ) 19 | 20 | read.service.blocks( 21 | datasetid, 22 | user, 23 | pswd, 24 | dec = ".", 25 | na.strings = "NA", 26 | fill = FALSE, 27 | sep = "\\t", 28 | quote = if (identical(sep, "\\n")) "" else "'\\"", 29 | fileEncoding = "UTF-8" 30 | ) 31 | } 32 | \arguments{ 33 | \item{datasetid}{Integer BExIS ID of the requested dataset.} 34 | 35 | \item{user}{User name on BExIS. If not provided, function will prompt input.} 36 | 37 | \item{pswd}{Password on BExIS. If not provided, function will prompt input.} 38 | 39 | \item{dec}{the character used in the file for decimal points.} 40 | 41 | \item{na.strings}{a character vector of strings which are to be interpreted as NA values. Blank fields are also considered to be missing values in logical, integer, numeric and complex fields.} 42 | 43 | \item{fill}{logical. If TRUE then in case the rows have unequal length, blank fields are implicitly added. See 'Details' of \code{?read.table}.} 44 | 45 | \item{sep}{the field separator character. Values on each line of the file are separated by this character. If sep = "" (the default for read.table) the separator is 'white space', that is one or more spaces, tabs, newlines or carriage returns.} 46 | 47 | \item{quote}{the set of quoting characters. To disable quoting altogether, use quote = "". See scan for the behaviour on quotes embedded in quotes. Quoting is only considered for columns read as character, which is all of them unless colClasses is specified.} 48 | 49 | \item{fileEncoding}{character string: if non-empty declares the encoding to be used on a file (not a connection) so the character data can be re-encoded as they are written. See \code{\link[base:connections]{base::connections()}}.} 50 | } 51 | \value{ 52 | Returns dataset from BExIS as a data.frame. 53 | } 54 | \description{ 55 | Read table from a web service. Inherits functionality of \code{read.table()}. 56 | } 57 | \details{ 58 | \code{read.service.blocks()} returns a data.frame array as a list of data.frames. 59 | } 60 | \author{ 61 | Dennis Heimann, Andreas Ostrowski 62 | } 63 | \keyword{internal} 64 | -------------------------------------------------------------------------------- /man/standardize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/standardize.R 3 | \name{standardize} 4 | \alias{standardize} 5 | \title{Standardize trait datasets} 6 | \usage{ 7 | standardize(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a traitdata object (as returned by \code{as.traitdata()}) or a data table 11 | containing at least the column `verbatimScientificName.} 12 | 13 | \item{...}{parameters as described for \code{standardize.traits()} and 14 | \code{standardize.taxonomy()}.} 15 | } 16 | \value{ 17 | A traitdata object with standardized scientific taxon names according 18 | to GBif Backbone taxonomy and standardized trait names according to a 19 | thesaurus, if provided. 20 | } 21 | \description{ 22 | wrapper that applies \code{standardize.taxonomy()} and 23 | \code{standardize.traits()} in one go. 24 | } 25 | \seealso{ 26 | Other standardize: 27 | \code{\link{standardize_taxa}()}, 28 | \code{\link{standardize_traits}()} 29 | } 30 | \concept{standardize} 31 | -------------------------------------------------------------------------------- /man/standardize.exploratories.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/standardize.exploratories.R 3 | \name{standardize.exploratories} 4 | \alias{standardize.exploratories} 5 | \title{Standardize Georeference from Biodiversity Exploratories Plot ID} 6 | \usage{ 7 | standardize.exploratories( 8 | x, 9 | plots = "locationID", 10 | user = NULL, 11 | pswd = NULL, 12 | getdata = !is.null(user), 13 | fillall = TRUE, 14 | ..., 15 | verbose = NULL 16 | ) 17 | } 18 | \arguments{ 19 | \item{x}{A traitdata table of class 'traitdata'.} 20 | 21 | \item{plots}{Name of column containing the plot IDs. Must match the 22 | Exploratories EP_PlotID scheme (e.g. AEG1, AEG12, HEW21, ...).} 23 | 24 | \item{user}{User name for Biodiversity Exploratories Information System 25 | (BExIS, https://www.bexis.uni-jena.de); required for download of exact 26 | geolocation.} 27 | 28 | \item{pswd}{password for above request. Will be prompted for if not provided.} 29 | 30 | \item{getdata}{logical; if \code{FALSE} it suppresses the extraction of location 31 | data from BExIS, and will not ask for user credentials. Instead, public 32 | data of less precision will be used.} 33 | 34 | \item{fillall}{if TRUE (default), the output will contain all terms suggested 35 | by the glossary and fill empty columns with NA. This is required for an 36 | upload of the data to BExIS.} 37 | 38 | \item{...}{If input is a rawdata table of type species-trait matrix or 39 | occurence table (wide table) then provide parameters according to 40 | \code{as.traitdata()}.} 41 | 42 | \item{verbose}{logical; if \code{FALSE} all messages will be suppressed.} 43 | } 44 | \value{ 45 | A traitdata object with harmonized location data from the context of the Biodiversity Exploratories project. 46 | } 47 | \description{ 48 | Adds columns of georeference to trait-data table if measurements 49 | relate to specimens from the Biodiversity Exploratories plots or regions. 50 | 51 | This function requires valid credentials for the Biodiversity Exploratories 52 | Information System (BExIS)! 53 | } 54 | \examples{ 55 | 56 | \dontrun{ 57 | moths <- read.service(21247, dec = ",") 58 | 59 | dataset1 <- as.traitdata(moths, taxa = "species", traits = c(body_mass = 60 | "weight", wing_length = "wing_length", wing_width = "wing_width", wing_area = 61 | "wing_area", wing_loading = "wing_loading"), 62 | keep = c(locationID = "plot")) 63 | 64 | dataset1Std <- standardize.exploratories(dataset1) 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /man/standardize_taxa.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/standardize.R 3 | \name{standardize_taxa} 4 | \alias{standardize_taxa} 5 | \alias{standardize.taxonomy} 6 | \alias{standardise_taxa} 7 | \title{Standardize scientific names of species} 8 | \usage{ 9 | standardize_taxa( 10 | x, 11 | method = get_gbif_taxonomy, 12 | method_options = c(subspecies = TRUE, higherrank = FALSE, verbose = FALSE, fuzzy = 13 | TRUE, conf_threshold = 90, resolve_synonyms = TRUE), 14 | return = c("kingdom", "phylum", "class", "order", "family"), 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{x}{a traitdata object (as returned by \code{as.traitdata()}) or a data table 20 | containing at least the column `verbatimScientificName.} 21 | 22 | \item{method}{default option is \code{get_gbif_taxonomy}. In principle, takes any 23 | function that takes a vector of species names as input to produce a 24 | taxonomy lookup table (i.e. mapping user-provided \code{verbatimScientificName} 25 | to \code{taxonID} and other taxon-level information). Will allow to chose from 26 | different sources of taxonomic reference.} 27 | 28 | \item{method_options}{a name vector of arguments to be passed on to \code{method}. 29 | See \link{get_gbif_taxonomy} for options.} 30 | 31 | \item{return}{a character vector containing the informatoin that should be 32 | extracted into the output. Valid entries are the column names returned by 33 | function \code{get_gbif_taxonomy()}. See 'Details'.} 34 | 35 | \item{...}{parameters to be ignored, forwarded from wrapper function 36 | \code{standardize()}.} 37 | } 38 | \value{ 39 | A traidata object with standardized scientific taxon names according 40 | to GBif Backbone taxonomy. 41 | } 42 | \description{ 43 | Adds columns to a traitdata object containing accepted species 44 | names and relates to globally unique taxon identifiers via URI. 45 | } 46 | \details{ 47 | Taxonomic standardisation is an enormous challenge for biodiversity 48 | data management and research. Constant changes in species and higher taxa, 49 | refinements of phylogenetic trees and changing attribution to original 50 | authors, moving species into other genera or difficulties to place species 51 | into the Linean nomenclature results in highly fluctuent taxonomic 52 | definitions. 53 | 54 | As a consequence, there is not one reference for accepted species names and 55 | depending on the field of resaerch and taxonomic focus other authorities 56 | will be employed. 57 | 58 | For reasons of simplicity and because of its high coverage of taxa, the 59 | function \code{standardize_taxa()} uses the GBIF Backbone Taxonomy as its 60 | reference system and resolves all provided species names to the accepted 61 | name according to GBIF (resolving misspellings and synonyms in the 62 | process). We invite pull requests to make this function more general and 63 | enable a choice of a taxonomic reference. 64 | } 65 | \examples{ 66 | 67 | \dontrun{ 68 | 69 | pulldata("carabids") 70 | 71 | dataset1 <- as.traitdata(carabids, 72 | taxa = "name_correct", 73 | traits = c("body_length", "antenna_length", "metafemur_length"), 74 | units = "mm", 75 | keep = c(datasetID = "source_measurement", measurementRemark = "note"), 76 | metadata = list( 77 | bibliographicCitation = attributes(carabids)$citeAs, 78 | author = "Fons van der Plas", 79 | license = "http://creativecommons.org/publicdomain/zero/1.0/" 80 | ) 81 | ) 82 | 83 | dataset1Std <- standardize_taxa(dataset1) 84 | } 85 | } 86 | \seealso{ 87 | Other standardize: 88 | \code{\link{standardize_traits}()}, 89 | \code{\link{standardize}()} 90 | } 91 | \concept{standardize} 92 | -------------------------------------------------------------------------------- /man/standardize_traits.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/standardize.R 3 | \name{standardize_traits} 4 | \alias{standardize_traits} 5 | \alias{standardise_traits} 6 | \alias{standardize.traits} 7 | \title{Standardize trait names and harmonize measured values and reported facts} 8 | \usage{ 9 | standardize_traits( 10 | x, 11 | thesaurus = attributes(x)$thesaurus, 12 | rename = NULL, 13 | categories = c("No", "Yes"), 14 | output = "logical", 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{x}{a traitdata object (as returned by \code{as.traitdata()}) or a data table 20 | containing at least the column `verbatimScientificName.} 21 | 22 | \item{thesaurus}{an object of class 'thesaurus' (as returned by 23 | \code{as.thesaurus()}).} 24 | 25 | \item{rename}{a named vector to map user-provided names to thesaurus object 26 | names (see Details).} 27 | 28 | \item{categories}{target categories for binary/logical traits harmonization.} 29 | 30 | \item{output}{behaviour of \code{fixlogical()}. see \code{\link[=fixlogical]{fixlogical()}}.} 31 | 32 | \item{...}{parameters to be ignored, forwarded from wrapper function 33 | \code{standardize()}.} 34 | } 35 | \value{ 36 | A traidata object with standardized trait names according to a 37 | provided thesaurus. 38 | } 39 | \description{ 40 | Adds columns to a traitdata table with standardized trait names 41 | and relates them to globally unique identifiers via URIs. Optionally 42 | converts units of values and renames factor levels into accepted terms. 43 | } 44 | \details{ 45 | The function matches the trait names provided in 'verbatimTraitName' 46 | to the traits provided in the thesaurus (in field 'trait'). Matching must 47 | be exact (case sensitive). Fuzzy matching may be provided in a later 48 | version of the package. 49 | 50 | The function parameter 'rename' should be provided to map trait names where 51 | user-provided names and thesaurus names are different. In this case, rename 52 | should be a named vector with the target names used in the thesaurus as 53 | names, and the original names as provided in 'verbatimTraitName' as value. 54 | E.g. \code{rename = c()} 55 | } 56 | \examples{ 57 | 58 | 59 | pulldata("carabids") 60 | 61 | dataset1 <- as.traitdata(carabids, 62 | taxa = "name_correct", 63 | traits = c("body_length", "antenna_length", "metafemur_length"), 64 | units = "mm", 65 | keep = c(datasetID = "source_measurement", measurementRemark = "note"), 66 | metadata = list( 67 | bibliographicCitation = attributes(carabids)$citeAs, 68 | author = "Fons van der Plas", 69 | license = "http://creativecommons.org/publicdomain/zero/1.0/" 70 | ) 71 | ) 72 | 73 | traitlist <- as.thesaurus( 74 | body_length = as.trait("body_length", expectedUnit = "mm", valueType = "numeric", 75 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length"), 76 | antenna_length = as.trait("antenna_length", expectedUnit = "mm", valueType = "numeric", 77 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Antenna_length"), 78 | metafemur_length = as.trait("metafemur_length", expectedUnit = "mm", valueType = "numeric", 79 | identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length") 80 | ) 81 | 82 | dataset1Std <- standardize_traits(dataset1, thesaurus = traitlist) 83 | 84 | 85 | 86 | ## Example: matching of original names to thesaurus 87 | 88 | pulldata("heteroptera_raw") 89 | 90 | dataset2 <- as.traitdata(heteroptera_raw, 91 | taxa = "SpeciesID", 92 | traits = c("Body_length", "Antenna_Seg1", "Antenna_Seg2", 93 | "Antenna_Seg3", "Antenna_Seg4", "Antenna_Seg5", "Hind.Femur_length"), 94 | units = "mm", 95 | keep = c(sex = "Sex", references = "Source", lifestage = "Wing_development"), 96 | metadata = list( 97 | bibliographicCitation = attributes(heteroptera_raw)$citeAs, 98 | license = "http://creativecommons.org/publicdomain/zero/1.0/" 99 | ) 100 | ) 101 | 102 | 103 | traits2 <- as.thesaurus( 104 | Body_length = as.trait("Body_length", 105 | expectedUnit = "mm", valueType = "numeric", 106 | traitDescription = "From the tip of the head to the end of the abdomen"), 107 | Antenna_Seg1 = as.trait("Antenna_Seg1", 108 | expectedUnit = "mm", valueType = "numeric", 109 | traitDescription = "Length of first antenna segment", 110 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 111 | Antenna_Seg2 = as.trait("Antenna_Seg2", 112 | expectedUnit = "mm", valueType = "numeric", 113 | traitDescription = "Length of second antenna segment", 114 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 115 | Antenna_Seg3 = as.trait("Antenna_Seg3", 116 | expectedUnit = "mm", valueType = "numeric", 117 | traitDescription = "Length of third antenna segment", 118 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 119 | Antenna_Seg4 = as.trait("Antenna_Seg4", 120 | expectedUnit = "mm", valueType = "numeric", 121 | traitDescription = "Length of fourth antenna segment", 122 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 123 | Antenna_Seg5 = as.trait("Antenna_Seg5", 124 | expectedUnit = "mm", valueType = "numeric", 125 | traitDescription = "Length of fifth antenna segment (only Pentatomoidea)", 126 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 127 | Hind.Femur_length = as.trait("Hind.Femur_length", 128 | expectedUnit = "mm", valueType = "numeric", 129 | traitDescription = "Length of the femur of the hind leg", 130 | broaderTerm = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length") 131 | ) 132 | 133 | dataset2Std <- standardize_traits(dataset2, 134 | thesaurus = traits2 135 | ) 136 | 137 | } 138 | \seealso{ 139 | Other standardize: 140 | \code{\link{standardize_taxa}()}, 141 | \code{\link{standardize}()} 142 | 143 | Other standardize: 144 | \code{\link{standardize_taxa}()}, 145 | \code{\link{standardize}()} 146 | } 147 | \concept{standardize} 148 | -------------------------------------------------------------------------------- /man/traitdataform-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/traitdataform-package.R 3 | \name{traitdataform-package} 4 | \alias{traitdataform-package} 5 | \title{Package 'traitdataform': harmonizing ecological trait data in R} 6 | \description{ 7 | This package assists in handling functional trait data and transferring them 8 | into the Trait Data Standard (Schneider et al. in preparation). 9 | } 10 | \details{ 11 | There are two major use cases for the package: 12 | \itemize{ 13 | \item preparation of own trait datasets for upload into public data bases, and 14 | \item harmonizing trait datasets from different sources by moulding them into a 15 | unified format. 16 | } 17 | 18 | The toolset of the package includes 19 | \itemize{ 20 | \item transforming species-trait-matrix or observation table data into a unified 21 | long-table format 22 | \item mapping column names into terms provided in a standard trait vocabulary 23 | \item matching of species names into GBIF Backbone Taxonomy (taxonomic ontology 24 | server) 25 | \item matching of trait names into a user-provided traitlist, i.e. a thesaurus 26 | of traits 27 | \item unifying trait values into target unit format and legit factor levels 28 | \item saving trait dataset into a desired format using templates (e.g. for BExIS) 29 | } 30 | 31 | A documentation is available online at 32 | http://ecologicaltraitdata.github.io/traitdataform/ or offline in the package 33 | vignette 'traitdataform'. 34 | } 35 | \keyword{internal} 36 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(traitdataform) 3 | 4 | test_check("traitdataform") 5 | -------------------------------------------------------------------------------- /tests/testthat/test.columns_to_ETS.R: -------------------------------------------------------------------------------- 1 | context("trait data structure") 2 | library(traitdataform) 3 | 4 | test_that("encoding and read-in successful", { 5 | skip_if_not( l10n_info()$`UTF-8` ) 6 | 7 | expect_true(exists("arthropodtraits")) 8 | expect_equal(as.character(arthropodtraits[1065,"Author"]),"(Herrich-Schäffer, 1841)") 9 | 10 | 11 | expect_true(exists("heteroptera_raw")) 12 | 13 | expect_true(heteroptera_raw[1,6] == "(Fallen, 1807)") 14 | expect_equal(as.character(heteroptera_raw[13,6]), "(Herrich-Schäffer, 1841)") 15 | 16 | expect_false(is.null(attributes(heteroptera_raw)$taxa)) 17 | expect_true(attributes(heteroptera_raw)$taxa == "SpeciesID") 18 | 19 | expect_false(is.null(attributes(heteroptera_raw)$thesaurus)) 20 | expect_s3_class(attributes(heteroptera_raw)$thesaurus, "thesaurus") 21 | 22 | 23 | }) 24 | 25 | test_that("keep matrix structure", { 26 | 27 | expect_message(as.traitdata(arthropodtraits, traits = c("Body_Size", "Dispersal_ability", "Feeding_guild", "Feeding_guild_short", "Feeding_mode", "Feeding_specialization", "Feeding_tissue", "Feeding_plant_part", "Endophagous_lifestyle", "Stratum_use", "Stratum_use_short"), longtable = FALSE), "data were not converted to longtable!") 28 | 29 | dd1 <- as.traitdata(arthropodtraits, traits = c("Body_Size", "Dispersal_ability", "Feeding_guild", "Feeding_guild_short", "Feeding_mode", "Feeding_specialization", "Feeding_tissue", "Feeding_plant_part", "Endophagous_lifestyle", "Stratum_use", "Stratum_use_short"), longtable = FALSE) 30 | 31 | expect_length(dd1$verbatimScientificName,1230) 32 | expect_length(dd1[1,], 14) 33 | expect_s3_class(dd1, "traitdata") 34 | 35 | }) 36 | 37 | test_that("reformat, with trait columns provided as argument", { 38 | 39 | dd1 <- as.traitdata(arthropodtraits, traits = c("Body_Size", "Dispersal_ability", "Feeding_guild", "Feeding_guild_short", "Feeding_mode", "Feeding_specialization", "Feeding_tissue", "Feeding_plant_part", "Endophagous_lifestyle", "Stratum_use", "Stratum_use_short")) 40 | 41 | expect_length(dd1$verbatimScientificName,10238) 42 | expect_length(dd1[1,], 5) 43 | expect_s3_class(dd1, "traitdata") 44 | 45 | dd2 <- as.traitdata(carabids, traits = c("body_length", "antenna_length", "metafemur_length", "eyewidth_corr")) 46 | dd2 <- as.traitdata(carabids) 47 | 48 | expect_length(dd2$verbatimScientificName,480) 49 | expect_length(dd2[1,], 7) 50 | expect_s3_class(dd2, "traitdata") 51 | 52 | } ) 53 | 54 | test_that("reformat species data, with trait columns provided as attributes", { 55 | 56 | expect_message(as.traitdata(arthropodtraits), "Input is taken to be a species -- trait matrix") 57 | dd1 <- as.traitdata(arthropodtraits) 58 | 59 | expect_length(dd1$verbatimScientificName,10238) 60 | expect_length(dd1[1,], 5) 61 | expect_s3_class(dd1, "traitdata") 62 | 63 | 64 | }) 65 | 66 | 67 | test_that("reformat observation data, with trait columns provided as attributes", { 68 | 69 | dd3 <- as.traitdata(heteroptera_raw) 70 | 71 | expect_length(dd3$verbatimScientificName, 9386) 72 | expect_length(dd3[1,], 10) 73 | expect_s3_class(dd3, "traitdata") 74 | 75 | }) 76 | 77 | test_that("reformat, with trait columns provided as thesaurus", { 78 | 79 | expect_message(as.traitdata(arthropodtraits), "Input is taken to be a species -- trait matrix") 80 | dd1 <- as.traitdata(arthropodtraits) 81 | 82 | expect_length(dd1$verbatimScientificName,10238) 83 | expect_length(dd1[1,], 5) 84 | expect_s3_class(dd1, "traitdata") 85 | 86 | }) 87 | 88 | 89 | 90 | test_that("reformat, with 'keep' argument", { 91 | 92 | dd2 <- as.traitdata(carabids, traits = c("body_length", "antenna_length", "metafemur_length", "eyewidth_corr"), keep = c(measurementDeterminedBy = "source_measurement", measurementRemarks = "note")) 93 | 94 | expect_length(dd2$verbatimScientificName,480) 95 | expect_length(dd2[1,], 7) 96 | expect_true( all(names(dd2) == c("verbatimScientificName", "verbatimTraitName", "verbatimTraitValue", "verbatimTraitUnit", "measurementID", "measurementDeterminedBy", "measurementRemarks")) ) 97 | expect_s3_class(dd2, "traitdata") 98 | 99 | 100 | } 101 | ) 102 | 103 | 104 | test_that("reformat, with 'drop' argument", { 105 | 106 | dd2 <- as.traitdata(carabids, traits = c("body_length", "antenna_length", "metafemur_length", "eyewidth_corr"), keep = c(measurementDeterminedBy = "source_measurement"), drop = c("note")) 107 | 108 | expect_length(dd2$verbatimScientificName,480) 109 | expect_length(dd2[1,], 6) 110 | expect_true( all(names(dd2) == c("verbatimScientificName", "verbatimTraitName", "verbatimTraitValue", "verbatimTraitUnit", "measurementID", "measurementDeterminedBy")) ) 111 | expect_s3_class(dd2, "traitdata") 112 | 113 | } 114 | ) 115 | 116 | -------------------------------------------------------------------------------- /tests/testthat/test.map_gbif_taxonomy.R: -------------------------------------------------------------------------------- 1 | context("get gbif taxonomy") 2 | 3 | library(traitdataform) 4 | 5 | test_that("gbif taxonomy reachable", { 6 | skip_if_not( curl::has_internet() ) 7 | # digest(get_gbif_taxonomy(carabids$name_correct[1:12])) 8 | expect_known_hash(get_gbif_taxonomy(carabids$name_correct[1:12]), hash = 'd37d3df9bed65e2451367aeaf98866dd') 9 | # digest(get_gbif_taxonomy(arthropodtraits$SpeciesID[1:12])) 10 | expect_known_hash(get_gbif_taxonomy(arthropodtraits$SpeciesID[1:12]), hash = '78fc839027d0fa26b64e6f856ece50b0') 11 | }) 12 | 13 | test_that("mapping misspelled names", { 14 | skip_if_not( curl::has_internet() ) 15 | expect_true(get_gbif_taxonomy("Cicindela_silvatica", fuzzy = TRUE)$scientificName == "Cicindela sylvatica") 16 | expect_true(get_gbif_taxonomy("Tetrix krausi", fuzzy = TRUE)$scientificName == "Tetrix kraussi") 17 | expect_true(is.na(get_gbif_taxonomy("Tetrics krausi", fuzzy = TRUE)$scientificName)) 18 | expect_true(get_gbif_taxonomy("Tetrics krausi", conf_threshold = 50, fuzzy = TRUE)$scientificName == "Tetrix kraussi") 19 | 20 | }) 21 | 22 | test_that("mapping synonyms", { 23 | skip_if_not( curl::has_internet() ) 24 | expect_true(get_gbif_taxonomy("Limodromus_assimilis")$scientificName == "Platynus assimilis") 25 | expect_true(get_gbif_taxonomy("Trichocellus_cognatus")$scientificName == "Dicheirotrichus cognatus") 26 | expect_true(get_gbif_taxonomy("Trichocellus_placidus")$scientificName == "Dicheirotrichus placidus") 27 | expect_true(get_gbif_taxonomy("Styloctetor stativus")$scientificName == "Styloctetor compar") 28 | }) 29 | 30 | 31 | test_that("mapping doubtful taxa", { 32 | }) 33 | 34 | test_that("mapping lower or higher taxa", { 35 | skip_if_not( curl::has_internet() ) 36 | expect_true( 37 | get_gbif_taxonomy("Acrocephalus familiaris kingi")$scientificName == "Acrocephalus familiaris kingi") 38 | 39 | expect_true( 40 | get_gbif_taxonomy("Acrocephalus familiaris kingi", subspecies = FALSE)$scientificName == "Acrocephalus familiaris") 41 | 42 | expect_true(get_gbif_taxonomy("Abax")$taxonRank == "genus") 43 | # expect_true(get_gbif_taxonomy("Cidnopus quercus", higherrank = TRUE)$taxonRank == "genus") 44 | }) 45 | 46 | test_that("not matching", { 47 | skip_if_not( curl::has_internet() ) 48 | expect_true(get_gbif_taxonomy("No_species", fuzzy = TRUE)$warnings == " Check spelling or lower confidence threshold!") 49 | expect_true(get_gbif_taxonomy("raoi_sdoi", fuzzy = TRUE)$warnings == "No matching species concept! Check spelling or lower confidence threshold!") 50 | }) 51 | 52 | 53 | test_that("big data handling", { 54 | skip_if_not( curl::has_internet() ) 55 | skip_on_cran() 56 | 57 | # digest(get_gbif_taxonomy(levels(traitdataform:::as_factor_clocale(carabids$name_correct))) ) 58 | expect_known_hash(get_gbif_taxonomy(levels(as_factor_clocale(carabids$name_correct))), hash = '6e354098357e48a0ef7132a60e00736d') 59 | 60 | # digest(get_gbif_taxonomy(levels(traitdataform:::as_factor_clocale(heteroptera_raw$SpeciesID))) ) 61 | expect_known_hash(get_gbif_taxonomy(levels(as_factor_clocale(heteroptera_raw$SpeciesID))), hash = 'b9505ad8e3a6c8f79005b30c42fce201') 62 | 63 | # expect_known_hash(get_gbif_taxonomy(levels(arthropodtraits$SpeciesID)), hash = '2efcaaa0e1') 64 | 65 | 66 | }) 67 | 68 | -------------------------------------------------------------------------------- /tests/testthat/test.standardize.R: -------------------------------------------------------------------------------- 1 | context("apply standardize() functions to traitdata") 2 | 3 | library(traitdataform) 4 | 5 | test_that("mapping of taxa works", { 6 | 7 | skip_if_not( curl::has_internet() ) 8 | 9 | dataset1 <- as.traitdata(carabids) 10 | 11 | dd1 <- standardise_taxa(dataset1[c(1,83,166,206,240,286,320,323,361,306,440),]) 12 | expect_equal(as.character(dd1$scientificName), c("Abax parallelepipedus", "Abax parallelepipedus", "Calathus cinctus", "Calathus cinctus", "Dromius angustus", "Platynus assimilis", "Platynus assimilis", NA, NA, "Notiophilus aquaticus", "Dicheirotrichus placidus") ) 13 | expect_true(all(c("scientificName", "taxonID", "warnings") %in% names(dd1))) 14 | 15 | dd1 <- standardise_taxa(dataset1[c(1,83,166,206,240,286,320,323,361,306,440),], method_options = c(resolve_synonyms = FALSE)) 16 | expect_equal(as.character(dd1$scientificName), c("Abax parallelepipedus", "Abax parallelepipedus", "Calathus cinctus", "Calathus cinctus", "Dromius angustus", "Limodromus assimilis", "Limodromus assimilis", NA, NA, "Notiophilus aquaticus", "Trichocellus placidus") ) 17 | expect_true(all(c("scientificName", "taxonID", "warnings") %in% names(dd1))) 18 | 19 | 20 | dataset2 <- as.traitdata(arthropodtraits) 21 | 22 | dd2 <- standardise_taxa(dataset2[c(215,476,774,975,1445,1706,3437,3905,4667,5396,5896,8755,8966),]) 23 | expect_equal(as.character(dd2$scientificName), c("Acalypta parvula", "Acanthodelphax denticauda", "Acanthodelphax spinosa", "Acanthodelphax spinosa", "Acanthodelphax spinosa", "Aspidapion radiolus", "Aspidapion radiolus", "Aspidapion radiolus", "Mocyta fungi", "Gymnetron ictericus", "Gymnetron ictericus", "Gymnetron ictericus", "Trigonocranus emmeae") ) 24 | expect_true(all(c("scientificName", "taxonID", "warnings") %in% names(dd2))) 25 | 26 | }) 27 | 28 | 29 | 30 | test_that("mapping of traits based on thesaurus works", { 31 | 32 | dataset2 <- as.traitdata(heteroptera_raw) 33 | 34 | traits2 <- as.thesaurus( 35 | Body_length = as.trait("Body_length", 36 | expectedUnit = "mm", valueType = "numeric", 37 | traitDescription = "From the tip of the head to the end of the abdomen"), 38 | Antenna_Seg1 = as.trait("Antenna_Seg1", 39 | expectedUnit = "mm", valueType = "numeric", 40 | traitDescription = "Length of first antenna segment", 41 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 42 | Antenna_Seg2 = as.trait("Antenna_Seg2", 43 | expectedUnit = "mm", valueType = "numeric", 44 | traitDescription = "Length of second antenna segment", 45 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 46 | Antenna_Seg3 = as.trait("Antenna_Seg3", 47 | expectedUnit = "mm", valueType = "numeric", 48 | traitDescription = "Length of third antenna segment", 49 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 50 | Antenna_Seg4 = as.trait("Antenna_Seg4", 51 | expectedUnit = "mm", valueType = "numeric", 52 | traitDescription = "Length of fourth antenna segment", 53 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 54 | Antenna_Seg5 = as.trait("Antenna_Seg5", 55 | expectedUnit = "mm", valueType = "numeric", 56 | traitDescription = "Length of fifth antenna segment (only Pentatomoidea)", 57 | broaderTerm = "http://ecologicaltraitdata.github.io/TraitDataList/Antenna_length"), 58 | Hind.Femur_length = as.trait("Hind.Femur_length", 59 | expectedUnit = "mm", valueType = "numeric", 60 | traitDescription = "Length of the femur of the hind leg", 61 | broaderTerm = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length") 62 | ) 63 | 64 | dataset2Std <- standardize_traits(dataset2, 65 | thesaurus = traits2 66 | ) 67 | expect_equal(dim(dataset2Std), c(2586,14)) 68 | }) 69 | 70 | 71 | test_that("unit conversion works", { 72 | 73 | 74 | }) 75 | 76 | test_that("factor level harmonization works", { 77 | 78 | dd1 <- as.traitdata(arthropodtraits, traits = c("Body_Size", "Dispersal_ability", "Feeding_guild_short", "Feeding_mode"), units =c("mm","","","") ) 79 | dd1Std <- standardize_traits(dd1) 80 | 81 | expect_equal(dim(dd1Std), c(4920,10)) 82 | expect_equal(sum(subset(dd1Std, verbatimTraitName == "Body_Size")$traitValue), 5795.08) 83 | }) -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | --------------------------------------------------------------------------------