├── .Rbuildignore ├── .gitignore ├── .pre-commit-config.yaml ├── .travis.yml ├── .zenodo.json ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── MATSS.Rproj ├── NAMESPACE ├── R ├── MATSS-package.R ├── analysis_wrapper.R ├── create_MATSS_compendium.R ├── get_data_bbs.R ├── get_data_biotime.R ├── get_data_cowley.R ├── get_data_gpdd.R ├── get_data_jornada.R ├── get_data_maizuru.R ├── get_data_mtquad.R ├── get_data_portal.R ├── get_data_shortgrass_steppe.R ├── get_data_vegplots-sdl.R ├── get_sanparks_data.R ├── plan_analyses.R ├── plan_data.R ├── plan_references.R ├── summary_stats.R ├── utils-data-checks.R ├── utils-data-processing.R ├── utils-data-summarize.R ├── utils-matssdata.R ├── utils-pipe.R ├── utils-retriever-data.R └── utils-testing.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── data └── dragons.rda ├── deploy_compendium.sh ├── inst ├── CITATION ├── biotime_dataset_info.RDS ├── extdata │ ├── cowleylizards.txt │ ├── cowleysnakes.txt │ ├── gpdd_locations.csv │ ├── sanparks │ │ ├── judithk.815.1-815.1.txt │ │ ├── peggym.1049.1-karoo2008.txt │ │ ├── peggym.1050.1-Karoo2006.txt │ │ ├── peggym.1051.1-Karoo09.txt │ │ ├── peggym.111.1-GGHNPSpecieCodes.txt │ │ ├── peggym.112.1-GGHNPTotals.txt │ │ ├── peggym.114.1-KarooNationalParkCensuscodes.txt │ │ └── peggym.116.1-KRNPTotals.txt │ └── subsampled │ │ ├── biotimesql │ │ ├── CITATION │ │ ├── biotimesql_ID_ABUNDANCE.csv │ │ ├── biotimesql_allrawdata.csv │ │ ├── biotimesql_biomass.csv │ │ ├── biotimesql_citation1.csv │ │ ├── biotimesql_contacts.csv │ │ ├── biotimesql_datasets.csv │ │ ├── biotimesql_methods.csv │ │ ├── biotimesql_sample.csv │ │ ├── biotimesql_site.csv │ │ └── biotimesql_species.csv │ │ ├── breed-bird-survey │ │ ├── CITATION │ │ ├── breed_bird_survey_counts.csv │ │ ├── breed_bird_survey_region_codes.csv │ │ ├── breed_bird_survey_routes.csv │ │ ├── breed_bird_survey_species.csv │ │ └── breed_bird_survey_weather.csv │ │ └── mapped-plant-quads-mt │ │ ├── CITATION │ │ ├── mapped_plant_quads_mt_allrecords_cover.csv │ │ ├── mapped_plant_quads_mt_allrecords_density.csv │ │ └── mapped_plant_quads_mt_species_list.csv └── templates │ ├── template-README.md │ ├── template-Rprofile │ ├── template-functions.R │ ├── template-pipeline.R │ ├── template-references.bib │ └── template-report.Rmd ├── man ├── MATSS.Rd ├── analysis_wrapper.Rd ├── append_data_citations.Rd ├── append_retriever_citation.Rd ├── build_analyses_plan.Rd ├── build_bbs_datasets_plan.Rd ├── build_biotime_datasets_plan.Rd ├── build_datasets_plan.Rd ├── build_gpdd_datasets_plan.Rd ├── build_references_plan.Rd ├── build_retriever_datasets_plan.Rd ├── check_data_format.Rd ├── check_default_data_path.Rd ├── collect_analyses.Rd ├── combine_bbs_subspecies.Rd ├── correct_biotime_dataset.Rd ├── create_MATSS_compendium.Rd ├── dragons.Rd ├── filter_bbs_species.Rd ├── filter_bbs_ts.Rd ├── get_bbs_route_region_data.Rd ├── get_biotime_data.Rd ├── get_biotime_dataset_ids.Rd ├── get_cowley_lizards.Rd ├── get_cowley_snakes.Rd ├── get_default_data_path.Rd ├── get_effort_from_data.Rd ├── get_gpdd_data.Rd ├── get_jornada_data.Rd ├── get_karoo_data.Rd ├── get_kruger_data.Rd ├── get_maizuru_data.Rd ├── get_mtquad_data.Rd ├── get_portal_rodents.Rd ├── get_sdl_data.Rd ├── get_sgs_data.Rd ├── get_times_from_data.Rd ├── has_integer_times.Rd ├── has_missing_samples.Rd ├── install_retriever_data.Rd ├── interpolate_missing_samples.Rd ├── interpolate_obs.Rd ├── invoke.Rd ├── is_equitimed.Rd ├── make_equitimed.Rd ├── make_integer_times.Rd ├── normalize_obs.Rd ├── pipe.Rd ├── prepare_bbs_ts_data.Rd ├── prepare_datasets.Rd ├── print.matssdata.Rd ├── print.matsssummary.Rd ├── process_bbs_route_region_data.Rd ├── process_biotime_dataset.Rd ├── richness.Rd ├── summarize_df.Rd ├── summarize_vec.Rd ├── summary.matssdata.Rd ├── temp_autocor.Rd ├── to_numeric_vector.Rd ├── ts_summary.Rd └── use_default_data_path.Rd ├── tests ├── testthat.R └── testthat │ ├── setup.R │ ├── teardown.R │ ├── test-01-data-checking.R │ ├── test-02-dataset-processing.R │ ├── test-03-retriever-utils.R │ ├── test-04-retriever-data.R │ ├── test-05-build-plans.R │ ├── test-06-build-plans-installed-subsample.R │ ├── test-07-analysis-wrapper.R │ ├── test-08-summary-stats.R │ ├── test-09-summary-stats-utils.R │ ├── test-10-compendium-creation.R │ ├── test-99-dataset-regressions.R │ └── test-999-installed-subsample-dataset-regressions.R └── vignettes ├── .gitignore ├── MATSS.Rmd ├── data-formats.Rmd ├── dataset-summary.Rmd ├── hipergator-install.Rmd └── references.bib /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^_pkgdown\.yml$ 2 | ^LICENSE\.md$ 3 | ^CONTRIBUTING\.md$ 4 | ^CODE_OF_CONDUCT\.md$ 5 | ^.*\.Rproj$ 6 | ^\.Rproj\.user$ 7 | ^.travis.yml$ 8 | ^README\.Rmd$ 9 | ^README-.*\.png$ 10 | ^doc$ 11 | ^docs$ 12 | ^Meta$ 13 | ^.drake 14 | ^.zenodo.json$ 15 | ^README_cache 16 | ^miniconda.sh$ 17 | ^deploy_compendium.sh$ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | .drake/ 6 | .drake_history/ 7 | /output/drake-cache.sqlite 8 | /analysis/*.html 9 | /doc 10 | /docs 11 | Meta 12 | /inst/extdata/subsampled/breed-bird-survey-prepped 13 | /inst/extdata/subsampled/biotime-prepped 14 | /inst/templates/template-README.html 15 | README_cache 16 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/lorenzwalthert/pre-commit-hooks 3 | rev: v0.0.0.9028 4 | hooks: 5 | - id: readme-rmd-rendered 6 | - id: roxygenize 7 | - id: use-tidy-description -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | cache: 3 | - packages 4 | os: linux 5 | dist: "xenial" 6 | warnings_are_errors: false 7 | 8 | r_packages: 9 | - covr 10 | r_github_packages: 11 | - ropensci/rdataretriever 12 | 13 | addons: 14 | apt: 15 | packages: 16 | - libgsl0-dbg 17 | - libgsl0-dev 18 | update: true 19 | 20 | before_install: 21 | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh 22 | - bash miniconda.sh -b -p $HOME/miniconda 23 | - export PATH="$HOME/miniconda/bin:$PATH" 24 | - hash -r 25 | - conda config --set always_yes yes --set changeps1 no 26 | - conda update -q conda 27 | # Useful for debugging any issues with conda 28 | - conda info -a 29 | - conda create -q -n test-environment python=3 30 | - source activate test-environment 31 | - conda install git pip 32 | - pip install git+git://github.com/weecology/retriever@master 33 | # - conda install -c conda-forge retriever 34 | - retriever 35 | - python -c 'import retriever;retriever.check_for_updates()' 36 | 37 | jobs: 38 | include: 39 | - r: devel 40 | - r: release 41 | after_success: 42 | - R CMD INSTALL . 43 | - rm README.Rmd 44 | - Rscript -e 'pkgdown::build_site()' 45 | - Rscript -e 'covr::codecov()' 46 | - bash deploy_compendium.sh 47 | deploy: 48 | strategy: git 49 | provider: pages 50 | token: $GITHUB_PAT 51 | keep_history: true 52 | local_dir: docs 53 | skip_cleanup: true 54 | on: 55 | branch: master 56 | -------------------------------------------------------------------------------- /.zenodo.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "Macroecological Analyses of Time Series Structure", 3 | "creators": [ 4 | { 5 | "affiliation": "University of Florida", 6 | "name": "Hao Ye", 7 | "orcid": "0000-0002-8630-1458" 8 | }, 9 | { 10 | "affiliation": "University of Florida", 11 | "name": "Ellen K. Bledsoe", 12 | "orcid": "0000-0002-3629-7235" 13 | }, 14 | { 15 | "affiliation": "University of Florida", 16 | "name": "Renata Diaz", 17 | "orcid": "0000-0003-0803-4734" 18 | }, 19 | { 20 | "affiliation": "University of Florida", 21 | "name": "S. K. Morgan Ernest", 22 | "orcid": "0000-0002-6026-8530" 23 | }, 24 | { 25 | "affiliation": "University of Florida", 26 | "name": "Juniper L. Simonis", 27 | "orcid": "0000-0001-9798-0460" 28 | }, 29 | { 30 | "affiliation": "University of Florida", 31 | "name": "Ethan P. White", 32 | "orcid": "0000-0001-6728-7745" 33 | }, 34 | { 35 | "affiliation": "University of Florida", 36 | "name": "Glenda M. Yenni", 37 | "orcid": "0000-0001-6969-1848" 38 | } 39 | ], 40 | "description": "Support for macroecological analyses of time series. The intent of the package is to enable end users to run analyses on a collection of population and community time series data. Functions are provided to download and import datasets, produce reproducible workflows using the `drake` package, and for generating research compendia with code and reports.", 41 | "keywords": [ 42 | "ecology", 43 | "long-term", 44 | "time-series" 45 | ], 46 | "access_right": "open", 47 | "license": "mit-license", 48 | "upload_type": "software" 49 | } -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at [ethan@weecology.org](mailto:ethan@weecology.org). All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), version 1.4, 44 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 45 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Guidelines for Contributing 2 | 3 | Thanks for checking out our project! If you haven't already, please check out the [readme](README.md) for general info about this project. 4 | 5 | * [Contributor Code of Conduct](CONTRIBUTING.md#contributor-code-of-conduct) 6 | * [For the General Public](CONTRIBUTING.md#for-the-general-public) 7 | * [Weecologists](CONTRIBUTING.md#weecologists) 8 | * [Documentation](CONTRIBUTING.md#documentation) 9 | * [Testing](CONTRIBUTING.md#testing) 10 | * [Building](CONTRIBUTING.md#building) 11 | * [Contributing Data](CONTRIBUTING.md#contributing-data) 12 | 13 | 14 | ## Contributor Code of Conduct 15 | All contributors will be expected to follow our [code of conduct](CODE_OF_CONDUCT.md). 16 | 17 | ## For the General Public 18 | If you're not a member of the Weecology lab, we ask that you use one of the following two methods for contributing: 19 | 20 | 1. Create an issue -- if you spot any typos, bugs, or have general suggestions, etc. You can also use this to participate in ongoing discussions. For more info, please check out this Github [guide](https://guides.github.com/features/issues/). 21 | 22 | 2. Fork and create a pull request -- if you have suggested bugfixes or changes. For more info, please check out this Github [guide](https://help.github.com/articles/about-pull-requests/). We ask that you follow our guidelines below on documentation and testing. 23 | 24 | ## Weecologists 25 | 26 | If you're actively working on this repo, then you should have write access. For anything beyond a minor change to documentation or coding files, please create a branch for any new features or bugfixes and create a pull request when you'd like your work to be merged in. 27 | 28 | If you don't have write access and you would like to, please contact @gmyenni for access. 29 | 30 | ## Documentation 31 | 32 | If you are contributing code to this project, you generally don't need any additional packages, since the documentation will be written as comments in the R scripts. If you are also building the package, see the [section below](#building) for more details. 33 | 34 | In most cases, you'll be creating a new function and then documenting it. You can check the existing functions for examples, but here's a basic template: 35 | ``` 36 | #' @title {this is the heading for the help file} 37 | #' 38 | #' @description {A description of the function} 39 | #' 40 | #' @param {name of a function argument} {what the argument does} 41 | #' @return {what is returned from the function} 42 | #' @examples 43 | #' {R code that is an example use of the function} 44 | #' @export 45 | #' 46 | newfunc <- function() ... 47 | ``` 48 | 49 | Note that you can also include links to other functions, math formatting, and more. For more details, see the [chapter on documentation ](http://r-pkgs.had.co.nz/man.html) in Hadley Wickham's book for R packages. 50 | 51 | ## Testing 52 | 53 | If you are adding new functionality, please include automated tests to verify that some of the basic functionality is correct. 54 | 55 | Automated testing uses R scripts, that live in the `tests/testthat/` subfolder for the package. If you are adding a new file, please name it as `test-{concept}.R`. 56 | 57 | As a general rule, you don't need to test all possible inputs and outputs for a function, but you should test some important aspects: 58 | * outputs are the correct format (including dimensions and components) 59 | * sample input produces the correct sample output 60 | 61 | You can see the existing tests as examples of how to organize your tests, but note that there are several different kinds of `expect_` functions that test for different things. For more details, see the [chapter on testing ](http://r-pkgs.had.co.nz/tests.html) in Hadley Wickham's book for R packages. 62 | 63 | ## Building 64 | 65 | To fully build the package, including documentation, running-tests, you will need the `roxygen2`, `testthat`, `devtools`, `pkgdown`, and `usethis` R packages. 66 | 67 | Specific operations are then done by calling the appropriate functions from within R, while your working directory is somewhere in the package folder. 68 | 69 | The suggested workflow is: 70 | 1. Write code, documentation, and tests. 71 | - To add dependencies on external packages, use `usethis::use_package("package name")`. This will modify the documentation on *this* package accordingly. If you are adding a dependency to a package that is *NOT* on CRAN, use `usethis::use_dev_package("package name")` instead. 72 | 2. Run `devtools::document()` to generate the documentation files and update the `NAMESPACE` file. 73 | 3. Run `devtools::install()` to install the new version of the package. 74 | 4. Run `devtools::test()` to run the test scripts on the new version of the package. 75 | 5. Run `pkgdown::build_site()` to update the pkgdown site built from the documentation. 76 | 77 | If you are also prepping the package as a whole, then you will also want to run `devtools::check()` and/or `devtools::check_cran()` to make sure that the package is complete. 78 | 79 | For more info, see the [GitHub repo](https://github.com/hadley/devtools) for the `devtools` package. 80 | 81 | ## Contributing Data 82 | 83 | We use a specific data structure that handles community data and optional covariates. For more information, please see the related [vignette](https://weecology.github.io/MATSS-pipeline/articles/data-formats.html). -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Type: Package 2 | Package: MATSS 3 | Title: Macroecological Analyses of Time Series Structure 4 | Version: 0.3.2 5 | Authors@R: 6 | c(person(given = "Hao", 7 | family = "Ye", 8 | role = c("aut", "cre"), 9 | email = "hao.ye@weecology.org", 10 | comment = c(ORCID = "0000-0002-8630-1458")), 11 | person(given = "Ellen", 12 | family = "Bledsoe", 13 | role = "aut", 14 | comment = c(ORCID = "0000-0002-3629-7235")), 15 | person(given = "Renata", 16 | family = "Diaz", 17 | role = "aut", 18 | comment = c(ORCID = "0000-0003-0803-4734")), 19 | person(given = "S. K. Morgan", 20 | family = "Ernest", 21 | role = "aut", 22 | comment = c(ORCID = "0000-0002-6026-8530")), 23 | person(given = "Juniper", 24 | family = "Simonis", 25 | role = "aut", 26 | comment = c(ORCID = "0000-0001-9798-0460")), 27 | person(given = "Ethan", 28 | family = "White", 29 | role = "aut", 30 | comment = c(ORCID = "0000-0001-6728-7745")), 31 | person(given = "Glenda", 32 | family = "Yenni", 33 | role = "aut", 34 | comment = c(ORCID = "0000-0001-6969-1848"))) 35 | Description: Support for macroecological analyses of time 36 | series. The intent of the package is to enable end users to run 37 | analyses on a collection of population and community time series data. 38 | Functions are provided to download and import datasets, produce 39 | reproducible workflows using the `drake` package, and for generating 40 | research compendia with code and reports. 41 | License: MIT + file LICENSE 42 | URL: https://github.com/weecology/MATSS 43 | BugReports: https://github.com/weecology/MATSS/issues 44 | Depends: 45 | R (>= 3.4.0) 46 | Imports: 47 | dplyr, 48 | drake (>= 7.10.0), 49 | forecast, 50 | fs, 51 | future, 52 | future.batchtools, 53 | ggplot2, 54 | here, 55 | lubridate, 56 | magrittr, 57 | portalr (>= 0.3.4), 58 | purrr, 59 | rdataretriever (>= 2.0.0), 60 | reticulate (>= 1.15), 61 | rlang, 62 | RSQLite, 63 | rstudioapi, 64 | stringr, 65 | tibble, 66 | tidyr, 67 | tidyselect (>= 1.0.0), 68 | usethis, 69 | vctrs 70 | Suggests: 71 | covr, 72 | knitr, 73 | networkD3, 74 | pkgdown, 75 | reticulate, 76 | rmarkdown, 77 | testthat, 78 | visNetwork 79 | VignetteBuilder: 80 | knitr 81 | Encoding: UTF-8 82 | LazyData: true 83 | Roxygen: list(markdown = TRUE) 84 | RoxygenNote: 7.1.0 85 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Code is copyright Weecology (2018-2020) and shared under the MIT License. 2 | 3 | Datasets are: 4 | 5 | Cowley Lizards 6 | -------------- 7 | This dataset is compiled from Tables 1 & 2 in the open access publication of Wilgers et al. 2006. 8 | 9 | Please cite the following publication if you use this data set: 10 | 11 | Dustin J. Wilgers, Eva A. Horne, Brett K. Sandercock, and Allan W. Volkmann "EFFECTS OF RANGELAND MANAGEMENT ON COMMUNITY DYNAMICS OF THE HERPETOFAUNA OF THE TALLGRASS PRAIRIE," Herpetologica 62(4), 378-388, (1 December 2006). https://doi.org/10.1655/0018-0831(2006)62[378:EORMOC]2.0.CO;2 12 | 13 | Cowley Snakes 14 | -------------- 15 | This dataset is compiled from Tables 1 & 2 in the open access publication of Wilgers et al. 2006. 16 | 17 | Please cite the following publication if you use this data set: 18 | 19 | Dustin J. Wilgers, Eva A. Horne, Brett K. Sandercock, and Allan W. Volkmann "EFFECTS OF RANGELAND MANAGEMENT ON COMMUNITY DYNAMICS OF THE HERPETOFAUNA OF THE TALLGRASS PRAIRIE," Herpetologica 62(4), 378-388, (1 December 2006). https://doi.org/10.1655/0018-0831(2006)62[378:EORMOC]2.0.CO;2 20 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2018 Weecology 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MATSS.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageRoxygenize: rd,collate,namespace 19 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,matssdata) 4 | S3method(print,matsssummary) 5 | S3method(summary,matssdata) 6 | export("%>%") 7 | export(analysis_wrapper) 8 | export(append_data_citations) 9 | export(append_retriever_citation) 10 | export(build_analyses_plan) 11 | export(build_bbs_datasets_plan) 12 | export(build_biotime_datasets_plan) 13 | export(build_datasets_plan) 14 | export(build_gpdd_datasets_plan) 15 | export(build_references_plan) 16 | export(build_retriever_datasets_plan) 17 | export(check_data_format) 18 | export(check_default_data_path) 19 | export(collect_analyses) 20 | export(combine_bbs_subspecies) 21 | export(correct_biotime_dataset) 22 | export(create_MATSS_compendium) 23 | export(download_datasets) 24 | export(filter_bbs_species) 25 | export(filter_bbs_ts) 26 | export(get_bbs_route_region_data) 27 | export(get_biotime_data) 28 | export(get_biotime_dataset_ids) 29 | export(get_cowley_lizards) 30 | export(get_cowley_snakes) 31 | export(get_default_data_path) 32 | export(get_effort_from_data) 33 | export(get_gpdd_data) 34 | export(get_jornada_data) 35 | export(get_karoo_data) 36 | export(get_kruger_data) 37 | export(get_maizuru_data) 38 | export(get_mtquad_data) 39 | export(get_portal_rodents) 40 | export(get_sdl_data) 41 | export(get_sgs_data) 42 | export(get_times_from_data) 43 | export(has_integer_times) 44 | export(has_missing_samples) 45 | export(import_retriever_data) 46 | export(install_retriever_data) 47 | export(interpolate_missing_samples) 48 | export(interpolate_obs) 49 | export(invoke) 50 | export(is_equitimed) 51 | export(is_evenly_sampled) 52 | export(is_fully_sampled) 53 | export(make_equitimed) 54 | export(make_evenly_sampled) 55 | export(make_integer_times) 56 | export(normalize_effort) 57 | export(normalize_obs) 58 | export(normalize_times) 59 | export(prepare_bbs_ts_data) 60 | export(prepare_biotime_data) 61 | export(prepare_datasets) 62 | export(process_bbs_route_region_data) 63 | export(process_biotime_dataset) 64 | export(richness) 65 | export(summarise_df) 66 | export(summarise_vec) 67 | export(summarize_df) 68 | export(summarize_vec) 69 | export(temp_autocor) 70 | export(to_numeric_vector) 71 | export(ts_summary) 72 | export(use_default_data_path) 73 | importFrom(magrittr,"%>%") 74 | importFrom(rlang,.data) 75 | importFrom(utils,read.delim) 76 | -------------------------------------------------------------------------------- /R/MATSS-package.R: -------------------------------------------------------------------------------- 1 | #' @title Macroecological Analayses of Time Series Structure 2 | #' 3 | #' @description Support for macroecological analyses of time series. The intent 4 | #' of the package is to enable end users to run analyses on a collection of 5 | #' population and community time series data. Functions are provided to 6 | #' download and import datasets, produce reproducible workflows using the 7 | #' `drake` package, and for generating research compendia with code and 8 | #' reports. 9 | #' 10 | #' @name MATSS 11 | #' 12 | #' @docType package 13 | #' 14 | #' @keywords package 15 | #' 16 | NULL 17 | 18 | .onAttach <- function(libname, pkgname) { 19 | # packageStartupMessage('Please look at our data formats by running `vignette("data-formats")`') 20 | } 21 | 22 | #' @importFrom rlang .data 23 | 24 | ## quiets concerns of R CMD check re: variables used in NSE functions 25 | if (getRversion() >= "2.15.1") utils::globalVariables( 26 | c("analysis", "citation_text", "combine", "cross", "data", 27 | "dataset", "file_in", "fun", "location_id", "map", "region", "route", 28 | "target", "timeperiod_id", "trigger") 29 | ) 30 | 31 | #' @title dragons dataset 32 | #' 33 | #' @description A dataset containing example timeseries for some dragons. 34 | #' 35 | #' @format A list with 3 elements: 36 | #' \describe{ 37 | #' \item{abundance}{a data.frame with abundances for 3 dragons} 38 | #' \item{covariates}{a data.frame with times of observations and effort and precip data} 39 | #' \item{metadata}{a list with: 40 | #' `timename` - the name of the time column in covariates, 41 | #' `period` - the gap between successive observations, 42 | #' `authors` - the authors of the dataset, 43 | #' `species_table` - information about the species observed} 44 | #' } 45 | "dragons" 46 | -------------------------------------------------------------------------------- /R/analysis_wrapper.R: -------------------------------------------------------------------------------- 1 | #' @title Invoke an analysis on a dataset 2 | #' 3 | #' @description This function is a helper that, at its core, simply applies the 4 | #' function to the dataset. The key added functionality is to preserve the 5 | #' names of the function and the dataset, as well as metadata and any 6 | #' additional arguments; returning the result in a tibble that is consistent 7 | #' in format, regardless of what function is actually being invoked. 8 | #' 9 | #' @param fun the analysis function 10 | #' @param data the dataset 11 | #' @param ... additional arguments to pass to `fun` 12 | #' 13 | #' @return a tibble with these columns: 14 | #' \tabular{ll}{ 15 | #' \code{results} \tab the output of `fun(data)`\cr 16 | #' \code{metadata} \tab the metadata component of the original dataset\cr 17 | #' \code{dataset} \tab the name of the dataset\cr 18 | #' \code{method} \tab the name of the analysis function\cr 19 | #' \code{args} \tab a list of optional args to `method`\cr 20 | #' } 21 | #' 22 | #' @examples 23 | #' \dontrun{ 24 | #' sgs_data <- MATSS::get_sgs_data() 25 | #' invoke(ts_summary, sgs_data) 26 | #' } 27 | #' 28 | #' @export 29 | #' 30 | invoke <- function(fun, data, ...) 31 | { 32 | # Get the name of the dataset and method 33 | dataset_name <- tail(all.vars(match.call()$data), 1) 34 | method_name <- tail(all.vars(match.call()$fun), 1) 35 | 36 | # Extract the metadata from the original dataset 37 | metadata <- data$metadata 38 | 39 | results <- fun(data, ...) 40 | 41 | # Return the combined results and metadata 42 | tibble::tibble(results = list(results), 43 | metadata = list(metadata), 44 | dataset = dataset_name, 45 | method = method_name, 46 | args = list(list(...))) 47 | } 48 | 49 | #' @title Create a function that replicates an analysis for all time series in a 50 | #' dataset 51 | #' 52 | #' @description This wrapper takes as input, some analysis function, `fun`, and 53 | #' returns a function that will run that analysis function on all the time 54 | #' series in a dataset. Some post-processing attempts to handle merging of 55 | #' the output in a sensible way. 56 | #' 57 | #' @param fun the analysis function 58 | #' 59 | #' @return a function that takes in a `dataset` and optional arguments, and 60 | #' returns a data.frame or tibble with the combined results, and an "id" 61 | #' column with the name of the species 62 | #' 63 | #' @examples 64 | #' \dontrun{ 65 | #' sgs_data <- MATSS::get_sgs_data() 66 | #' summarize_dataset <- analysis_wrapper(ts_summary) 67 | #' summarize_dataset(sgs_data) 68 | #' } 69 | #' 70 | #' @export 71 | #' 72 | analysis_wrapper <- function(fun) 73 | { 74 | # Make sure `fun` is availabe in the returned function 75 | force(fun) 76 | 77 | function(dataset, ...) 78 | { 79 | # apply the analysis to each abundance time series 80 | raw_results <- lapply(dataset$abundance, fun, ...) 81 | 82 | # check output types to see if we need conversion 83 | # - if all data.frames, then bind_rows 84 | if (all(purrr::map_lgl(raw_results, ~ "data.frame" %in% class(.)))) 85 | { 86 | results <- dplyr::bind_rows(raw_results, .id = "id") 87 | 88 | # - if all vectors, then convert to data.frames and bind_rows 89 | } else if (all(purrr::map_lgl(raw_results, is.vector))) { 90 | results <- purrr::map(raw_results, ~ tibble::as_tibble(as.list(.), 91 | .name_repair = "unique")) %>% 92 | dplyr::bind_rows(.id = "id") 93 | 94 | # - otherwise, store as a tibble, with output in a list-column 95 | } else { 96 | results <- tibble::tibble("id" = names(raw_results), 97 | "value" = unname(raw_results)) 98 | } 99 | 100 | return(results) 101 | } 102 | } 103 | 104 | -------------------------------------------------------------------------------- /R/get_data_cowley.R: -------------------------------------------------------------------------------- 1 | #' @title Read in the cowley lizard community data from a txt file 2 | #' 3 | #' Import cowley lizard data from data files 4 | #' 5 | #' @return list of two dataframes (one with abundance data, the other with 6 | #' covariate data) and one list of metadata. 7 | #' 8 | #' @export 9 | #' 10 | get_cowley_lizards <- function() 11 | { 12 | get_cowley_data("cowleylizards.txt") 13 | } 14 | 15 | #' @title Read in the cowley snake community data from a txt file 16 | #' 17 | #' Import cowley snake data from data files 18 | #' 19 | #' @return list of two dataframes (one with abundance data, the other with 20 | #' covariate data) and one list of metadata. 21 | #' 22 | #' @export 23 | #' 24 | get_cowley_snakes <- function() 25 | { 26 | get_cowley_data("cowleysnakes.txt") 27 | } 28 | 29 | #' @noRd 30 | get_cowley_data <- function(file = "cowleylizards.txt") 31 | { 32 | path <- system.file("extdata", file, 33 | package = "MATSS", mustWork = TRUE) 34 | 35 | raw_data <- read.delim(path) %>% 36 | dplyr::mutate_if(is.numeric, list(~dplyr::na_if(., -99))) %>% 37 | dplyr::filter(!is.na(dplyr::select(., 4))) 38 | 39 | abundance <- dplyr::select(raw_data, -tidyselect::all_of(c("Year", "Site", "Total"))) 40 | 41 | covariates <- tibble::tibble(Year = c(1989, 1990, 1992, 1993, 1994, 1995, 1996, 42 | 1997, 1998, 1999, 2000, 2001, 2002, 2003), 43 | collection_date = as.Date(c("1989-04-22", "1990-04-28", 44 | "1992-04-18", "1993-04-24", 45 | "1994-04-24", "1995-04-29", 46 | "1996-04-20", "1997-04-26", 47 | "1998-04-25", "1999-04-24", 48 | "2000-04-29", "2001-04-21", 49 | "2002-04-27", "2003-04-26")), 50 | burn_status = factor(c("no", "yes", "no", "yes", 51 | "no", "yes", "no", "yes", 52 | "yes", "yes", "yes", "yes", 53 | "no", "yes")), 54 | air_temp = c(27, 17, 20, 19, 24, 15, 20, 55 | 16, 31, 12, 22, 22, 17, 29), 56 | cloud_cover = factor(c("no", "no", "yes", "yes", 57 | "no", "yes", "no", "yes", 58 | "no", "yes", "yes", "yes", 59 | "yes", "no"))) 60 | 61 | metadata <- list(timename = "Year", effort = NULL, site = "CowleyCounty", 62 | period = 1, 63 | is_community = TRUE, 64 | species_table = data.frame(id = colnames(abundance), 65 | stringsAsFactors = FALSE) %>% 66 | dplyr::mutate(species_name = gsub("_", " ", .data$id)), 67 | location = c("latitude" = 37.25, "longitude" = -(96+43/60)), 68 | citation = paste("Wilgers, DJ, Horne, EA, Sandercock, BK, Volkmann, AW, 2006.", 69 | "EFFECTS OF RANGELAND MANAGEMENT ON COMMUNITY DYNAMICS OF THE", 70 | "HERPETOFAUNA OF THE TALLGRASS PRAIRIE, Herpetologica, 62(4)."), 71 | source_url = paste0("https://bioone.org/journals/Herpetologica/volume-62/", 72 | "issue-4/0018-0831(2006)62[378:EORMOC]2.0.CO;2/EFFECTS-OF-RANGELAND-", 73 | "MANAGEMENT-ON-COMMUNITY-DYNAMICS-OF-THE-HERPETOFAUNA/10.1655/0018-", 74 | "0831(2006)62[378:EORMOC]2.0.CO;2.full")) 75 | 76 | out <- list(abundance = abundance, 77 | covariates = covariates, 78 | metadata = metadata) 79 | attr(out, "class") <- "matssdata" 80 | 81 | return(out) 82 | } 83 | -------------------------------------------------------------------------------- /R/get_data_gpdd.R: -------------------------------------------------------------------------------- 1 | #' @title Create GPDD population time-series data 2 | #' 3 | #' @description Selects sites containing at least `min_num_yrs`` of data 4 | #' samples during that period. 5 | #' 6 | #' @param min_num_yrs minimum number of years of data 7 | #' @param location_id Location code of data to return 8 | #' @param timeperiod_id Sampling timescale code of data to return 9 | #' (some datasets provide at more than one scale) 10 | #' @inheritParams get_mtquad_data 11 | #' 12 | #' @return list of abundance, covariates, and metadata 13 | #' 14 | #' @examples 15 | #' \dontrun{ 16 | #' get_gpdd_data(location_id=83, timeperiod_id=408) 17 | #' } 18 | #' @export 19 | 20 | get_gpdd_data <- function(path = get_default_data_path(), 21 | location_id = 83, timeperiod_id = 408, min_num_yrs = 10) 22 | { 23 | 24 | gpdd_data_tables <- import_retriever_data("global-population-dynamics", path = path) 25 | citation_file <- file.path(path, "global-population-dynamics", "CITATION") 26 | citation_text <- readLines(citation_file, warn = FALSE) 27 | 28 | gpdd_data <- gpdd_data_tables$global_population_dynamics_data %>% 29 | dplyr::left_join(gpdd_data_tables$global_population_dynamics_main, by = "mainid") %>% 30 | dplyr::left_join(gpdd_data_tables$global_population_dynamics_location, by = "locationid") %>% 31 | dplyr::filter(.data$locationid == location_id, .data$timeperiodid %in% timeperiod_id, 32 | .data$datasetlength >= min_num_yrs) %>% 33 | dplyr::mutate(taxonid = paste0("sp", .data$taxonid), 34 | date = format(lubridate::date_decimal(.data$decimalyearbegin), "%Y-%m-%d")) %>% 35 | dplyr::arrange(.data$date) %>% 36 | dplyr::select(-tidyselect::all_of(c("siblyfittedtheta", "siblythetacilower", "siblythetaciupper", "siblyextremeneffect", 37 | "siblyreturnrate", "siblycarryingcapacity", "population", "generation", 38 | "spatialdensity", "spatialaccuracy"))) 39 | 40 | summary_by_date_and_taxonid <- gpdd_data %>% 41 | dplyr::select(.data$date, .data$sampleyear, .data$decimalyearbegin, 42 | .data$decimalyearend, .data$taxonid, .data$populationuntransformed) %>% 43 | dplyr::group_by(.data$date, .data$taxonid) %>% 44 | dplyr::summarize(total = sum(.data$populationuntransformed), 45 | sampleyear = mean(.data$sampleyear), 46 | decimalyearbegin = min(.data$decimalyearbegin), 47 | decimalyearend = max(.data$decimalyearend)) %>% 48 | tidyr::spread(key = .data$taxonid, value = .data$total, fill = 0) %>% 49 | dplyr::ungroup() 50 | 51 | abundance <- summary_by_date_and_taxonid %>% 52 | dplyr::select(dplyr::starts_with("sp")) 53 | 54 | covariates <- summary_by_date_and_taxonid %>% 55 | dplyr::mutate_at("date", as.Date) %>% 56 | dplyr::select(-dplyr::starts_with("sp")) 57 | 58 | location <- gpdd_data %>% 59 | dplyr::select_at(c("biotopeid", "locationid", "exactname", "townname", "countystateprovince", 60 | "country", "continent", "ocean", "longitudedegrees", "longitudeminutes", "eorw", 61 | "latitudedegrees", "latitudeminutes", "nors", "longdd", "latdd", 62 | "north", "east", "south", "area", "notes.y", "locationextent")) %>% 63 | dplyr::rename(latitude = .data$latdd, 64 | longitude = .data$longdd) %>% 65 | dplyr::distinct() 66 | 67 | samples <- gpdd_data %>% 68 | dplyr::select_at(c("samplingfrequency", "startyear", "endyear", "samplingunits", 69 | "samplingprotocol", "reliability", "datasetlength", "notes.x", "notes.y")) %>% 70 | dplyr::distinct() 71 | 72 | source <- gpdd_data %>% 73 | dplyr::select(.data$datasourceid) %>% 74 | dplyr::distinct() %>% 75 | dplyr::left_join(gpdd_data_tables$global_population_dynamics_datasource, by = "datasourceid") 76 | 77 | species_table <- gpdd_data_tables$global_population_dynamics_taxon %>% 78 | dplyr::rename(id = .data$taxonid) %>% 79 | dplyr::mutate(id = paste0("sp", .data$id)) %>% 80 | dplyr::filter(.data$id %in% colnames(abundance)) 81 | 82 | citation_line <- paste(source$author, source$year, source$title, source$reference, sep = ". ") 83 | 84 | metadata <- list(timename = "date", 85 | species_table = species_table, 86 | effort = NULL, 87 | location = location, 88 | samples = samples, 89 | source = source, 90 | is_community = FALSE, 91 | citation = c(citation_line, citation_text)) 92 | 93 | out <- list("abundance" = abundance, 94 | "covariates" = covariates, 95 | "metadata" = metadata) 96 | attr(out, "class") <- "matssdata" 97 | 98 | return(out) 99 | } -------------------------------------------------------------------------------- /R/get_data_jornada.R: -------------------------------------------------------------------------------- 1 | #' @title get Jornada rodent data 2 | #' 3 | #' @inheritParams get_mtquad_data 4 | #' @return list of abundance, covariates, and metadata 5 | #' 6 | #' @export 7 | get_jornada_data <- function(path = file.path(get_default_data_path(), 8 | "jornada-lter-rodent")) 9 | { 10 | # read in Jornada rodent data 11 | data_tables <- import_retriever_data(path = path) 12 | jornada <- data_tables$jornada_lter_rodent_smes_rodent_trapping 13 | 14 | # select key columns 15 | # filter out unknown species and recaptures 16 | jornada_rodents <- jornada %>% 17 | dplyr::select_at(c("year", "season", "spp", "recap")) %>% 18 | dplyr::filter(.data$recap != "Y", 19 | !.data$spp %in% c("DIPO1", "PERO1", "NA", "."), 20 | !is.na(.data$spp)) 21 | 22 | # get data into wide format 23 | # summarize counts for each species in each period 24 | jornada_abundances <- jornada_rodents %>% 25 | dplyr::count(.data$year, .data$season, .data$spp) %>% 26 | tidyr::spread(key = .data$spp, value = .data$n, fill = 0) 27 | 28 | season <- rep(0, nrow(jornada_abundances)) 29 | season[which(jornada_abundances$season == "F")] <- 0.5 30 | jornada_abundances$time <- jornada_abundances$year + season 31 | 32 | # split into two dataframes and save 33 | covariates <- dplyr::select_at(jornada_abundances, c("year", "season", "time")) 34 | abundance <- dplyr::select_at(jornada_abundances, dplyr::vars(-c("year", "season", "time"))) 35 | species_table = tibble::tibble(id = c("CHPE", 36 | "DIME", 37 | "DIOR", 38 | "DISP", 39 | "MUMU", 40 | "NEAL", 41 | "NEMI", 42 | "ONAR", 43 | "ONLE", 44 | "PEBO", 45 | "PEER", 46 | "PELE", 47 | "PEMA", 48 | "PGFL", 49 | "REME", 50 | "SIHI", 51 | "SPSP"), 52 | genus = c("Chaetodipus", 53 | "Dipodomys", 54 | "Dipodomys", 55 | "Dipodomys", 56 | "Mus", 57 | "Neotoma", 58 | "Neotoma", 59 | "Onychomys", 60 | "Onychomys", 61 | "Peromyscus", 62 | "Peromyscus", 63 | "Peromyscus", 64 | "Peromyscus", 65 | "Perognathus", 66 | "Reithrodontomys", 67 | "Sigmodon", 68 | "Spermophilus"), 69 | species = c("penicillatus", 70 | "merriami", 71 | "ordii", 72 | "spectabilis", 73 | "musculus", 74 | "albigula", 75 | "micropus", 76 | "arenicola", 77 | "leucogaster", 78 | "boylii", 79 | "eremicus", 80 | "leucopus", 81 | "maniculatus", 82 | "flavus", 83 | "megalotis", 84 | "hispidus", 85 | "spilosoma")) 86 | metadata <- list(timename = "time", period = 0.5, effort = NULL, 87 | species_table = species_table, 88 | is_community = TRUE, 89 | location = c("latitude" = 32.6, 90 | "longitude" = -106.7)) 91 | 92 | out <- list(abundance = abundance, 93 | covariates = covariates, 94 | metadata = metadata) %>% 95 | append_retriever_citation(path) 96 | attr(out, "class") <- "matssdata" 97 | 98 | return(out) 99 | } 100 | -------------------------------------------------------------------------------- /R/get_data_maizuru.R: -------------------------------------------------------------------------------- 1 | #' @title get the maizuru community data 2 | #' 3 | #' @inheritParams get_mtquad_data 4 | #' @return list of abundance, covariates, and metadata 5 | #' 6 | #' @export 7 | get_maizuru_data <- function(path = file.path(get_default_data_path(), 8 | "ushio-maizuru-fish-community")) 9 | { 10 | data_tables <- import_retriever_data(path = path) 11 | 12 | raw_data <- data_tables$ushio_maizuru_fish_community_maizuru 13 | raw_data$date <- dplyr::select(raw_data, .data$y, .data$m, .data$d) %>% 14 | apply(1, paste, collapse = "-") %>% 15 | as.Date() 16 | 17 | species_table <- tibble::tibble(id = c("aurelia_sp", 18 | "engraulis_japonicus", 19 | "plotosus_japonicus", 20 | "sebastes_inermis", 21 | "trachurus_japonicus", 22 | "girella_punctata", 23 | "pseudolabrus_sieboldi", 24 | "parajulis_poecilepterus", 25 | "halichoeres_tenuispinis", 26 | "chaenogobius_gulosus", 27 | "pterogobius_zonoleucus", 28 | "tridentiger_trigonocephalus", 29 | "siganus_fuscescens", 30 | "sphyraena_pinguis", 31 | "rudarius_ercodes"), 32 | genus = c("Aurelia", 33 | "Engraulis", 34 | "Plotosus", 35 | "Sebastes", 36 | "Trachurus", 37 | "Girella", 38 | "Pseudolabrus", 39 | "Parajulis", 40 | "Halichoeres", 41 | "Chaenogobius", 42 | "Pterogobius", 43 | "Tridentiger", 44 | "Siganus", 45 | "Sphyraena", 46 | "Rudarius"), 47 | species = c(NA, 48 | "japonicus", 49 | "japonicus", 50 | "inermis", 51 | "japonicus", 52 | "punctata", 53 | "sieboldi", 54 | "poecilepterus", 55 | "tenuispinis", 56 | "gulosus", 57 | "zonoleucus", 58 | "trigonocephalus", 59 | "fuscescens", 60 | "pinguis", 61 | "ercodes")) 62 | 63 | covars <- c("date_tag", "surf_t", "bot_t", "y", "m", "d", "date") 64 | out <- list(abundance = raw_data %>% 65 | dplyr::select(-tidyselect::any_of(covars)) %>% 66 | dplyr::mutate_all(~round(. + 1e-10)), 67 | covariates = raw_data %>% 68 | dplyr::select_at(covars), 69 | metadata = list(timename = "date", effort = NULL, 70 | species_table = species_table, 71 | location = c("latitude" = 35 + 28/60, 72 | "longitude" = 135 + 22/60), 73 | is_community = TRUE)) %>% 74 | append_retriever_citation(path) 75 | attr(out, "class") <- "matssdata" 76 | 77 | return(out) 78 | } 79 | -------------------------------------------------------------------------------- /R/get_data_mtquad.R: -------------------------------------------------------------------------------- 1 | #' @title get Montana plant quad time-series data 2 | #' 3 | #' @param path where to load the raw data files from 4 | #' @return list of abundance, covariates, and metadata 5 | #' 6 | #' @export 7 | 8 | get_mtquad_data <- function(path = file.path(get_default_data_path(), 9 | "mapped-plant-quads-mt")) 10 | { 11 | mtquad_data_tables <- import_retriever_data(path = path) 12 | 13 | mtquad_data <- mtquad_data_tables$mapped_plant_quads_mt_allrecords_density %>% 14 | dplyr::select_at(dplyr::vars(-c("objectid", "seedling", "x", "y"))) %>% 15 | dplyr::group_by(.data$year, .data$species, .data$quad) %>% 16 | dplyr::summarize(abundance = sum(.data$stems)) %>% 17 | dplyr::group_by(.data$year, .data$species) %>% 18 | dplyr::summarize(abundance = sum(.data$abundance)) %>% 19 | tidyr::spread(key = .data$species, value = .data$abundance, fill = 0) %>% 20 | dplyr::ungroup() 21 | 22 | abundance <- dplyr::select(mtquad_data, -.data$year) 23 | covariates <- dplyr::select(mtquad_data, .data$year) 24 | species_table <- mtquad_data_tables$mapped_plant_quads_mt_species_list %>% 25 | dplyr::mutate(id = paste(.data$species, .data$density), 26 | genus = sub("^unknown", NA, .data$species), 27 | species = ifelse(is.na(.data$genus), NA, 28 | sub("^sp.|unknown]$", NA, .data$density)), 29 | density = .data$cover, 30 | cover = .data$annual, 31 | annual = .data$growthform, 32 | growthform = NULL) %>% 33 | dplyr::select(.data$id, .data$genus, .data$species, dplyr::everything()) %>% 34 | tibble::add_row(id = "unknown") 35 | metadata <- list(timename = "year", effort = NULL, 36 | species_table = species_table, 37 | is_community = TRUE, 38 | location = c("latitude" = 46 + 22/60, 39 | "longitude" = -(105+5/60))) 40 | 41 | out <- list("abundance" = abundance, 42 | "covariates" = covariates, 43 | "metadata" = metadata) %>% 44 | append_retriever_citation(path) 45 | attr(out, "class") <- "matssdata" 46 | 47 | return(out) 48 | } 49 | -------------------------------------------------------------------------------- /R/get_data_portal.R: -------------------------------------------------------------------------------- 1 | #' @title get portal rodent data 2 | #' 3 | #' Import Portal rodent data using portalr functions. 4 | #' Currently returns rodent data formatted appropriately for 5 | #' LDA analysis. 6 | #' 7 | #' @param time_or_plots select whether to: (1) "time" == get the data for the 8 | #' entire timespan of the experiment, or (2) "plots" == just the time period 9 | #' with consistent treatments 10 | #' @param treatment "control" or "exclosure" treatments 11 | #' @param type type of animals to get: "Rodents" or restrict to "Granivores" 12 | #' 13 | #' @return list of two dataframes (one with abundance data, the other with covariate data) 14 | #' and one list of metadata. 15 | #' 16 | #' @export 17 | get_portal_rodents <- function(time_or_plots = "plots", 18 | treatment = "control", 19 | type = "Rodents") 20 | { 21 | # set params 22 | if (tolower(time_or_plots) == "plots") { 23 | plots <- "all" 24 | start_period <- 118 25 | standard_effort <- 8 26 | } else if (tolower(time_or_plots) == "time") { 27 | plots <- "longterm" 28 | start_period <- 1 29 | standard_effort <- 4 30 | } 31 | 32 | # get raw data by plot 33 | dat <- portalr::abundance(path = "repo", clean = FALSE, 34 | level = "plot", type = type, 35 | plots = plots, 36 | unknowns = FALSE, shape = 'flat', 37 | time = "all", effort = TRUE, 38 | min_plots = 0) 39 | 40 | # filter according to treatment 41 | if (treatment == 'exclosure') 42 | { 43 | dat <- dplyr::filter(dat, .data$treatment == "exclosure") 44 | } else if (treatment == "control") { 45 | dat <- dplyr::filter(dat, .data$plot %in% c(2, 4, 8, 11, 12, 14, 17, 22)) 46 | } 47 | 48 | # summarize by period, computing weighted abundance by effort 49 | dat2 <- dat %>% 50 | dplyr::filter(.data$period %in% start_period:436, 51 | .data$ntraps >= 1) %>% 52 | dplyr::select(-.data$period, -.data$ntraps) %>% 53 | dplyr::group_by(.data$censusdate, .data$species, .data$newmoonnumber) %>% 54 | dplyr::summarize(abundance = round(standard_effort * mean(.data$abundance, na.rm = TRUE) + 1e-10)) %>% 55 | dplyr::ungroup() %>% 56 | tidyr::spread(.data$species, .data$abundance) 57 | 58 | species_codes <- setdiff(names(dat2), c("censusdate", "newmoonnumber")) 59 | species_table <- portalr::load_datafile(file.path("Rodents", "Portal_rodent_species.csv"), 60 | na.strings = "", "repo") %>% 61 | dplyr::rename(id = .data$speciescode) %>% 62 | dplyr::filter(.data$id %in% species_codes) %>% 63 | tidyr::separate(.data$scientificname, c("genus", "species"), sep = " ") 64 | 65 | abundance <- dplyr::select(dat2, -.data$newmoonnumber, -.data$censusdate) 66 | covariates <- dplyr::select(dat2, .data$newmoonnumber, .data$censusdate) 67 | metadata <- list(timename = "newmoonnumber", effort = NULL, 68 | species_table = species_table, 69 | is_community = TRUE, 70 | location = c("latitude" = 31.938, 71 | "longitude" = -109.08), 72 | citation = portalr::get_dataset_citation()$textVersion) 73 | 74 | out <- list(abundance = abundance, 75 | covariates = covariates, 76 | metadata = metadata) 77 | attr(out, "class") <- "matssdata" 78 | 79 | return(out) 80 | } -------------------------------------------------------------------------------- /R/get_data_shortgrass_steppe.R: -------------------------------------------------------------------------------- 1 | #' @title get Shortgrass Steppe rodent data 2 | #' 3 | #' @inheritParams get_mtquad_data 4 | #' @return list of abundance, covariates, and metadata 5 | #' 6 | #' @export 7 | get_sgs_data <- function(path = file.path(get_default_data_path(), 8 | "shortgrass-steppe-lter")) 9 | { 10 | data_tables <- import_retriever_data(path = path) 11 | sgs_data <- data_tables$shortgrass_steppe_lter_shortgrass_data 12 | 13 | # select key columns 14 | # filter out unknown species and recaptures 15 | sgs_data <- sgs_data %>% 16 | dplyr::select_at(c("session", "year", "veg", "web", "spp")) %>% 17 | dplyr::filter(.data$spp != "NA", 18 | .data$spp != "") 19 | 20 | # get data into wide format 21 | # summarize counts for each species in each period 22 | sgs_abundance <- sgs_data %>% 23 | dplyr::count(.data$year, .data$session, .data$spp) %>% 24 | tidyr::spread(key = .data$spp, value = .data$n, fill = 0) 25 | 26 | season <- rep(0, nrow(sgs_abundance)) 27 | season[grepl("Sep", sgs_abundance$session)] <- 0.5 28 | sgs_abundance$samples <- sgs_abundance$year + season 29 | 30 | # split into two dataframes and save 31 | abundance <- dplyr::select_at(sgs_abundance, dplyr::vars(-c("year", "session", "samples"))) 32 | covariates <- dplyr::select_at(sgs_abundance, c("year", "session", "samples")) 33 | species_table = tibble::tibble(id = c("CHHI", 34 | "DIOR", 35 | "MIOC", 36 | "MUMU", 37 | "ONLE", 38 | "PEFL", 39 | "PEMA", 40 | "REME", 41 | "REMO", 42 | "SPTR", 43 | "SYAU", 44 | "THTA"), 45 | genus = c("Chaetodipus", 46 | "Dipodomys", 47 | "Microtus", 48 | "Mus", 49 | "Onychomys", 50 | "Perognathus", 51 | "Peromyscus", 52 | "Reithrodontomys", 53 | "Reithrodontomys", 54 | "Spermophilus", 55 | "Sylvilagus", 56 | "Thomomys"), 57 | species = c("hispidus", 58 | "ordii", 59 | "ochrogaster", 60 | "musculus", 61 | "leucogaster", 62 | "flavus", 63 | "maniculatus", 64 | "megalotis", 65 | "montanus", 66 | "tridecemlineatus", 67 | "auduboni", 68 | "talpoides")) 69 | 70 | metadata <- list(timename = "samples", effort = NULL, period = 0.5, 71 | species_table = species_table, 72 | is_community = TRUE, 73 | location = c("latitude" = 40 + 49/60, 74 | "longitude" = -(104 + 46/60))) 75 | 76 | out <- list("abundance" = abundance, 77 | "covariates" = covariates, 78 | "metadata" = metadata) %>% 79 | append_retriever_citation(path) 80 | attr(out, "class") <- "matssdata" 81 | 82 | return(out) 83 | } 84 | -------------------------------------------------------------------------------- /R/get_data_vegplots-sdl.R: -------------------------------------------------------------------------------- 1 | #' @title Create Sonoran desert lab time-series data 2 | #' 3 | #' @description Original data found here http://www.eebweb.arizona.edu/faculty/venable/LTREB/LTREB%20data.htm 4 | #' 5 | #' @param plots vector of plots to keep 6 | #' @inheritParams get_mtquad_data 7 | #' @return list of abundance, covariates, and metadata 8 | #' 9 | #' @export 10 | get_sdl_data <- function(plots = c(4, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17), 11 | path = file.path(get_default_data_path(), "veg-plots-sdl")) 12 | { 13 | sdl_data_tables <- import_retriever_data(path = path) 14 | 15 | sdl_data <- sdl_data_tables$veg_plots_sdl_SMDensity %>% 16 | dplyr::select(-.data$countns) %>% 17 | dplyr::filter(.data$plot %in% plots) %>% 18 | dplyr::mutate(code = ifelse(code == "", "UNKN", code)) %>% 19 | dplyr::group_by(.data$year, .data$code) %>% 20 | dplyr::summarize(count = sum(.data$count)) %>% 21 | tidyr::spread(key = .data$code, value = .data$count, fill = 0) %>% 22 | dplyr::ungroup() 23 | 24 | abundance <- dplyr::select(sdl_data, -.data$year) 25 | covariates <- dplyr::select(sdl_data, .data$year) 26 | species_table <- sdl_data_tables$veg_plots_sdl_Species %>% 27 | dplyr::rename(id = .data$code, 28 | species_name = .data$acceptedname) %>% 29 | dplyr::mutate_all(~gsub("[^\\x20-\\x7E]", " ", ., perl = TRUE)) %>% 30 | dplyr::mutate_all(~gsub(" *", " ", .)) 31 | species_table[, c("species_name", "var_subsp")] <- 32 | stringr::str_split(species_table$species_name, "\\ssubsp\\.\\s|\\svar\\.\\s", simplify = TRUE) 33 | species_table[, c("genus", "species")] <- 34 | stringr::str_split(species_table$species_name, "\\s", simplify = TRUE) 35 | species_table <- species_table %>% 36 | dplyr::mutate(species = ifelse(nchar(.data$species) == 0, NA, .data$species), 37 | var_subsp = ifelse(nchar(.data$var_subsp) == 0, NA, .data$var_subsp)) %>% 38 | dplyr::select(.data$id, .data$species_name, 39 | .data$family, .data$genus, .data$species, .data$var_subsp, 40 | dplyr::everything()) %>% 41 | tibble::add_row(id = "UNKN") %>% 42 | as.data.frame() 43 | for (j in seq(NCOL(species_table))) 44 | { 45 | Encoding(species_table[, j]) <- "unknown" 46 | } 47 | 48 | metadata <- list(timename = "year", effort = NULL, 49 | species_table = species_table, 50 | location = c("latitude" = 32.21, 51 | "longitude" = -111.01), 52 | is_community = TRUE) 53 | 54 | out <- list("abundance" = abundance, 55 | "covariates" = covariates, 56 | "metadata" = metadata) %>% 57 | append_retriever_citation(path) 58 | attr(out, "class") <- "matssdata" 59 | 60 | return(out) 61 | } 62 | -------------------------------------------------------------------------------- /R/plan_analyses.R: -------------------------------------------------------------------------------- 1 | #' @title Collect Analyses Together into a Tibble 2 | #' 3 | #' @description This is a helper function to accompany [`plan_analyses`]: it is 4 | #' necessary to collect all of the results that are produced by the drake 5 | #' plan. 6 | #' 7 | #' This function strives to be intelligent about the format of the individual 8 | #' results. For output from [`analysis_wrapper`] that already has information 9 | #' about the method and dataset, we can just combine them. Otherwise, we 10 | #' parse the name of the object for the method and the dataset, to format 11 | #' into a structure similar to the output from [`analysis_wrapper`]. 12 | #' 13 | #' @param list_of_results the list of objects 14 | #' 15 | #' @return a drake plan (i.e. a tibble) specifying the targets and commands 16 | #' for all the analyses and the collected results (grouping the outputs from 17 | #' each method into a single list) 18 | #' 19 | #' @export 20 | #' 21 | collect_analyses <- function(list_of_results) 22 | { 23 | if (!is_structured_results_list(list_of_results)) 24 | { 25 | results_names <- all.vars(match.call()$list_of_results) 26 | 27 | 28 | 29 | 30 | list_of_results 31 | 32 | } 33 | dplyr::bind_rows(list_of_results) 34 | } 35 | 36 | #' @title Generate a Drake Plan for Analyses 37 | #' 38 | #' @description Given M methods to be applied to N datasets, make a drake plan 39 | #' that contains an `analysis` targets corresponding to each M x N 40 | #' combination, as well as M `results` targets corresponding to a list of 41 | #' the `analysis` outputs for each of the M methods. 42 | #' 43 | #' @param methods a drake plan listing the methods to be applied (it is 44 | #' expected that each method is a function that takes in a dataset object) 45 | #' @param datasets a drake plan listing the datasets to be analyzed 46 | #' @param ... arguments to be passed to \code{drake::\link[drake]{drake_plan}} 47 | #' 48 | #' @return a drake plan (i.e. a tibble) specifying the targets and commands 49 | #' for all the analyses and the collected results (grouping the outputs from 50 | #' each method into a single list) 51 | #' 52 | #' @export 53 | #' 54 | build_analyses_plan <- function(methods, datasets, ...) 55 | { 56 | ## The combination of each method x dataset 57 | drake::drake_plan( 58 | # expand out each `fun(data)``, where 59 | # `fun` is each of the values in methods$target 60 | # `data` is each of the values in datasets$target 61 | # note: tidyeval syntax is to get all the values from the previous plans, 62 | # but keep them as unevaluated symbols, so that drake_plan handles 63 | # them appropriately 64 | analysis = drake::target(invoke(fun, data), 65 | transform = cross(fun = !!rlang::syms(methods$target), 66 | data = !!rlang::syms(datasets$target)) 67 | ), 68 | # create a list of the created `analysis` objects, grouping by the `fun` 69 | # that made them - this keeps the results from the different methods 70 | # separated, so that the reports/syntheses can handle the right outputs 71 | results = drake::target(dplyr::bind_rows(analysis), 72 | transform = combine(analysis, .by = fun)), 73 | ... 74 | ) 75 | } 76 | -------------------------------------------------------------------------------- /R/plan_references.R: -------------------------------------------------------------------------------- 1 | #' @title Generate a Drake Plan for dataset references 2 | #' 3 | #' @description Given N datasets, extract the citation from each one, and then 4 | #' combine them into a single vector and remove duplicates. 5 | #' 6 | #' @inheritParams build_analyses_plan 7 | #' 8 | #' @return a drake plan (i.e. a tibble) specifying the targets and commands 9 | #' for all the references and the combined vector 10 | #' 11 | #' @export 12 | #' 13 | build_references_plan <- function(datasets, ...) 14 | { 15 | drake::drake_plan( 16 | citation_text = drake::target(dataset$metadata$citation, 17 | transform = map(dataset = !!rlang::syms(datasets$target))), 18 | citations = drake::target(unique(c(citation_text)), 19 | transform = combine(citation_text)), 20 | ... = 21 | ) 22 | } 23 | -------------------------------------------------------------------------------- /R/utils-data-summarize.R: -------------------------------------------------------------------------------- 1 | #' @title Normalize observations, effort, or times 2 | #' @aliases normalize_effort, normalize_obs 3 | #' 4 | #' @description \code{normalize_obs} will normalize the observations to the 5 | #' provided effort vector, if `obs_per_effort` is TRUE. 6 | #' 7 | #' \code{normalize_effort} will generate a default effort vector if it is 8 | #' not provided, (effort = 1 over the provided `obs` time series) 9 | #' 10 | #' \code{normalize_times} will generate a default times vector if it is 11 | #' not provided, (times = `seq(length(obs))`) 12 | #' 13 | #' @param obs the time series of \code{numeric} observations 14 | #' @inheritParams ts_summary 15 | #' 16 | #' @export 17 | normalize_obs <- function(obs, effort, 18 | obs_per_effort = !is.null(effort)) 19 | { 20 | if (!("logical" %in% class(obs_per_effort))) { 21 | stop("`obs_per_effort` must be logical") 22 | } 23 | check_obs(obs) 24 | if (obs_per_effort) { 25 | if (NROW(obs) != NROW(effort)) { 26 | stop("`obs` and `effort` are not of same length") 27 | } 28 | obs <- obs / effort 29 | } 30 | return(obs) 31 | } 32 | 33 | #' @rdname normalize_obs 34 | #' @export 35 | normalize_times <- function(obs, times = NULL) 36 | { 37 | if (is.null(times)) { 38 | message("`time` is `NULL`, assuming evenly spaced data") 39 | times <- seq_len(NROW(obs)) 40 | } else if (!is.numeric(times)) { 41 | message("`time` is not numeric, assuming evenly spaced data") 42 | times <- seq_len(NROW(obs)) 43 | } 44 | check_obs_and_times(obs, times) 45 | return(times) 46 | } 47 | 48 | #' @rdname normalize_obs 49 | #' @export 50 | normalize_effort <- function(obs, effort = NULL) 51 | { 52 | if (is.null(effort)) { 53 | message("`effort` is `NULL`, assuming all effort = 1") 54 | effort <- rep(1, NROW(obs)) 55 | } 56 | check_effort(effort) 57 | return(effort) 58 | } 59 | 60 | #' @title Extract a numeric vector 61 | #' @description Extract a numeric vector from a data.frame or a matrix (taking 62 | #' the first column). 63 | #' @param x the input data 64 | #' @export 65 | to_numeric_vector <- function(x) 66 | { 67 | if (is.data.frame(x)) 68 | { 69 | x <- x[[1]] 70 | } else if (is.matrix(x)) { 71 | x <- x[, 1] 72 | } 73 | return(as.numeric(x)) 74 | } -------------------------------------------------------------------------------- /R/utils-matssdata.R: -------------------------------------------------------------------------------- 1 | #' @title Summarize a time series dataset 2 | #' 3 | #' @param x Class `matssdata` object to be summarized. 4 | #' @param ... additional arguments to `ts_summary()` 5 | #' 6 | #' @return \code{list} of number of species, number of 7 | #' observations, summaries of the variables, the times, the effort, the 8 | #' species richness, total observation, and the among-species correlation. 9 | #' 10 | #' @export 11 | summary.matssdata <- function(x, ...) 12 | { 13 | stopifnot(check_data_format(x)) 14 | ts_summary(x, ..., include_spp_correlations = FALSE) 15 | } 16 | 17 | #' @title Print a time series summary 18 | #' 19 | #' @param x Class `matsssummary` object to be printed 20 | #' @param ... additional arguments (unused) 21 | #' 22 | #' @export 23 | print.matsssummary <- function(x, ..., n = NULL) 24 | { 25 | cat(pillar::style_subtle(paste0("# Abundance matrix: ", x$num_obs, " obs x ", x$num_spp, " spp")), 26 | "\n") 27 | 28 | details <- tibble::trunc_mat(x$stats[[1]], n = n) 29 | cat(format(details)[-1], sep = "\n") 30 | } 31 | 32 | #' @title Print a time series dataset 33 | #' 34 | #' @param x Class `matssdata` object to be printed 35 | #' @param ... additional arguments (unused) 36 | #' 37 | #' @export 38 | print.matssdata <- function(x, ...) 39 | { 40 | print_hrule() 41 | print_info(x$abundance, "$abundance") 42 | 43 | print_hrule() 44 | print_info(x$covariates, "$covariates") 45 | 46 | print_hrule() 47 | print_info(x$metadata, "$metadata") 48 | 49 | invisible(x) 50 | } 51 | 52 | 53 | # print the info about a tibble or list 54 | print_info <- function(x, name = "$abundance", details = TRUE, width = 13) 55 | { 56 | header <- format_info_header(x) 57 | cat(format(name, width = width), header, "\n") 58 | print_details(x, details) 59 | } 60 | 61 | # format header info for an object 62 | format_info_header <- function(x) 63 | { 64 | if (inherits(x, "tbl")) 65 | { 66 | pillar::style_subtle(paste0("# A tibble: ", NROW(x), " x ", NCOL(x))) 67 | } else if (inherits(x, "data.frame")) { 68 | pillar::style_subtle(paste0("# A data.frame: ", NROW(x), " x ", NCOL(x))) 69 | } else if (inherits(x, "list")) { 70 | pillar::style_subtle(paste0("# List of ", length(x))) 71 | } 72 | } 73 | 74 | # print the details of an object 75 | print_details <- function(x, details = TRUE) 76 | { 77 | if (!details) return() 78 | 79 | if (inherits(x, "data.frame")) 80 | { 81 | cat(pillar::style_subtle("# variables: "), "\n") 82 | print(names(x)) 83 | } else if (inherits(x, "list")) { 84 | str(x, no.list = TRUE, max.level = 1, indent.str = " ..") 85 | } 86 | } 87 | 88 | # print a horizontal line 89 | print_hrule <- function(width = getOption("width")) 90 | { 91 | line_text <- paste0(c(rep.int("-", width), "\n"), collapse = "") 92 | cat(pillar::style_subtle(line_text)) 93 | } 94 | -------------------------------------------------------------------------------- /R/utils-pipe.R: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{magrittr::\link[magrittr]{\%>\%}} for details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @export 9 | #' @importFrom magrittr %>% 10 | #' @usage lhs \%>\% rhs 11 | NULL 12 | -------------------------------------------------------------------------------- /R/utils-testing.R: -------------------------------------------------------------------------------- 1 | # check if `retriever` is installed 2 | #' @noRd 3 | skip_if_no_retriever <- function() 4 | { 5 | have_retriever <- reticulate::py_module_available("retriever") 6 | if (!have_retriever) 7 | testthat::skip("retriever not available for testing") 8 | } 9 | 10 | #' @noRd 11 | expect_plan <- function(plan) 12 | { 13 | eval(bquote(expect_true(tibble::is_tibble(.(plan))))) 14 | eval(bquote(expect_true("drake_plan" %in% class(.(plan))))) 15 | eval(bquote(expect_true(all(c("target", "command") %in% names(.(plan)))))) 16 | eval(bquote(expect_equal(class(.(plan)$target), "character"))) 17 | eval(bquote(expect_equal(class(.(plan)$command), "list"))) 18 | } 19 | 20 | #' @noRd 21 | expect_dataset <- function(dat, 22 | abundance_hash = "", 23 | covariates_hash = "", 24 | metadata_hash = "", 25 | species_table_hash = NULL) 26 | { 27 | eval(bquote(testthat::expect_true(check_data_format(.(dat))))) 28 | eval(bquote(testthat::expect_known_hash(.(dat)$abundance, .(abundance_hash)))) 29 | eval(bquote(testthat::expect_known_hash(.(dat)$covariates, .(covariates_hash)))) 30 | species_table <- unlist(dat$metadata$species_table) 31 | if (!is.null(species_table) && !is.null(species_table_hash)) 32 | { 33 | eval(bquote(testthat::expect_known_hash(species_table, .(species_table_hash)))) 34 | } 35 | dat$metadata$citation <- NULL 36 | dat$metadata$species_table <- NULL 37 | eval(bquote(testthat::expect_known_hash(.(dat)$metadata, .(metadata_hash)))) 38 | } 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # MATSS 5 | 6 | 7 | 8 | [![Build 9 | Status](https://travis-ci.org/weecology/MATSS.svg?branch=master)](https://travis-ci.org/weecology/MATSS) 10 | [![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/weecology/MATSS/master/LICENSE) 11 | [![Coverage 12 | status](https://codecov.io/gh/weecology/MATSS/branch/master/graph/badge.svg)](https://codecov.io/github/weecology/MATSS?branch=master) 13 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3333008.svg)](https://doi.org/10.5281/zenodo.3333008) 14 | 15 | 16 | ## Overview 17 | 18 | **`MATSS`** is a package for conducting Macroecological Analyses of Time 19 | Series Structure. We designed it to help researchers quickly get started 20 | in analyses of ecological time series, and to reinforce and spread good 21 | practices in computational analyses. 22 | 23 | We provide functionality to: 24 | 25 | - obtain time series data from ecological communities, processed into 26 | a common [data 27 | format](weecology.github.io/articles/data-formats.html) 28 | - perform basic processing and summaries of those datasets; see [data 29 | processing](weecology.github.io/articles/data-processing.html) 30 | - build an analysis pipeline for macroecological analyses, using the 31 | workflow framework of the `drake` package 32 | - package the above data analytical work in a reproducible way in a 33 | [research 34 | compendium](https://weecology.github.io/MATSS/articles/MATSS.html#template-research-compendium) 35 | 36 | ## Contributing 37 | 38 | For more information about contributing code, datasets, or analyses, 39 | please check out the [Contributing Guide](CONTRIBUTING.md). 40 | 41 | ## Installation 42 | 43 | You can install **`MATSS`** from github with: 44 | 45 | ``` r 46 | # install.packages("remotes") 47 | remotes::install_github("weecology/MATSS", build_opts = c("--no-resave-data", "--no-manual")) 48 | ``` 49 | 50 | **`MATSS`** also uses the **`rdataretriever`** package to download 51 | additional datasets. To get this package and its dependencies wokring, 52 | we recommend following the online [installation 53 | instructions](https://docs.ropensci.org/rdataretriever/). 54 | 55 | ## Datasets 56 | 57 | **`MATSS`** pulls data from a variety of sources, including: 58 | 59 | - 10 individual datasets that we’ve added, 60 | - the North American Breeding Bird Survey database (spanning 3903 61 | separate datasets), 62 | - the Global Population Dynamics Database (spanning 120 separate 63 | datasets), 64 | - and the BioTime database (spanning 361 separate datasets). 65 | 66 | Combined, there are **320483** individual time series across all of 67 | these data sources. 68 | 69 | ## Getting Started 70 | 71 | We recommend you take a look at our vignette on [Getting 72 | Started](https://weecology.github.io/MATSS/articles/MATSS.html) for more 73 | details about how to begin using **`MATSS`**. 74 | 75 | If you have the package installed, you can also view the vignette from 76 | within R: 77 | 78 | ``` r 79 | vignette("MATSS") 80 | ``` 81 | 82 | ## Example Use Cases 83 | 84 | Here are some examples of analyses built on **`MATSS`**: 85 | 86 | - [MATSS-LDATS](https://github.com/weecology/MATSS-LDATS) applies the 87 | [**`LDATS`**](https://github.com/weecology/LDATS) package to 88 | investigate changepoints in community dynamics across the datasets 89 | in **`MATSS`** 90 | - [MATSS-Forecasting](https://github.com/weecology/MATSS-forecasting) 91 | investigates which properties are associated with the predictability 92 | of population time series across the datasets in **`MATSS`** 93 | 94 | ## Acknowledgments 95 | 96 | We thank Erica Christensen and Joan Meiners for their contributions and 97 | input on early prototypes of this project. This project would not be 98 | possible without the support of Henry Senyondo and the 99 | [retriever](https://www.data-retriever.org/) team. Finally, we thank 100 | Will Landau and the [drake](https://ropensci.github.io/drake/) team for 101 | their input and responsiveness to feedback. 102 | 103 | Package development is supported through various funding sources: 104 | including the [Gordon and Betty Moore Foundation’s Data-Driven Discovery 105 | Initiative](http://www.moore.org/programs/science/data-driven-discovery), 106 | [Grant GBMF4563](http://www.moore.org/grants/list/GBMF4563) to E. P. 107 | White (supporting also the time of J. Simonis and H. Ye), the [National 108 | Science Foundation](http://nsf.gov/), [Grant 109 | DEB-1622425](https://www.nsf.gov/awardsearch/showAward?AWD_ID=1622425) 110 | to S. K. M. Ernest, and a [National Science Foundation Graduate Research 111 | Fellowship](https://www.nsfgrfp.org/) (No. 112 | [DGE-1315138](https://www.nsf.gov/awardsearch/showAward?AWD_ID=1315138) 113 | and 114 | [DGE-1842473](https://www.nsf.gov/awardsearch/showAward?AWD_ID=1842473)) 115 | to R. Diaz. 116 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | navbar: 2 | components: 3 | articles: 4 | text: Articles 5 | menu: 6 | - text: Data Formats 7 | href: articles/data-formats.html 8 | - text: Dataset Summaries 9 | href: articles/dataset-summary.html 10 | - text: Installing MATSS on the HiPerGator 11 | href: articles/hipergator-install.html 12 | reference: 13 | - title: "Creating a Research Compendium" 14 | contents: 15 | - create_MATSS_compendium 16 | - title: "Drake Plans" 17 | desc: "Functions to help build Drake plans" 18 | contents: 19 | - build_analyses_plan 20 | - build_datasets_plan 21 | - build_bbs_datasets_plan 22 | - build_biotime_datasets_plan 23 | - build_gpdd_datasets_plan 24 | - build_retriever_datasets_plan 25 | - analysis_wrapper 26 | - collect_analyses 27 | - title: "Configure Data Path" 28 | desc: "Functions to configure the data path for storing downloaded datasets" 29 | contents: 30 | - get_default_data_path 31 | - check_default_data_path 32 | - use_default_data_path 33 | - title: "Rdataretriever" 34 | desc: "Functions that use Rdataretriever to download and import data" 35 | contents: 36 | - download_datasets 37 | - import_retriever_data 38 | - install_retriever_data 39 | - title: "Get Data" 40 | desc: "Functions to load datasets" 41 | contents: 42 | - get_biotime_data 43 | - get_biotime_dataset_ids 44 | - get_bbs_route_region_data 45 | - get_cowley_lizards 46 | - get_cowley_snakes 47 | - get_gpdd_data 48 | - get_jornada_data 49 | - get_karoo_data 50 | - get_kruger_data 51 | - get_maizuru_data 52 | - get_mtquad_data 53 | - get_portal_rodents 54 | - get_sdl_data 55 | - get_sgs_data 56 | - process_biotime_dataset 57 | - correct_biotime_dataset 58 | - title: "Analysis Methods" 59 | desc: "Functions to compute analyses" 60 | contents: 61 | - ts_summary 62 | - summarize_df 63 | - summarize_vec 64 | - richness 65 | - temp_autocor 66 | - interpolate_obs 67 | - title: "Data Checking, Filtering, and Processing" 68 | desc: "Functions to check datasets, extract components, and preprocess them" 69 | contents: 70 | - check_data_format 71 | - get_times_from_data 72 | - get_effort_from_data 73 | - normalize_obs 74 | - normalize_times 75 | - normalize_effort 76 | - to_numeric_vector 77 | - prepare_bbs_ts_data 78 | - process_bbs_route_region_data 79 | - filter_bbs_ts 80 | - filter_bbs_species 81 | - combine_bbs_subspecies 82 | - title: "Datasets" 83 | desc: "Description of the built-in datasets" 84 | contents: 85 | - maizuru_data 86 | - dragons 87 | -------------------------------------------------------------------------------- /data/dragons.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weecology/MATSS/75940bdc39883081b6253c7a1e9a7052853f92de/data/dragons.rda -------------------------------------------------------------------------------- /deploy_compendium.sh: -------------------------------------------------------------------------------- 1 | # exit if not running on branch 'master' 2 | this_branch=$(git branch --show-current) 3 | 4 | if !([ "$this_branch" == "master" ] || [ "$TRAVIS_BRANCH" == "master" ] && [ "$TRAVIS_PULL_REQUEST" == "false" ]); then 5 | echo "Not on branch 'master'; exiting early" 6 | exit 0 7 | fi 8 | 9 | # save working directory to restore lat1er 10 | orig_dir=$(pwd) 11 | 12 | # setup vars 13 | GIT_USER="Weecology Deploy Bot" 14 | GIT_EMAIL="weecologydeploy@weecology.org" 15 | REPO="MATSSdemo" 16 | GH_REPO="github.com/weecology/$REPO.git" 17 | LAST_COMMIT_MESSAGE=$(git log --format=%B -n 1) 18 | 19 | # build compendium 20 | Rscript -e 'MATSS:::write_Rprofile()' 21 | Rscript -e 'MATSS::create_MATSS_compendium("../MATSSdemo", DEPLOY = TRUE)' 22 | rm .Rprofile 23 | cd ../MATSSdemo 24 | Rscript -e 'devtools::document(".")' 25 | Rscript -e 'devtools::install(".", upgrade = "never")' 26 | Rscript -e 'source("analysis/pipeline.R")' 27 | echo 'Successfully built compendium!' 28 | 29 | # add newest compendium to the compendium repo 30 | mkdir ../scratch 31 | cd ../scratch 32 | git clone git://${GH_REPO} 33 | cp -r ../MATSSdemo/* ${REPO} 34 | cd ${REPO} 35 | git remote 36 | git config user.email ${GIT_EMAIL} 37 | git config user.name ${GIT_USER} 38 | git add . 39 | git commit -m "Update Compendium: Travis Build $TRAVIS_BUILD_NUMBER" -m "$LAST_COMMIT_MESSAGE" 40 | git push "https://${COMPENDIUM_DEPLOY_TOKEN}@${GH_REPO}" master > /dev/null 2>&1 41 | 42 | # restore previous working directory 43 | cd ${orig_dir} 44 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite MATSS in publications, use:") 2 | 3 | citEntry(key = "ye2020MATSS", 4 | entry = "Misc", 5 | title = "Macroecological Analyses of Time Series Structure", 6 | author = personList(as.person("Hao Ye"), 7 | as.person("Ellen K. Bledsoe"), 8 | as.person("Renata Diaz"), 9 | as.person("S. K. Morgan Ernest"), 10 | as.person("Juniper L. Simonis"), 11 | as.person("Ethan P. White"), 12 | as.person("Glenda M. Yenni")), 13 | year = "2020", 14 | version = "v0.3.2", 15 | publisher = "Zenodo", 16 | url = "https://doi.org/10.5281/zenodo.3823768", 17 | doi = "10.5281/zenodo.3823768", 18 | textVersion = 19 | paste("Hao Ye, Ellen K. Bledsoe, Renata Diaz, S. K. Morgan Ernest,", 20 | "Juniper L. Simonis, Ethan P. White, & Glenda M. Yenni.", 21 | "(2020, May 13). Macroecological Analyses of Time Series", 22 | "Structure (Version v0.3.2). Zenodo.", 23 | "http://doi.org/10.5281/zenodo.3823768") 24 | ) 25 | -------------------------------------------------------------------------------- /inst/biotime_dataset_info.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weecology/MATSS/75940bdc39883081b6253c7a1e9a7052853f92de/inst/biotime_dataset_info.RDS -------------------------------------------------------------------------------- /inst/extdata/cowleylizards.txt: -------------------------------------------------------------------------------- 1 | Year Site Total Cnemnidophorous_sexlineatus Crotaphytus_collaris Eumeces_obsoletus Ophisaurus_attenuatus Phrynosoma_cornutum Scincella_lateralis 2 | 1989 CowleyCounty 10 0 6 3 0 1 0 3 | 1990 CowleyCounty 15 0 7 2 4 0 2 4 | 1991 CowleyCounty -99 -99 -99 -99 -99 -99 -99 5 | 1992 CowleyCounty 7 0 3 2 0 0 2 6 | 1993 CowleyCounty 18 1 3 11 2 0 1 7 | 1994 CowleyCounty 10 0 0 9 1 0 0 8 | 1995 CowleyCounty 17 0 3 12 2 0 0 9 | 1996 CowleyCounty 9 0 5 3 1 0 0 10 | 1997 CowleyCounty 16 0 4 5 7 0 0 11 | 1998 CowleyCounty 19 0 5 6 6 1 1 12 | 1999 CowleyCounty 19 0 4 13 0 0 2 13 | 2000 CowleyCounty 48 1 3 36 3 0 5 14 | 2001 CowleyCounty 30 0 4 22 1 0 3 15 | 2002 CowleyCounty 21 0 5 12 2 0 2 16 | 2003 CowleyCounty 20 1 6 12 1 0 0 17 | -------------------------------------------------------------------------------- /inst/extdata/cowleysnakes.txt: -------------------------------------------------------------------------------- 1 | Year Site Total Agkistrodon_contortrix Diadophis_punctatus Elaphe_emoryi Elaphe_obsoleta Lampropeltis_calligaster Lampropeltis_getula Lampropeltis_triangulum Masticophis_flagellum Nerodia_sipedon Pituophis_catenifer Storeria_dekayi Tantilla_gracilis Thamnophis_proximus Thamnophis_radix Thamnophis_sirtalis Tropidoclonion_lineatum 2 | 1989 CowleyCounty 19 0 7 1 1 0 0 2 4 1 0 0 3 0 0 0 0 3 | 1990 CowleyCounty 59 2 33 3 0 1 2 4 1 2 0 1 6 1 0 3 0 4 | 1991 CowleyCounty -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 -99 5 | 1992 CowleyCounty 29 0 7 2 0 0 0 1 0 0 0 0 18 0 0 1 0 6 | 1993 CowleyCounty 64 0 29 1 0 0 1 0 0 0 1 0 30 0 0 0 2 7 | 1994 CowleyCounty 45 0 31 1 0 0 0 0 0 0 0 0 11 0 0 1 1 8 | 1995 CowleyCounty 75 1 47 3 0 1 2 1 2 0 0 0 14 0 0 4 0 9 | 1996 CowleyCounty 28 0 10 2 0 0 2 1 0 0 0 0 9 0 0 4 0 10 | 1997 CowleyCounty 61 0 41 5 0 0 1 1 0 0 1 0 9 1 0 2 0 11 | 1998 CowleyCounty 76 0 44 0 0 1 1 0 0 2 0 0 18 1 2 5 2 12 | 1999 CowleyCounty 56 0 22 5 0 0 2 0 0 1 0 0 18 0 2 4 2 13 | 2000 CowleyCounty 59 0 12 1 0 1 4 0 1 0 0 0 28 7 0 3 2 14 | 2001 CowleyCounty 43 0 14 1 0 0 2 0 0 1 0 0 18 2 0 5 0 15 | 2002 CowleyCounty 52 0 12 0 0 1 3 1 0 0 0 0 32 0 0 3 0 16 | 2003 CowleyCounty 56 0 29 0 0 0 0 2 1 0 0 0 17 1 0 5 1 17 | -------------------------------------------------------------------------------- /inst/extdata/gpdd_locations.csv: -------------------------------------------------------------------------------- 1 | "LocationID","TimePeriodID" 2 | 19,"408" 3 | 20,"408" 4 | 21,"408" 5 | 22,"408" 6 | 23,"408" 7 | 24,"408" 8 | 25,"408" 9 | 26,"408" 10 | 27,"408" 11 | 28,"408" 12 | 29,"408" 13 | 30,"408" 14 | 31,"408" 15 | 32,"408" 16 | 33,"408" 17 | 34,"408" 18 | 35,"408" 19 | 36,"408" 20 | 37,"408" 21 | 38,"408" 22 | 39,"408" 23 | 40,"408" 24 | 41,"408" 25 | 42,"408" 26 | 43,"408" 27 | 44,"408" 28 | 45,"408" 29 | 46,"408" 30 | 47,"408" 31 | 48,"408" 32 | 49,"408" 33 | 50,"408" 34 | 51,"408" 35 | 52,"408" 36 | 53,"408" 37 | 54,"408" 38 | 55,"408" 39 | 56,"408" 40 | 57,"408" 41 | 58,"408" 42 | 59,"408" 43 | 60,"408" 44 | 61,"408" 45 | 62,"408" 46 | 63,"408" 47 | 64,"408" 48 | 65,"408" 49 | 66,"408" 50 | 67,"408" 51 | 68,"408" 52 | 69,"408" 53 | 70,"408" 54 | 71,"408" 55 | 72,"408" 56 | 73,"408" 57 | 74,"408" 58 | 75,"408" 59 | 76,"408" 60 | 77,"408" 61 | 78,"408" 62 | 79,"408" 63 | 80,"408" 64 | 81,"408" 65 | 82,"408" 66 | 83,"408" 67 | 377,"408" 68 | 378,"408" 69 | 379,"408" 70 | 380,"408" 71 | 381,"408" 72 | 382,"408" 73 | 383,"408" 74 | 384,"408" 75 | 385,"408" 76 | 386,"408" 77 | 387,"408" 78 | 388,"408" 79 | 389,"408" 80 | 390,"408" 81 | 391,"408" 82 | 392,"408" 83 | 393,"408" 84 | 394,"408" 85 | 395,"408" 86 | 396,"408" 87 | 397,"408" 88 | 398,"408" 89 | 399,"408" 90 | 400,"408" 91 | 401,"408" 92 | 402,"408" 93 | 403,"408" 94 | 404,"408" 95 | 405,"408" 96 | 406,"408" 97 | 407,"408" 98 | 408,"408" 99 | 409,"408" 100 | 410,"408" 101 | 411,"408" 102 | 412,"408" 103 | 413,"408" 104 | 1024,"1:12" 105 | 10100,"408" 106 | 10561,"408" 107 | 10562,"408" 108 | 10563,"408" 109 | 10564,"408" 110 | 10792,"408" 111 | 10793,"408" 112 | 10794,"408" 113 | 10795,"408" 114 | 10796,"408" 115 | 10797,"408" 116 | 10798,"408" 117 | 10799,"408" 118 | 10800,"408" 119 | 10801,"408" 120 | 10802,"408" 121 | 10803,"408" 122 | -------------------------------------------------------------------------------- /inst/extdata/sanparks/peggym.111.1-GGHNPSpecieCodes.txt: -------------------------------------------------------------------------------- 1 | ID Common name Species ID 2 | 1 Blesbok bl 3 | 2 Black wildebeest bw 4 | 3 Eland el 5 | 4 Grey rhebok gr 6 | 5 Mountain reedbuck mr 7 | 6 Oribi or 8 | 7 Ostrich os 9 | 8 Red hartebeest rh 10 | 9 Springbok sp 11 | 10 Burchell's zebra z 12 | 11 13 | 12 Cattle Cattle 14 | 13 Donkeys Donkeys 15 | 14 Goats Goats 16 | 15 Horses Horses 17 | 16 Sheep Sheep 18 | -------------------------------------------------------------------------------- /inst/extdata/sanparks/peggym.112.1-GGHNPTotals.txt: -------------------------------------------------------------------------------- 1 | 1994/5/2 00:00:00 Black wildebeest 161.00 2 | 1994/5/2 00:00:00 Burchell's zebra 131.00 3 | 1994/5/2 00:00:00 Eland 76.00 4 | 1994/5/2 00:00:00 Oribi 10.00 5 | 1994/5/2 00:00:00 Springbok 73.00 6 | 1997/2/13 00:00:00 Black wildebeest 95.00 7 | 1997/2/13 00:00:00 Burchell's zebra 106.00 8 | 1997/2/13 00:00:00 Cattle 46.00 9 | 1997/2/13 00:00:00 Eland 125.00 10 | 1997/2/13 00:00:00 Grey rhebok 187.00 11 | 1997/2/13 00:00:00 Mountain reedbuck 257.00 12 | 1997/2/13 00:00:00 Oribi 13.00 13 | 1997/2/13 00:00:00 Springbok 54.00 14 | 1998/2/10 00:00:00 Black wildebeest 103.00 15 | 1998/2/10 00:00:00 Burchell's zebra 99.00 16 | 1998/2/10 00:00:00 Cattle 16.00 17 | 1998/2/10 00:00:00 Eland 126.00 18 | 1998/2/10 00:00:00 Grey rhebok 111.00 19 | 1998/2/10 00:00:00 Horses 3.00 20 | 1998/2/10 00:00:00 Mountain reedbuck 151.00 21 | 1998/2/10 00:00:00 Oribi 14.00 22 | 1998/2/10 00:00:00 Red hartebeest 41.00 23 | 1998/2/10 00:00:00 Springbok 67.00 24 | 1999/2/8 00:00:00 Black wildebeest 103.00 25 | 1999/2/8 00:00:00 Burchell's zebra 88.00 26 | 1999/2/8 00:00:00 Eland 148.00 27 | 1999/2/8 00:00:00 Grey rhebok 67.00 28 | 1999/2/8 00:00:00 Mountain reedbuck 232.00 29 | 1999/2/8 00:00:00 Oribi 5.00 30 | 1999/2/8 00:00:00 Red hartebeest 41.00 31 | 1999/2/8 00:00:00 Springbok 53.00 32 | 2000/2/18 00:00:00 Black wildebeest 115.00 33 | 2000/2/18 00:00:00 Burchell's zebra 92.00 34 | 2000/2/18 00:00:00 Eland 165.00 35 | 2000/2/18 00:00:00 Grey rhebok 152.00 36 | 2000/2/18 00:00:00 Mountain reedbuck 96.00 37 | 2000/2/18 00:00:00 Oribi 16.00 38 | 2000/2/18 00:00:00 Red hartebeest 15.00 39 | 2000/2/18 00:00:00 Springbok 59.00 40 | 2002/2/17 00:00:00 Black wildebeest 406.00 41 | 2002/2/17 00:00:00 Blesbok 853.00 42 | 2002/2/17 00:00:00 Burchell's zebra 281.00 43 | 2002/2/17 00:00:00 Cattle 108.00 44 | 2002/2/17 00:00:00 Eland 276.00 45 | 2002/2/17 00:00:00 Goats 71.00 46 | 2002/2/17 00:00:00 Grey rhebok 140.00 47 | 2002/2/17 00:00:00 Mountain reedbuck 157.00 48 | 2002/2/17 00:00:00 Oribi 17.00 49 | 2002/2/17 00:00:00 Red hartebeest 346.00 50 | 2002/2/17 00:00:00 Sheep 108.00 51 | 2002/2/17 00:00:00 Springbok 172.00 52 | 2003/2/4 00:00:00 Black wildebeest 407.00 53 | 2003/2/4 00:00:00 Blesbok 528.00 54 | 2003/2/4 00:00:00 Burchell's zebra 334.00 55 | 2003/2/4 00:00:00 Eland 417.00 56 | 2003/2/4 00:00:00 Goats 70.00 57 | 2003/2/4 00:00:00 Grey rhebok 154.00 58 | 2003/2/4 00:00:00 Mountain reedbuck 142.00 59 | 2003/2/4 00:00:00 Oribi 9.00 60 | 2003/2/4 00:00:00 Red hartebeest 217.00 61 | 2003/2/4 00:00:00 Sheep 70.00 62 | 2003/2/4 00:00:00 Springbok 201.00 63 | 2004/2/19 00:00:00 Black wildebeest 735.00 64 | 2004/2/19 00:00:00 Blesbok 1923.00 65 | 2004/2/19 00:00:00 Burchell's zebra 775.00 66 | 2004/2/19 00:00:00 Eland 668.00 67 | 2004/2/19 00:00:00 Grey rhebok 160.00 68 | 2004/2/19 00:00:00 Mountain reedbuck 150.00 69 | 2004/2/19 00:00:00 Oribi 7.00 70 | 2004/2/19 00:00:00 Red hartebeest 915.00 71 | 2004/2/19 00:00:00 Springbok 412.00 72 | -------------------------------------------------------------------------------- /inst/extdata/sanparks/peggym.114.1-KarooNationalParkCensuscodes.txt: -------------------------------------------------------------------------------- 1 | Common name Species 2 | Aardwolf aard 3 | Buffalo b 4 | Baboon bab 5 | Black breasted snake eagle bbse 6 | Black eagle be 7 | Bat eared fox bef 8 | Ben ben 9 | Booted eagle bte 10 | Cow cow 11 | Duiker d 12 | Eland el 13 | Fd fd 14 | Gemsbok g 15 | Goat goat 16 | Grey rhebok gr 17 | Jackal j 18 | Jackal buzzard jb 19 | Kudu k 20 | Kori burstard kb 21 | Klipspringer kp 22 | Martial eagle me 23 | Mountain reedbuck mr 24 | Mountain zebra mz 25 | Ostrich o 26 | Porcupine porc 27 | Red hartebeest rh 28 | Springbok sp 29 | Steenbok st 30 | "Plain zebra, Quagga" z 31 | -------------------------------------------------------------------------------- /inst/extdata/subsampled/biotimesql/CITATION: -------------------------------------------------------------------------------- 1 | Dornelas M, Antão LH, Moyes F, et al. BioTIME: A database of biodiversity time series for the Anthropocene. Global Ecology & Biogeography. 2018; 00:1 - 26. https://doi.org/10.1111/geb.12729. -------------------------------------------------------------------------------- /inst/extdata/subsampled/biotimesql/biotimesql_ID_ABUNDANCE.csv: -------------------------------------------------------------------------------- 1 | id_abundance,abundance_type 2 | 1,Count 3 | 5,Presence/Absence 4 | 6,MeanCount 5 | 7,Density 6 | 8,NA 7 | -------------------------------------------------------------------------------- /inst/extdata/subsampled/biotimesql/biotimesql_biomass.csv: -------------------------------------------------------------------------------- 1 | id_biomass,biomass_type 2 | 1,NA 3 | 2,Size 4 | 3,Weight 5 | 9,Relative biomass 6 | 10,Volume 7 | 12,Cover 8 | -------------------------------------------------------------------------------- /inst/extdata/subsampled/biotimesql/biotimesql_contacts.csv: -------------------------------------------------------------------------------- 1 | "id_contacts","study_id","contact_1","contact_2","cont_1_mail","cont_2_mail","license","web_link","data_source" 2 | 1,10,"Sara Webb","Sara Scanga","swebb@drew.edu","sescanga@utica.edu","ODbL","http://esapubs.org/archive/ecol/E082/011/default.htm","Ecology" 3 | 16,57,"Emily Stanley","Corinna Gries","ehstanley@wisc.edu","cgries@wisc.edu","CC-by","http://lter.limnology.wisc.edu/","North Temperate Lakes Long Term Ecological Research" 4 | 71,162,"Stephen Hale","","hale.stephen@epa.gov","","PDDL","http://www.iobis.org/mapper/?dataset=25","OBIS" 5 | 202,321,"Robert Schooley","","schooley@illinois.edu","","CC-by","http://jornada.nmsu.edu/lter/dataset/49798/view","Jornada LTER" 6 | -------------------------------------------------------------------------------- /inst/extdata/subsampled/biotimesql/biotimesql_datasets.csv: -------------------------------------------------------------------------------- 1 | "id_datasets","study_id","taxa","organisms","title","ab_bio","has_plot","data_points","start_year","end_year","cent_lat","cent_long","number_of_species","number_of_samples","number_lat_long","total","grain_size_text","grain_sq_km","area_sq_km","ab_type","bio_type","sample_type" 2 | 1,10,"Terrestrial plants","woody plants","Windstorm disturbance without patch dynamics twelve years of change in a Minnesota forest","A","Y",3,1984,1996,47.4,-95.12,25,191,1,1406,"2 x 2 m plots (16 ha)",4e-06,5.91e-06,1,1,11 3 | 16,57,"Fish","fish","North Temperate Lakes LTER Fish Abundance","A","S",32,1981,2012,43.9928,-89.4946,76,258,1,10892,"11 lakes/6 littoral zones",40,5.56e-06,1,1,36 4 | 71,162,"Benthos","Benthos","EPA'S EMAP Database","A","S",13,1990,2004,36.76236,-116.837677,5624,6588,6073,173040,"0.04 m sq or 0.1 m sq grabs",4.4e-08,4871568,5,1,16 5 | 202,321,"Mammals","small mammals","Small Mammal Exclosure Study. Jornada LTER. SMES rodent trapping data","AB","Y",13,1995,2007,32.550335,-106.811564,19,11757,766,12787,"0.5",0.5,9.9991855,1,3,112 6 | -------------------------------------------------------------------------------- /inst/extdata/subsampled/biotimesql/biotimesql_site.csv: -------------------------------------------------------------------------------- 1 | "id_site","study_id","realm","climate","general_treat","treatment","treat_comments","treat_date","cen_latitude","cen_longitude","habitat","protected_area","area","biome_map" 2 | 1,10,"Terrestrial","Temperate","","","","",47.4,-95.12,"Woodland",FALSE,5.91e-06,"Temperate broadleaf and mixed forests" 3 | 16,57,"Freshwater","Temperate","","","","",43.9928,-89.4946,"Lakes",FALSE,5.56e-06,"Small lake ecosystems" 4 | 71,162,"Marine","Temperate/Tropical","","","","",36.76236,-116.837677,"Coastal habitats",FALSE,54966400,"Multiple ecoregions" 5 | 202,321,"Terrestrial","Temperate","vegetation responses to the exclusion of small mammals","one unfenced control plot. one fenced plot to exclude rodents and rabbits. and one fenced plot to exclude rabbits only","Each of the three or four plots in a replicate block are separated by 20 meters","",32.550335,-106.811564,"Shrubland",FALSE,9.99918,"Deserts and xeric shrublands" 6 | -------------------------------------------------------------------------------- /inst/extdata/subsampled/biotimesql/biotimesql_species.csv: -------------------------------------------------------------------------------- 1 | "id_species","genus","species","genus_species" 2 | 2709,"Chaetodipus","penicillatus","Chaetodipus penicillatus" 3 | 2879,"Dipodomys","merriami","Dipodomys merriami" 4 | 2880,"Dipodomys","ordii","Dipodomys ordii" 5 | 2882,"Dipodomys","spectabilis","Dipodomys spectabilis" 6 | 3203,"Neotoma","albigula","Neotoma albigula" 7 | 3204,"Neotoma","micropus","Neotoma micropus" 8 | 3232,"Onychomys","arenicola","Onychomys arenicola" 9 | 3233,"Onychomys","leucogaster","Onychomys leucogaster" 10 | 3289,"Perognathus","flavus","Perognathus flavus" 11 | 3290,"Peromyscus","boylii","Peromyscus boylii" 12 | 3292,"Peromyscus","eremicus","Peromyscus eremicus" 13 | 3293,"Peromyscus","leucopus","Peromyscus leucopus" 14 | 3294,"Peromyscus","maniculatus","Peromyscus maniculatus" 15 | 3295,"Peromyscus","sp","Peromyscus sp" 16 | 3434,"Reithrodontomys","megalotis","Reithrodontomys megalotis" 17 | 3481,"Sigmodon","hispidus","Sigmodon hispidus" 18 | 3486,"Spermophilus","spilosoma","Spermophilus spilosoma" 19 | 42829,"Mus","musculus","Mus musculus" 20 | 43129,"Dipodomys","sp","Dipodomys sp" 21 | -------------------------------------------------------------------------------- /inst/extdata/subsampled/breed-bird-survey/CITATION: -------------------------------------------------------------------------------- 1 | Pardieck, K.L., D.J. Ziolkowski Jr., M.-A.R. Hudson. 2015. North American Breeding Bird Survey Dataset 1966 - 2014, version 2014.0. U.S. Geological Survey, Patuxent Wildlife Research Center -------------------------------------------------------------------------------- /inst/extdata/subsampled/breed-bird-survey/breed_bird_survey_counts.csv: -------------------------------------------------------------------------------- 1 | record_id,routedataid,countrynum,statenum,route,rpid,year,aou,count10,count20,count30,count40,count50,stoptotal,speciestotal 2 | 5445225,6174482,124,4,1,101,1972,30,0,0,0,2,0,1,2 3 | 5445226,6174482,124,4,1,101,1972,40,0,0,0,0,1,1,1 4 | 5445227,6174482,124,4,1,101,1972,540,5,16,1,5,3,14,30 5 | 5445228,6174482,124,4,1,101,1972,700,0,2,0,0,0,1,2 6 | 5445229,6174482,124,4,1,101,1972,1320,8,3,0,32,5,11,48 7 | 5445230,6174482,124,4,1,101,1972,1370,3,0,0,0,2,3,5 8 | 5445231,6174482,124,4,1,101,1972,1400,1,2,0,2,2,4,7 9 | 5445232,6174482,124,4,1,101,1972,1420,2,1,0,8,3,7,14 10 | 5445233,6174482,124,4,1,101,1972,1430,1,0,0,15,0,3,16 11 | 5445234,6174482,124,4,1,101,1972,1470,0,0,0,1,0,1,1 12 | 5445235,6174482,124,4,1,101,1972,1490,0,0,0,0,1,1,1 13 | 5445236,6174482,124,4,1,101,1972,1940,0,1,0,0,0,1,1 14 | 5445237,6174482,124,4,1,101,1972,2210,0,0,0,1,0,1,1 15 | 5445238,6174482,124,4,1,101,1972,2240,0,0,0,5,0,1,5 16 | 5445239,6174482,124,4,1,101,1972,2490,0,0,1,1,0,2,2 17 | 5445240,6174482,124,4,1,101,1972,2580,2,1,1,1,1,6,6 18 | 5445241,6174482,124,4,1,101,1972,2630,1,0,0,0,0,1,1 19 | 5445242,6174482,124,4,1,101,1972,2640,0,2,5,0,0,6,7 20 | 5445243,6174482,124,4,1,101,1972,2730,1,0,0,2,0,2,3 21 | 5445244,6174482,124,4,1,101,1972,3091,9,10,1,0,0,15,20 22 | 5445245,6174482,124,4,1,101,1972,3160,0,1,3,2,0,4,6 23 | 5445246,6174482,124,4,1,101,1972,3310,1,0,0,2,0,3,3 24 | 5445247,6174482,124,4,1,101,1972,3370,1,0,0,0,0,1,1 25 | 5445248,6174482,124,4,1,101,1972,3420,0,1,0,1,0,2,2 26 | 5445249,6174482,124,4,1,101,1972,4440,2,5,1,0,0,5,8 27 | 5445250,6174482,124,4,1,101,1972,4470,0,3,1,0,2,4,6 28 | 5445251,6174482,124,4,1,101,1972,4740,1,3,18,15,12,25,49 29 | 5445252,6174482,124,4,1,101,1972,4750,0,4,0,0,0,1,4 30 | 5445253,6174482,124,4,1,101,1972,4880,0,0,0,2,0,2,2 31 | 5445254,6174482,124,4,1,101,1972,4930,1,11,2,7,3,7,24 32 | 5445255,6174482,124,4,1,101,1972,4950,1,8,6,7,3,17,25 33 | 5445256,6174482,124,4,1,101,1972,4980,18,20,8,7,7,27,60 34 | 5445257,6174482,124,4,1,101,1972,5011,21,21,24,20,12,46,98 35 | 5445258,6174482,124,4,1,101,1972,5070,1,0,0,0,0,1,1 36 | 5445259,6174482,124,4,1,101,1972,5100,0,0,9,2,0,4,11 37 | 5445260,6174482,124,4,1,101,1972,5110,0,3,0,0,0,1,3 38 | 5445261,6174482,124,4,1,101,1972,5380,6,1,9,11,4,21,31 39 | 5445262,6174482,124,4,1,101,1972,5400,4,9,9,5,0,20,27 40 | 5445263,6174482,124,4,1,101,1972,5420,16,8,8,2,4,20,38 41 | 5445264,6174482,124,4,1,101,1972,5610,4,3,2,0,1,9,10 42 | 5445265,6174482,124,4,1,101,1972,6050,7,20,3,16,9,20,55 43 | 5445266,6174482,124,4,1,101,1972,6120,0,203,0,0,0,3,203 44 | 5445267,6174482,124,4,1,101,1972,6160,0,0,1,0,0,1,1 45 | 5445268,6174482,124,4,1,101,1972,6520,1,5,0,0,0,3,6 46 | 5445269,6174482,124,4,1,101,1972,6882,12,15,3,0,0,8,30 47 | 5445270,6174482,124,4,1,101,1972,7000,0,0,1,0,0,1,1 48 | 5445271,6174482,124,4,1,101,1972,7210,0,2,0,0,0,1,2 49 | 5445272,6178138,124,4,1,101,1973,30,0,0,0,2,0,1,2 50 | 5445273,6178138,124,4,1,101,1973,540,14,40,7,3,5,22,69 51 | 5445274,6178138,124,4,1,101,1973,700,2,2,0,0,0,2,4 52 | 5445275,6178138,124,4,1,101,1973,770,0,0,0,1,0,1,1 53 | 5445276,6178138,124,4,1,101,1973,1320,5,9,9,10,6,13,39 54 | 5445277,6178138,124,4,1,101,1973,1350,0,5,2,0,0,4,7 55 | 5445278,6178138,124,4,1,101,1973,1370,2,7,0,2,0,4,11 56 | 5445279,6178138,124,4,1,101,1973,1420,0,0,1,2,0,2,3 57 | 5445280,6178138,124,4,1,101,1973,1430,0,4,1,0,0,2,5 58 | 5445281,6178138,124,4,1,101,1973,1490,0,0,1,5,0,3,6 59 | 5445282,6178138,124,4,1,101,1973,1720,0,0,20,0,0,1,20 60 | 5445283,6178138,124,4,1,101,1973,2020,0,8,0,0,0,1,8 61 | 5445284,6178138,124,4,1,101,1973,2490,0,3,0,0,0,2,3 62 | 5445285,6178138,124,4,1,101,1973,2580,0,0,0,1,0,1,1 63 | 5445286,6178138,124,4,1,101,1973,2640,0,0,3,0,0,2,3 64 | 5445287,6178138,124,4,1,101,1973,2730,1,0,1,0,0,2,2 65 | 5445288,6178138,124,4,1,101,1973,3090,0,0,1,0,1,2,2 66 | 5445289,6178138,124,4,1,101,1973,3091,1,1,0,0,0,2,2 67 | 5445290,6178138,124,4,1,101,1973,3160,0,3,0,1,0,3,4 68 | 5445291,6178138,124,4,1,101,1973,3420,0,0,1,0,0,1,1 69 | 5445292,6178138,124,4,1,101,1973,4440,1,0,1,4,0,5,6 70 | 5445293,6178138,124,4,1,101,1973,4740,5,5,17,16,7,28,50 71 | 5445294,6178138,124,4,1,101,1973,4750,0,3,2,0,0,4,5 72 | 5445295,6178138,124,4,1,101,1973,4880,0,0,1,1,1,3,3 73 | 5445296,6178138,124,4,1,101,1973,4930,0,0,0,10,0,1,10 74 | 5445297,6178138,124,4,1,101,1973,4950,5,1,3,0,0,6,9 75 | 5445298,6178138,124,4,1,101,1973,4980,18,18,3,6,9,20,54 76 | 5445299,6178138,124,4,1,101,1973,5011,12,13,13,9,5,34,52 77 | 5445300,6178138,124,4,1,101,1973,5100,2,1,9,0,1,5,13 78 | 5445301,6178138,124,4,1,101,1973,5380,4,3,10,14,0,17,31 79 | 5445302,6178138,124,4,1,101,1973,5400,2,6,2,3,3,14,16 80 | 5445303,6178138,124,4,1,101,1973,5420,7,6,0,4,0,16,17 81 | 5445304,6178138,124,4,1,101,1973,5610,3,3,0,1,0,7,7 82 | 5445305,6178138,124,4,1,101,1973,6050,2,2,1,5,2,11,12 83 | 5445306,6178138,124,4,1,101,1973,6120,0,50,0,0,0,1,50 84 | 5445307,6178138,124,4,1,101,1973,6130,1,2,1,1,4,8,9 85 | 5445308,6178138,124,4,1,101,1973,6160,0,0,2,0,0,1,2 86 | 5445309,6178138,124,4,1,101,1973,6520,1,2,0,0,0,2,3 87 | 5445310,6178138,124,4,1,101,1973,6810,0,2,0,0,0,1,2 88 | 5445311,6178138,124,4,1,101,1973,6882,4,20,1,2,2,6,29 89 | 5445312,6178138,124,4,1,101,1973,7610,0,1,0,1,0,2,2 90 | 5445313,6179521,124,4,1,101,1974,540,18,4,3,2,2,14,29 91 | 5445314,6179521,124,4,1,101,1974,1320,0,3,0,4,13,5,20 92 | 5445315,6179521,124,4,1,101,1974,1370,0,0,0,1,8,2,9 93 | 5445316,6179521,124,4,1,101,1974,1400,0,0,0,3,6,2,9 94 | 5445317,6179521,124,4,1,101,1974,1420,0,0,0,1,5,2,6 95 | 5445318,6179521,124,4,1,101,1974,1430,1,3,0,1,6,7,11 96 | 5445319,6179521,124,4,1,101,1974,1490,0,2,0,0,9,2,11 97 | 5445320,6179521,124,4,1,101,1974,1720,0,2,1,0,0,3,3 98 | 5445321,6179521,124,4,1,101,1974,2240,0,0,0,0,9,1,9 99 | 5445322,6179521,124,4,1,101,1974,2250,0,0,0,0,1,1,1 100 | 5445323,6179521,124,4,1,101,1974,2490,0,1,3,2,1,7,7 101 | 5445324,6179521,124,4,1,101,1974,2580,1,0,1,2,2,5,6 102 | -------------------------------------------------------------------------------- /inst/extdata/subsampled/breed-bird-survey/breed_bird_survey_region_codes.csv: -------------------------------------------------------------------------------- 1 | countrynum,regioncode,regionname 2 | 124,4,Alberta 3 | -------------------------------------------------------------------------------- /inst/extdata/subsampled/breed-bird-survey/breed_bird_survey_routes.csv: -------------------------------------------------------------------------------- 1 | countrynum,statenum,route,routename,active,latitude,longitude,stratum,bcr,routetypeid,routetypedetailid 2 | 124,4,1,SEVEN PERSON,0,49.93222,-110.84452,38,11,1,1 3 | 124,4,2,BOW ISLAND,1,49.87324,-111.4102,38,11,1,1 4 | 124,4,3,MILK RIVER,1,49.08995,-112.39699,65,11,1,1 5 | 124,4,4,FORT MACLEOD,1,49.81509,-113.56982000000001,38,11,1,1 6 | 124,4,5,SENTINEL,0,49.6325,-114.58692,64,10,1,1 7 | 124,4,6,BINDLOSS,1,50.74668,-110.25869,38,11,1,1 8 | 124,4,7,BROOKS,0,50.56388,-111.84916,38,11,1,1 9 | 124,4,8,BOW CITY,0,50.30279,-112.02041,38,11,1,1 10 | 124,4,9,BUFFALO HILL,1,50.60147,-113.02852,38,11,1,1 11 | 124,4,10,HIGHWOOD RIVER,1,50.48053,-114.42456,30,10,1,1 12 | 124,4,11,CEREAL,1,51.00887,-110.7956,37,11,1,1 13 | 124,4,12,BIG STONE,0,51.32875,-111.07823,37,11,1,1 14 | 124,4,13,ROWLEY,1,51.77818,-112.64694,37,11,1,1 15 | 124,4,14,KEOMA,0,51.21261,-113.77234,30,11,1,1 16 | 124,4,15,EAGLE HILL,1,51.83143,-114.45039,30,6,1,1 17 | 124,4,16,EXSHAW,0,51.09521,-115.09111,64,10,1,1 18 | 124,4,17,BOW PASS,1,51.46021,-116.22023,64,10,1,1 19 | 124,4,18,CZAR,1,52.46309,-110.65127,30,11,1,1 20 | 124,4,19,TALBOT,1,52.20111,-111.07638,37,11,1,1 21 | 124,4,20,HALKIRK,1,52.17208,-112.33861,37,11,1,1 22 | 124,4,21,DELBURNE,0,52.20126,-113.22753,30,11,1,1 23 | 124,4,22,RAVEN,1,52.02799,-114.47782,30,6,1,1 24 | 124,4,23,HARLECH,0,52.49814,-116.00016,29,6,1,1 25 | 124,4,24,WINDY POINT,1,52.25872,-116.39168,29,10,1,1 26 | 124,4,25,MALIGNE LAKE,1,52.73141,-117.63709,64,10,1,1 27 | 124,4,26,DERWENT,1,53.49666,-110.86644,30,11,1,1 28 | 124,4,27,TWO HILLS,1,53.71555,-111.52556,30,11,1,1 29 | 124,4,28,LINDBROOK,1,53.33769,-112.78208,30,11,1,1 30 | 124,4,29,CALAHOO,1,53.68701,-113.88548,30,11,1,1 31 | 124,4,30,WARBURG,0,53.16381,-114.22008,30,6,1,1 32 | 124,4,31,RONAN,1,53.80272,-115.26551,29,6,1,1 33 | 124,4,32,EDSON,1,53.59917,-116.40425,29,6,1,1 34 | 124,4,33,ENTRANCE,1,53.3741,-117.71063,29,6,1,1 35 | 124,4,34,HOFF,1,53.81254,-118.48345,29,10,1,1 36 | 124,4,36,KEHIWIN LAKE,1,54.0577,-110.8863,30,6,1,1 37 | 124,4,37,THERIEN,1,54.26763,-111.2782,30,6,1,1 38 | 124,4,38,KINIKINIK,1,54.49999,-112.97986,29,6,1,1 39 | 124,4,39,ATHABASCA,1,54.83513,-113.15816,29,6,1,1 40 | 124,4,40,THUNDER LAKE,1,54.12038,-114.71491,29,6,1,1 41 | 124,4,41,SWAN HILLS,0,54.76026,-115.53764,29,6,1,1 42 | 124,4,42,WINDFALL,1,54.13311,-116.21592,30,6,1,1 43 | 124,4,43,LITTLE SMOKY,1,54.76609,-117.24782,30,6,1,1 44 | 124,4,44,ECONOMY CRK,1,54.75011,-118.22621,30,6,1,1 45 | 124,4,45,WAPITI,0,54.93788,-119.19583,30,6,1,1 46 | 124,4,46,WINEFRED LAKE,1,55.55421,-110.90708,29,6,1,1 47 | 124,4,47,HEART LAKE,1,55.00613,-111.73195,29,6,1,1 48 | 124,4,48,WANDERING R,0,55.38705,-112.45371,29,6,1,1 49 | 124,4,49,CALLING LAKE,1,55.486,-113.44,29,6,1,1 50 | 124,4,50,MARTEN RIVER,1,55.49903,-114.91416,29,6,1,1 51 | 124,4,51,KINUSO,0,55.25797,-115.38534,29,6,1,1 52 | 124,4,52,HIGH PRAIRIE,0,55.5528,-116.78184,29,6,1,1 53 | 124,4,53,WATINO,1,55.78143,-117.82555,29,6,1,1 54 | 124,4,54,RYCROFT,1,55.84027,-118.45722,29,6,1,1 55 | 124,4,55,WEMBLEY,1,55.1415,-119.21774,30,6,1,1 56 | 124,4,57,HANGINGSTONE,0,56.23349,-111.67212,29,6,1,1 57 | 124,4,60,SUNNYBROOK,1,53.16381,-114.22008,30,6,1,1 58 | 124,4,61,LOON RIVER,1,56.28424,-115.36416,29,6,1,1 59 | 124,4,62,THREE CREEKS,1,56.3628,-117.00335,29,6,1,1 60 | 124,4,63,CARDINAL LK,1,56.27611,-117.82119,29,6,1,1 61 | 124,4,64,HINES CREEK,1,56.50842,-118.73912,29,6,1,1 62 | 124,4,65,BEAR CANYON,1,56.19496,-119.8199,29,6,1,1 63 | 124,4,73,HOTCHKISS,1,57.06812,-117.61089,29,6,1,1 64 | 124,4,77,FORT SMITH,1,59.98857,-111.83849,25,6,1,1 65 | 124,4,78,PEACE POINT,1,59.31102,-112.48716,29,6,1,1 66 | 124,4,79,HAY CAMP,1,59.86175,-111.61484,25,6,1,1 67 | 124,4,81,RED ROCK COULEE,1,49.8778677,-110.889404,38,11,1,1 68 | 124,4,96,KANANASKIS,1,50.98545,-115.07526,64,10,1,1 69 | 124,4,101,MANYBERRIES,1,49.26127,-110.69079,38,11,1,1 70 | 124,4,102,SHERBOURNE L,1,49.58229,-111.69789,38,11,1,1 71 | 124,4,103,HORSEFLY LK.,1,49.61134,-112.1495,38,11,1,1 72 | 124,4,104,GRANUM,1,49.90214,-113.09867,38,11,1,1 73 | 124,4,105,WATERTON PRK,1,49.0201,-114.04528,65,10,1,1 74 | 124,4,106,SCHULER,1,50.00432,-110.07319,38,11,1,1 75 | 124,4,107,WARDLOW,1,50.92139,-111.11275,38,11,1,1 76 | 124,4,108,RANIER,0,50.30279,-112.02041,38,11,1,1 77 | 124,4,109,MAZEPPA,1,50.42635,-113.82728,37,11,1,1 78 | 124,4,110,OKOTOKS,0,50.54263,-114.09345,37,11,1,1 79 | 124,4,111,ESTHER,1,51.44533,-110.38028,37,11,1,1 80 | 124,4,112,HEMARUKA,1,51.57551,-111.20079,37,11,1,1 81 | 124,4,113,DOWLING LAKE,1,51.95441,-112.13115,37,11,1,1 82 | 124,4,114,SHEEP COULEE,1,51.56148,-113.89621,30,11,1,1 83 | 124,4,115,DIDSBURY,1,51.54794,-114.21268,30,11,1,1 84 | 124,4,116,WIGWAM CREEK,1,51.66166,-115.40694,29,6,1,1 85 | 124,4,118,MILLARD LAKE,1,52.93074,-110.58437,30,11,1,1 86 | 124,4,119,KILLAM,1,52.90149,-111.89042,30,11,1,1 87 | 124,4,120,HEISLER,1,52.63801,-111.99628,30,11,1,1 88 | 124,4,121,BATTLE RIVER,1,52.61069,-113.98702,30,11,1,1 89 | 124,4,122,BENALTO,1,52.5814,-114.40959,30,6,1,1 90 | 124,4,123,CRIPPLE CRK,1,52.20935,-115.96781,64,6,1,1 91 | 124,4,125,SUNWAPTA FLS,1,52.48771,-117.51832,64,10,1,1 92 | 124,4,126,VANESTI,1,53.29003,-110.54172,30,11,1,1 93 | 124,4,127,VIKING,1,53.10329,-111.76012,30,11,1,1 94 | 124,4,128,BRUCE,1,53.10505,-112.14825,30,11,1,1 95 | 124,4,129,HAY LAKES,0,53.11978,-113.1229,30,11,1,1 96 | 124,4,130,TOMAHAWK,0,53.54122,-114.78685,30,6,1,1 97 | 124,4,131,MACKAY,1,53.77362,-115.61189,29,6,1,1 98 | 124,4,136,BEAVERDAM,1,54.18052,-110.30494,29,6,1,1 99 | 124,4,137,LAC LA BICHE,1,54.75735,-111.97184,29,6,1,1 100 | 124,4,138,LONG LAKE,1,54.43414,-112.8288,30,6,1,1 101 | 124,4,139,PICKARDVILLE,1,54.05083,-113.91555,30,11,1,1 102 | -------------------------------------------------------------------------------- /inst/extdata/subsampled/breed-bird-survey/breed_bird_survey_species.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weecology/MATSS/75940bdc39883081b6253c7a1e9a7052853f92de/inst/extdata/subsampled/breed-bird-survey/breed_bird_survey_species.csv -------------------------------------------------------------------------------- /inst/extdata/subsampled/mapped-plant-quads-mt/CITATION: -------------------------------------------------------------------------------- 1 | Jed Anderson, Lance Vermeire, and Peter B. Adler. 2011. Fourteen years of mapped, permanent quadrats in a northern mixed prairie, USA. Ecology 92:1703. -------------------------------------------------------------------------------- /inst/extdata/subsampled/mapped-plant-quads-mt/mapped_plant_quads_mt_species_list.csv: -------------------------------------------------------------------------------- 1 | species,density,cover,annual,growthform 2 | Achillea,millefolium,230,NA,NA 3 | Achnatherum,nelsonii,NA,52,NA 4 | Agoseris,glauca,120,NA,NA 5 | Allium,textile,1361,NA,1 6 | Amaranthus,sp.,NA,NA,1 7 | Anagallis,minima,NA,NA,37 8 | Andropogon,sp.,NA,31,NA 9 | Androsace,occidentalis,NA,NA,1346 10 | Androsace,sp.,NA,NA,16 11 | ant,hill,NA,1,NA 12 | Antennaria,sp.,NA,2,NA 13 | Aristida,purpurea,NA,27,NA 14 | Artemisia,cana,1825,NA,NA 15 | Artemisia,frigida,NA,926,NA 16 | Artemisia,tridentata,NA,535,NA 17 | Astragalus,sp.,21,NA,7 18 | Atriplex,argentea,NA,NA,304 19 | Atriplex,powellii,NA,NA,1 20 | Atriplex,sp.,NA,NA,154 21 | Borago,officinalis,NA,NA,112 22 | Bouteloua,dactyloides,NA,20684,NA 23 | Bouteloua,gracilis,NA,29465,NA 24 | Brassica,sp.,NA,NA,31 25 | Bromus,tectorum,NA,NA,2060 26 | Calochortus,nuttallii,2,NA,NA 27 | Camelina,microcarpa,NA,NA,540 28 | Campanula,rotundifolia,NA,NA,1 29 | Carduus,sp.,18,NA,NA 30 | Carex,duriuscula,693,NA,NA 31 | Carex,filifolia,NA,3982,NA 32 | Carex,sp.,NA,23,NA 33 | Chaenactis,douglasii,3,NA,NA 34 | Chamaesyce,geyeri,NA,NA,335 35 | Chamaesyce,serpens,NA,NA,2119 36 | Chamaesyce,sp.,NA,NA,1 37 | Chenopodium,album,NA,NA,2487 38 | Chenopodium,leptophyllum,NA,NA,14 39 | Chenopodium,sp.,NA,NA,116 40 | Chorispora,tenella,NA,NA,2 41 | Cirsium,sp.,1,NA,NA 42 | Cleome,serrulata,NA,NA,2 43 | Collinsia,parviflora,NA,NA,28 44 | Collinsia,sp.,NA,NA,40 45 | Collomia,linearis,NA,NA,675 46 | Collomia,sp.,NA,NA,120 47 | Convolvulus,sp.,NA,NA,1 48 | Cryptantha,crassisepala,NA,NA,223 49 | Cryptantha,minima,NA,NA,569 50 | Cryptantha,sp.,NA,NA,214 51 | Cymopterus,sp.,489,NA,NA 52 | depression,NA,25,NA,nonplant 53 | Descurainia,incana,NA,NA,374 54 | Dichanthelium,sp.,NA,10,NA 55 | Draba,nemorosa,NA,NA,1608 56 | Draba,sp.,NA,NA,71 57 | dung,NA,60,NA,nonplant 58 | Ellisia,nyctelea,NA,NA,25 59 | Elymus,elymoides,NA,48,NA 60 | Erigeron,sp.,5,NA,NA 61 | Escobaria,missouriensis,NA,303,NA 62 | Euphorbia,marginata,NA,NA,5 63 | Euphorbia,sp.,NA,NA,126 64 | Euphorbia,spathulata,NA,NA,87 65 | Evax,prolifera,NA,NA,2628 66 | Gaura,coccinea,4,NA,NA 67 | Gutierrezia,sarothrae,NA,4,NA 68 | Hackelia,floribunda,NA,NA,1634 69 | Hedeoma,hispida,NA,NA,2988 70 | Hedeoma,sp.,NA,NA,88 71 | Helianthus,annuus,NA,NA,12 72 | Helianthus,sp.,NA,NA,6 73 | Hesperostipa,comata,NA,5557,NA 74 | Hordeum,pusillum,NA,NA,2245 75 | Keoleria,macrantha,NA,8,NA 76 | Lactuca,serriola,NA,NA,217 77 | Lactuca,sp.,NA,NA,7 78 | Lappula,occidentalis,NA,NA,8663 79 | Lappula,sp.,NA,NA,2807 80 | Lepidium,densiflorum,NA,NA,8362 81 | Lepidium,sp.,NA,NA,1955 82 | Leptosiphon,harknessii,NA,NA,16 83 | Leucocrinum,montanum,28,NA,NA 84 | Linum,rigidum,15,NA,10 85 | Linum,sulcatum,NA,NA,2 86 | Lithospermum,incisum,6,NA,1 87 | Lomatium,foeniculaceum,229,NA,NA 88 | Lomatium,macrocarpum,8,NA,NA 89 | Lomatium,orientale,314,NA,6 90 | Lomatium,sp.,59,NA,NA 91 | Lupinus,pusillus,NA,NA,69 92 | Lygodesmia,juncea,26,NA,NA 93 | Lygodesmia,sp.,NA,NA,3 94 | Machaeranthera,canescens,5,NA,58 95 | Machaeranthera,tanacetifolia,NA,NA,17 96 | Malva,sp.,74,NA,1 97 | Medicago,lupulina,149,NA,1511 98 | Melilotus,officinalis,1,NA,NA 99 | Mentha,sp.,NA,NA,2 100 | Microsteris,gracilis,NA,NA,852 101 | Microsteris,sp.,NA,NA,24 102 | Monolepis,nuttalliana,NA,NA,1788 103 | Monolepis,sp.,NA,NA,210 104 | Munroa,sp.,NA,NA,3 105 | Munroa,squarrosa,NA,NA,160 106 | Musineon,divaricatum,91,NA,NA 107 | Myosurus,apetalus,NA,NA,1399 108 | Myosurus,sp.,NA,NA,270 109 | Nassella,viridula,NA,688,NA 110 | Oenothera,albicaulis,NA,NA,5 111 | Oenothera,sp.,NA,NA,20 112 | Opuntia,fragilis,NA,2,NA 113 | Opuntia,polyacantha,NA,418,NA 114 | Opuntia,sp.,NA,9,NA 115 | Pascopyrum,smithii,116047,NA,NA 116 | Pediomelum,argophyllum,60,NA,13 117 | Pediomelum,sp.,1,NA,4 118 | Penstemon,albidus,2,NA,NA 119 | Penstemon,sp.,8,NA,NA 120 | Phlox,hoodii,NA,240,7 121 | Plantago,aristata,NA,NA,2178 122 | Plantago,elongata,NA,NA,10254 123 | Plantago,patagonica,NA,NA,9225 124 | Plantago,sp.,NA,NA,637 125 | Poa,secunda,NA,16510,NA 126 | Polemonium,sp.,NA,NA,382 127 | Polygonum,buxiforme,NA,NA,1794 128 | Polygonum,douglasii,NA,NA,703 129 | Polygonum,sp.,NA,NA,2808 130 | Ranunculus,sp.,NA,NA,1 131 | Rumex,acetosella,NA,NA,7 132 | Salsola,tragus,NA,NA,49 133 | Schedonnardus,paniculatus,NA,1180,NA 134 | Selenia,aurea,NA,NA,1 135 | Solanum,rostratum,NA,NA,2 136 | Solidago,sp.,14,NA,NA 137 | Sphaeralcea,coccinea,2375,NA,14 138 | Sporobolus,cryptandrus,NA,171,NA 139 | Sporobolus,neglectus,NA,NA,2 140 | Stipa,sp.,NA,385,NA 141 | Taraxacum,officinale,314,NA,NA 142 | Thlaspi,arvense,NA,NA,64 143 | Tragopogon,lamottei,50,NA,NA 144 | Tragopogon,porrifolius,1,NA,NA 145 | Trifolium,sp.,NA,NA,10 146 | unknown,,33,NA,163 147 | unknown,annual,NA,NA,1261 148 | unknown,forb,25,NA,553 149 | unknown,grass,4,NA,NA 150 | unknown,moss,NA,19,NA 151 | unknown,perennial,15,NA,NA 152 | Vicia,americana,391,NA,2 153 | Viola,sp.,84,NA,NA 154 | Vulpia,octoflora,NA,NA,50865 155 | Vulpia,octoflora hirtella,NA,NA,1870 156 | Vulpia,sp.,NA,NA,86 157 | -------------------------------------------------------------------------------- /inst/templates/template-Rprofile: -------------------------------------------------------------------------------- 1 | options( 2 | usethis.full_name = "{{{name}}}", 3 | usethis.description = list( 4 | `Authors@R` = 'person("{{{name}}}", 5 | email = "{{{email}}}", 6 | role = c("aut", "cre"))', 7 | Version = "{{{version}}}" 8 | ) 9 | ) -------------------------------------------------------------------------------- /inst/templates/template-functions.R: -------------------------------------------------------------------------------- 1 | #' @title Calculate Simpson's diversity index 2 | #' 3 | #' @details There are multiple forms of Simpson's index. Here, we compute 4 | #' \eqn{1-D}, where \eqn{D_t = sum (p_{t,i})^2}, with \eqn{p_{t,i}} being the 5 | #' proportional abundance of species \eqn{i} in time index \eqn{t}. 6 | #' 7 | #' This calculation produces a vector of values, one for each time point. 8 | #' 9 | #' @param dat the dataset 10 | #' 11 | #' @return A vector of the diversity index calculations, one per time point. 12 | #' 13 | #' @examples 14 | #' \dontrun{ 15 | #' sgs_data <- MATSS::get_sgs_data() 16 | #' compute_simpson_index(sgs_data) 17 | #' } 18 | #' 19 | #' @export 20 | #' 21 | compute_simpson_index <- function(dat) 22 | { 23 | if (!dat$metadata$is_community) 24 | { 25 | message("Skipping...the provided data is not from a community.") 26 | return(NA) 27 | } 28 | 29 | total <- rowSums(dat$abundance) 30 | proportions <- sweep(dat$abundance, 1, total, "/") 31 | proportions_squared <- proportions * proportions 32 | lambda <- apply(proportions_squared, 1, sum, na.rm = TRUE) 33 | return(1 - lambda) 34 | } 35 | 36 | #' @rdname compute_linear_trend 37 | #' @description `compute_linear_trend_ts()` fits a simple linear model to its 38 | #' only input, a time series (represented by a numeric vector of values), and 39 | #' returns the coefficients 40 | compute_linear_trend_ts <- function(ts) 41 | { 42 | lm_fit <- stats::lm(y ~ t, data = data.frame(y = ts, t = seq_along(ts))) 43 | stats::coefficients(lm_fit) 44 | } 45 | 46 | #' @title Calculate linear trend coefficients 47 | #' @aliases compute_linear_trend_ts 48 | #' 49 | #' @description `compute_linear_trend()` applies `compute_linear_trend_ts()` to 50 | #' each abundance time series in its input data. 51 | #' 52 | #' @details In the implementation of `compute_linear_trend()`, we use the `MATSS` 53 | #' function \code{\link[MATSS]{analysis_wrapper}} to construct the version of 54 | #' `compute_linear_trend_ts()` that can be applied to a whole dataset. 55 | #' 56 | #' @param dataset the dataset 57 | #' 58 | #' @return A tibble, containing the `results`, dataset `metadata`, name of the 59 | #' input dataset in `dataset`, name of the `method` applied to each time 60 | #' series (in this case, "compute_linear_trend_ts"), and additional `args`. 61 | #' 62 | #' @examples 63 | #' \dontrun{ 64 | #' sgs_data <- MATSS::get_sgs_data() 65 | #' compute_linear_trend(sgs_data) 66 | #' } 67 | #' 68 | #' @export 69 | #' 70 | compute_linear_trend <- MATSS::analysis_wrapper(compute_linear_trend_ts) 71 | -------------------------------------------------------------------------------- /inst/templates/template-pipeline.R: -------------------------------------------------------------------------------- 1 | library(MATSS) 2 | library(drake) 3 | library({{{package}}}) 4 | 5 | ## set working directory to project folder 6 | setwd(here::here()) 7 | 8 | ## include the functions in packages as dependencies 9 | # - this is to help Drake recognize that targets need to be rebuilt if the 10 | # functions have changed 11 | expose_imports(MATSS) 12 | expose_imports({{{package}}}) 13 | 14 | ## download the datasets 15 | # - toggle the following variable to use downloaded datasets 16 | use_downloaded_datasets <- FALSE 17 | if (use_downloaded_datasets) 18 | { 19 | download_datasets() 20 | } 21 | 22 | ## a Drake plan for creating the datasets 23 | # - these are the default options, which don't include downloaded datasets 24 | datasets <- build_datasets_plan(include_retriever_data = use_downloaded_datasets, 25 | include_bbs_data = use_downloaded_datasets, 26 | include_gpdd = use_downloaded_datasets, 27 | include_biotime_data = use_downloaded_datasets) 28 | 29 | ## a Drake plan that defines the methods 30 | methods <- drake_plan( 31 | simpson_index = {{{package}}}::compute_simpson_index, 32 | linear_trend = {{{package}}}::compute_linear_trend 33 | ) 34 | 35 | ## a Drake plan for the analyses (each combination of method x dataset) 36 | analyses <- build_analyses_plan(methods, datasets) 37 | 38 | ## a Drake plan to collect citation info from datasets 39 | references <- build_references_plan(datasets) 40 | 41 | ## a Drake plan for the Rmarkdown report 42 | # - we use `knitr_in()` 43 | reports <- drake_plan( 44 | report = rmarkdown::render( 45 | knitr_in("analysis/report.Rmd"), 46 | output_file("analysis/report.md") 47 | ) 48 | ) 49 | 50 | ## The full workflow 51 | workflow <- bind_plans( 52 | datasets, 53 | methods, 54 | analyses, 55 | references, 56 | reports 57 | ) 58 | 59 | ## Visualize how the targets depend on one another 60 | if (interactive()) 61 | { 62 | if (require(networkD3, quietly = TRUE)) 63 | sankey_drake_graph(workflow, build_times = "none", targets_only = TRUE) 64 | if (require(visNetwork, quietly = TRUE)) 65 | vis_drake_graph(workflow, build_times = "none", targets_only = TRUE) 66 | } 67 | 68 | ## Run the workflow 69 | make(workflow) 70 | -------------------------------------------------------------------------------- /inst/templates/template-references.bib: -------------------------------------------------------------------------------- 1 | %% This BibTeX bibliography file was created by MATSS. 2 | %% https://weecology.github.io/MATSS/ 3 | 4 | {{{bibentries}}} 5 | -------------------------------------------------------------------------------- /inst/templates/template-report.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Summary Report" 3 | author: "{{{author}}}" 4 | date: "`r Sys.Date()`" 5 | output: github_document 6 | bibliography: references.bib 7 | --- 8 | 9 | ```{r setup, include = FALSE} 10 | knitr::opts_chunk$set(echo = TRUE) 11 | 12 | library(MATSS) 13 | library(dplyr) 14 | library(tidyr) 15 | library(ggplot2) 16 | library(drake) 17 | library({{{package}}}) 18 | ``` 19 | 20 | This report was compiled and generated by the `MATSS` R package [@{{{matss_ref_key}}}]. 21 | 22 | ## Read in the results 23 | 24 | As a result of running the Drake pipeline, we have objects for each of the "targets" in the Drake cache. First, let's examine what the names of those targets are: 25 | 26 | ```{r} 27 | cached() 28 | ``` 29 | 30 | Note that we have two sets of results, one for running the `compute_simpson_index()` function on each dataset, and one for running the `compute_linear_trend()` function on each dataset (producing results for each time series in each dataset). 31 | 32 | We can use `loadd()` to load specific targets (or all of them) into the R environment; similar to the base `load()` function for loading in .Rdata files. 33 | 34 | Alternatively, we can use `readd()` to return a target directly; similar to the base `readRDS()` function for reading in .RDS files. 35 | 36 | ### Simpson's Index 37 | 38 | First, let's look at the `compute_simpson_index()` results: 39 | 40 | ```{r} 41 | results_simpson_index <- readd("results_simpson_index") 42 | results_simpson_index 43 | ``` 44 | 45 | The object is a tibble with the output of the calculations stored in the `results` column: 46 | * Because the output of `compute_simpson_index()` is a numeric vector corresponding to Simpson's index, computed at each time step, these vectors are the elements of the `results` list-column. 47 | * Related information about the dataset and additional args are stored in the other columns of this tibble. 48 | 49 | ### Linear Trends 50 | 51 | Now, let's look at the `compute_linear_trend()` results: 52 | 53 | ```{r} 54 | results_linear_trend <- readd("results_linear_trend") 55 | results_linear_trend 56 | ``` 57 | 58 | Again, the object is a tibble with a similar structure as previously, with the `results` list-column containing the tibble outputs from `compute_linear_trend`. 59 | 60 | ## Processing Results 61 | 62 | We encourage the use of [`Tidyverse`](https://www.tidyverse.org/) for extracting and handling the output. In particular, there are some useful examples of dealing with complex output structures in list-columns described in https://github.com/jennybc/row-oriented-workflows. 63 | 64 | ### Plot time series of Simpson's Index 65 | 66 | Our goal is to plot a time series of Simpson's Index for each separate dataset. 67 | 68 | This suggests the following processing procedure: 69 | * extract the values for each dataset, to make a single long-format data.frame 70 | * construct a "time" variable, which will serve as the x-axis in plotting 71 | 72 | ```{r} 73 | to_plot <- results_simpson_index %>% 74 | select(dataset, results) %>% 75 | unnest(cols = results) %>% 76 | rename(value = results) %>% 77 | group_by(dataset) %>% 78 | mutate(t = row_number()) %>% 79 | ungroup() 80 | ``` 81 | 82 | Plotting is mostly straightforward. Note that we allow the x-axis scale to vary for each dataset separately, because the number of time points varies across datasets, and having them all be aligned across facets would give the (false) impression of synchronicity. 83 | 84 | ```{r} 85 | ggplot(to_plot, 86 | aes(x = t, y = value)) + 87 | geom_line() + 88 | facet_wrap(~dataset, scales = "free_x") + 89 | theme_bw() + 90 | labs(x = "Time", y = "Simpson's Index (1-D)") 91 | ``` 92 | 93 | ### Plot distribution of species trends 94 | 95 | Our goal is to plot the distribution of species trends over time in each dataset. 96 | 97 | Processing is a bit simpler, since we really only need to subset the results that we want: 98 | 99 | ```{r} 100 | to_plot <- results_linear_trend %>% 101 | select(dataset, results) %>% 102 | unnest(cols = results) %>% 103 | select(dataset, id, t) 104 | ``` 105 | 106 | When plotting, we note that the range in slopes varies a lot, and depends on abundance scaling, so we allow the facets to have different y-axis scales, making sure to add in the 0 line, and jittering the points for the raw data, too. 107 | 108 | ```{r} 109 | ggplot(to_plot, 110 | aes(x = 0, y = t)) + 111 | geom_violin() + 112 | geom_point(position = position_jitter(width = 0.1, height = 0), shape = 5) + 113 | geom_hline(mapping = aes(yintercept = 0), size = 1, lty = 2, color = "red") + 114 | facet_wrap(~dataset, scales = "free_y") + 115 | theme_bw() + 116 | labs(x = "Density", y = "Slope of Population Trendline") 117 | ``` 118 | 119 | ## Dataset Citations 120 | 121 | ```{r, echo = FALSE, results = "asis"} 122 | cat(paste("1.", readd(citations)), sep = "\n") 123 | ``` 124 | 125 | ## References 126 | -------------------------------------------------------------------------------- /man/MATSS.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MATSS-package.R 3 | \docType{package} 4 | \name{MATSS} 5 | \alias{MATSS} 6 | \title{Macroecological Analayses of Time Series Structure} 7 | \description{ 8 | Support for macroecological analyses of time series. The intent 9 | of the package is to enable end users to run analyses on a collection of 10 | population and community time series data. Functions are provided to 11 | download and import datasets, produce reproducible workflows using the 12 | \code{drake} package, and for generating research compendia with code and 13 | reports. 14 | } 15 | \keyword{package} 16 | -------------------------------------------------------------------------------- /man/analysis_wrapper.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/analysis_wrapper.R 3 | \name{analysis_wrapper} 4 | \alias{analysis_wrapper} 5 | \title{Create a function that replicates an analysis for all time series in a 6 | dataset} 7 | \usage{ 8 | analysis_wrapper(fun) 9 | } 10 | \arguments{ 11 | \item{fun}{the analysis function} 12 | } 13 | \value{ 14 | a function that takes in a \code{dataset} and optional arguments, and 15 | returns a data.frame or tibble with the combined results, and an "id" 16 | column with the name of the species 17 | } 18 | \description{ 19 | This wrapper takes as input, some analysis function, \code{fun}, and 20 | returns a function that will run that analysis function on all the time 21 | series in a dataset. Some post-processing attempts to handle merging of 22 | the output in a sensible way. 23 | } 24 | \examples{ 25 | \dontrun{ 26 | sgs_data <- MATSS::get_sgs_data() 27 | summarize_dataset <- analysis_wrapper(ts_summary) 28 | summarize_dataset(sgs_data) 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /man/append_data_citations.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-retriever-data.R 3 | \name{append_data_citations} 4 | \alias{append_data_citations} 5 | \title{Generate a vector of citations.} 6 | \usage{ 7 | append_data_citations(citations = NULL, citation_files) 8 | } 9 | \arguments{ 10 | \item{citations}{a vector of strings containing existing citations to append} 11 | 12 | \item{citation_files}{a vector of filepaths to the citation files} 13 | } 14 | \value{ 15 | a vector of strings containing the citations 16 | } 17 | \description{ 18 | Given an existing vector of citations (or the NULL default), 19 | add the citations that are specified in the paths of \code{citation_files} 20 | } 21 | -------------------------------------------------------------------------------- /man/append_retriever_citation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-retriever-data.R 3 | \name{append_retriever_citation} 4 | \alias{append_retriever_citation} 5 | \title{Append citation info to a formatted dataset} 6 | \usage{ 7 | append_retriever_citation(formatted_data, path) 8 | } 9 | \arguments{ 10 | \item{formatted_data}{a dataset that already follows the `MATSS`` standard} 11 | 12 | \item{path}{where to load the raw data files from} 13 | } 14 | \value{ 15 | the same dataset, with the citation appended to \code{metadata} 16 | } 17 | \description{ 18 | Given an existing formatted dataset, and the path to the 19 | downloaded dataset, from retriever, and via \code{import_retriever_data()}, 20 | read in the citation info and add it to the metadata for the dataset 21 | } 22 | -------------------------------------------------------------------------------- /man/build_analyses_plan.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plan_analyses.R 3 | \name{build_analyses_plan} 4 | \alias{build_analyses_plan} 5 | \title{Generate a Drake Plan for Analyses} 6 | \usage{ 7 | build_analyses_plan(methods, datasets, ...) 8 | } 9 | \arguments{ 10 | \item{methods}{a drake plan listing the methods to be applied (it is 11 | expected that each method is a function that takes in a dataset object)} 12 | 13 | \item{datasets}{a drake plan listing the datasets to be analyzed} 14 | 15 | \item{...}{arguments to be passed to \code{drake::\link[drake]{drake_plan}}} 16 | } 17 | \value{ 18 | a drake plan (i.e. a tibble) specifying the targets and commands 19 | for all the analyses and the collected results (grouping the outputs from 20 | each method into a single list) 21 | } 22 | \description{ 23 | Given M methods to be applied to N datasets, make a drake plan 24 | that contains an \code{analysis} targets corresponding to each M x N 25 | combination, as well as M \code{results} targets corresponding to a list of 26 | the \code{analysis} outputs for each of the M methods. 27 | } 28 | -------------------------------------------------------------------------------- /man/build_bbs_datasets_plan.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plan_data.R 3 | \name{build_bbs_datasets_plan} 4 | \alias{build_bbs_datasets_plan} 5 | \title{Generate a Drake Plan for BBS Datasets} 6 | \usage{ 7 | build_bbs_datasets_plan(path = get_default_data_path(), data_subset = NULL) 8 | } 9 | \arguments{ 10 | \item{path}{where to get the downloaded retriever datasets} 11 | 12 | \item{data_subset}{optional, a subset of the BBS communities to use 13 | (to speed up development). As c(1:X)} 14 | } 15 | \value{ 16 | a drake plan (i.e. a tibble) specifying the targets and commands 17 | for gathering BBS datasets 18 | } 19 | \description{ 20 | Generate a Drake Plan for BBS Datasets 21 | } 22 | -------------------------------------------------------------------------------- /man/build_biotime_datasets_plan.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plan_data.R 3 | \name{build_biotime_datasets_plan} 4 | \alias{build_biotime_datasets_plan} 5 | \title{Generate a Drake Plan for Biotime Datasets} 6 | \usage{ 7 | build_biotime_datasets_plan( 8 | path = get_default_data_path(), 9 | data_subset = NULL, 10 | do_processing = TRUE, 11 | force_reprocessing = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{path}{where to get the downloaded retriever datasets} 16 | 17 | \item{data_subset}{optional, a subset of the Biotime study_ids to use 18 | (to speed up development). As c(1:X)} 19 | 20 | \item{do_processing}{whether to process the datasets if necessary} 21 | 22 | \item{force_reprocessing}{whether to force re-processing of datasets} 23 | } 24 | \value{ 25 | a drake plan (i.e. a tibble) specifying the targets and commands 26 | for gathering Biotime datasets 27 | } 28 | \description{ 29 | Generate a Drake Plan for Biotime Datasets 30 | } 31 | -------------------------------------------------------------------------------- /man/build_datasets_plan.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plan_data.R 3 | \name{build_datasets_plan} 4 | \alias{build_datasets_plan} 5 | \title{Generate a Drake Plan for Datasets} 6 | \usage{ 7 | build_datasets_plan( 8 | path = get_default_data_path(), 9 | include_retriever_data = FALSE, 10 | include_bbs_data = FALSE, 11 | bbs_subset = NULL, 12 | include_gpdd_data = FALSE, 13 | include_biotime_data = FALSE, 14 | biotime_subset = NULL, 15 | biotime_process = TRUE 16 | ) 17 | } 18 | \arguments{ 19 | \item{path}{where to get the downloaded retriever datasets} 20 | 21 | \item{include_retriever_data}{whether to include retriever-downloaded data} 22 | 23 | \item{include_bbs_data}{whether to include BBS data} 24 | 25 | \item{bbs_subset}{optional, a subset of the BBS communities to use 26 | (to speed up development). As c(1:X)} 27 | 28 | \item{include_gpdd_data}{whether to include gpdd data} 29 | 30 | \item{include_biotime_data}{whether to include biotime data} 31 | 32 | \item{biotime_subset}{optional, a subset of the biotime study_ids to use 33 | (to speed up development). As c(1:X)} 34 | 35 | \item{biotime_process}{whether to process the biotime datasets when building 36 | the plan} 37 | } 38 | \value{ 39 | a drake plan (i.e. a tibble) specifying the targets and commands 40 | for gathering datasets 41 | } 42 | \description{ 43 | Generate a Drake Plan for Datasets 44 | } 45 | -------------------------------------------------------------------------------- /man/build_gpdd_datasets_plan.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plan_data.R 3 | \name{build_gpdd_datasets_plan} 4 | \alias{build_gpdd_datasets_plan} 5 | \title{Generate a Drake Plan for GPDD Datasets} 6 | \usage{ 7 | build_gpdd_datasets_plan() 8 | } 9 | \value{ 10 | a drake plan (i.e. a tibble) specifying the targets and commands 11 | for gathering GPDD datasets 12 | } 13 | \description{ 14 | Generate a Drake Plan for GPDD Datasets 15 | } 16 | -------------------------------------------------------------------------------- /man/build_references_plan.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plan_references.R 3 | \name{build_references_plan} 4 | \alias{build_references_plan} 5 | \title{Generate a Drake Plan for dataset references} 6 | \usage{ 7 | build_references_plan(datasets, ...) 8 | } 9 | \arguments{ 10 | \item{datasets}{a drake plan listing the datasets to be analyzed} 11 | 12 | \item{...}{arguments to be passed to \code{drake::\link[drake]{drake_plan}}} 13 | } 14 | \value{ 15 | a drake plan (i.e. a tibble) specifying the targets and commands 16 | for all the references and the combined vector 17 | } 18 | \description{ 19 | Given N datasets, extract the citation from each one, and then 20 | combine them into a single vector and remove duplicates. 21 | } 22 | -------------------------------------------------------------------------------- /man/build_retriever_datasets_plan.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plan_data.R 3 | \name{build_retriever_datasets_plan} 4 | \alias{build_retriever_datasets_plan} 5 | \title{Generate a Drake Plan for retriever datasets} 6 | \usage{ 7 | build_retriever_datasets_plan(path = get_default_data_path()) 8 | } 9 | \arguments{ 10 | \item{path}{where to get the downloaded retriever datasets} 11 | } 12 | \value{ 13 | a drake plan (i.e. a tibble) specifying the targets and commands 14 | for retriever downloaded datasets 15 | } 16 | \description{ 17 | Generate a Drake Plan for retriever datasets 18 | } 19 | -------------------------------------------------------------------------------- /man/check_data_format.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-data-checks.R 3 | \name{check_data_format} 4 | \alias{check_data_format} 5 | \title{Check data format} 6 | \usage{ 7 | check_data_format(data) 8 | } 9 | \arguments{ 10 | \item{data}{dataset to check} 11 | } 12 | \value{ 13 | TRUE or FALSE 14 | } 15 | \description{ 16 | Check whether its input matches the specified data format in 17 | the \code{data-formats.Rmd} vignette 18 | } 19 | -------------------------------------------------------------------------------- /man/check_default_data_path.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-retriever-data.R 3 | \name{check_default_data_path} 4 | \alias{check_default_data_path} 5 | \title{Check if a default data path is set} 6 | \usage{ 7 | check_default_data_path() 8 | } 9 | \description{ 10 | See \code{portalr::\link[portalr]{check_default_data_path}} for details. 11 | } 12 | -------------------------------------------------------------------------------- /man/collect_analyses.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plan_analyses.R 3 | \name{collect_analyses} 4 | \alias{collect_analyses} 5 | \title{Collect Analyses Together into a Tibble} 6 | \usage{ 7 | collect_analyses(list_of_results) 8 | } 9 | \arguments{ 10 | \item{list_of_results}{the list of objects} 11 | } 12 | \value{ 13 | a drake plan (i.e. a tibble) specifying the targets and commands 14 | for all the analyses and the collected results (grouping the outputs from 15 | each method into a single list) 16 | } 17 | \description{ 18 | This is a helper function to accompany \code{\link{plan_analyses}}: it is 19 | necessary to collect all of the results that are produced by the drake 20 | plan. 21 | 22 | This function strives to be intelligent about the format of the individual 23 | results. For output from \code{\link{analysis_wrapper}} that already has information 24 | about the method and dataset, we can just combine them. Otherwise, we 25 | parse the name of the object for the method and the dataset, to format 26 | into a structure similar to the output from \code{\link{analysis_wrapper}}. 27 | } 28 | -------------------------------------------------------------------------------- /man/combine_bbs_subspecies.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_bbs.R 3 | \name{combine_bbs_subspecies} 4 | \alias{combine_bbs_subspecies} 5 | \title{Combine subspecies into their common species} 6 | \usage{ 7 | combine_bbs_subspecies(bbs_data_table, species_table) 8 | } 9 | \arguments{ 10 | \item{bbs_data_table}{main bbs data table} 11 | 12 | \item{species_table}{table of species for BBS} 13 | } 14 | \description{ 15 | Modified from \url{https://github.com/weecology/bbs-forecasting} 16 | } 17 | -------------------------------------------------------------------------------- /man/correct_biotime_dataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_biotime.R 3 | \name{correct_biotime_dataset} 4 | \alias{correct_biotime_dataset} 5 | \title{Correct and clean specific datasets} 6 | \usage{ 7 | correct_biotime_dataset(raw_data, dataset_id = 10) 8 | } 9 | \arguments{ 10 | \item{raw_data}{The raw data for a specific dataset_id} 11 | 12 | \item{dataset_id}{the study_id} 13 | } 14 | \value{ 15 | a corrected version of \code{raw_data} 16 | } 17 | \description{ 18 | Correct and clean specific datasets 19 | } 20 | \details{ 21 | For \code{dataset_id = 54}, it appears that day and month were sometimes 22 | interchanged. Since there did not seem to be measurements after August in 23 | any given year otherwise, we use that to filter and swap \code{day} and \code{month}. 24 | } 25 | -------------------------------------------------------------------------------- /man/create_MATSS_compendium.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/create_MATSS_compendium.R 3 | \name{create_MATSS_compendium} 4 | \alias{create_MATSS_compendium} 5 | \title{Create a research compendium for MATSS} 6 | \usage{ 7 | create_MATSS_compendium( 8 | path, 9 | name = usethis:::find_name(), 10 | fields = NULL, 11 | rstudio = rstudioapi::isAvailable(), 12 | open = interactive(), 13 | DEPLOY = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{path}{A path. If it exists, it is used. If it does not exist, it is 18 | created, provided that the parent path exists.} 19 | 20 | \item{name}{author name for the package} 21 | 22 | \item{fields}{A named list of fields to add to \code{DESCRIPTION}, potentially 23 | overriding default values. See \code{\link[usethis:use_description]{use_description()}} for how you can set 24 | personalized defaults using package options} 25 | 26 | \item{rstudio}{If \code{TRUE}, calls \code{\link[usethis:use_rstudio]{use_rstudio()}} to make the new package or 27 | project into an \href{https://support.rstudio.com/hc/en-us/articles/200526207-Using-Projects}{RStudio Project}. 28 | If \code{FALSE} and a non-package project, a sentinel \code{.here} file is placed so 29 | that the directory can be recognized as a project by the 30 | \href{https://here.r-lib.org}{here} or 31 | \href{https://rprojroot.r-lib.org}{rprojroot} packages.} 32 | 33 | \item{open}{If \code{TRUE}, \link[usethis:proj_activate]{activates} the new project: 34 | \itemize{ 35 | \item If RStudio desktop, the package is opened in a new session. 36 | \item If on RStudio server, the current RStudio project is activated. 37 | \item Otherwise, the working directory and active project is changed. 38 | }} 39 | 40 | \item{DEPLOY}{if \code{TRUE}, add preamble text to the readme} 41 | } 42 | \value{ 43 | Path to the newly created package, invisibly. 44 | } 45 | \description{ 46 | Create an R package, using \code{\link[usethis]{use_package}}, 47 | then perform the following additional actions: 48 | \itemize{ 49 | \item add various package dependencies, including "MATSS" 50 | \item add template analysis and pipeline files 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /man/dragons.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MATSS-package.R 3 | \docType{data} 4 | \name{dragons} 5 | \alias{dragons} 6 | \title{dragons dataset} 7 | \format{ 8 | A list with 3 elements: 9 | \describe{ 10 | \item{abundance}{a data.frame with abundances for 3 dragons} 11 | \item{covariates}{a data.frame with times of observations and effort and precip data} 12 | \item{metadata}{a list with: 13 | \code{timename} - the name of the time column in covariates, 14 | \code{period} - the gap between successive observations, 15 | \code{authors} - the authors of the dataset, 16 | \code{species_table} - information about the species observed} 17 | } 18 | } 19 | \usage{ 20 | dragons 21 | } 22 | \description{ 23 | A dataset containing example timeseries for some dragons. 24 | } 25 | \keyword{datasets} 26 | -------------------------------------------------------------------------------- /man/filter_bbs_species.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_bbs.R 3 | \name{filter_bbs_species} 4 | \alias{filter_bbs_species} 5 | \title{Filter poorly sampled BBS species} 6 | \usage{ 7 | filter_bbs_species(bbs_data_table, species_table) 8 | } 9 | \arguments{ 10 | \item{bbs_data_table}{main bbs data table} 11 | 12 | \item{species_table}{table of species for BBS} 13 | } 14 | \value{ 15 | dataframe, filtered version of initial dataframe 16 | } 17 | \description{ 18 | Modified from \url{https://github.com/weecology/bbs-forecasting} 19 | 20 | Removes waterbirds, shorebirds, owls, kingfishers, knightjars, 21 | dippers. These species are poorly sampled due to their aquatic or 22 | noctural nature. Also removes taxa that were either partially unidentified 23 | (e.g. "sp.") or were considered hybrids (e.g. "A x B") or were listed as more 24 | than one species (e.g. "A / B") 25 | } 26 | -------------------------------------------------------------------------------- /man/filter_bbs_ts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_bbs.R 3 | \name{filter_bbs_ts} 4 | \alias{filter_bbs_ts} 5 | \title{Filter BBS to specified time series period and number of samples} 6 | \usage{ 7 | filter_bbs_ts(bbs_data, start_yr, end_yr, min_num_yrs) 8 | } 9 | \arguments{ 10 | \item{bbs_data}{dataframe that contains BBS site_id and year columns} 11 | 12 | \item{start_yr}{num first year of time-series} 13 | 14 | \item{end_yr}{num last year of time-series} 15 | 16 | \item{min_num_yrs}{num minimum number of years of data between start_yr & end_yr} 17 | } 18 | \value{ 19 | dataframe with original data and associated environmental data 20 | } 21 | \description{ 22 | Modified from \url{https://github.com/weecology/bbs-forecasting} 23 | and \url{https://github.com/weecology/MATSS-community-change} 24 | } 25 | -------------------------------------------------------------------------------- /man/get_bbs_route_region_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_bbs.R 3 | \name{get_bbs_route_region_data} 4 | \alias{get_bbs_route_region_data} 5 | \title{Get cleaned BBS data} 6 | \usage{ 7 | get_bbs_route_region_data( 8 | path = file.path(get_default_data_path(), "breed-bird-survey-prepped", 9 | paste0("route", route, "region", region, ".RDS")), 10 | route = 1, 11 | region = 11 12 | ) 13 | } 14 | \arguments{ 15 | \item{path}{where to load the raw data files from} 16 | 17 | \item{route}{Route number} 18 | 19 | \item{region}{Region number} 20 | } 21 | \value{ 22 | list of abundance, covariates, and metadata 23 | } 24 | \description{ 25 | Gets prepped BBS data (as a list of abundance, covariates, and 26 | metadata) for a specified route and region. First run \code{prepare_bbs_data} 27 | to create these files from the raw BBS data tables. If the files are not 28 | found, then \code{NULL} is returned. 29 | } 30 | -------------------------------------------------------------------------------- /man/get_biotime_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_biotime.R 3 | \name{get_biotime_data} 4 | \alias{get_biotime_data} 5 | \title{Get a cleaned BioTime dataset} 6 | \usage{ 7 | get_biotime_data( 8 | path = get_default_data_path(), 9 | dataset_id = 321, 10 | raw_path = file.path(path, "biotime-prepped", paste0("dataset", dataset_id, ".RDS")) 11 | ) 12 | } 13 | \arguments{ 14 | \item{path}{where to load the raw data files from} 15 | 16 | \item{dataset_id}{the dataset index} 17 | 18 | \item{raw_path}{full path to the raw dataset file} 19 | } 20 | \value{ 21 | list of abundance, covariates, and metadata 22 | } 23 | \description{ 24 | Gets a prepped BioTime dataset (as a list of abundance, 25 | covariates, and metadata) for a specified dataset_id. First run 26 | \code{\link{prepare_biotime_data}} to create these files from the raw 27 | BioTime database. If the files are not found, then \code{NULL} is returned. 28 | Original data found here http://biotime.st-andrews.ac.uk/home.php 29 | } 30 | \examples{ 31 | \dontrun{ 32 | get_biotime_data(dataset_id = 321) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /man/get_biotime_dataset_ids.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_biotime.R 3 | \name{get_biotime_dataset_ids} 4 | \alias{get_biotime_dataset_ids} 5 | \alias{prepare_biotime_data} 6 | \title{Return BioTime dataset ids for individual loading} 7 | \usage{ 8 | get_biotime_dataset_ids( 9 | path = get_default_data_path(), 10 | data_subset = NULL, 11 | do_processing = FALSE, 12 | force_reprocessing = FALSE 13 | ) 14 | 15 | prepare_biotime_data(path = get_default_data_path(), data_subset = NULL) 16 | } 17 | \arguments{ 18 | \item{path}{where to get the downloaded retriever datasets} 19 | 20 | \item{data_subset}{optional, a subset of the Biotime study_ids to use 21 | (to speed up development). As c(1:X)} 22 | 23 | \item{do_processing}{whether to process the datasets if necessary} 24 | 25 | \item{force_reprocessing}{whether to force re-processing of datasets} 26 | } 27 | \value{ 28 | vector of dataset ids in the processed set of files 29 | 30 | vector of dataset ids in the processed set of files 31 | } 32 | \description{ 33 | Retrieve the dataset ids from processed BioTime files. If the 34 | processed files do not exist, and \code{do_processing == TRUE}, then we also 35 | load the raw BioTime database and process the necessary datasets, too. 36 | 37 | \code{prepare_biotime_data} is a thin wrapper around 38 | \code{get_biotime_dataset_ids()} for processing BioTime dataset 39 | } 40 | \examples{ 41 | \dontrun{ 42 | get_biotime_dataset_ids() 43 | } 44 | \dontrun{ 45 | prepare_biotime_data() 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /man/get_cowley_lizards.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_cowley.R 3 | \name{get_cowley_lizards} 4 | \alias{get_cowley_lizards} 5 | \title{Read in the cowley lizard community data from a txt file 6 | 7 | Import cowley lizard data from data files} 8 | \usage{ 9 | get_cowley_lizards() 10 | } 11 | \value{ 12 | list of two dataframes (one with abundance data, the other with 13 | covariate data) and one list of metadata. 14 | } 15 | \description{ 16 | Read in the cowley lizard community data from a txt file 17 | 18 | Import cowley lizard data from data files 19 | } 20 | -------------------------------------------------------------------------------- /man/get_cowley_snakes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_cowley.R 3 | \name{get_cowley_snakes} 4 | \alias{get_cowley_snakes} 5 | \title{Read in the cowley snake community data from a txt file 6 | 7 | Import cowley snake data from data files} 8 | \usage{ 9 | get_cowley_snakes() 10 | } 11 | \value{ 12 | list of two dataframes (one with abundance data, the other with 13 | covariate data) and one list of metadata. 14 | } 15 | \description{ 16 | Read in the cowley snake community data from a txt file 17 | 18 | Import cowley snake data from data files 19 | } 20 | -------------------------------------------------------------------------------- /man/get_default_data_path.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-retriever-data.R 3 | \name{get_default_data_path} 4 | \alias{get_default_data_path} 5 | \title{What is the default data path?} 6 | \usage{ 7 | get_default_data_path(fallback = "~") 8 | } 9 | \arguments{ 10 | \item{fallback}{the default value to use if the setting is missing} 11 | } 12 | \description{ 13 | See \code{portalr::\link[portalr]{get_default_data_path}} for details. 14 | } 15 | -------------------------------------------------------------------------------- /man/get_effort_from_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-data-checks.R 3 | \name{get_effort_from_data} 4 | \alias{get_effort_from_data} 5 | \title{extract the effort from a formatted data structure} 6 | \usage{ 7 | get_effort_from_data(data) 8 | } 9 | \arguments{ 10 | \item{data}{a formatted data structure} 11 | } 12 | \value{ 13 | a \code{numeric} vector containing the effort 14 | } 15 | \description{ 16 | extract the effort from a formatted data structure 17 | } 18 | -------------------------------------------------------------------------------- /man/get_gpdd_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_gpdd.R 3 | \name{get_gpdd_data} 4 | \alias{get_gpdd_data} 5 | \title{Create GPDD population time-series data} 6 | \usage{ 7 | get_gpdd_data( 8 | path = get_default_data_path(), 9 | location_id = 83, 10 | timeperiod_id = 408, 11 | min_num_yrs = 10 12 | ) 13 | } 14 | \arguments{ 15 | \item{path}{where to load the raw data files from} 16 | 17 | \item{location_id}{Location code of data to return} 18 | 19 | \item{timeperiod_id}{Sampling timescale code of data to return 20 | (some datasets provide at more than one scale)} 21 | 22 | \item{min_num_yrs}{minimum number of years of data} 23 | } 24 | \value{ 25 | list of abundance, covariates, and metadata 26 | } 27 | \description{ 28 | Selects sites containing at least `min_num_yrs`` of data 29 | samples during that period. 30 | } 31 | \examples{ 32 | \dontrun{ 33 | get_gpdd_data(location_id=83, timeperiod_id=408) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /man/get_jornada_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_jornada.R 3 | \name{get_jornada_data} 4 | \alias{get_jornada_data} 5 | \title{get Jornada rodent data} 6 | \usage{ 7 | get_jornada_data( 8 | path = file.path(get_default_data_path(), "jornada-lter-rodent") 9 | ) 10 | } 11 | \arguments{ 12 | \item{path}{where to load the raw data files from} 13 | } 14 | \value{ 15 | list of abundance, covariates, and metadata 16 | } 17 | \description{ 18 | get Jornada rodent data 19 | } 20 | -------------------------------------------------------------------------------- /man/get_karoo_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_sanparks_data.R 3 | \name{get_karoo_data} 4 | \alias{get_karoo_data} 5 | \title{get Karoo ungulate data 6 | 7 | Import and clean Karoo abundance from data files} 8 | \usage{ 9 | get_karoo_data() 10 | } 11 | \value{ 12 | list of two dataframes (one with abundance data, the other with 13 | covariate data), and one list of metadata. 14 | } 15 | \description{ 16 | get Karoo ungulate data 17 | 18 | Import and clean Karoo abundance from data files 19 | } 20 | -------------------------------------------------------------------------------- /man/get_kruger_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_sanparks_data.R 3 | \name{get_kruger_data} 4 | \alias{get_kruger_data} 5 | \title{get Kruger National Park ungulate data 6 | 7 | Import and clean Kruger National Park abundance from data files} 8 | \usage{ 9 | get_kruger_data() 10 | } 11 | \value{ 12 | list of two dataframes (one with abundance data, the other with 13 | covariate data), and one list of metadata. 14 | } 15 | \description{ 16 | get Kruger National Park ungulate data 17 | 18 | Import and clean Kruger National Park abundance from data files 19 | } 20 | -------------------------------------------------------------------------------- /man/get_maizuru_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_maizuru.R 3 | \name{get_maizuru_data} 4 | \alias{get_maizuru_data} 5 | \title{get the maizuru community data} 6 | \usage{ 7 | get_maizuru_data( 8 | path = file.path(get_default_data_path(), "ushio-maizuru-fish-community") 9 | ) 10 | } 11 | \arguments{ 12 | \item{path}{where to load the raw data files from} 13 | } 14 | \value{ 15 | list of abundance, covariates, and metadata 16 | } 17 | \description{ 18 | get the maizuru community data 19 | } 20 | -------------------------------------------------------------------------------- /man/get_mtquad_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_mtquad.R 3 | \name{get_mtquad_data} 4 | \alias{get_mtquad_data} 5 | \title{get Montana plant quad time-series data} 6 | \usage{ 7 | get_mtquad_data( 8 | path = file.path(get_default_data_path(), "mapped-plant-quads-mt") 9 | ) 10 | } 11 | \arguments{ 12 | \item{path}{where to load the raw data files from} 13 | } 14 | \value{ 15 | list of abundance, covariates, and metadata 16 | } 17 | \description{ 18 | get Montana plant quad time-series data 19 | } 20 | -------------------------------------------------------------------------------- /man/get_portal_rodents.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_portal.R 3 | \name{get_portal_rodents} 4 | \alias{get_portal_rodents} 5 | \title{get portal rodent data 6 | 7 | Import Portal rodent data using portalr functions. 8 | Currently returns rodent data formatted appropriately for 9 | LDA analysis.} 10 | \usage{ 11 | get_portal_rodents( 12 | time_or_plots = "plots", 13 | treatment = "control", 14 | type = "Rodents" 15 | ) 16 | } 17 | \arguments{ 18 | \item{time_or_plots}{select whether to: (1) "time" == get the data for the 19 | entire timespan of the experiment, or (2) "plots" == just the time period 20 | with consistent treatments} 21 | 22 | \item{treatment}{"control" or "exclosure" treatments} 23 | 24 | \item{type}{type of animals to get: "Rodents" or restrict to "Granivores"} 25 | } 26 | \value{ 27 | list of two dataframes (one with abundance data, the other with covariate data) 28 | and one list of metadata. 29 | } 30 | \description{ 31 | get portal rodent data 32 | 33 | Import Portal rodent data using portalr functions. 34 | Currently returns rodent data formatted appropriately for 35 | LDA analysis. 36 | } 37 | -------------------------------------------------------------------------------- /man/get_sdl_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_vegplots-sdl.R 3 | \name{get_sdl_data} 4 | \alias{get_sdl_data} 5 | \title{Create Sonoran desert lab time-series data} 6 | \usage{ 7 | get_sdl_data( 8 | plots = c(4, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17), 9 | path = file.path(get_default_data_path(), "veg-plots-sdl") 10 | ) 11 | } 12 | \arguments{ 13 | \item{plots}{vector of plots to keep} 14 | 15 | \item{path}{where to load the raw data files from} 16 | } 17 | \value{ 18 | list of abundance, covariates, and metadata 19 | } 20 | \description{ 21 | Original data found here http://www.eebweb.arizona.edu/faculty/venable/LTREB/LTREB\%20data.htm 22 | } 23 | -------------------------------------------------------------------------------- /man/get_sgs_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_shortgrass_steppe.R 3 | \name{get_sgs_data} 4 | \alias{get_sgs_data} 5 | \title{get Shortgrass Steppe rodent data} 6 | \usage{ 7 | get_sgs_data( 8 | path = file.path(get_default_data_path(), "shortgrass-steppe-lter") 9 | ) 10 | } 11 | \arguments{ 12 | \item{path}{where to load the raw data files from} 13 | } 14 | \value{ 15 | list of abundance, covariates, and metadata 16 | } 17 | \description{ 18 | get Shortgrass Steppe rodent data 19 | } 20 | -------------------------------------------------------------------------------- /man/get_times_from_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-data-checks.R 3 | \name{get_times_from_data} 4 | \alias{get_times_from_data} 5 | \title{extract the times from a formatted data structure} 6 | \usage{ 7 | get_times_from_data(data) 8 | } 9 | \arguments{ 10 | \item{data}{a formatted data structure} 11 | } 12 | \value{ 13 | a \code{numeric} or \code{Date} vector containing the times 14 | } 15 | \description{ 16 | extract the times from a formatted data structure 17 | } 18 | -------------------------------------------------------------------------------- /man/has_integer_times.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-data-processing.R 3 | \name{has_integer_times} 4 | \alias{has_integer_times} 5 | \title{Check if a dataset has integer times} 6 | \usage{ 7 | has_integer_times(data) 8 | } 9 | \arguments{ 10 | \item{data}{dataset to check} 11 | } 12 | \value{ 13 | \code{TRUE} or \code{FALSE} 14 | } 15 | \description{ 16 | Check if a dataset has integer times 17 | } 18 | \details{ 19 | If the times are already integer or Date, true. Otherwise FALSE, 20 | with a message if times are missing, or if times could potentially be 21 | rounded. 22 | } 23 | -------------------------------------------------------------------------------- /man/has_missing_samples.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-data-processing.R 3 | \name{has_missing_samples} 4 | \alias{has_missing_samples} 5 | \alias{is_fully_sampled} 6 | \title{Check for missing samples} 7 | \usage{ 8 | has_missing_samples(data, period = NULL, tol = 1e-06, check_covariates = FALSE) 9 | } 10 | \arguments{ 11 | \item{data}{dataset to check} 12 | 13 | \item{period}{period to check the times against (if \code{NULL}, first check to 14 | see if there is a known \code{period} set in the metadata, otherwise assumes 1)} 15 | 16 | \item{tol}{tolerance for the period} 17 | 18 | \item{check_covariates}{\code{TRUE} or \code{FALSE} (whether to check covariates, too)} 19 | } 20 | \value{ 21 | \code{TRUE} or \code{FALSE} 22 | } 23 | \description{ 24 | Some analyses may require evenly sampled data without missing 25 | values. \code{has_missing_samples} checks that the dataset is equitimed, and 26 | then for missing values within \code{abundance} (and optionally, \code{covariates}) 27 | 28 | \code{is_full_sampled()} does the same check, but returns \code{TRUE} if there are 29 | NO missing samples. 30 | } 31 | -------------------------------------------------------------------------------- /man/install_retriever_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-retriever-data.R 3 | \name{install_retriever_data} 4 | \alias{install_retriever_data} 5 | \alias{import_retriever_data} 6 | \alias{download_datasets} 7 | \title{Download data from the data retriever} 8 | \usage{ 9 | install_retriever_data( 10 | dataset, 11 | path = get_default_data_path(), 12 | force_install = FALSE 13 | ) 14 | 15 | import_retriever_data(dataset = NULL, path = get_default_data_path()) 16 | 17 | download_datasets( 18 | dataset = c("jornada-lter-rodent", "shortgrass-steppe-lter", "veg-plots-sdl", 19 | "mapped-plant-quads-mt", "ushio-maizuru-fish-community", 20 | "global-population-dynamics", "breed-bird-survey", "biotimesql"), 21 | path = get_default_data_path(), 22 | force_install = FALSE 23 | ) 24 | } 25 | \arguments{ 26 | \item{dataset}{the name of the dataset that you wish to download} 27 | 28 | \item{path}{the overarching folder in which to download datasets; OR the 29 | full path to the folder containing the data (when \code{dataset == NULL})} 30 | 31 | \item{force_install}{whether to install the dataset if the correctly named 32 | folder already exists} 33 | } 34 | \description{ 35 | \code{install_retriever_data} downloads retriever datasets and 36 | is a wrapper around \code{rdataretriever::\link[rdataretriever]{install}} 37 | 38 | \code{import_retriever_data} loads a previously downloaded 39 | retriever dataset 40 | 41 | \code{download_datasets} is a wrapper around 42 | \code{\link{install_retriever_data}} to download multiple datasets, with 43 | the default to download all of the datasets that are supported. 44 | } 45 | \examples{ 46 | \dontrun{ 47 | install_retriever_data("veg-plots-sdl") 48 | } 49 | \dontrun{ 50 | import_retriever_data("veg-plots-sdl") 51 | } 52 | \dontrun{ 53 | download_datasets() 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /man/interpolate_missing_samples.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-data-processing.R 3 | \name{interpolate_missing_samples} 4 | \alias{interpolate_missing_samples} 5 | \title{Impute missing samples using linear interpolation} 6 | \usage{ 7 | interpolate_missing_samples( 8 | data, 9 | period = NULL, 10 | tol = 1e-06, 11 | interpolate_covariates = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{data}{dataset to modify} 16 | 17 | \item{period}{period to check the times against (if \code{NULL}, first check to 18 | see if there is a known \code{period} set in the metadata, otherwise assumes 1)} 19 | 20 | \item{tol}{tolerance for the period} 21 | 22 | \item{interpolate_covariates}{\code{TRUE} or \code{FALSE} (whether to do covariates, too)} 23 | } 24 | \value{ 25 | the dataset, with interpolated samples 26 | } 27 | \description{ 28 | Impute missing samples using linear interpolation 29 | } 30 | \details{ 31 | First, check if the data are evenly sampled in time. If not, we 32 | exit early. Next, apply \code{forecast::na.interp()} to each variable that has 33 | non-finite values. 34 | } 35 | -------------------------------------------------------------------------------- /man/interpolate_obs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary_stats.R 3 | \name{interpolate_obs} 4 | \alias{interpolate_obs} 5 | \title{Interpolate observations} 6 | \usage{ 7 | interpolate_obs(obs, times, interp_method = forecast::na.interp, ...) 8 | } 9 | \arguments{ 10 | \item{obs}{the time series of \code{numeric} observations} 11 | 12 | \item{times}{\code{numeric} or \code{Date} vector of timestamps of the 13 | observations.} 14 | 15 | \item{interp_method}{\code{character} a function used to interpolate 16 | \code{obs}. Defaults to \code{\link[forecast]{na.interp}}.} 17 | 18 | \item{...}{further arguments to be passed to the interpolation method} 19 | } 20 | \value{ 21 | Interpolated observation vector. 22 | } 23 | \description{ 24 | Interpolate observations based on their timestamps and a 25 | method. 26 | } 27 | -------------------------------------------------------------------------------- /man/invoke.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/analysis_wrapper.R 3 | \name{invoke} 4 | \alias{invoke} 5 | \title{Invoke an analysis on a dataset} 6 | \usage{ 7 | invoke(fun, data, ...) 8 | } 9 | \arguments{ 10 | \item{fun}{the analysis function} 11 | 12 | \item{data}{the dataset} 13 | 14 | \item{...}{additional arguments to pass to \code{fun}} 15 | } 16 | \value{ 17 | a tibble with these columns: 18 | \tabular{ll}{ 19 | \code{results} \tab the output of \code{fun(data)}\cr 20 | \code{metadata} \tab the metadata component of the original dataset\cr 21 | \code{dataset} \tab the name of the dataset\cr 22 | \code{method} \tab the name of the analysis function\cr 23 | \code{args} \tab a list of optional args to \code{method}\cr 24 | } 25 | } 26 | \description{ 27 | This function is a helper that, at its core, simply applies the 28 | function to the dataset. The key added functionality is to preserve the 29 | names of the function and the dataset, as well as metadata and any 30 | additional arguments; returning the result in a tibble that is consistent 31 | in format, regardless of what function is actually being invoked. 32 | } 33 | \examples{ 34 | \dontrun{ 35 | sgs_data <- MATSS::get_sgs_data() 36 | invoke(ts_summary, sgs_data) 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /man/is_equitimed.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-data-processing.R 3 | \name{is_equitimed} 4 | \alias{is_equitimed} 5 | \alias{is_evenly_sampled} 6 | \title{Check that the times of a dataset are evenly sampled} 7 | \usage{ 8 | is_equitimed(data, period = NULL, tol = 1e-06) 9 | } 10 | \arguments{ 11 | \item{data}{dataset to check} 12 | 13 | \item{period}{period to check the times against (if \code{NULL}, first check to 14 | see if there is a known \code{period} set in the metadata, otherwise assumes 1)} 15 | 16 | \item{tol}{tolerance for the period} 17 | } 18 | \value{ 19 | \code{TRUE} or \code{FALSE} 20 | } 21 | \description{ 22 | Check that the times of a dataset are evenly sampled 23 | } 24 | -------------------------------------------------------------------------------- /man/make_equitimed.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-data-processing.R 3 | \name{make_equitimed} 4 | \alias{make_equitimed} 5 | \alias{make_evenly_sampled} 6 | \title{Insert rows if necessary so that time series are evenly sampled} 7 | \usage{ 8 | make_equitimed( 9 | data, 10 | period = NULL, 11 | tol = 1e-06, 12 | method = c("mean", "method", "closest"), 13 | na.rm = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{data}{dataset to modify} 18 | 19 | \item{period}{period to check the times against (if \code{NULL}, first check to 20 | see if there is a known \code{period} set in the metadata, otherwise assumes 1)} 21 | 22 | \item{tol}{tolerance for the period} 23 | 24 | \item{method}{one of \code{c("mean", "method", "closest")} that determines how 25 | the rows of the original data will get coerced into the output here.} 26 | 27 | \item{na.rm}{a logical value indicating whether \code{NA} 28 | values should be stripped before the computation proceeds.} 29 | } 30 | \value{ 31 | the dataset, with rows coerced according to the equitimed time 32 | indices, and additional empty rows inserted if needed 33 | } 34 | \description{ 35 | Insert rows if necessary so that time series are evenly sampled 36 | } 37 | \details{ 38 | First, \code{get_full_times()} computes the sequence of time index values 39 | at a regular sampling interval of period. These will be the final time 40 | index values for the output. \emph{Some} set of rows of the original dataset 41 | will map to each of these time indices. 42 | 43 | The \code{method} argument determines how these rows get coerced: 44 | \describe{ 45 | \item{mean}{the values in the rows are averaged together using \code{mean}} 46 | \item{median}{the values in the rows are averaged together using \code{median}} 47 | \item{closest}{the values in the row that is closest in time to the 48 | desired time index are used.} 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /man/make_integer_times.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-data-processing.R 3 | \name{make_integer_times} 4 | \alias{make_integer_times} 5 | \title{Add a time variable with integer values for evenly sampled data} 6 | \usage{ 7 | make_integer_times(data, period = NULL, tol = 1e-06) 8 | } 9 | \arguments{ 10 | \item{data}{dataset to modify} 11 | 12 | \item{period}{period to check the times against (if \code{NULL}, first check to 13 | see if there is a known \code{period} set in the metadata, otherwise assumes 1)} 14 | 15 | \item{tol}{tolerance for the period} 16 | } 17 | \value{ 18 | the dataset, with integer times 19 | } 20 | \description{ 21 | Add a time variable with integer values for evenly sampled data 22 | } 23 | \details{ 24 | First, check if the data are evenly sampled in time. If not, we 25 | exit early. Next, if the times are already integer or Date, we don't do 26 | anything. If the times are numeric, but roundable to integer, we round. 27 | Otherwise, we add a new variable to \code{covariates} from 1:n and designate 28 | this variable as the \code{timename}. 29 | } 30 | -------------------------------------------------------------------------------- /man/normalize_obs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-data-summarize.R 3 | \name{normalize_obs} 4 | \alias{normalize_obs} 5 | \alias{normalize_effort,} 6 | \alias{normalize_times} 7 | \alias{normalize_effort} 8 | \title{Normalize observations, effort, or times} 9 | \usage{ 10 | normalize_obs(obs, effort, obs_per_effort = !is.null(effort)) 11 | 12 | normalize_times(obs, times = NULL) 13 | 14 | normalize_effort(obs, effort = NULL) 15 | } 16 | \arguments{ 17 | \item{obs}{the time series of \code{numeric} observations} 18 | 19 | \item{effort}{\code{numeric} vector of effort associated with the 20 | observations.} 21 | 22 | \item{obs_per_effort}{\code{logical} indicator if \code{obs} should be 23 | corrected for \code{effort} before summaries are done.} 24 | 25 | \item{times}{\code{numeric} or \code{Date} vector of timestamps of the 26 | observations.} 27 | } 28 | \description{ 29 | \code{normalize_obs} will normalize the observations to the 30 | provided effort vector, if \code{obs_per_effort} is TRUE. 31 | 32 | \code{normalize_effort} will generate a default effort vector if it is 33 | not provided, (effort = 1 over the provided \code{obs} time series) 34 | 35 | \code{normalize_times} will generate a default times vector if it is 36 | not provided, (times = \code{seq(length(obs))}) 37 | } 38 | -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-pipe.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \description{ 10 | See \code{magrittr::\link[magrittr]{\%>\%}} for details. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/prepare_bbs_ts_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_bbs.R 3 | \name{prepare_bbs_ts_data} 4 | \alias{prepare_bbs_ts_data} 5 | \title{Prepare BBS population time-series data} 6 | \usage{ 7 | prepare_bbs_ts_data( 8 | start_yr = 1965, 9 | end_yr = 2018, 10 | min_num_yrs = 10, 11 | path = get_default_data_path(), 12 | data_subset = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{start_yr}{num first year of time-series} 17 | 18 | \item{end_yr}{num last year of time-series} 19 | 20 | \item{min_num_yrs}{num minimum number of years of data between start_yr & end_yr} 21 | 22 | \item{path}{where to load the raw data files from} 23 | 24 | \item{data_subset}{optional, a subset of the BBS communities to use 25 | (to speed up development). As c(1:X)} 26 | } 27 | \description{ 28 | Modified from \url{https://github.com/weecology/bbs-forecasting} 29 | and \url{https://github.com/weecology/MATSS-community-change}. 30 | 31 | Selects sites with data spanning \code{start_yr} through \code{end_yr} containing at 32 | least \code{min_num_yrs} of data samples during that period. Cleans data tables 33 | and stores each individual route as a .RDS file. Saves a data table of the 34 | route + region pairs. 35 | } 36 | -------------------------------------------------------------------------------- /man/prepare_datasets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-retriever-data.R 3 | \name{prepare_datasets} 4 | \alias{prepare_datasets} 5 | \title{Prepare datasets for usage} 6 | \usage{ 7 | prepare_datasets( 8 | dataset = c("breed-bird-survey", "biotimesql"), 9 | bbs_params = list(path = get_default_data_path(), start_yr = 1965, end_yr = 2018, 10 | min_num_yrs = 10, data_subset = NULL), 11 | biotime_params = list(path = get_default_data_path(), data_subset = NULL) 12 | ) 13 | } 14 | \arguments{ 15 | \item{dataset}{what datasets to prepare (must follow the naming convention 16 | of \code{\link{download_datasets}})} 17 | 18 | \item{bbs_params}{params to pass to} 19 | 20 | \item{biotime_params}{} 21 | } 22 | \description{ 23 | This wraps all the functions that prepare datasets from specific 24 | databases (e.g. \code{\link{prepare_bbs_ts_data}}, \code{\link{prepare_biotime_data}}). 25 | } 26 | -------------------------------------------------------------------------------- /man/print.matssdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-matssdata.R 3 | \name{print.matssdata} 4 | \alias{print.matssdata} 5 | \title{Print a time series dataset} 6 | \usage{ 7 | \method{print}{matssdata}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{Class \code{matssdata} object to be printed} 11 | 12 | \item{...}{additional arguments (unused)} 13 | } 14 | \description{ 15 | Print a time series dataset 16 | } 17 | -------------------------------------------------------------------------------- /man/print.matsssummary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-matssdata.R 3 | \name{print.matsssummary} 4 | \alias{print.matsssummary} 5 | \title{Print a time series summary} 6 | \usage{ 7 | \method{print}{matsssummary}(x, ..., n = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{Class \code{matsssummary} object to be printed} 11 | 12 | \item{...}{additional arguments (unused)} 13 | } 14 | \description{ 15 | Print a time series summary 16 | } 17 | -------------------------------------------------------------------------------- /man/process_bbs_route_region_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_bbs.R 3 | \name{process_bbs_route_region_data} 4 | \alias{process_bbs_route_region_data} 5 | \title{Process the BBS data for an individual route and region} 6 | \usage{ 7 | process_bbs_route_region_data( 8 | bbs_data_table, 9 | location_table, 10 | species_table, 11 | save_to_file = FALSE, 12 | storage_path = file.path(get_default_data_path(), "breed-bird-survey-prepped"), 13 | citation_text = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{bbs_data_table}{main bbs data table} 18 | 19 | \item{location_table}{information about location of the route} 20 | 21 | \item{species_table}{table of species for BBS} 22 | 23 | \item{save_to_file}{whether to save the processed dataset to a file} 24 | 25 | \item{storage_path}{folder in which to put processed dataset} 26 | 27 | \item{citation_text}{text of citation for the database} 28 | } 29 | \value{ 30 | the processed BBS data 31 | } 32 | \description{ 33 | Correct and otherwise filter BBS species data (see 34 | \code{\link{combine_bbs_subspecies}} and \code{\link{filter_bbs_species}} 35 | for more info). Generate the abundance, covariate, and metadata tables and 36 | return the combined object. 37 | } 38 | -------------------------------------------------------------------------------- /man/process_biotime_dataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data_biotime.R 3 | \name{process_biotime_dataset} 4 | \alias{process_biotime_dataset} 5 | \title{Process an individual BioTime dataset} 6 | \usage{ 7 | process_biotime_dataset( 8 | biotime_data_tables, 9 | dataset_id = 10, 10 | save_to_file = FALSE, 11 | storage_path = file.path(get_default_data_path(), "biotime-prepped"), 12 | citation_text = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{biotime_data_tables}{full BioTime data tables} 17 | 18 | \item{dataset_id}{the study_id} 19 | 20 | \item{save_to_file}{whether to save the processed dataset to a file} 21 | 22 | \item{storage_path}{folder in which to put processed dataset} 23 | 24 | \item{citation_text}{text of citation for the database} 25 | } 26 | \value{ 27 | the processed BioTime dataset 28 | } 29 | \description{ 30 | Filter and modify the BioTime data. Generate the abundance, 31 | covariate, and metadata tables and return the combined object. 32 | } 33 | -------------------------------------------------------------------------------- /man/richness.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary_stats.R 3 | \name{richness} 4 | \alias{richness} 5 | \title{Count non-0 entries} 6 | \usage{ 7 | richness(x) 8 | } 9 | \arguments{ 10 | \item{x}{\code{numeric} vector} 11 | } 12 | \value{ 13 | \code{numeric} value of the number of non-0 entries in \code{x}. 14 | } 15 | \description{ 16 | Calculate the richness (number of non-0 entries) of a given 17 | sample. 18 | } 19 | -------------------------------------------------------------------------------- /man/summarize_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary_stats.R 3 | \name{summarize_df} 4 | \alias{summarize_df} 5 | \alias{summarise_df} 6 | \title{Compute summaries and autocorrelation for each variable} 7 | \usage{ 8 | summarize_df( 9 | df, 10 | times = seq_len(NROW(df)), 11 | interp_method = forecast::na.interp, 12 | ... 13 | ) 14 | 15 | summarise_df( 16 | df, 17 | times = seq_len(NROW(df)), 18 | interp_method = forecast::na.interp, 19 | ... 20 | ) 21 | } 22 | \arguments{ 23 | \item{df}{the data.frame of variables to summarize} 24 | 25 | \item{times}{the time indices associated with the rows of \code{df}} 26 | 27 | \item{interp_method}{\code{character} a function used to interpolate 28 | \code{obs}. Defaults to \code{\link[forecast]{na.interp}}.} 29 | 30 | \item{...}{further arguments to be passed to acf} 31 | } 32 | \description{ 33 | Compute summaries and autocorrelation for each variable 34 | } 35 | -------------------------------------------------------------------------------- /man/summarize_vec.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary_stats.R 3 | \name{summarize_vec} 4 | \alias{summarize_vec} 5 | \alias{summarise_vec} 6 | \title{Summarize a univariate vector} 7 | \usage{ 8 | summarize_vec(x, round_out = TRUE, digits = NULL) 9 | 10 | summarise_vec(x, round_out = TRUE, digits = NULL) 11 | } 12 | \arguments{ 13 | \item{x}{the vector to be summarized} 14 | 15 | \item{round_out}{\code{logical} indicator if rounding should happen.} 16 | 17 | \item{digits}{\code{NULL} (default) or \code{integer} value of the number 18 | of digits for rounding. If \code{NULL}, \code{digits} is calculated to 19 | be two order of magnitude lower than the smallest value in the vector 20 | being summarized.} 21 | } 22 | \value{ 23 | \code{vector} with entries corresponding to the mininum, maximum, 24 | median, mean, standard deviation, and count of the observations, times, 25 | or effort, rounded based on \code{round_out} and \code{digits}. 26 | } 27 | \description{ 28 | Summarize a univariate vector 29 | } 30 | -------------------------------------------------------------------------------- /man/summary.matssdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-matssdata.R 3 | \name{summary.matssdata} 4 | \alias{summary.matssdata} 5 | \title{Summarize a time series dataset} 6 | \usage{ 7 | \method{summary}{matssdata}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{Class \code{matssdata} object to be summarized.} 11 | 12 | \item{...}{additional arguments to \code{ts_summary()}} 13 | } 14 | \value{ 15 | \code{list} of number of species, number of 16 | observations, summaries of the variables, the times, the effort, the 17 | species richness, total observation, and the among-species correlation. 18 | } 19 | \description{ 20 | Summarize a time series dataset 21 | } 22 | -------------------------------------------------------------------------------- /man/temp_autocor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary_stats.R 3 | \name{temp_autocor} 4 | \alias{temp_autocor} 5 | \title{Interpolate observations} 6 | \usage{ 7 | temp_autocor(obs, times, interp_method = forecast::na.interp, ...) 8 | } 9 | \arguments{ 10 | \item{obs}{the time series of \code{numeric} observations} 11 | 12 | \item{times}{\code{numeric} or \code{Date} vector of timestamps of the 13 | observations.} 14 | 15 | \item{interp_method}{\code{character} a function used to interpolate 16 | \code{obs}. Defaults to \code{\link[forecast]{na.interp}}.} 17 | 18 | \item{...}{further arguments to be passed to acf} 19 | } 20 | \value{ 21 | Autocorrelation of the observation vector. 22 | } 23 | \description{ 24 | Calculate the autocorrelation of the observations based on 25 | their timestamps, with interpolation (based on a specified method) if 26 | necessary. 27 | } 28 | -------------------------------------------------------------------------------- /man/to_numeric_vector.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-data-summarize.R 3 | \name{to_numeric_vector} 4 | \alias{to_numeric_vector} 5 | \title{Extract a numeric vector} 6 | \usage{ 7 | to_numeric_vector(x) 8 | } 9 | \arguments{ 10 | \item{x}{the input data} 11 | } 12 | \description{ 13 | Extract a numeric vector from a data.frame or a matrix (taking 14 | the first column). 15 | } 16 | -------------------------------------------------------------------------------- /man/ts_summary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary_stats.R 3 | \name{ts_summary} 4 | \alias{ts_summary} 5 | \title{Summarize a time series dataset} 6 | \usage{ 7 | ts_summary( 8 | data, 9 | times = NULL, 10 | effort = NULL, 11 | obs_per_effort = !is.null(effort), 12 | interp_method = forecast::na.interp, 13 | include_spp_correlations = TRUE, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{data}{a vector, matrix, or data.frame of \code{numeric} observations 19 | (within columns) across times (within rows).} 20 | 21 | \item{times}{\code{numeric} or \code{Date} vector of timestamps of the 22 | observations.} 23 | 24 | \item{effort}{\code{numeric} vector of effort associated with the 25 | observations.} 26 | 27 | \item{obs_per_effort}{\code{logical} indicator if \code{obs} should be 28 | corrected for \code{effort} before summaries are done.} 29 | 30 | \item{interp_method}{\code{character} a function used to interpolate 31 | \code{obs}. Defaults to \code{\link[forecast]{na.interp}}.} 32 | 33 | \item{include_spp_correlations}{whether to include the calculations of 34 | between-species correlations} 35 | 36 | \item{...}{additional arguments to be passed to \code{\link{temp_autocor}}} 37 | } 38 | \value{ 39 | \code{ts_summary}: \code{list} of number of species, number of 40 | observations, summaries of the variables, the times, the effort, the 41 | species richness, total observation, and the among-species correlation. 42 | } 43 | \description{ 44 | \code{ts_summary} creates a summary of a community time series 45 | dataset. The summary contains community-level statistics, including total 46 | number of observations, species richness, cross-correlations; as well as 47 | summary statistics on the individual populations that make up the 48 | community. 49 | 50 | Some aspects of the summaries depend on \code{times}, which should be a 51 | vector of the time index associated with the time series; and 52 | \code{effort}, which should be a vector of the sampling effort. 53 | \code{obs_per_effort} is an optional argument for correcting abundance 54 | based on effort; by default, it checks if \code{effort} is NULL. 55 | Interpolation of missing values for autocorrelation calcuations (if 56 | needed) is done via \code{interp_method} 57 | } 58 | -------------------------------------------------------------------------------- /man/use_default_data_path.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-retriever-data.R 3 | \name{use_default_data_path} 4 | \alias{use_default_data_path} 5 | \title{Manage the default path for downloading MATSS Data into} 6 | \usage{ 7 | use_default_data_path(path = NULL) 8 | } 9 | \arguments{ 10 | \item{path}{Folder into which data will be downloaded} 11 | } 12 | \description{ 13 | See \code{portalr::\link[portalr]{use_default_data_path}} for details. 14 | } 15 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(MATSS) 3 | 4 | if ("sample.kind" %in% names(formals(RNGkind))) 5 | { 6 | suppressWarnings(RNGkind(sample.kind = "Rounding")) 7 | } 8 | 9 | test_check("MATSS") 10 | -------------------------------------------------------------------------------- /tests/testthat/setup.R: -------------------------------------------------------------------------------- 1 | pre_test_options <- options( 2 | ## make ui_*() output easier to test against 3 | ## just say no to ANSI escape codes 4 | "crayon.enabled" = FALSE 5 | ) 6 | -------------------------------------------------------------------------------- /tests/testthat/teardown.R: -------------------------------------------------------------------------------- 1 | options(pre_test_options) 2 | -------------------------------------------------------------------------------- /tests/testthat/test-01-data-checking.R: -------------------------------------------------------------------------------- 1 | context("Validation Function for Data Format") 2 | 3 | set.seed(42) 4 | 5 | data(dragons) 6 | 7 | test_that("check_data_format works on basic examples", { 8 | 9 | # basic type checking 10 | expect_false(check_data_format(Nile)) 11 | expect_false(check_data_format(mtcars)) 12 | expect_false(check_data_format(list(mtcars))) 13 | 14 | # abundance type checking 15 | expect_false(check_data_format(list(abundance = mtcars))) 16 | expect_true(check_data_format(list(abundance = mtcars, 17 | metadata = list(is_community = FALSE, 18 | citation = "Henderson and Velleman (1981), Building multiple regression models interactively. Biometrics, 37, 391–411.")))) 19 | expect_false(check_data_format(list(abundance = iris))) 20 | expect_false(check_data_format(list(abundance = Nile))) 21 | 22 | # covariates format checking 23 | expect_true(check_data_format(list(abundance = mtcars, 24 | covariates = mtcars, 25 | metadata = list(is_community = FALSE, 26 | citation = "Henderson and Velleman (1981), Building multiple regression models interactively. Biometrics, 37, 391–411.")))) 27 | expect_false(check_data_format(list(abundance = mtcars, 28 | covariates = Nile))) 29 | expect_false(check_data_format(list(abundance = mtcars, 30 | covariates = iris))) 31 | 32 | # working example 33 | expect_true(check_data_format(dragons)) 34 | }) 35 | 36 | test_that("get_effort_from_data works on basic examples", { 37 | expect_null(get_effort_from_data(Nile)) 38 | expect_null(get_effort_from_data(mtcars)) 39 | expect_null(get_effort_from_data(list(abundance = mtcars, 40 | covariates = mtcars))) 41 | expect_equal(get_effort_from_data(dragons), 42 | c(3, 3, 2, 4, 1, 9)) 43 | }) 44 | 45 | test_that("get_times_from_data works on basic examples", { 46 | expect_null(get_times_from_data(Nile)) 47 | expect_null(get_times_from_data(mtcars)) 48 | expect_null(get_times_from_data(list(abundance = mtcars, 49 | covariates = mtcars))) 50 | expect_equal(get_times_from_data(dragons), 51 | seq.Date(from = as.Date("2014-06-28"), by = "1 year", length.out = 6)) 52 | }) 53 | -------------------------------------------------------------------------------- /tests/testthat/test-03-retriever-utils.R: -------------------------------------------------------------------------------- 1 | context("Check Retriever utility functions") 2 | 3 | # check if `retriever` is installed 4 | skip_if_no_retriever <- function() 5 | { 6 | have_retriever <- reticulate::py_module_available("retriever") 7 | if (!have_retriever) 8 | skip("retriever not available for testing") 9 | } 10 | 11 | test_path <- tempdir() 12 | 13 | test_that("retriever install checks data directory", { 14 | Sys.setenv("MATSS_DATA_PATH" = file.path(test_path, "FUBAR")) 15 | expect_error(install_retriever_data("iris"), 16 | class = "usethis_error", 17 | "User input required, but session is not interactive.") 18 | }) 19 | 20 | test_that("data_path functions work", { 21 | Sys.unsetenv("MATSS_DATA_PATH") 22 | expect_false(check_default_data_path()) 23 | 24 | Sys.setenv(MATSS_DATA_PATH = test_path) 25 | expect_equal(get_default_data_path(), test_path) 26 | expect_true(check_default_data_path()) 27 | }) 28 | 29 | test_that("retriever installing does error checking", { 30 | skip_if_no_retriever() 31 | expect_error(install_retriever_data("iris"), NA) 32 | m <- capture_messages(install_retriever_data("iris")) 33 | expect_match(m, "A folder already exists for \"iris\"") 34 | expect_match(m, "Use `force_install = TRUE` to overwrite it with a fresh install.") 35 | }) 36 | 37 | test_that("retriever importing does error checking", { 38 | unused_filename <- tempfile(tmpdir = "") 39 | unused_filename <- substring(unused_filename, 2, nchar(unused_filename)) 40 | expect_false(file.exists(file.path(test_path, unused_filename))) 41 | expect_error(w <- capture_warnings(dat <- import_retriever_data(unused_filename)), NA) 42 | expect_true(is.null(dat)) 43 | expect_match(w, paste0("Didn't find any downloaded data in ", 44 | file.path(test_path, unused_filename))) 45 | expect_match(w, "Did you run get_retriever_data\\(\\) first?") 46 | }) 47 | 48 | test_that("retriever importing works", { 49 | skip_if_no_retriever() 50 | expect_error(dat <- import_retriever_data("iris"), NA) 51 | expect_known_hash(dat$iris_Iris, "e8f8676f3c") 52 | }) 53 | -------------------------------------------------------------------------------- /tests/testthat/test-04-retriever-data.R: -------------------------------------------------------------------------------- 1 | context("Check Retriever datasets") 2 | 3 | test_that("veg-plots-sdl data retrieval works correctly", { 4 | skip_if_no_retriever() 5 | test_path <- tempdir() 6 | Sys.setenv(MATSS_DATA_PATH = test_path) 7 | 8 | expect_error(install_retriever_data("veg-plots-sdl"), NA) 9 | expect_error(dat <- import_retriever_data("veg-plots-sdl"), NA) 10 | expect_false(is.null(dat)) 11 | 12 | expect_error(dat <- get_sdl_data(), NA) 13 | expect_dataset(dat, "ff06f7af67", "de2dc7f655", "d52a190865", "d98472ffcc") 14 | }) 15 | -------------------------------------------------------------------------------- /tests/testthat/test-05-build-plans.R: -------------------------------------------------------------------------------- 1 | context("Building Drake Plans") 2 | 3 | test_that("build_datasets_plan works", { 4 | expect_error(datasets <- build_datasets_plan(), NA) 5 | expect_plan(datasets) 6 | expect_equal(dim(datasets), c(5, 2)) 7 | 8 | expect_error(datasets <- build_datasets_plan(include_retriever_data = TRUE), NA) 9 | expect_plan(datasets) 10 | expect_equal(dim(datasets), c(10, 3)) 11 | }) 12 | 13 | test_that("build_analyses_plan works", { 14 | datasets <- build_datasets_plan() 15 | methods <- drake::drake_plan( 16 | abs = abs, 17 | mean = mean 18 | ) 19 | N <- NROW(datasets) 20 | M <- NROW(methods) 21 | 22 | expect_error(analyses <- build_analyses_plan(methods, datasets), NA) 23 | expect_equal(NROW(analyses), N * M + M) 24 | 25 | expect_error(analyses <- build_analyses_plan(methods, datasets, trace = TRUE), NA) 26 | expect_equal(NROW(analyses), N * M + M) 27 | 28 | subplan_abs <- dplyr::filter(analyses, grepl("^analysis_abs_", target)) 29 | expect_equal(NROW(subplan_abs), N) 30 | expect_true(all(subplan_abs$fun == "abs")) 31 | expect_identical(subplan_abs$data, datasets$target) 32 | 33 | subplan_mean <- dplyr::filter(analyses, grepl("^analysis_mean_", target)) 34 | expect_equal(NROW(subplan_mean), N) 35 | expect_true(all(subplan_mean$fun == "mean")) 36 | expect_identical(subplan_mean$data, datasets$target) 37 | 38 | subplan_results <- dplyr::filter(analyses, grepl("^results_", target)) 39 | expect_equal(NROW(subplan_results), M) 40 | expect_identical(subplan_results$fun, methods$target) 41 | 42 | fun_calls <- lapply(subplan_results$command, as.character) 43 | expect_true(all(vapply(fun_calls, dplyr::first, "") == "dplyr::bind_rows")) 44 | }) 45 | -------------------------------------------------------------------------------- /tests/testthat/test-06-build-plans-installed-subsample.R: -------------------------------------------------------------------------------- 1 | context("Building Drake Plans for datasets installed in the subsampled folder") 2 | 3 | path <- system.file("extdata", "subsampled", 4 | package = "MATSS", mustWork = TRUE) 5 | Sys.setenv(MATSS_DATA_PATH = path) 6 | 7 | test_that("build_bbs_datasets_plan works", { 8 | expect_error(datasets <- build_bbs_datasets_plan(), NA) 9 | expect_plan(datasets) 10 | expect_true(all(grepl("bbs_rtrg_[0-9]+_[0-9]+$", datasets$target))) 11 | expect_equal(dim(datasets), c(3, 3)) 12 | 13 | expect_error(datasets <- build_datasets_plan(include_retriever_data = TRUE, 14 | include_bbs_data = TRUE), NA) 15 | expect_plan(datasets) 16 | expect_equal(sum(grepl("bbs_rtrg_[0-9]+_[0-9]+$", datasets$target)), 3) 17 | expect_equal(dim(datasets), c(13, 3)) 18 | }) 19 | 20 | test_that("build_gpdd_datasets_plan works", { 21 | expect_error(datasets <- build_gpdd_datasets_plan(), NA) 22 | expect_plan(datasets) 23 | expect_true(all(grepl("gpdd_rtrg_[0-9]+_[0-9\\.]+$", datasets$target))) 24 | expect_equal(dim(datasets), c(120, 2)) 25 | 26 | expect_error(datasets <- build_datasets_plan(include_gpdd_data = TRUE), NA) 27 | expect_plan(datasets) 28 | expect_equal(sum(grepl("gpdd_rtrg_[0-9]+_[0-9\\.]+$", datasets$target)), 120) 29 | expect_equal(dim(datasets), c(125, 2)) 30 | }) 31 | 32 | test_that("build_biotime_datasets_plan works", { 33 | expect_error(datasets <- build_biotime_datasets_plan(do_processing = FALSE), NA) 34 | expect_plan(datasets) 35 | expect_true(all(grepl("biotime_rtrg_[0-9]+$", datasets$target))) 36 | expect_equal(dim(datasets), c(361, 3)) 37 | 38 | expect_error(datasets <- build_datasets_plan(include_biotime_data = TRUE, 39 | biotime_process = FALSE), NA) 40 | expect_plan(datasets) 41 | expect_equal(sum(grepl("biotime_rtrg_[0-9]+$", datasets$target)), 361) 42 | expect_equal(dim(datasets), c(366, 3)) 43 | }) 44 | 45 | test_that("build_bbs_datasets_plan works", { 46 | expect_error(datasets <- build_datasets_plan(include_bbs_data = T), NA) 47 | 48 | datasets <- build_datasets_plan(include_bbs_data = T) 49 | 50 | expect_plan(datasets) 51 | # expect_true(all(grepl("_data$", datasets$target))) 52 | expect_equal(dim(datasets), c(8, 3)) 53 | 54 | methods <- drake::drake_plan( 55 | abs = abs, 56 | mean = mean 57 | ) 58 | N <- NROW(datasets) 59 | M <- NROW(methods) 60 | 61 | expect_error(analyses <- build_analyses_plan(methods, datasets), NA) 62 | expect_equal(NROW(analyses), N * M + M) 63 | 64 | expect_error(analyses <- build_analyses_plan(methods, datasets, trace = TRUE), NA) 65 | expect_equal(NROW(analyses), N * M + M) 66 | 67 | subplan_abs <- dplyr::filter(analyses, grepl("^analysis_abs_", target)) 68 | expect_equal(NROW(subplan_abs), N) 69 | expect_true(all(subplan_abs$fun == "abs")) 70 | expect_identical(subplan_abs$data, datasets$target) 71 | 72 | subplan_mean <- dplyr::filter(analyses, grepl("^analysis_mean_", target)) 73 | expect_equal(NROW(subplan_mean), N) 74 | expect_true(all(subplan_mean$fun == "mean")) 75 | expect_identical(subplan_mean$data, datasets$target) 76 | 77 | subplan_results <- dplyr::filter(analyses, grepl("^results_", target)) 78 | expect_equal(NROW(subplan_results), M) 79 | expect_identical(subplan_results$fun, methods$target) 80 | 81 | fun_calls <- lapply(subplan_results$command, as.character) 82 | expect_true(all(vapply(fun_calls, dplyr::first, "") == "dplyr::bind_rows")) 83 | }) 84 | -------------------------------------------------------------------------------- /tests/testthat/test-07-analysis-wrapper.R: -------------------------------------------------------------------------------- 1 | context("Tests of Wrapper Functions") 2 | 3 | data <- dragons 4 | num_vars <- NCOL(data$abundance) 5 | 6 | test_that("analysis_wrapper and invoke work for simple functions", { 7 | # setup sample inputs 8 | f <- function(ts) { 9 | tibble::tibble(n = NROW(ts), 10 | mean = mean(ts), 11 | sd = sd(ts)) 12 | } 13 | 14 | # check if analysis_wrapper works properly and has the right size 15 | expect_error(fun <- analysis_wrapper(f), NA) 16 | expect_error(output <- fun(data), NA) 17 | expect_equal(dim(output), c(num_vars, 4)) 18 | expect_identical(output$id, names(data$abundance)) 19 | 20 | # check if invoke works properly 21 | expect_error(output <- invoke(fun, data), NA) 22 | expect_equal(dim(output), c(1, 5)) 23 | expect_identical(output$dataset, "data") 24 | expect_identical(output$method, "fun") 25 | expect_identical(output$args[[1]], list()) 26 | 27 | # check metadata 28 | expect_identical(output$metadata[[1]], data$metadata) 29 | 30 | # check digest 31 | output$metadata[[1]] <- list() 32 | expect_known_hash(output, "4bd05cfb68") 33 | }) 34 | 35 | test_that("invoke preserves arguments correctly", { 36 | # setup sample inputs 37 | CI_levels <- c(0.05, 0.95) 38 | 39 | # create our different methods 40 | expect_error(compute_quantiles <- analysis_wrapper(quantile), NA) 41 | 42 | # check results 43 | expect_error(output <- invoke(compute_quantiles, data), NA) 44 | expect_equal(dim(output$results[[1]]), c(num_vars, 6)) 45 | expect_known_hash(output$results[[1]], "29da5e7ffc") 46 | expect_identical(output$dataset, "data") 47 | expect_identical(output$method, "compute_quantiles") 48 | expect_identical(output$args[[1]], list()) 49 | 50 | # check results 51 | expect_error(output <- invoke(compute_quantiles, data, probs = c(0.05, 0.95)), NA) 52 | expect_equal(dim(output$results[[1]]), c(num_vars, 3)) 53 | expect_known_hash(output$results[[1]], "5758611722") 54 | expect_identical(output$dataset, "data") 55 | expect_identical(output$method, "compute_quantiles") 56 | expect_identical(output$args[[1]], list(probs = c(0.05, 0.95))) 57 | 58 | # check results 59 | expect_error(output_alt <- invoke(compute_quantiles, data, probs = CI_levels), NA) 60 | expect_identical(output_alt, output) 61 | 62 | # check results 63 | expect_error(output <- invoke(compute_quantiles, data, c(0.05, 0.95)), NA) 64 | expect_equal(dim(output$results[[1]]), c(num_vars, 3)) 65 | expect_known_hash(output$results[[1]], "5758611722") 66 | expect_identical(output$dataset, "data") 67 | expect_identical(output$method, "compute_quantiles") 68 | expect_identical(output$args[[1]], list(c(0.05, 0.95))) 69 | }) -------------------------------------------------------------------------------- /tests/testthat/test-09-summary-stats-utils.R: -------------------------------------------------------------------------------- 1 | context("Time Series Summary Statistics Utility Functions") 2 | 3 | test_that("normalize_obs works", { 4 | expect_error(normalize_obs(rnorm(50), obs_per_effort = 1), 5 | "`obs_per_effort` must be logical") 6 | expect_error(normalize_obs(rnorm(50), effort = 1:10), 7 | "`obs` and `effort` are not of same length") 8 | x <- 1 + rpois(20, 4) 9 | expect_error(output <- normalize_obs(x, x), NA) 10 | expect_equal(output, rep(1, 20)) 11 | }) 12 | 13 | test_that("normalize_times works", { 14 | m <- capture_messages(expect_equal(normalize_times(rnorm(50)), 1:50)) 15 | expect_match(m, "`time` is `NULL`, assuming evenly spaced data") 16 | m <- capture_messages(expect_equal(normalize_times(rnorm(50), 17 | seq.Date(from = as.Date("2000-01-01"), by = "2 days", length.out = 50)), 1:50)) 18 | expect_match(m, "`time` is not numeric, assuming evenly spaced data") 19 | expect_equal(normalize_times(rnorm(4), c(1, 1, 2, 3)), c(1, 1, 2, 3)) 20 | }) 21 | 22 | test_that("normalize_effort works", { 23 | m <- capture_messages(expect_equal(normalize_effort(rnorm(50)), rep(1, 50))) 24 | expect_match(m, "`effort` is `NULL`, assuming all effort = 1") 25 | expect_equal(normalize_effort(rnorm(4), c(1, 1, 2, 3)), c(1, 1, 2, 3)) 26 | }) 27 | 28 | test_that("to_numeric_vector works", { 29 | expect_equal(to_numeric_vector(1:6), 1:6) 30 | expect_equal(to_numeric_vector(mtcars), mtcars$mpg) 31 | expect_equal(to_numeric_vector(tibble::as_tibble(mtcars)), mtcars$mpg) 32 | expect_equal(to_numeric_vector(matrix(1:12, ncol = 2)), 1:6) 33 | }) 34 | 35 | test_that("check_interp_method works", { 36 | expect_error(check_interp_method(NULL)) 37 | expect_error(check_interp_method(NA)) 38 | expect_error(check_interp_method(mtcars)) 39 | expect_error(check_interp_method(mean), NA) 40 | expect_error(check_interp_method(forecast::na.interp), NA) 41 | }) 42 | 43 | test_that("check_obs works", { 44 | expect_error(check_obs(NULL)) 45 | expect_error(check_obs(ChickWeight)) 46 | expect_error(check_obs(mtcars), NA) 47 | expect_error(check_obs(matrix(rnorm(16), nrow = 4)), NA) 48 | expect_error(check_obs(c(NA, rnorm(16))), NA) 49 | }) 50 | 51 | test_that("check_effort works", { 52 | expect_error(check_effort(NULL)) 53 | expect_error(check_effort(mtcars)) 54 | expect_error(check_effort(matrix(rnorm(16), nrow = 4))) 55 | expect_error(check_effort(c(NA, rnorm(16))), NA) 56 | }) 57 | 58 | test_that("check_times works", { 59 | expect_error(check_times(NULL)) 60 | expect_error(check_times(mtcars)) 61 | expect_error(check_times(matrix(rnorm(100), ncol = 10))) 62 | expect_error(check_times(time(sunspot.year)), NA) 63 | expect_error(check_times(c(NA, time(sunspot.year))), NA) 64 | expect_error(check_times(rnorm(16)), NA) 65 | expect_error(check_times(c(NA, rnorm(16))), NA) 66 | }) 67 | 68 | test_that("check_obs_and_times works", { 69 | expect_error(check_obs_and_times(rnorm(16), rnorm(15))) 70 | expect_error(check_obs_and_times(sunspot.year, time(sunspot.year)), NA) 71 | }) 72 | -------------------------------------------------------------------------------- /tests/testthat/test-10-compendium-creation.R: -------------------------------------------------------------------------------- 1 | context("MATSS research compendium") 2 | 3 | test_path <- file.path(tempdir(), "test") 4 | 5 | test_that("creating a compendium works", { 6 | expect_error(create_MATSS_compendium(test_path, name = "test name"), NA) 7 | }) 8 | 9 | test_that("compendium files exist", { 10 | expect_true(file.exists(test_path)) 11 | expect_true(file.exists(file.path(test_path, "DESCRIPTION"))) 12 | expect_true(file.exists(file.path(test_path, "LICENSE"))) 13 | expect_true(file.exists(file.path(test_path, "LICENSE.md"))) 14 | expect_true(file.exists(file.path(test_path, "NAMESPACE"))) 15 | expect_true(file.exists(file.path(test_path, "README.md"))) 16 | expect_true(file.exists(file.path(test_path, ".gitignore"))) 17 | expect_true(file.exists(file.path(test_path, ".Rbuildignore"))) 18 | expect_equal(file.exists(file.path(test_path, "test.Rproj")), 19 | rstudioapi::isAvailable()) 20 | expect_true(file.exists(file.path(test_path, "analysis"))) 21 | expect_true(file.exists(file.path(test_path, "analysis", "pipeline.R"))) 22 | expect_true(file.exists(file.path(test_path, "analysis", "report.Rmd"))) 23 | expect_true(file.exists(file.path(test_path, "analysis", "references.bib"))) 24 | expect_true(file.exists(file.path(test_path, "R"))) 25 | expect_true(file.exists(file.path(test_path, "R", "analysis_functions.R"))) 26 | }) 27 | -------------------------------------------------------------------------------- /tests/testthat/test-99-dataset-regressions.R: -------------------------------------------------------------------------------- 1 | context("Check Datasets") 2 | 3 | test_path <- tempdir() 4 | Sys.setenv(MATSS_DATA_PATH = test_path) 5 | 6 | test_that("get_gpdd_data formats data correctly", { 7 | skip_if_no_retriever() 8 | download_datasets("global-population-dynamics") 9 | 10 | expect_error(dat <- get_gpdd_data(location_id = 83, timeperiod_id = 408), NA) 11 | expect_dataset(dat, "701d60bb9e", "303e5d422b", "22f70476be", "9c44940787") 12 | expect_known_hash(dat$metadata$citation, "6c7d2e15e6") 13 | }) 14 | 15 | test_that("Shortgrass Steppe data is retrievable and works", { 16 | skip_if_no_retriever() 17 | download_datasets("shortgrass-steppe-lter") 18 | 19 | expect_error(dat <- get_sgs_data(), NA) 20 | expect_dataset(dat, "30f412e387", "e87060f72a", "f212abd4ab", "67ba6e0b35") 21 | expect_known_hash(dat$metadata$citation, "be3383c603") 22 | }) 23 | 24 | test_that("Jornada data is retrievable and works", { 25 | skip_if_no_retriever() 26 | download_datasets("jornada-lter-rodent") 27 | 28 | expect_error(dat <- get_jornada_data(), NA) 29 | expect_dataset(dat, "b01c0f0361", "b71bd81c62", "e13443e3ca", "cdef5e0eb9") 30 | expect_known_hash(dat$metadata$citation, "ecc667faa2") 31 | }) 32 | 33 | test_that("Portal data is retrievable and works", { 34 | expect_error(dat <- get_portal_rodents(), NA) 35 | expect_dataset(dat, "2136da689d", "ec4befd3dd", "6074e384c2", "0635765f20") 36 | expect_known_hash(dat$metadata$citation, "85f8b00f1d") 37 | }) 38 | 39 | test_that("Karoo data is retrievable and works", { 40 | expect_error(dat <- get_karoo_data(), NA) 41 | expect_dataset(dat, "811613052a", "72deba00b8", "625990a0b8", "7f959c373d") 42 | expect_known_hash(dat$metadata$citation, "b54523a903") 43 | }) 44 | 45 | test_that("Cowley Lizards data is retrievable and works", { 46 | expect_error(dat <- get_cowley_lizards(), NA) 47 | expect_dataset(dat, "baf0ca42d4", "ea94cef99a", "b59b5ae6b4", "c3f66160ae") 48 | expect_known_hash(dat$metadata$citation, "e7bbe85cb4") 49 | }) 50 | 51 | test_that("Cowley Snakes data is retrievable and works", { 52 | expect_error(dat <- get_cowley_snakes(), NA) 53 | expect_dataset(dat, "f9d6848c03", "ea94cef99a", "b59b5ae6b4", "8ee7798b91") 54 | expect_known_hash(dat$metadata$citation, "e7bbe85cb4") 55 | }) 56 | 57 | test_that("Kruger data is retrievable and works", { 58 | expect_error(dat <- get_kruger_data(), NA) 59 | expect_dataset(dat, "3184bfcfa6", "e00ef454e1", "4b1f4de879", "7f959c373d") 60 | expect_known_hash(dat$metadata$citation, "3595d0fbba") 61 | }) 62 | -------------------------------------------------------------------------------- /tests/testthat/test-999-installed-subsample-dataset-regressions.R: -------------------------------------------------------------------------------- 1 | context("Check Datasets that are installed in the subsampled folder") 2 | 3 | path <- system.file("extdata", "subsampled", 4 | package = "MATSS", mustWork = TRUE) 5 | Sys.setenv(MATSS_DATA_PATH = path) 6 | 7 | test_that("process_bbs_ts_data formats data correctly", { 8 | unlink(file.path(path, "breed-bird-survey-prepped")) 9 | expect_error(prepare_bbs_ts_data(), NA) 10 | expect_error(dat <- get_bbs_route_region_data(route = 1, region = 4), NA) 11 | expect_dataset(dat, "3fe07b68b9", "3854304cf6", "5fd9a7fdc3", "f9d0f4d9a6") 12 | 13 | expect_error(dat <- get_bbs_route_region_data(route = 2, region = 4), NA) 14 | expect_true(check_data_format(dat)) 15 | expect_error(dat <- get_bbs_route_region_data(route = 3, region = 4), NA) 16 | expect_true(check_data_format(dat)) 17 | }) 18 | 19 | test_that("get_mtquad_data formats data correctly", { 20 | expect_error(dat <- get_mtquad_data(), NA) 21 | expect_dataset(dat, "c4a22592f9", "f9debd76c0", "2ab904f618", "7f959c373d") 22 | }) 23 | 24 | test_that("get_biotime_data processes data correctly", { 25 | unlink(file.path(path, "biotime-prepped"), recursive = TRUE) 26 | expect_error(prepare_biotime_data(data_subset = c(1, 14, 67, 172)), NA) 27 | biotime_data_tables <- import_retriever_data("biotimesql", path = path) 28 | expect_error(dat <- process_biotime_dataset(biotime_data_tables, dataset_id = 321), NA) 29 | expect_dataset(dat, "e55c7fdbf0", "31d9dbfb67", "9f825e1939", "c507e2dc4a") 30 | 31 | expect_error(get_biotime_dataset_ids(do_processing = TRUE), NA) 32 | expect_error(dat <- get_biotime_data(dataset_id = 321), NA) 33 | expect_dataset(dat, "e55c7fdbf0", "31d9dbfb67", "9f825e1939", "c507e2dc4a") 34 | 35 | expect_true(check_metadata_species_table(dat)) 36 | }) 37 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/data-formats.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Data Formats" 3 | author: 4 | - Ellen K. Bledsoe 5 | - Hao Ye 6 | date: "`r Sys.Date()`" 7 | output: rmarkdown::html_vignette 8 | vignette: > 9 | %\VignetteIndexEntry{MATSS: Data Formats} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | ```{r setup, include = FALSE} 15 | knitr::opts_chunk$set( 16 | collapse = TRUE, 17 | comment = "#>" 18 | ) 19 | ``` 20 | 21 | ## Data Structure 22 | 23 | The universal data structure we're going to use is: 24 | 25 | * a list with the following elements: 26 | - a data.frame or tibble, named `abundance` (required) 27 | - a data.frame or tibble, named `covariates` (optional) 28 | - a list, named `metadata` (required) 29 | 30 | If both `abundance` and `covariates` are present in the list, then the two data.frames must have the same number of rows. 31 | 32 | ### abundance 33 | 34 | In the `abundance` data.frame: 35 | 36 | * each row is an observation (e.g. in time or space) 37 | * each column is a variable 38 | 39 | Here, the common usage is for each column to be a species or taxon, and each row to be an observed sample. In other words, each column is a time series, with the rows sorted such that time advances down (higher row indices correspond to later times). 40 | 41 | ### covariates 42 | 43 | In the `covariates` data.frame: 44 | 45 | * each row is an observation (e.g. in time or space) 46 | * each column is a variable 47 | 48 | The number of rows should match that of `abundance`, and rows of `covariates` should line up with `abundance` (either sampled simultaneously or concurrently). Common covariates are date and time, temperature, treatments, etc. 49 | 50 | ### metadata 51 | 52 | In the `metadata` list: 53 | 54 | * in general, any entries are allowed, in any data structure and format 55 | * it must have a `is_community` entry, which indicates whether the time series in `abundance` can be treated as components of a community with interactions and/or shared drivers in some way 56 | * it must have a `citation` entry that is a vector of text values for the reference to the dataset. There can be multiple values (e.g. in the case of a specific dataset pulled from a larger database). 57 | * if there is a `location` entry, it must contain at least a `latitude` and `longitude` value (in decimal form). `location` itself can be a data.frame or vector (that has names) 58 | * if there is a `timename` entry, it refers to a column in the `covariates` data.frame that gives a time index for the data 59 | - this column must be some form of numeric, integer, date, or date/time corresponding to the timing of the samples 60 | - this column must be of a form that applying `tidyr::full_seq`, along with a "period" entry (using 1 if missing) will produce the appropriate equi-timed spacing 61 | * if there is a `period` entry, it must be compatible with `tidyr::full_seq` and the `timename` variable described above. 62 | * if there is a `species_table` entry, it must have an `id` column that includes all the column names in `abundances`. This is intended to provide more information about the different variables in `abundances`. 63 | 64 | ## Example Data 65 | 66 | Here is an example of a correctly formatted dataset with covariates and metadata: 67 | 68 | ```{r example crosstab data} 69 | library(MATSS) 70 | data(dragons) 71 | 72 | str(dragons) 73 | ``` 74 | 75 | We can view the abundance and covariates tables side by side: 76 | ```{r} 77 | knitr::kable(dragons[c("abundance", "covariates")]) 78 | ``` 79 | 80 | ## Checking Data 81 | 82 | We also provide a function for checking whether the data is formatted correctly: 83 | 84 | ```{r check dat} 85 | check_data_format(dragons) 86 | ``` 87 | 88 | -------------------------------------------------------------------------------- /vignettes/dataset-summary.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "MATSS: Dataset Summary" 3 | author: 4 | - Hao Ye 5 | date: "`r Sys.Date()`" 6 | output: 7 | rmarkdown::html_document: 8 | self_contained: yes 9 | fontsize: 11pt 10 | documentclass: article 11 | vignette: > 12 | %\VignetteIndexEntry{MATSS: Dataset Summary} 13 | %\VignetteEngine{knitr::rmarkdown_notangle} 14 | %\VignetteEncoding{UTF-8} 15 | --- 16 | 17 | ```{r setup, include = FALSE} 18 | IS_LOCAL <- identical(Sys.getenv("LOCAL"), "true") 19 | 20 | knitr::opts_chunk$set( 21 | collapse = TRUE, 22 | comment = "#>", 23 | purl = IS_LOCAL 24 | ) 25 | ``` 26 | 27 | ```{r setup packages} 28 | library(dplyr) 29 | library(tidyr) 30 | library(purrr) 31 | library(MATSS) 32 | ``` 33 | 34 | # Overview 35 | 36 | The basic info we want for each dataset is: 37 | 38 | * taxon data (species-level if appropriate) 39 | * sampling start and end times 40 | * sampling frequency 41 | * geographical location 42 | * life history characteristics of the organisms 43 | * temperature at sampling location 44 | 45 | For the latter two, it's probably easier to rely on external packages to retrieve the info, so the summaries will first focus on the remaining entries. 46 | 47 | # BioTime summaries 48 | 49 | Define a function for how to generate the summary for BioTime datasets. Note that we use a `tibble` structure, to enable easy assembly across datasets. 50 | 51 | ```{r define summary for Biotime data} 52 | summarize_biotime_data <- function(dataset_id = 10, ...) 53 | { 54 | dat <- get_biotime_data(dataset_id = dataset_id, 55 | ...) 56 | 57 | summ_info <- tibble(dataset_id = dataset_id) 58 | 59 | # determine species info 60 | summ_info$species_table <- I(list(dat$metadata$species_table)) 61 | 62 | # determine start and end time 63 | if (dat$metadata$is_annual_sampling) 64 | { 65 | var <- "year" 66 | } else { 67 | var <- "date" 68 | } 69 | summ_info$start_time <- as.character(min(dat$covariates[[var]], na.rm = TRUE)) 70 | summ_info$end_time <- as.character(max(dat$covariates[[var]], na.rm = TRUE)) 71 | 72 | # determine sampling frequency 73 | if (dat$metadata$is_annual_sampling) # missing month data in raw values 74 | { 75 | summ_info$sampling_frequency <- "annual (or less)" 76 | } else if (length(unique(dat$covariates$year)) >= NROW(dat$covariates)) { 77 | summ_info$sampling_frquency <- "annual (or less)" 78 | } else { 79 | summ_info$sampling_frequency <- "subannual" 80 | } 81 | 82 | # determine spatial extent 83 | summ_info$number_locations <- dat$metadata$number_lat_long 84 | summ_info$lat <- dat$metadata$cent_lat 85 | summ_info$long <- dat$metadata$cent_long 86 | summ_info$lat_min <- min(dat$covariates$latitude, na.rm = TRUE) 87 | summ_info$lat_max <- max(dat$covariates$latitude, na.rm = TRUE) 88 | summ_info$long_min <- min(dat$covariates$longitude, na.rm = TRUE) 89 | summ_info$long_max <- max(dat$covariates$longitude, na.rm = TRUE) 90 | return(summ_info) 91 | } 92 | ``` 93 | 94 | ## Results 95 | 96 | Now repeat for all the datasets within BioTime 97 | 98 | ```{r biotime summaries, eval = IS_LOCAL} 99 | biotime_dataset_ids <- get_biotime_dataset_ids() 100 | 101 | biotime_summaries <- map_dfr(biotime_dataset_ids, summarize_biotime_data) 102 | ``` 103 | 104 | Extract out just the dataset-level info, and save it for the vignette to be built without needing to download and process the BioTime data first. 105 | 106 | ```{r dataset info, eval = IS_LOCAL} 107 | dataset_info <- biotime_summaries %>% 108 | select(-species_table) 109 | 110 | saveRDS(dataset_info, here::here("inst/biotime_dataset_info.RDS")) 111 | ``` 112 | 113 | 114 | ```{r print dataset info} 115 | dataset_info_file <- system.file("biotime_dataset_info.RDS", 116 | package = "MATSS", mustWork = TRUE) 117 | dataset_info <- readRDS(dataset_info_file) 118 | knitr::kable(dataset_info, digits = 2) 119 | ``` 120 | 121 | Extract out just the taxa: 122 | 123 | ```{r species info, eval = IS_LOCAL} 124 | species_info <- biotime_summaries %>% 125 | unnest() %>% 126 | select(dataset_id, genus, species, genus_species) 127 | 128 | # knitr::kable(species_info) 129 | ``` 130 | 131 | ## Search for duplicates 132 | -------------------------------------------------------------------------------- /vignettes/hipergator-install.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Installing MATSS on the HiPerGator" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{hipergator-install} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>", 14 | eval = FALSE 15 | ) 16 | ``` 17 | 18 | 0. If you need a HiPerGator account, follow the instructions on the lab-wiki: https://github.com/weecology/lab-wiki/wiki/Programming:-HiPerGator-Intro-Guide 19 | 20 | 1. Log onto HiPerGator (substituting your username for ). 21 | 22 | ```{bash} 23 | ssh @hpg2.rc.ufl.edu 24 | ``` 25 | 26 | 2. Follow the [installation instructions for Python on HiPerGator](https://github.com/weecology/lab-wiki/wiki/Programming:-HiPerGator-Intro-Guide#installing-python-packages). No need to actually install packages yet. 27 | 28 | 3. Create a file named .Rprofile to allow for local installs (substituting your 29 | username for ): 30 | 31 | ```{bash} 32 | echo '.libPaths(c("/home//R_libs", .libPaths()))' >> .Rprofile 33 | ``` 34 | 35 | 4. Make sure that you have the specified folder for storing R packages (substituting your 36 | username for ): 37 | 38 | ```{bash} 39 | mkdir /home//R_libs 40 | ``` 41 | 42 | 5. Load the R module (which makes R available in your HiPerGator session), and then open R: 43 | 44 | ```{bash} 45 | ml R 46 | R 47 | ``` 48 | 49 | 6. In `R`, install the core Python retriever (yes, this happens in `R`, which is kind of magical). 50 | 51 | ```{r} 52 | install.packages('reticulate') 53 | library(reticulate) 54 | conda_install('r-reticulate', 'retriever') 55 | ``` 56 | 57 | If you get an error that the conda environment `'r-reticulate'` does not exist, you may need to create it first. Run the following lines and then retry the `conda_install('r-reticulate', 'retriever')` command: 58 | 59 | ```{r} 60 | conda_create('r-reticulate') 61 | ``` 62 | 63 | 7. Restart R 64 | 65 | 8. Install the `rdataretriever` 66 | 67 | ```{r} 68 | remotes::install_github('ropensci/rdataretriever') 69 | library(rdataretriever) 70 | ``` 71 | 72 | If you get an error about "HTTP error 404" and "Rate limit remaing", you may need to setup a GITHUB_PAT key on HiPerGator - https://happygitwithr.com/github-pat.html 73 | 74 | 9. Install `MATSS` 75 | 76 | ```{r} 77 | remotes::install_github("weecology/MATSS") 78 | ``` 79 | 80 | 10. Try installing a dataset from retriever: 81 | ```{r} 82 | MATSS::install_retriever_data("veg-plots-sdl", force_install = TRUE) 83 | ``` 84 | 85 | If you receive an error about the number of arguments, e.g. 86 | ``` 87 | Error in py_call_impl(callable, dots$args, dots$keywords) : 88 | TypeError: install_csv() takes from 1 to 4 positional arguments but 5 were given 89 | ``` 90 | then you will need a newer version of retriever than was previously installed. 91 | 92 | Exit R to return to the command line, and then use the following commands to install retriever from github: 93 | 94 | ```{bash} 95 | conda activate r-reticulate 96 | conda install git pip 97 | pip install git+git://github.com/weecology/retriever@master 98 | ``` 99 | 100 | Then, retry the command at the beginning of this step in R. 101 | 102 | 11. To run a `MATSS` pipeline on HiPerGator you will need to setup `drake` to use 103 | slurm for parallel scheduling. See https://github.com/weecology/MATSS-LDATS/ for 104 | an example of how to do this. -------------------------------------------------------------------------------- /vignettes/references.bib: -------------------------------------------------------------------------------- 1 | %% This BibTeX bibliography file was created using BibDesk. 2 | %% https://bibdesk.sourceforge.io/ 3 | 4 | %% Created for Hao Ye at 2020-04-23 18:16:29 -0400 5 | 6 | 7 | %% Saved with string encoding Unicode (UTF-8) 8 | 9 | 10 | 11 | @article{Marwick_2018, 12 | Author = {Ben Marwick and Carl Boettiger and Lincoln Mullen}, 13 | Date-Added = {2018-10-25 14:33:23 -0400}, 14 | Date-Modified = {2018-10-25 14:33:55 -0400}, 15 | Journal = {The American Statistician}, 16 | Number = {1}, 17 | Pages = {80-88}, 18 | Title = {Packaging Data Analytical Work Reproducibly Using R (and Friends)}, 19 | Volume = {72}, 20 | Year = {2018}, 21 | Bdsk-File-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAnLi4vcmVmcy9wYXBlcnMvTWFyd2ljay9NYXJ3aWNrXzIwMTgucGRmTxEBVgAAAAABVgACAAAJU2hpcmFzYWdpAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEJEAAH/////EE1hcndpY2tfMjAxOC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP////8AAAAAAAAAAAAAAAAAAQAEAAAKIGN1AAAAAAAAAAAAAAAAAAdNYXJ3aWNrAAACADAvOlVzZXJzOmh5ZTpyZWZzOnBhcGVyczpNYXJ3aWNrOk1hcndpY2tfMjAxOC5wZGYADgAiABAATQBhAHIAdwBpAGMAawBfADIAMAAxADgALgBwAGQAZgAPABQACQBTAGgAaQByAGEAcwBhAGcAaQASAC5Vc2Vycy9oeWUvcmVmcy9wYXBlcnMvTWFyd2ljay9NYXJ3aWNrXzIwMTgucGRmABMAAS8AABUAAgAK//8AAAAIAA0AGgAkAE4AAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABqA==}} 22 | 23 | @comment{BibDesk Static Groups{ 24 | 25 | 26 | 27 | 28 | 29 | group name 30 | dynamic stability 31 | keys 32 | Ushio_2018,Pimm_1984,Cenci_2019,Rypdal_2019,Deyle_2016,Ye_2018,May_1973a 33 | 34 | 35 | group name 36 | Luo reply 37 | keys 38 | Luo_2015,Tsonis_2015,Nes_2015,Sugihara_1994,Hartmann_2013,Sugihara_2012,Clark_2015,Ebisuzaki_1997 39 | 40 | 41 | group name 42 | rEDM refs 43 | keys 44 | Lorenz_1963,Sugihara_1994,Sugihara_1990,Sugihara_2012,Deyle_2011,Takens_1981,Sauer_1991,Fisher_1915,Casdagli_1991,Ye_2015a,Deyle_2016,Ye_2018,Whitney_1936,Chang_2017,Rulkov_1995,Cheng_1994,Cleveland_1979,Deyle_2013,Deyle_2018,Dixon_1999,Ye_2015,McGowan_2017 45 | 46 | 47 | group name 48 | Regime Shifts 49 | keys 50 | Hare_2000,Butchart_2010,Hastings_2010,Rudnick_2003,Scheffer_2009,Guttal_2008a,Scheffer_2001,Guttal_2008,Biggs_2009,Carpenter_2011,Ratajczak_2018 51 | 52 | 53 | group name 54 | research statement 55 | keys 56 | Hastings_2018,Athey_2016,Thomas_2018,Giron-Nava_2017,Ye_2015a,Ye_2015,Sugihara_2012,Sugihara_1990,Ushio_2018,Ratajczak_2018,White_2019,Pennekamp_2019,Ernest_2019,Ye_2018,Christensen_2018,Petchey_2015,Dornelas_2018,Beaulieu-Jones_2017,Ye_2016,Nes_2015,Pimm_1984,Banerjee_2015,Benedetti-Cecchi_2018,Ernest_2018a,Pedersen_2018,McGowan_2017,Wilson_2017,Deyle_2016,Cenci_2019,Yenni_2019 57 | 58 | 59 | group name 60 | salmon 61 | keys 62 | Schnute_2007,Glaser_2014a,CBC-News_2009,Beamish_2005,McKinnell_2008,Grant_2010,Holt_2009,Kaeriyama_2004,Beamish_2012,Peterman_2012,Kell_2007,Mysak_1986,Peterman_2003,Grant_2011,Gilhousen_1992,Ricker_1947,Ricker_1997,Jacobson_1995,Peterson_2010,Beamish_1999,Schnute_2000,Theiler_1992,Ricker_1954,Northcote_1989,Sugihara_1990,Dennis_2001,Preikshot_2012,Sugihara_1992,Cox_1997,Sauer_1991,Mantua_1997,Sugihara_1994,Hutchinson_1957,DFO_2012,Sugihara_2012,Hsieh_2005,Deyle_2011,Levy_1992,Thompson_1945,Henderson_1998,Dorner_2008,DFO_2009,Ebisuzaki_1997,Rand_2006,Cass_2006,Burgner_1991,Glaser_2011,Deyle_2013,MacDonald_2012,Cass_1994,Dixon_2001,Burrow_2012,Hsieh_2008,McClatchie_2010,Beamish_1993,Marmorek_2011,Peterman_2011,Ricker_1950,Eliason_2011,Beamish_1997,Liu_2012,Beamish_2012a,Hodgson_2002,Beacham_2004,Hare_1994,Cazelles_1992,Sugihara_2011,Holt_2009a,Beamish_2004,Myers_1998,Ascioti_1993,Dixon_1999,Beamish_1994,Eggers_2005,Perretti_2013,Thomson_2012,Hodgson_2006,Takens_1981,CBC-News_2010,Packard_1980,Guill_2011,Nelson_1998,Foerster_1968,Withler_2000 63 | 64 | 65 | group name 66 | topic models 67 | keys 68 | Blei_2006 69 | 70 | 71 | 72 | }} 73 | --------------------------------------------------------------------------------