├── .DS_Store ├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ └── pkgdown.yaml ├── .gitignore ├── CONTRIBUTING.md ├── CRAN-RELEASE ├── CoordinateCleaner.Rproj ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── CoordinateCleaner-package.R ├── cc_aohi.R ├── cc_cap.R ├── cc_cen.R ├── cc_coun.R ├── cc_dupl.R ├── cc_equ.R ├── cc_gbif.R ├── cc_inst.R ├── cc_iucn.R ├── cc_outl.R ├── cc_sea.R ├── cc_urb.R ├── cc_val.R ├── cc_zero.R ├── cd_ddmm.R ├── cd_round.R ├── cf_age.R ├── cf_equal.R ├── cf_outl.R ├── cf_range.R ├── clean_coordinates.R ├── clean_dataset.R ├── clean_fossils.R ├── internal_clean_coordinate.R ├── internal_clean_dataset.R ├── internal_write_pyrate.R ├── methods.spatialvalid.R ├── sysdata.rda └── write_pyrate.R ├── README.md ├── _pkgdown.yml ├── _site.yml ├── articles ├── CoordinateCleaner.bib ├── Dataset_level_cleaning.Rmd ├── Geographic_outliers.Rmd ├── The_institutions_database.Rmd ├── The_institutions_database.html └── apa.csl ├── codemeta.json ├── cran-comments.md ├── data ├── aohi.rda ├── buffland.rda ├── buffsea.rda ├── countryref.rda ├── institutions.rda └── pbdb_example.rda ├── docs ├── 404.html ├── CONTRIBUTING.html ├── articles │ ├── Cleaning_GBIF_data_with_CoordinateCleaner.html │ ├── Cleaning_GBIF_data_with_CoordinateCleaner_files │ │ ├── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ │ └── figure-html │ │ │ ├── unnamed-chunk-11-1.png │ │ │ ├── unnamed-chunk-16-1.png │ │ │ ├── unnamed-chunk-17-1.png │ │ │ ├── unnamed-chunk-18-1.png │ │ │ ├── unnamed-chunk-19-1.png │ │ │ ├── unnamed-chunk-5-1.png │ │ │ └── unnamed-chunk-6-1.png │ ├── Cleaning_PBDB_fossils_with_CoordinateCleaner.html │ ├── Cleaning_PBDB_fossils_with_CoordinateCleaner_files │ │ ├── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ │ └── figure-html │ │ │ ├── unnamed-chunk-16-1.png │ │ │ ├── unnamed-chunk-17-1.png │ │ │ ├── unnamed-chunk-25-1.png │ │ │ ├── unnamed-chunk-25-2.png │ │ │ ├── unnamed-chunk-27-1.png │ │ │ ├── unnamed-chunk-27-2.png │ │ │ └── unnamed-chunk-7-1.png │ ├── Comparison_other_software.html │ ├── Comparison_other_software_files │ │ └── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ ├── Using_custom_gazetteers.html │ ├── Using_custom_gazetteers_files │ │ ├── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ │ └── figure-html │ │ │ ├── unnamed-chunk-1-1.png │ │ │ ├── unnamed-chunk-2-1.png │ │ │ └── unnamed-chunk-2-2.png │ ├── clgbif11-1.png │ ├── clgbif16-1.png │ ├── clgbif17-1.png │ ├── clgbif18-1.png │ ├── clgbif19-1.png │ ├── clgbif5-1.png │ ├── clgbif6-1.png │ ├── cusgaz1-1.png │ ├── cusgaz2-1.png │ ├── cusgaz2-2.png │ └── index.html ├── authors.html ├── bootstrap-toc.css ├── bootstrap-toc.js ├── docsearch.css ├── docsearch.js ├── index.html ├── link.svg ├── news │ └── index.html ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml └── reference │ ├── CoordinateCleaner-defunct.html │ ├── CoordinateCleaner-package.html │ ├── buffland.html │ ├── cc_cap.html │ ├── cc_cen.html │ ├── cc_coun.html │ ├── cc_dupl.html │ ├── cc_equ.html │ ├── cc_gbif.html │ ├── cc_inst.html │ ├── cc_iucn.html │ ├── cc_outl.html │ ├── cc_sea.html │ ├── cc_urb.html │ ├── cc_val.html │ ├── cc_zero.html │ ├── cd_ddmm.html │ ├── cd_round.html │ ├── cf_age.html │ ├── cf_equal.html │ ├── cf_outl.html │ ├── cf_range.html │ ├── clean_coordinates.html │ ├── clean_dataset.html │ ├── clean_fossils.html │ ├── countryref.html │ ├── index.html │ ├── institutions.html │ ├── pbdb_example.html │ ├── plot.spatialvalid-1.png │ ├── plot.spatialvalid.html │ └── write_pyrate.html ├── index.Rmd ├── inst ├── CITATION └── WORDLIST ├── man ├── CoordinateCleaner-package.Rd ├── aohi.Rd ├── buffland.Rd ├── buffsea.Rd ├── cc_aohi.Rd ├── cc_cap.Rd ├── cc_cen.Rd ├── cc_coun.Rd ├── cc_dupl.Rd ├── cc_equ.Rd ├── cc_gbif.Rd ├── cc_inst.Rd ├── cc_iucn.Rd ├── cc_outl.Rd ├── cc_sea.Rd ├── cc_urb.Rd ├── cc_val.Rd ├── cc_zero.Rd ├── cd_ddmm.Rd ├── cd_round.Rd ├── cf_age.Rd ├── cf_equal.Rd ├── cf_outl.Rd ├── cf_range.Rd ├── clean_coordinates.Rd ├── clean_dataset.Rd ├── clean_fossils.Rd ├── countryref.Rd ├── institutions.Rd ├── is.spatialvalid.Rd ├── pbdb_example.Rd ├── plot.spatialvalid.Rd └── write_pyrate.Rd ├── tests ├── testthat.R └── testthat │ ├── Rplots.pdf │ ├── test_coordinatelevel_functions.R │ ├── test_datasetlevel_functions.R │ ├── test_fossillevel_functions.R │ └── test_wrapper_functions.R └── vignettes ├── Cleaning_GBIF_data_with_CoordinateCleaner.R ├── Cleaning_GBIF_data_with_CoordinateCleaner.Rmd ├── Cleaning_GBIF_data_with_CoordinateCleaner.html ├── Cleaning_GBIF_data_with_CoordinateCleaner_files └── header-attrs-2.21 │ └── header-attrs.js ├── Cleaning_PBDB_fossils_with_CoordinateCleaner.R ├── Cleaning_PBDB_fossils_with_CoordinateCleaner.Rmd ├── Cleaning_PBDB_fossils_with_CoordinateCleaner.html ├── Comparison_other_software.Rmd ├── Comparison_other_software.html ├── CoordinateCleaner.bib ├── Using_custom_gazetteers.R ├── Using_custom_gazetteers.Rmd ├── Using_custom_gazetteers.Rmd.orig ├── Using_custom_gazetteers.html ├── apa.csl ├── cusgaz-cusgaz1-1.png ├── cusgaz-cusgaz2-1.png ├── cusgaz-cusgaz2-2.png ├── gbif-clgbif11-1.png ├── gbif-clgbif16-1.png ├── gbif-clgbif17-1.png ├── gbif-clgbif18-1.png ├── gbif-clgbif19-1.png ├── gbif-clgbif5-1.png ├── gbif-clgbif6-1.png ├── paleobioDB_angiosperms_PyRate.py ├── paleobioDB_angiosperms_TaxonList.txt ├── pbdb-unnamed-chunk-15-1.png ├── pbdb-unnamed-chunk-16-1.png ├── pbdb-unnamed-chunk-17-1.png ├── pbdb-unnamed-chunk-24-1.png ├── pbdb-unnamed-chunk-24-2.png ├── pbdb-unnamed-chunk-25-1.png ├── pbdb-unnamed-chunk-25-2.png ├── pbdb-unnamed-chunk-26-1.png ├── pbdb-unnamed-chunk-26-2.png ├── pbdb-unnamed-chunk-27-1.png ├── pbdb-unnamed-chunk-27-2.png ├── pbdb-unnamed-chunk-33-1.png ├── pbdb-unnamed-chunk-33-2.png ├── pbdb-unnamed-chunk-35-1.png ├── pbdb-unnamed-chunk-35-2.png └── pbdb-unnamed-chunk-7-1.png /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/.DS_Store -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^CRAN-RELEASE$ 2 | ^codemeta\.json$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | extra_gazetteers 6 | README.Rmd 7 | CoordinateCleaner.Rcheck 8 | old 9 | .Rhistory 10 | .travis.yml 11 | Code.Rproj 12 | Tutorials 13 | CONTRIBUTING.md 14 | docs/ 15 | ^_pkgdown\.yml$ 16 | ^docs$ 17 | cran-comments.md 18 | pre_submission_tests.R 19 | ^articles$ 20 | _site.yml 21 | index.Rmd 22 | vignettes/Cleaning_GBIF_data_with_CoordinateCleaner.Rmd.orig 23 | Using_custom_gazetteers.Rmd.orig 24 | ^\.github$ 25 | ^pkgdown$ 26 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macos-latest, r: 'release'} 22 | - {os: windows-latest, r: 'release'} 23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 24 | - {os: ubuntu-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'oldrel-1'} 26 | 27 | env: 28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 29 | R_KEEP_PKG_SOURCE: yes 30 | 31 | steps: 32 | - uses: actions/checkout@v3 33 | 34 | - uses: r-lib/actions/setup-pandoc@v2 35 | 36 | - uses: r-lib/actions/setup-r@v2 37 | with: 38 | r-version: ${{ matrix.config.r }} 39 | http-user-agent: ${{ matrix.config.http-user-agent }} 40 | use-public-rspm: true 41 | 42 | - uses: r-lib/actions/setup-r-dependencies@v2 43 | with: 44 | extra-packages: any::rcmdcheck 45 | needs: check 46 | 47 | - uses: r-lib/actions/check-r-package@v2 48 | with: 49 | upload-snapshots: true 50 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | tags: ['*'] 7 | 8 | name: pkgdown 9 | 10 | jobs: 11 | pkgdown: 12 | runs-on: ubuntu-latest 13 | # Only restrict concurrency for non-PR jobs 14 | concurrency: 15 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | permissions: 19 | contents: write 20 | steps: 21 | - uses: actions/checkout@v3 22 | 23 | - uses: r-lib/actions/setup-pandoc@v2 24 | 25 | - uses: r-lib/actions/setup-r@v2 26 | with: 27 | use-public-rspm: true 28 | 29 | - uses: r-lib/actions/setup-r-dependencies@v2 30 | with: 31 | extra-packages: any::pkgdown, local::. 32 | needs: website 33 | 34 | - name: Build site 35 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 36 | shell: Rscript {0} 37 | 38 | - name: Deploy to GitHub pages 🚀 39 | if: github.event_name != 'pull_request' 40 | uses: JamesIves/github-pages-deploy-action@v4.4.1 41 | with: 42 | clean: false 43 | branch: gh-pages 44 | folder: docs -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | .Rhistory 6 | Package.Rproj 7 | articles/inst 8 | pre_submission_tests.R 9 | inst/doc 10 | /docs/articles/inst/ 11 | docs 12 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # CONTRIBUTING # 2 | 3 | ## Bugs, suggestions or feature requests? 4 | 5 | * Submit an issue on the [Issues page](https://github.com/azizka/CoordinateCleaner/issues) - be sure to include R session information and a reproducible example. 6 | 7 | ## Code contribution 8 | 9 | * If you want to contribute to the package - awesome. Please get in touch with [zizka.alexander@gmail.com](mailto:zizka.alexander@gmail.com). -------------------------------------------------------------------------------- /CRAN-RELEASE: -------------------------------------------------------------------------------- 1 | This package was submitted to CRAN on 2020-10-13. 2 | Once it is accepted, delete this file and tag the release (commit f876093). 3 | -------------------------------------------------------------------------------- /CoordinateCleaner.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageCheckArgs: --as-cran _SP_EVOLUTION_STATUS_=2 R CMD check _R_CHECK_S3_METHODS_SHOW_POSSIBLE_ISSUES_=true 19 | PackageRoxygenize: rd,collate,namespace,vignette 20 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Type: Package 2 | Package: CoordinateCleaner 3 | Title: Automated Cleaning of Occurrence Records from Biological 4 | Collections 5 | Version: 3.0.1 6 | Authors@R: c(person(given = "Alexander", family = "Zizka", email = "zizka.alexander@gmail.com", 7 | role = c("aut", "cre")), 8 | person(given = "Daniele", family = "Silvestro", role = "ctb"), 9 | person(given = "Tobias", family = "Andermann", role = "ctb"), 10 | person(given = "Josue", family = "Azevedo", role = "ctb"), 11 | person(given = "Camila", family = "Duarte Ritter", role = "ctb"), 12 | person(given = "Daniel", family = "Edler", role = "ctb"), 13 | person(given = "Harith", family = "Farooq", role = "ctb"), 14 | person(given = "Andrei", family = "Herdean", role = "ctb"), 15 | person(given = "Maria", family = "Ariza", role = "ctb"), 16 | person(given = "Ruud", family = "Scharn", role = "ctb"), 17 | person(given = "Sten", family = "Svanteson", role = "ctb"), 18 | person(given = "Niklas", family = "Wengstrom", role = "ctb"), 19 | person(given = "Vera", family = "Zizka", role = "ctb"), 20 | person(given = "Alexandre", family ="Antonelli", role = "ctb"), 21 | person(given = "Bruno", family = "Vilela", role = "ctb", 22 | comment = "Bruno updated the package to remove dependencies on sp, raster, rgdal, maptools, and rgeos packages"), 23 | person("Irene", "Steves", role = "rev", 24 | comment = "Irene reviewed the package for ropensci, see "), 25 | person("Francisco", "Rodriguez-Sanchez", role = "rev", 26 | comment = "Francisco reviewed the package for ropensci, see ")) 27 | Description: Automated flagging of common spatial and temporal 28 | errors in biological and paleontological collection data, for the use 29 | in conservation, ecology and paleontology. Includes automated tests to 30 | easily flag (and exclude) records assigned to country or province 31 | centroid, the open ocean, the headquarters of the Global Biodiversity 32 | Information Facility, urban areas or the location of biodiversity 33 | institutions (museums, zoos, botanical gardens, universities). 34 | Furthermore identifies per species outlier coordinates, zero 35 | coordinates, identical latitude/longitude and invalid coordinates. 36 | Also implements an algorithm to identify data sets with a significant 37 | proportion of rounded coordinates. Especially suited for large data 38 | sets. The reference for the methodology is: Zizka et al. (2019) 39 | . 40 | License: GPL-3 41 | URL: https://ropensci.github.io/CoordinateCleaner/ 42 | BugReports: https://github.com/ropensci/CoordinateCleaner/issues 43 | Depends: 44 | R (>= 3.5.0) 45 | Imports: 46 | dplyr, 47 | geosphere, 48 | ggplot2, 49 | graphics, 50 | grDevices, 51 | methods, 52 | rgbif, 53 | rnaturalearth (>= 0.3.2), 54 | stats, 55 | terra, 56 | tidyselect, 57 | utils 58 | Suggests: 59 | countrycode, 60 | covr, 61 | knitr, 62 | magrittr, 63 | maps, 64 | rmarkdown, 65 | rnaturalearthdata, 66 | sf, 67 | testthat, 68 | viridis 69 | Config/Needs/website: tidyverse, viridis, caret, msm, countrycode, cran/speciesgeocodeR 70 | VignetteBuilder: 71 | knitr 72 | Encoding: UTF-8 73 | Language: en-gb 74 | LazyData: true 75 | RoxygenNote: 7.2.3 76 | SystemRequirements: GDAL (>= 2.0.1) 77 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(plot,spatialvalid) 4 | S3method(summary,spatialvalid) 5 | export(cc_aohi) 6 | export(cc_cap) 7 | export(cc_cen) 8 | export(cc_coun) 9 | export(cc_dupl) 10 | export(cc_equ) 11 | export(cc_gbif) 12 | export(cc_inst) 13 | export(cc_iucn) 14 | export(cc_outl) 15 | export(cc_sea) 16 | export(cc_urb) 17 | export(cc_val) 18 | export(cc_zero) 19 | export(cd_ddmm) 20 | export(cd_round) 21 | export(cf_age) 22 | export(cf_equal) 23 | export(cf_outl) 24 | export(cf_range) 25 | export(clean_coordinates) 26 | export(clean_dataset) 27 | export(clean_fossils) 28 | export(is.spatialvalid) 29 | export(write_pyrate) 30 | importFrom(dplyr,bind_rows) 31 | importFrom(dplyr,inner_join) 32 | importFrom(dplyr,left_join) 33 | importFrom(dplyr,select) 34 | importFrom(geosphere,destPoint) 35 | importFrom(geosphere,distHaversine) 36 | importFrom(geosphere,distm) 37 | importFrom(ggplot2,aes_string) 38 | importFrom(ggplot2,borders) 39 | importFrom(ggplot2,coord_fixed) 40 | importFrom(ggplot2,element_blank) 41 | importFrom(ggplot2,element_text) 42 | importFrom(ggplot2,fortify) 43 | importFrom(ggplot2,geom_point) 44 | importFrom(ggplot2,geom_polygon) 45 | importFrom(ggplot2,map_data) 46 | importFrom(ggplot2,scale_colour_manual) 47 | importFrom(ggplot2,scale_shape_manual) 48 | importFrom(ggplot2,theme) 49 | importFrom(ggplot2,theme_bw) 50 | importFrom(grDevices,extendrange) 51 | importFrom(graphics,abline) 52 | importFrom(graphics,hist) 53 | importFrom(graphics,plot) 54 | importFrom(graphics,segments) 55 | importFrom(graphics,title) 56 | importFrom(methods,as) 57 | importFrom(methods,is) 58 | importFrom(rgbif,occ_count) 59 | importFrom(rnaturalearth,ne_download) 60 | importFrom(rnaturalearth,ne_file_name) 61 | importFrom(stats,IQR) 62 | importFrom(stats,aggregate) 63 | importFrom(stats,binom.test) 64 | importFrom(stats,complete.cases) 65 | importFrom(stats,cov) 66 | importFrom(stats,dist) 67 | importFrom(stats,mad) 68 | importFrom(stats,median) 69 | importFrom(stats,na.omit) 70 | importFrom(stats,quantile) 71 | importFrom(stats,runif) 72 | importFrom(terra,buffer) 73 | importFrom(terra,crop) 74 | importFrom(terra,crs) 75 | importFrom(terra,expanse) 76 | importFrom(terra,ext) 77 | importFrom(terra,extract) 78 | importFrom(terra,geom) 79 | importFrom(terra,geomtype) 80 | importFrom(terra,plot) 81 | importFrom(terra,project) 82 | importFrom(terra,rast) 83 | importFrom(terra,subset) 84 | importFrom(terra,union) 85 | importFrom(terra,vect) 86 | importFrom(tidyselect,starts_with) 87 | importFrom(utils,data) 88 | importFrom(utils,write.table) 89 | -------------------------------------------------------------------------------- /R/cc_aohi.R: -------------------------------------------------------------------------------- 1 | #' Identify Coordinates in Artificial Hotspot Occurrence Inventory 2 | #' 3 | #' Removes or flags records within Artificial Hotspot Occurrence Inventory. 4 | #' Poorly geo-referenced occurrence records in biological databases are often 5 | #' erroneously geo-referenced to highly recurring coordinates that were assessed 6 | #' by Park et al 2022. See the reference for more details. 7 | #' 8 | #' 9 | #' @inheritParams cc_cap 10 | #' @param taxa Artificial Hotspot Occurrence Inventory (AHOI) were created based 11 | #' on four different taxa, birds, insecta, mammalia, and plantae. Users can 12 | #' choose to keep all, or any specific taxa subset to define the AHOI locations. 13 | #' Default is to keep all: c("Aves", "Insecta", "Mammalia", "Plantae"). 14 | #' @inherit cc_cap return 15 | #' 16 | #' @note See \url{https://ropensci.github.io/CoordinateCleaner/} for more 17 | #' details and tutorials. 18 | #' 19 | #' @keywords Coordinate cleaning 20 | #' @family Coordinates 21 | #' 22 | #' @references Park, D. S., Xie, Y., Thammavong, H. T., Tulaiha, R., & Feng, X. 23 | #' (2023). Artificial Hotspot Occurrence Inventory (AHOI). Journal of 24 | #' Biogeography, 50, 441–449. \doi{10.1111/jbi.14543} 25 | #' 26 | #' @examples 27 | #' 28 | #' x <- data.frame(species = letters[1:10], 29 | #' decimalLongitude = c(runif(99, -180, 180), -47.92), 30 | #' decimalLatitude = c(runif(99, -90,90), -15.78)) 31 | #' cc_aohi(x) 32 | #' 33 | #' @export 34 | #' @importFrom geosphere destPoint 35 | #' @importFrom terra vect ext crop buffer geom 36 | #' @importFrom utils data 37 | 38 | cc_aohi <- function(x, 39 | lon = "decimalLongitude", 40 | lat = "decimalLatitude", 41 | species = "species", 42 | taxa = c("Aves", "Insecta", "Mammalia", "Plantae"), 43 | buffer = 10000, 44 | geod = TRUE, 45 | value = "clean", 46 | verbose = TRUE) { 47 | 48 | # check value argument 49 | match.arg(value, choices = c("clean", "flagged")) 50 | 51 | if (verbose) { 52 | message("Testing Artificial Hotspot Occurrence Inventory") 53 | } 54 | if (buffer > 10 & !geod) { 55 | warnings("Using large buffer check 'geod'") 56 | } 57 | if (buffer < 100 & geod) { 58 | warnings("Using small buffer check 'geod'") 59 | } 60 | 61 | # set default projection 62 | wgs84 <- "+proj=longlat +datum=WGS84 +no_defs" 63 | 64 | # select relevant columns 65 | dat <- terra::vect(x[, c(lon, lat), drop = FALSE], 66 | geom = c(lon, lat), 67 | crs = wgs84) 68 | 69 | # Load ref 70 | aohi <- get0("aohi", envir = asNamespace("CoordinateCleaner")) 71 | aohi <- aohi[aohi$taxa %in% taxa, ] 72 | lon_lat <- c("decimalLongitude", "decimalLatitude") 73 | ref <- terra::vect(aohi[, lon_lat], 74 | geom = lon_lat, 75 | crs = wgs84) 76 | 77 | # fix buffer 0 78 | buffer <- ifelse(buffer == 0, 0.00000000001, buffer) 79 | 80 | if (geod) { 81 | # credits to https://seethedatablog.wordpress.com 82 | dg <- seq(from = 0, to = 360, by = 5) 83 | 84 | buff_XY <- 85 | geosphere::destPoint( 86 | p = terra::geom(ref)[, c("x", "y")], 87 | b = rep(dg, each = length(ref)), 88 | d = buffer 89 | ) 90 | 91 | id <- rep(seq_along(ref), times = length(dg)) 92 | 93 | 94 | lst <- split(data.frame(buff_XY), f = id) 95 | 96 | # Make SpatialPolygons out of the list of coordinates 97 | lst <- lapply(lst, as.matrix) 98 | ref <- 99 | sapply(lst, terra::vect, crs = wgs84, type = "polygons") 100 | ref <- terra::vect(ref) 101 | 102 | #point in polygon test 103 | ext_dat <- terra::extract(ref, dat) 104 | out <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2]) 105 | } else { 106 | ref_buff <- terra::buffer(ref, buffer) 107 | # There is a weird bug in terra, so I did this work around 108 | ref <- terra::vect(stats::na.omit(terra::geom(ref_buff)), 109 | type = "polygon", crs = ref) 110 | terra::values(ref) <- terra::values(ref_buff) 111 | 112 | ext_dat <- terra::extract(ref, dat) 113 | out <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2]) 114 | } 115 | 116 | # create output based on value argument 117 | if (verbose) { 118 | if (value == "clean") { 119 | message(sprintf("Removed %s records.", sum(!out))) 120 | } else { 121 | message(sprintf("Flagged %s records.", sum(!out))) 122 | } 123 | } 124 | 125 | switch(value, clean = return(x[out, ]), flagged = return(out)) 126 | } 127 | -------------------------------------------------------------------------------- /R/cc_cen.R: -------------------------------------------------------------------------------- 1 | #' Identify Coordinates in Vicinity of Country and Province Centroids 2 | #' 3 | #' Removes or flags records within a radius around the geographic centroids of political 4 | #' countries and provinces. Poorly geo-referenced occurrence records in 5 | #' biological databases are often erroneously geo-referenced to centroids. 6 | #' 7 | #' @param buffer numerical. The buffer around each province or country 8 | #' centroid, where records should be flagged as problematic. Units depend on geod. 9 | #' Default = 1 kilometre. 10 | #' @param test a character string. Specifying the details of the test. One of 11 | #' c(\dQuote{both}, \dQuote{country}, \dQuote{provinces}). If both tests for 12 | #' country and province centroids. 13 | #' @inheritParams cc_cap 14 | #' 15 | #' @inherit cc_cap return 16 | #' 17 | #' @note See \url{https://ropensci.github.io/CoordinateCleaner/} for more 18 | #' details and tutorials. 19 | #' 20 | #' @keywords Coordinate cleaning 21 | #' @family Coordinates 22 | #' 23 | #' @examples 24 | #' 25 | #' x <- data.frame(species = letters[1:10], 26 | #' decimalLongitude = c(runif(99, -180, 180), -47.92), 27 | #' decimalLatitude = c(runif(99, -90,90), -15.78)) 28 | #' cc_cen(x, geod = FALSE) 29 | #' 30 | #' \dontrun{ 31 | #' cc_inst(x, value = "flagged", buffer = 50000) #geod = T 32 | #' } 33 | #' 34 | #' @export 35 | #' @importFrom geosphere destPoint 36 | #' @importFrom terra vect ext crop buffer geom 37 | 38 | cc_cen <- function(x, 39 | lon = "decimalLongitude", 40 | lat = "decimalLatitude", 41 | species = "species", 42 | buffer = 1000, 43 | geod = TRUE, 44 | test = "both", 45 | ref = NULL, 46 | verify = FALSE, 47 | value = "clean", 48 | verbose = TRUE) { 49 | 50 | # check value argument 51 | match.arg(value, choices = c("clean", "flagged")) 52 | match.arg(test, choices = c("both", "country", "provinces")) 53 | 54 | if (verbose) { 55 | message("Testing country centroids") 56 | } 57 | if (buffer > 10 & !geod) { 58 | warnings("Using large buffer check 'geod'") 59 | } 60 | if (buffer < 100 & geod) { 61 | warnings("Using small buffer check 'geod'") 62 | } 63 | 64 | # set default projection 65 | wgs84 <- "+proj=longlat +datum=WGS84 +no_defs" 66 | 67 | # select relevant columns 68 | dat <- terra::vect(x[, c(lon, lat), drop = FALSE], 69 | geom = c(lon, lat), 70 | crs = wgs84) 71 | 72 | if (is.null(ref)) { 73 | ref <- CoordinateCleaner::countryref 74 | 75 | switch(test, country = { 76 | ref <- ref[ref$type == "country", ] 77 | }, province = { 78 | ref <- ref[ref$type == "province", ] 79 | }) 80 | } else { 81 | #proj4string(ref) <- wgs84 82 | warning("assuming lat/lon for centroids.ref") 83 | } 84 | buffer <- ifelse(buffer == 0, 0.00000000001, buffer) 85 | limits <- terra::ext(terra::buffer(dat, width = buffer)) 86 | 87 | # subset of testdatset according to speed up buffer 88 | lon_lat <- c("centroid.lon", "centroid.lat") 89 | ref <- terra::crop( 90 | terra::vect(ref[, lon_lat], 91 | geom = lon_lat, 92 | crs = wgs84), 93 | limits) 94 | 95 | # run buffering incase no centroids are found in the study area 96 | if (is.null(ref) | nrow(ref) == 0) { 97 | out <- rep(TRUE, nrow(x)) 98 | } else { 99 | if (geod) { 100 | # credits to https://seethedatablog.wordpress.com 101 | dg <- seq(from = 0, to = 360, by = 5) 102 | 103 | buff_XY <- geosphere::destPoint(p = terra::geom(ref)[, c("x", "y")], 104 | b = rep(dg, each = length(ref)), 105 | d = buffer) 106 | 107 | id <- rep(seq_along(ref), times = length(dg)) 108 | 109 | 110 | lst <- split(data.frame(buff_XY), f = id) 111 | 112 | # Make SpatialPolygons out of the list of coordinates 113 | lst <- lapply(lst, as.matrix) 114 | ref <- sapply(lst, terra::vect, crs = wgs84, type = "polygons") 115 | ref <- terra::vect(ref) 116 | 117 | #point in polygon test 118 | ext_dat <- terra::extract(ref, dat) 119 | out <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2]) 120 | } else { 121 | ref <- terra::buffer(ref, width = buffer) 122 | ext_dat <- terra::extract(ref, dat) 123 | out <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2]) 124 | } 125 | } 126 | 127 | # implement the verification 128 | if (verify & sum(out) > 0) { 129 | # get flagged coordinates 130 | ver <- x[!out,] 131 | 132 | #count the instances of all flagged records 133 | ver_count <- aggregate(ver[[species]] ~ ver[[lon]] + 134 | ver[[lat]] , FUN = "length") 135 | names(ver_count) <- c(lon, lat, "coord.count") 136 | 137 | ver_spec <- aggregate(ver[[lon]] ~ ver[[species]], FUN = "length") 138 | names(ver_spec) <- c(species, "species.count") 139 | 140 | #test which flagged x occur multiple times 141 | tester <- data.frame(x, ord = seq_len(nrow(x))) 142 | tester <- merge(tester, ver_count, by = c(lon,lat), all = TRUE) 143 | tester <- merge(tester, ver_spec, by = species, all = TRUE) 144 | 145 | tester <- tester[order(tester$ord),] 146 | tester[is.na(tester)] <- 0 147 | 148 | #only flag those records that occure with only one coordinate in the buffer 149 | out <- tester$coord.count <= tester$species.count | out 150 | } 151 | # create output based on value argument 152 | if (verbose) { 153 | if (value == "clean") { 154 | message(sprintf("Removed %s records.", sum(!out))) 155 | } else { 156 | message(sprintf("Flagged %s records.", sum(!out))) 157 | } 158 | } 159 | 160 | switch(value, clean = return(x[out, ]), flagged = return(out)) 161 | } 162 | -------------------------------------------------------------------------------- /R/cc_coun.R: -------------------------------------------------------------------------------- 1 | #' Identify Coordinates Outside their Reported Country 2 | #' 3 | #' Removes or flags mismatches between geographic coordinates and additional 4 | #' country information (usually this information is reliably reported with 5 | #' specimens). Such a mismatch can occur for example, if latitude and longitude 6 | #' are switched. 7 | #' 8 | #' 9 | #' @param iso3 a character string. The column with the country assignment of 10 | #' each record in three letter ISO code. Default = \dQuote{countrycode}. 11 | #' @param ref SpatVector (geometry: polygons). Providing the geographic 12 | #' gazetteer. Can be any SpatVector (geometry: polygons), but the structure 13 | #' must be identical to \code{rnaturalearth::ne_countries(scale = "medium", 14 | #' returnclass = "sf")}. 15 | #' Default = \code{rnaturalearth::ne_countries(scale = "medium", returnclass = 16 | #' "sf")} 17 | #' @param ref_col the column name in the reference dataset, containing the 18 | #' relevant ISO codes for matching. Default is to "iso_a3_eh" which refers to 19 | #' the ISO-3 codes in the reference dataset. See notes. 20 | #' @param buffer numeric. Units are in meters. If provided, a buffer is 21 | #' created around each country polygon. 22 | #' @inheritParams cc_cen 23 | #' 24 | #' @inherit cc_cap return 25 | #' 26 | #' @note The ref_col argument allows to adapt the function to the structure of 27 | #' alternative reference datasets. For instance, for 28 | #' \code{rnaturalearth::ne_countries(scale = "small")}, the default will fail, 29 | #' but ref_col = "iso_a3" will work. 30 | #' 31 | #' @note With the default reference, records are flagged if they fall outside 32 | #' the terrestrial territory of countries, hence records in territorial waters 33 | #' might be flagged. See \url{https://ropensci.github.io/CoordinateCleaner/} 34 | #' for more details and tutorials. 35 | #' 36 | #' @keywords Coordinate cleaning 37 | #' @family Coordinates 38 | #' 39 | #' @examples 40 | #' 41 | #' \dontrun{ 42 | #' x <- data.frame(species = letters[1:10], 43 | #' decimalLongitude = runif(100, -20, 30), 44 | #' decimalLatitude = runif(100, 35,60), 45 | #' countrycode = "RUS") 46 | #' 47 | #' cc_coun(x, value = "flagged")#non-terrestrial records are flagged as wrong. 48 | #' } 49 | #' 50 | #' @export 51 | #' @importFrom terra vect geomtype extract 52 | #' @importFrom stats na.omit 53 | 54 | cc_coun <- function(x, 55 | lon = "decimalLongitude", 56 | lat = "decimalLatitude", 57 | iso3 = "countrycode", 58 | value = "clean", 59 | ref = NULL, 60 | ref_col = "iso_a3", 61 | verbose = TRUE, 62 | buffer = NULL) { 63 | 64 | # check function arguments for validity 65 | match.arg(value, choices = c("clean", "flagged")) 66 | if (!iso3 %in% names(x)) { 67 | stop("iso3 argument missing, please specify") 68 | } 69 | 70 | if (verbose) { 71 | message("Testing country identity") 72 | } 73 | 74 | # set reference and check for dependency 75 | if (is.null(ref)) { 76 | if (!requireNamespace("rnaturalearth", quietly = TRUE)) { 77 | stop("Install the 'rnaturalearth' package or provide a custom reference", 78 | call. = FALSE 79 | ) 80 | } 81 | ref <- terra::vect(rnaturalearth::ne_countries(scale = "medium", 82 | returnclass = "sf")) 83 | } else { 84 | #Enable sf formatted custom references 85 | if (any(is(ref) == "Spatial") | inherits(ref, "sf")) { 86 | ref <- terra::vect(ref) 87 | } 88 | # Check if object is a SpatVector 89 | if (!(inherits(ref, "SpatVector") & 90 | terra::geomtype(ref) == "polygons")) { 91 | stop("ref must be a SpatVector with geomtype 'polygons'") 92 | } 93 | #Check projection of custom reference and reproject if necessary 94 | ref <- reproj(ref) 95 | } 96 | 97 | # prepare data 98 | dat <- terra::vect(x[, c(lon, lat)], 99 | geom = c(lon, lat), 100 | crs = ref) 101 | 102 | # Buffer around countries 103 | if (is.numeric(buffer)) { 104 | buffer <- ifelse(buffer == 0, 0.00000000001, buffer) 105 | ref_buff <- terra::buffer(ref, buffer) 106 | # There is a weird bug in terra, so I did this work around 107 | ref <- terra::vect(stats::na.omit(terra::geom(ref_buff)), 108 | type = "polygon", crs = ref) 109 | terra::values(ref) <- terra::values(ref_buff) 110 | } 111 | 112 | # get country from coordinates and compare with provided country 113 | country <- terra::extract(ref, dat) 114 | count_dat <- as.character(unlist(x[, iso3])) 115 | 116 | if (is.numeric(buffer)) { 117 | out <- logical(length(dat)) 118 | for (i in seq_along(dat)) { 119 | out[i] <- count_dat[i] %in% country[country[, 1] == i, ref_col] 120 | } 121 | } else { 122 | country <- country[, ref_col] 123 | out <- as.character(country) == count_dat 124 | out[is.na(out)] <- FALSE # marine records are set to False 125 | } 126 | # return output 127 | if (verbose) { 128 | if (value == "clean") { 129 | message(sprintf("Removed %s records.", sum(!out))) 130 | } else { 131 | message(sprintf("Flagged %s records.", sum(!out))) 132 | } 133 | } 134 | 135 | switch(value, clean = return(x[out, ]), flagged = return(out)) 136 | } 137 | -------------------------------------------------------------------------------- /R/cc_dupl.R: -------------------------------------------------------------------------------- 1 | #' Identify Duplicated Records 2 | #' 3 | #' Removes or flags duplicated records based on species name and coordinates, as well as 4 | #' user-defined additional columns. True (specimen) duplicates or duplicates 5 | #' from the same species can make up the bulk of records in a biological 6 | #' collection database, but are undesirable for many analyses. Both can be 7 | #' flagged with this function, the former given enough additional information. 8 | #' 9 | #' 10 | #' @param species a character string. The column with the species name. Default 11 | #' = \dQuote{species}. 12 | #' @param additions a vector of character strings. Additional columns to be 13 | #' included in the test for duplication. For example as below, collector name 14 | #' and collector number. 15 | #' @inheritParams cc_cap 16 | #' 17 | #' @inherit cc_cap return 18 | #' 19 | #' @keywords Coordinate cleaning 20 | #' @family Coordinates 21 | #' 22 | #' @examples 23 | #' 24 | #' x <- data.frame(species = letters[1:10], 25 | #' decimalLongitude = sample(x = 0:10, size = 100, replace = TRUE), 26 | #' decimalLatitude = sample(x = 0:10, size = 100, replace = TRUE), 27 | #' collector = "Bonpl", 28 | #' collector.number = c(1001, 354), 29 | #' collection = rep(c("K", "WAG","FR", "P", "S"), 20)) 30 | #' 31 | #' cc_dupl(x, value = "flagged") 32 | #' cc_dupl(x, additions = c("collector", "collector.number")) 33 | #' 34 | #' @export 35 | cc_dupl <- function(x, 36 | lon = "decimalLongitude", 37 | lat = "decimalLatitude", 38 | species = "species", 39 | additions = NULL, 40 | value = "clean", 41 | verbose = TRUE) { 42 | 43 | # check value argument 44 | match.arg(value, choices = c("clean", "flagged")) 45 | 46 | if (verbose) { 47 | message("Testing duplicates") 48 | } 49 | # test duplication 50 | out <- !duplicated(x[, c(lon, lat, species, additions)]) 51 | 52 | # create output based on value argument 53 | if (verbose) { 54 | if(value == "clean"){ 55 | message(sprintf("Removed %s records.", sum(!out))) 56 | }else{ 57 | message(sprintf("Flagged %s records.", sum(!out))) 58 | } 59 | } 60 | 61 | switch(value, clean = return(x[out, ]), flagged = return(out)) 62 | } 63 | -------------------------------------------------------------------------------- /R/cc_equ.R: -------------------------------------------------------------------------------- 1 | #' Identify Records with Identical lat/lon 2 | #' 3 | #' Removes or flags records with equal latitude and longitude coordinates, 4 | #' either exact or absolute. Equal coordinates can often indicate data entry 5 | #' errors. 6 | #' 7 | #' 8 | #' @param test character string. Defines if coordinates are compared exactly 9 | #' (\dQuote{identical}) or on the absolute scale (i.e. -1 = 1, 10 | #' \dQuote{absolute}). Default is to \dQuote{absolute}. 11 | #' @inheritParams cc_cap 12 | #' 13 | #' @inherit cc_cap return 14 | #' 15 | #' @keywords Coordinate cleaning 16 | #' @family Coordinates 17 | #' 18 | #' @examples 19 | #' 20 | #' x <- data.frame(species = letters[1:10], 21 | #' decimalLongitude = runif(100, -180, 180), 22 | #' decimalLatitude = runif(100, -90,90)) 23 | #' 24 | #' cc_equ(x) 25 | #' cc_equ(x, value = "flagged") 26 | #' 27 | #' @export 28 | cc_equ <- function(x, 29 | lon = "decimalLongitude", 30 | lat = "decimalLatitude", 31 | test = "absolute", 32 | value = "clean", 33 | verbose = TRUE) { 34 | 35 | # check value and test arguments 36 | match.arg(test, choices = c("absolute", "identical")) 37 | match.arg(value, choices = c("clean", "flagged")) 38 | 39 | if (verbose) { 40 | message("Testing equal lat/lon") 41 | } 42 | 43 | switch(test, absolute = { 44 | out <- !(abs(x[[lon]]) == abs(x[[lat]])) 45 | }, identical = { 46 | out <- !(x[[lon]] == x[[lat]]) 47 | }) 48 | 49 | if (verbose) { 50 | if (value == "clean"){ 51 | message(sprintf("Removed %s records.", sum(!out))) 52 | } else { 53 | message(sprintf("Flagged %s records.", sum(!out))) 54 | } 55 | } 56 | 57 | switch(value, clean = return(x[out, ]), flagged = return(out)) 58 | } 59 | -------------------------------------------------------------------------------- /R/cc_gbif.R: -------------------------------------------------------------------------------- 1 | #' Identify Records Assigned to GBIF Headquarters 2 | #' 3 | #' Removes or flags records within 0.5 degree radius around the GBIF headquarters in 4 | #' Copenhagen, DK. 5 | #' 6 | #' Not recommended if working with records from Denmark or the Copenhagen area. 7 | #' 8 | #' @param buffer numerical. The buffer around the GBIF headquarters, 9 | #' where records should be flagged as problematic. Units depend on geod. Default = 100 m. 10 | #' @param geod logical. If TRUE the radius is calculated 11 | #' based on a sphere, buffer is in meters. If FALSE 12 | #' the radius is calculated in degrees. Default = T. 13 | #' @inheritParams cc_cap 14 | #' 15 | #' @inherit cc_cap return 16 | #' 17 | #' @keywords Coordinate cleaning 18 | #' @family Coordinates 19 | #' 20 | #' @examples 21 | #' 22 | #' x <- data.frame(species = "A", 23 | #' decimalLongitude = c(12.58, 12.58), 24 | #' decimalLatitude = c(55.67, 30.00)) 25 | #' 26 | #' cc_gbif(x) 27 | #' cc_gbif(x, value = "flagged") 28 | #' 29 | #' @export 30 | #' @importFrom geosphere destPoint 31 | #' @importFrom terra vect buffer extract 32 | 33 | cc_gbif <- function(x, 34 | lon = "decimalLongitude", 35 | lat = "decimalLatitude", 36 | species = "species", 37 | buffer = 1000, 38 | geod = TRUE, 39 | verify = FALSE, 40 | value = "clean", 41 | verbose = TRUE) { 42 | 43 | # check function argument validity 44 | match.arg(value, choices = c("clean", "flagged")) 45 | 46 | if (verbose) { 47 | message("Testing GBIF headquarters, flagging records around Copenhagen") 48 | } 49 | 50 | 51 | if (buffer > 10 & !geod){ 52 | warnings("Using large buffer check 'geod'") 53 | } 54 | if (buffer < 100 & geod){ 55 | warnings("Using small buffer check 'geod'") 56 | } 57 | 58 | # Fix buffer when equals 0 59 | buffer <- ifelse(buffer == 0, 0.00000000001, buffer) 60 | 61 | # set default projection 62 | wgs84 <- "+proj=longlat +datum=WGS84 +no_defs" 63 | dat <- terra::vect(x[, c(lon, lat)], 64 | geom = c(lon, lat), 65 | crs = wgs84) 66 | if (geod) { 67 | # credits to https://seethedatablog.wordpress.com 68 | dg <- seq(from = 0, to = 360, by = 5) 69 | 70 | buff_XY <- geosphere::destPoint(p = cbind(12.58, 55.67), 71 | b = rep(dg, each = 1), 72 | d = buffer) 73 | 74 | id <- rep(1, times = length(dg)) 75 | 76 | 77 | lst <- split(data.frame(buff_XY), f = id) 78 | 79 | 80 | # Make SpatialPolygons out of the list of coordinates 81 | lst <- lapply(lst, as.matrix) 82 | ref <- sapply(lst, terra::vect, crs = wgs84, type = "polygons") 83 | ref <- terra::vect(ref) 84 | 85 | #point in polygon test 86 | ext_dat <- terra::extract(ref, dat) 87 | out <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2]) 88 | } else { 89 | ref_cen <- terra::vect(cbind(12.58, 55.67), 90 | crs = wgs84) 91 | ref <- terra::buffer(ref_cen, width = buffer) 92 | #point in polygon test 93 | ext_dat <- terra::extract(ref, dat) 94 | out <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2]) 95 | } 96 | 97 | # implement the verification 98 | if(verify & sum(out) > 0){ 99 | # get flagged coordinates 100 | ver <- x[!out,] 101 | 102 | #count the instances of all flagged records 103 | ver_count <- aggregate(ver[[species]] ~ ver[[lon]] + 104 | ver[[lat]] , FUN = "length") 105 | names(ver_count) <- c(lon, lat, "coord.count") 106 | 107 | ver_spec <- aggregate(ver[[lon]] ~ ver[[species]], FUN = "length") 108 | names(ver_spec) <- c(species, "species.count") 109 | 110 | #test which flagged x occur multiple times 111 | tester <- data.frame(x, ord = seq_len(nrow(x))) 112 | tester <- merge(tester, ver_count, by = c(lon,lat), all = TRUE) 113 | tester <- merge(tester, ver_spec, by = species, all = TRUE) 114 | 115 | tester <- tester[order(tester$ord),] 116 | tester[is.na(tester)] <- 0 117 | 118 | #only flag those records that occure with only one coordinate in the buffer 119 | out <- tester$coord.count <= tester$species.count| out 120 | } 121 | 122 | if (verbose) { 123 | if(value == "clean"){ 124 | message(sprintf("Removed %s records.", sum(!out))) 125 | }else{ 126 | message(sprintf("Flagged %s records.", sum(!out))) 127 | } 128 | } 129 | 130 | switch(value, clean = return(x[out, ]), flagged = return(out)) 131 | } 132 | -------------------------------------------------------------------------------- /R/cc_iucn.R: -------------------------------------------------------------------------------- 1 | #' Identify Records Outside Natural Ranges 2 | #' 3 | #' Removes or flags records outside of the provided natural range polygon, on a per species basis. 4 | #' Expects one entry per species. See the example or 5 | #' \url{https://www.iucnredlist.org/resources/spatial-data-download} for 6 | #' the required polygon structure. 7 | #' 8 | #' Download natural range maps in suitable format for amphibians, birds, 9 | #' mammals and reptiles 10 | #' from \url{https://www.iucnredlist.org/resources/spatial-data-download}. 11 | #' Note: the buffer radius is in degrees, thus will differ slightly between 12 | #' different latitudes. 13 | #' 14 | #' @param range a SpatVector of natural ranges for species in x. 15 | #' Must contain a column named as indicated by \code{species}. See details. 16 | #' @param species a character string. The column with the species name. 17 | #' Default = \dQuote{species}. 18 | #' @param buffer numerical. The buffer around each species' range, 19 | #' from where records should be flagged as problematic, in meters. Default = 0. 20 | #' @inheritParams cc_cap 21 | #' 22 | #' @inherit cc_cap return 23 | #' 24 | #' @note See \url{https://ropensci.github.io/CoordinateCleaner/} for more 25 | #' details and tutorials. 26 | #' 27 | #' @keywords Coordinate cleaning 28 | #' @family Coordinates 29 | #' @examples 30 | #' library(terra) 31 | #' 32 | #' x <- data.frame(species = c("A", "B"), 33 | #' decimalLongitude = runif(100, -170, 170), 34 | #' decimalLatitude = runif(100, -80,80)) 35 | #' 36 | #' range_species_A <- cbind(c(-45,-45,-60,-60,-45), c(-10,-25,-25,-10,-10)) 37 | #' rangeA <- terra::vect(range_species_A, "polygons") 38 | #' range_species_B <- cbind(c(15,15,32,32,15), c(10,-10,-10,10,10)) 39 | #' rangeB <- terra::vect(range_species_B, "polygons") 40 | #' range <- terra::vect(list(rangeA, rangeB)) 41 | #' range$binomial <- c("A", "B") 42 | #' 43 | #' cc_iucn(x = x, range = range, buffer = 0) 44 | #' 45 | #' @export 46 | #' @importFrom dplyr bind_rows 47 | #' @importFrom terra vect buffer extract geomtype subset crs 48 | 49 | cc_iucn <- function(x, 50 | range, 51 | lon = "decimalLongitude", 52 | lat = "decimalLatitude", 53 | species = "species", 54 | buffer = 0, 55 | value = "clean", 56 | verbose = TRUE){ 57 | 58 | # Check value argument 59 | match.arg(value, choices = c("clean", "flagged")) 60 | 61 | if (verbose) { 62 | message("Testing natural ranges") 63 | } 64 | 65 | if (any(is(range) == "Spatial") | inherits(range, "sf")) { 66 | range <- terra::vect(range) 67 | } 68 | # Check if object is a SpatVector 69 | if (!(inherits(range, "SpatVector") & 70 | terra::geomtype(range) == "polygons")) { 71 | stop("ref must be a SpatVector with geomtype 'polygons'") 72 | } 73 | 74 | # Prepare shape file 75 | ## Adapt to iucn polygons 76 | if("binomial" %in% names(range) & 77 | !species %in% names(range) & 78 | species %in% names(x)) { 79 | names(range)[names(range) == "binomial"] <- species 80 | } 81 | 82 | ## Reduce to species in dataset 83 | test_range <- range[[species]][, 1] %in% unique(unlist(x[, species])) 84 | range <- terra::subset(range, test_range) 85 | # Split by species 86 | dat <- data.frame(x, order = rownames(x)) 87 | dat <- split(dat, f = dat[, species]) 88 | 89 | # Apply buffer to ranges 90 | if (buffer != 0) { 91 | range <- terra::buffer(range, width = buffer) 92 | } 93 | 94 | # Check projection of ranges 95 | wgs84 <- "+proj=longlat +datum=WGS84 +no_defs" 96 | 97 | if (terra::crs(range) == "") { 98 | warning("no projection information for reference found, 99 | assuming '+proj=longlat +datum=WGS84 +no_defs'") 100 | terra::crs(range) <- wgs84 101 | }else if(terra::crs(range) != terra::crs(wgs84)) { 102 | range <- terra::project(range, wgs84) 103 | warning("reprojecting reference to '+proj=longlat +datum=WGS84 +no_defs'") 104 | } 105 | 106 | # Point-in-polygon-test 107 | out <- lapply(dat, function(k){ 108 | if (unique(k[, species]) %in% range[[species]][, 1]) { 109 | sub <- terra::vect(k[, c(lon, lat)], 110 | crs = wgs84, 111 | geom = c(lon, lat)) 112 | test_range_sub <- range[[species]][, 1] == unique(k[, species]) 113 | range_sub <- terra::subset(range, test_range_sub) 114 | #point in polygon test 115 | ext_dat <- terra::extract(range_sub, sub) 116 | flag <- !is.na(ext_dat[!duplicated(ext_dat[, 1]), 2]) 117 | 118 | data.frame(order = k$order, 119 | flag = flag) 120 | }else{ 121 | data.frame(order = k$order, 122 | flag = TRUE) 123 | } 124 | }) 125 | 126 | out <- dplyr::bind_rows(out) 127 | out <- out[order(as.numeric(as.character(out$order))), ] 128 | 129 | # Warning for species not in range 130 | tester <- unique(unlist(x[, species])) 131 | if(sum(!tester %in% range[[species]][, 1]) > 0){ 132 | miss <- tester[!tester %in% range[[species]][, 1]] 133 | warning(sprintf("species not found in range and not tested %s\n", miss)) 134 | } 135 | 136 | # Generate output 137 | if (verbose) { 138 | if(value == "clean"){ 139 | message(sprintf("Removed %s records.", sum(!out$flag))) 140 | }else{ 141 | message(sprintf("Flagged %s records.", sum(!out$flag))) 142 | } 143 | } 144 | 145 | switch(value, clean = return(x[out$flag, ]), 146 | flagged = return(out$flag)) 147 | } 148 | 149 | -------------------------------------------------------------------------------- /R/cc_urb.R: -------------------------------------------------------------------------------- 1 | #' Identify Records Inside Urban Areas 2 | #' 3 | #' Removes or flags records from inside urban areas, based on a geographic 4 | #' gazetteer. Often records from large databases span substantial time periods 5 | #' (centuries) and old records might represent habitats which today are replaced 6 | #' by city area. 7 | #' 8 | #' 9 | #' @param ref a SpatVector. Providing the geographic gazetteer 10 | #' with the urban areas. See details. By default 11 | #' rnaturalearth::ne_download(scale = 'medium', type = 'urban_areas', 12 | #' returnclass = "sf"). Can be any \code{SpatVector}, but the 13 | #' structure must be identical to \code{rnaturalearth::ne_download()}. 14 | #' @inheritParams cc_cap 15 | #' 16 | #' @inherit cc_cap return 17 | #' 18 | #' @note See \url{https://ropensci.github.io/CoordinateCleaner/} for more 19 | #' details and tutorials. 20 | #' 21 | #' @keywords Coordinate cleaning 22 | #' @family Coordinates 23 | #' 24 | #' @examples 25 | #' 26 | #' \dontrun{ 27 | #' x <- data.frame(species = letters[1:10], 28 | #' decimalLongitude = runif(100, -180, 180), 29 | #' decimalLatitude = runif(100, -90,90)) 30 | #' 31 | #' cc_urb(x) 32 | #' cc_urb(x, value = "flagged") 33 | #' } 34 | #' 35 | #' @export 36 | #' @importFrom terra vect crop project extract 37 | #' @importFrom rnaturalearth ne_download ne_file_name 38 | 39 | cc_urb <- function(x, 40 | lon = "decimalLongitude", 41 | lat = "decimalLatitude", 42 | ref = NULL, 43 | value = "clean", 44 | verbose = TRUE) { 45 | 46 | # check value argument 47 | match.arg(value, choices = c("clean", "flagged")) 48 | 49 | if (verbose) { 50 | message("Testing urban areas") 51 | } 52 | 53 | # check for reference data. 54 | if (is.null(ref)) { 55 | message("Downloading urban areas via rnaturalearth") 56 | ref <- 57 | try(suppressWarnings(terra::vect( 58 | rnaturalearth::ne_download(scale = 'medium', 59 | type = 'urban_areas', 60 | returnclass = "sf") 61 | )), 62 | silent = TRUE) 63 | 64 | if (inherits(ref, "try-error")) { 65 | warning(sprintf("Gazetteer for urban areas not found at\n%s", 66 | rnaturalearth::ne_file_name(scale = 'medium', 67 | type = 'urban_areas', 68 | full_url = TRUE))) 69 | warning("Skipping urban test") 70 | switch(value, clean = return(x), flagged = return(rep(NA, nrow(x)))) 71 | } 72 | 73 | } else { 74 | # Enable sf formatted custom references 75 | if (any(is(ref) == c("Spatial")) | inherits(ref, "sf")) { 76 | ref <- terra::vect(ref) 77 | } 78 | # Check if object is a SpatVector 79 | if (!(inherits(ref, "SpatVector") & 80 | terra::geomtype(ref) == "polygons")) { 81 | stop("ref must be a SpatVector with geomtype 'polygons'") 82 | } 83 | ref <- reproj(ref) 84 | } 85 | 86 | # Prepare input points and extent 87 | wgs84 <- "+proj=longlat +datum=WGS84 +no_defs" 88 | 89 | dat <- terra::vect(x[, c(lon, lat)], 90 | geom = c(lon, lat), 91 | crs = wgs84) 92 | limits <- terra::ext(dat) + 1 93 | ref <- terra::crop(ref, limits) 94 | ref <- terra::project(ref, wgs84) 95 | 96 | # test if any points fall within the buffer in case no urban areas are found 97 | # in the study area 98 | if (is.null(ref)) { 99 | out <- rep(TRUE, nrow(x)) 100 | } else { 101 | #point in polygon test 102 | ext_dat <- terra::extract(ref, dat) 103 | out <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2]) 104 | } 105 | 106 | if (verbose) { 107 | if (value == "clean") { 108 | message(sprintf("Removed %s records.", sum(!out))) 109 | }else{ 110 | message(sprintf("Flagged %s records.", sum(!out))) 111 | } 112 | } 113 | 114 | switch(value, clean = return(x[out, ]), flagged = return(out)) 115 | } 116 | -------------------------------------------------------------------------------- /R/cc_val.R: -------------------------------------------------------------------------------- 1 | #' Identify Invalid lat/lon Coordinates 2 | #' 3 | #' Removes or flags non-numeric and not available coordinates 4 | #' as well as lat >90, lat <-90, lon > 180 and lon < -180 are flagged. 5 | #' 6 | #' This test is obligatory before running any further tests of 7 | #' CoordinateCleaner, as additional tests only run with valid coordinates. 8 | #' 9 | #' @inheritParams cc_cap 10 | #' 11 | #' @inherit cc_cap return 12 | #' 13 | #' @note See \url{https://ropensci.github.io/CoordinateCleaner/} for more 14 | #' details and tutorials. 15 | #' 16 | #' @keywords Coordinate cleaning 17 | #' @family Coordinates 18 | #' 19 | #' @examples 20 | #' 21 | #' x <- data.frame(species = letters[1:10], 22 | #' decimalLongitude = c(runif(106, -180, 180), NA, "13W33'", "67,09", 305), 23 | #' decimalLatitude = runif(110, -90,90)) 24 | #' 25 | #' cc_val(x) 26 | #' cc_val(x, value = "flagged") 27 | #' 28 | #' @export 29 | cc_val <- function(x, 30 | lon = "decimalLongitude", 31 | lat = "decimalLatitude", 32 | value = "clean", 33 | verbose = TRUE) { 34 | 35 | # check value argument 36 | match.arg(value, choices = c("clean", "flagged")) 37 | 38 | if (verbose) { 39 | message("Testing coordinate validity") 40 | } 41 | 42 | x[[lon]] <- suppressWarnings(as.numeric(as.character(x[[lon]]))) 43 | x[[lat]] <- suppressWarnings(as.numeric(as.character(x[[lat]]))) 44 | 45 | out <- list( 46 | is.na(x[[lon]]), 47 | is.na(x[[lat]]), 48 | x[[lon]] < -180, 49 | x[[lon]] > 180, 50 | x[[lat]] < -90, 51 | x[[lat]] > 90 52 | ) 53 | 54 | out <- !Reduce("|", out) 55 | 56 | if (verbose) { 57 | if (value == "clean") { 58 | message(sprintf("Removed %s records.", sum(!out))) 59 | } else { 60 | message(sprintf("Flagged %s records.", sum(!out))) 61 | } 62 | } 63 | 64 | switch(value, clean = return(x[out, ]), flagged = return(out)) 65 | } 66 | -------------------------------------------------------------------------------- /R/cc_zero.R: -------------------------------------------------------------------------------- 1 | #' Identify Zero Coordinates 2 | #' 3 | #' Removes or flags records with either zero longitude or latitude and a radius 4 | #' around the point at zero longitude and zero latitude. These problems are 5 | #' often due to erroneous data-entry or geo-referencing and can lead to typical 6 | #' patterns of high diversity around the equator. 7 | #' 8 | #' 9 | #' @param buffer numerical. The buffer around the 0/0 point, 10 | #' where records should be flagged as problematic, in decimal 11 | #' degrees. Default = 0.5. 12 | #' @inheritParams cc_cap 13 | #' 14 | #' @inherit cc_cap return 15 | #' 16 | #' @note See \url{https://ropensci.github.io/CoordinateCleaner/} for more 17 | #' details and tutorials. 18 | #' 19 | #' @keywords Coordinate cleaning 20 | #' @family Coordinates 21 | #' 22 | #' @examples 23 | #' 24 | #' x <- data.frame(species = "A", 25 | #' decimalLongitude = c(0,34.84, 0, 33.98), 26 | #' decimalLatitude = c(23.08, 0, 0, 15.98)) 27 | #' 28 | #' cc_zero(x) 29 | #' cc_zero(x, value = "flagged") 30 | #' 31 | #' @export 32 | #' @importFrom terra extract buffer vect 33 | cc_zero <- function(x, 34 | lon = "decimalLongitude", 35 | lat = "decimalLatitude", 36 | buffer = 0.5, 37 | value = "clean", 38 | verbose = TRUE) { 39 | 40 | # check value argument 41 | match.arg(value, choices = c("clean", "flagged")) 42 | 43 | if (verbose) { 44 | message("Testing zero coordinates") 45 | } 46 | 47 | # plain zero in coordinates 48 | t1 <- !(x[[lon]] == 0 | x[[lat]] == 0) 49 | 50 | # radius around point 0/0 51 | dat <- terra::vect(x[, c(lon, lat)], geom = c(lon, lat)) 52 | if (buffer == 0) { # error when buffer = 0 53 | buffer <- 0.00000000000001 54 | } 55 | buff <- terra::buffer(terra::vect(data.frame("lat" = 0, "lon" = 0)), 56 | width = buffer) 57 | ext_dat <- terra::extract(buff, dat) 58 | t2 <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2]) 59 | 60 | # combine test results 61 | out <- Reduce("&", list(t1, t2)) 62 | 63 | if (verbose) { 64 | if (value == "clean") { 65 | message(sprintf("Removed %s records.", sum(!out))) 66 | } else { 67 | message(sprintf("Flagged %s records.", sum(!out))) 68 | } 69 | } 70 | 71 | switch(value, clean = return(x[out, ]), flagged = return(out)) 72 | } 73 | -------------------------------------------------------------------------------- /R/cf_equal.R: -------------------------------------------------------------------------------- 1 | #' Identify Fossils with equal min and max age 2 | #' 3 | #' Removes or flags records with equal minimum and maximum age. 4 | #' 5 | #' @inheritParams cf_age 6 | #' 7 | #' @inherit cc_cap return 8 | #' 9 | #' @note See \url{https://ropensci.github.io/CoordinateCleaner/} for more 10 | #' details and tutorials. 11 | #' 12 | #' @keywords Temporal cleaning Fossils 13 | #' @family fossils 14 | #' 15 | #' @examples 16 | #' 17 | #' minages <- runif(n = 10, min = 0.1, max = 25) 18 | #' x <- data.frame(species = letters[1:10], 19 | #' min_ma = minages, 20 | #' max_ma = minages + runif(n = 10, min = 0, max = 10)) 21 | #' x <- rbind(x, data.frame(species = "z", 22 | #' min_ma = 5, 23 | #' max_ma = 5)) 24 | #' 25 | #' cf_equal(x, value = "flagged") 26 | #' 27 | #' @export 28 | cf_equal <- function(x, min_age = "min_ma", 29 | max_age = "max_ma", 30 | value = "clean", 31 | verbose = TRUE) { 32 | match.arg(value, choices = c("clean", "flagged")) 33 | 34 | 35 | if (verbose) { 36 | message("Testing age validity") 37 | } 38 | 39 | # min_age == max_age 40 | t1 <- x[[max_age]] == x[[min_age]] 41 | 42 | # min_age > max_age 43 | 44 | t2 <- x[[min_age]] > x[[max_age]] 45 | 46 | flags <- t1 | t2 47 | 48 | # create output 49 | out <- rep(TRUE, nrow(x)) 50 | out[flags] <- FALSE 51 | 52 | if (verbose) { 53 | if(value == "clean"){ 54 | message(sprintf("Removed %s records.", sum(!out, na.rm = TRUE))) 55 | }else{ 56 | message(sprintf("Flagged %s records.", sum(!out, na.rm = TRUE))) 57 | } 58 | } 59 | 60 | # value 61 | switch(value, clean = return(x[out, ]), flagged = return(out)) 62 | } 63 | -------------------------------------------------------------------------------- /R/internal_clean_coordinate.R: -------------------------------------------------------------------------------- 1 | # Check projection of custom reference and reproject to wgs84 if necessary 2 | reproj <- function(ref) { 3 | wgs84 <- "+proj=longlat +datum=WGS84 +no_defs" 4 | ref_crs <- terra::crs(ref, proj = TRUE) 5 | # if no projection information is given assume wgs84 6 | if (ref_crs == "") { 7 | warning( 8 | "no projection information for reference found, 9 | assuming '+proj=longlat +datum=WGS84 +no_defs'" 10 | ) 11 | terra::crs(ref) <- wgs84 12 | } else { 13 | if (is.na(ref_crs)) { 14 | warning( 15 | "no projection information for reference found, 16 | assuming '+proj=longlat +datum=WGS84 +no_defs'" 17 | ) 18 | ref <- terra::project(ref, wgs84) 19 | } else if (ref_crs != wgs84) { 20 | #otherwise reproject 21 | ref <- terra::project(ref, wgs84) 22 | warning("reprojecting reference to '+proj=longlat +datum=WGS84 +no_defs'") 23 | } 24 | } 25 | return(ref) 26 | } 27 | 28 | # A function to create a raster from an input dataset, used in cc_outl for spatial thinning, 29 | # based on a point dataset and a raster resolution 30 | 31 | ras_create <- function(x, lat, lon, thinning_res){ 32 | # get data extend 33 | ex <- terra::ext(terra::vect(x[, c(lon, lat)], 34 | geom = c(lon, lat))) + thinning_res * 2 35 | 36 | # check for boundary conditions 37 | if (ex[1] < -180 | ex[2] > 180 | ex[3] < -90 | ex[4] > 90) { 38 | warning("fixing raster boundaries, assuming lat/lon projection") 39 | 40 | if (ex[1] < -180) { 41 | ex[1] <- -180 42 | } 43 | 44 | if (ex[2] > 180) { 45 | ex[2] <- 180 46 | } 47 | 48 | if (ex[3] < -90) { 49 | ex[3] <- -90 50 | } 51 | 52 | if (ex[4] > 90) { 53 | ex[4] <- 90 54 | } 55 | } 56 | 57 | # create raster 58 | ras <- terra::rast(x = ex, resolution = thinning_res) 59 | 60 | # set cell ids 61 | vals <- seq_len(terra::ncell(ras)) 62 | ras <- terra::setValues(ras, vals) 63 | 64 | return(ras) 65 | } 66 | 67 | 68 | # A function to get the distance between raster midpoints and 69 | #output a data.frame with the distances and the cell IDs as row and column names for cc_outl 70 | 71 | ras_dist <- function(x, lat, lon, ras, weights) { 72 | #x = a data.frame of point coordinates, ras = a raster with cell IDs as layer, 73 | #weight = logical, shall the distance matrix be weightened by the number of 74 | #points per cell? assign each point to a raster cell 75 | pts <- terra::extract(x = ras, 76 | y = terra::vect(x[, c(lon, lat)], 77 | geom = c(lon, lat), 78 | crs = ras)) 79 | 80 | # convert to data.frame 81 | midp <- data.frame(terra::as.points(ras), 82 | terra::xyFromCell(ras, 1:terra::ncell(ras))) 83 | 84 | # retain only cells that contain points 85 | midp <- midp[midp$lyr.1 %in% unique(pts$lyr.1), , drop = FALSE] 86 | 87 | # order 88 | midp <- midp[match(unique(pts$lyr.1), midp$lyr.1), , drop = FALSE] 89 | 90 | 91 | # calculate geospheric distance between raster cells with points 92 | dist <- geosphere::distm(midp[, c("x", "y")], 93 | fun = geosphere::distHaversine) / 1000 94 | 95 | # set rownames and colnames to cell IDs 96 | dist <- as.data.frame(dist, row.names = as.integer(midp$lyr.1)) 97 | names(dist) <- midp$lyr.1 98 | 99 | if (weights) { 100 | # approximate within cell distance as half 101 | # the cell size, assumin 1 deg = 100km 102 | # this is crude, but doesn't really matter 103 | dist[dist == 0] <- 100 * mean(terra::res(ras)) / 2 104 | 105 | # weight matrix to account for the number of points per cell 106 | ## the number of points in each cell 107 | cou <- table(pts$lyr.1) 108 | 109 | ## order 110 | cou <- cou[match(unique(pts$lyr.1), names(cou))] 111 | 112 | # weight matrix, representing the number of distances between or within the cellse (points cell 1 * points cell 2) 113 | wm <- outer(cou, cou) 114 | 115 | # multiply matrix elements to get weightend sum 116 | dist <- round(dist * wm, 0) 117 | 118 | dist <- list(pts = pts, dist = dist, wm = wm) 119 | } else { 120 | # set diagonale to NA, so it does not influence the mean 121 | dist[dist == 0] <- NA 122 | 123 | dist <- list(pts = pts, dist = dist) 124 | } 125 | 126 | return(dist) 127 | } 128 | 129 | 130 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /R/internal_write_pyrate.R: -------------------------------------------------------------------------------- 1 | .NoExtension <- function(filename) { 2 | if (substr(filename, nchar(filename), nchar(filename)) == ".") { 3 | return(substr(filename, 1, nchar(filename) - 1)) 4 | } else { 5 | .NoExtension(substr(filename, 1, nchar(filename) - 1)) 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /R/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/R/sysdata.rda -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CoordinateCleaner v3.0 2 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/CoordinateCleaner)](https://cranlogs.r-pkg.org:443/badges/CoordinateCleaner) 3 | [![downloads](https://cranlogs.r-pkg.org/badges/grand-total/CoordinateCleaner)](https://cranlogs.r-pkg.org:443/badges/grand-total/CoordinateCleaner) 4 | [![rstudio mirror downloads](https://cranlogs.r-pkg.org/badges/CoordinateCleaner)](https://cranlogs.r-pkg.org:443/badges/CoordinateCleaner) 5 | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) 6 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2539408.svg)](https://doi.org/10.5281/zenodo.2539408) 7 | [![rOpenSci peer-review](https://badges.ropensci.org/210_status.svg)](https://github.com/ropensci/software-review/issues/210) 8 | 9 | **CoordinateCleaner has been updated to version 3.0 on github and on CRAN to adapt to the retirement of sp and raster. The update may not be compatible with analysis-pipelines build with version 2.x*** 10 | 11 | Automated flagging of common spatial and temporal errors in biological and palaeontological collection data, for the use in conservation, ecology and palaeontology. Specifically includes tests for 12 | 13 | * General coordinate validity 14 | * Country and province centroids 15 | * Capital coordinates 16 | * Coordinates of biodiversity institutions 17 | * Spatial outliers 18 | * Temporal outliers 19 | * Coordinate-country discordance 20 | * Duplicated coordinates per species 21 | * Assignment to the location of the GBIF headquarters 22 | * Urban areas 23 | * Seas 24 | * Plain zeros 25 | * Equal longitude and latitude 26 | * Rounded coordinates 27 | * DDMM to DD.DD coordinate conversion errors 28 | * Large temporal uncertainty (fossils) 29 | * Equal minimum and maximum ages (fossils) 30 | * Spatio-temporal outliers (fossils) 31 | 32 | CoordinateCleaner can be particularly useful to improve data quality when using data from GBIF (e.g. obtained with [rgbif]( https://github.com/ropensci/rgbif)) or the Paleobiology database (e.g. obtained with [paleobioDB](https://github.com/ropensci/paleobioDB)) for historical biogeography (e.g. with [BioGeoBEARS](https://CRAN.R-project.org/package=BioGeoBEARS) or [phytools](https://CRAN.R-project.org/package=phytools)), automated conservation assessment (e.g. with [speciesgeocodeR](https://github.com/azizka/speciesgeocodeR/wiki) or [conR](https://CRAN.R-project.org/package=ConR)) or species distribution modelling (e.g. with [dismo](https://CRAN.R-project.org/package=dismo) or [sdm](https://CRAN.R-project.org/package=sdm)). See [scrubr](https://github.com/ropensci-archive/scrubr) and [taxize](https://github.com/ropensci/taxize) for complementary taxonomic cleaning or [biogeo](https://github.com/cran/biogeo) for correcting spatial coordinate errors. 33 | 34 | See [News](https://github.com/ropensci/CoordinateCleaner/blob/master/NEWS.md) for update information. 35 | 36 | # Installation 37 | ## Stable from CRAN 38 | 39 | ```r 40 | install.packages("CoordinateCleaner") 41 | library(CoordinateCleaner) 42 | ``` 43 | 44 | ## Developmental from GitHub 45 | ```r 46 | devtools::install_github("ropensci/CoordinateCleaner") 47 | library(CoordinateCleaner) 48 | ``` 49 | 50 | # Usage 51 | A simple example: 52 | 53 | ```r 54 | # Simulate example data 55 | minages <- runif(250, 0, 65) 56 | exmpl <- data.frame(species = sample(letters, size = 250, replace = TRUE), 57 | decimalLongitude = runif(250, min = 42, max = 51), 58 | decimalLatitude = runif(250, min = -26, max = -11), 59 | min_ma = minages, 60 | max_ma = minages + runif(250, 0.1, 65), 61 | dataset = "clean") 62 | 63 | # Run record-level tests 64 | rl <- clean_coordinates(x = exmpl) 65 | summary(rl) 66 | plot(rl) 67 | 68 | # Dataset level 69 | dsl <- clean_dataset(exmpl) 70 | 71 | # For fossils 72 | fl <- clean_fossils(x = exmpl, 73 | taxon = "species", 74 | lon = "decimalLongitude", 75 | lat = "decimalLatitude") 76 | summary(fl) 77 | 78 | # Alternative example using the pipe 79 | library(tidyverse) 80 | 81 | cl <- exmpl %>% 82 | cc_val()%>% 83 | cc_cap()%>% 84 | cd_ddmm()%>% 85 | cf_range(lon = "decimalLongitude", 86 | lat = "decimalLatitude", 87 | taxon ="species") 88 | ``` 89 | 90 | # Documentation 91 | Pipelines for cleaning data from the Global Biodiversity Information Facility (GBIF) and the Paleobiology Database (PaleobioDB) are available in [here](https://ropensci.github.io/CoordinateCleaner/articles/). 92 | 93 | 94 | # Contributing 95 | See the [CONTRIBUTING](https://github.com/ropensci/CoordinateCleaner/blob/master/CONTRIBUTING.md) document. 96 | 97 | # Citation 98 | Zizka A, Silvestro D, Andermann T, Azevedo J, Duarte Ritter C, Edler D, Farooq H, Herdean A, Ariza M, Scharn R, Svanteson S, Wengtrom N, Zizka V & Antonelli A (2019) CoordinateCleaner: standardized cleaning of occurrence records from biological collection databases. Methods in Ecology and Evolution, 10(5):744-751, doi:10.1111/2041-210X.13152, https://github.com/ropensci/CoordinateCleaner 99 | 100 | [![ropensci_footer](https://ropensci.org/public_images/ropensci_footer.png)](https://ropensci.org) 101 | 102 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | reference: 2 | - title: Wrapper functions 3 | contents: 4 | - has_concept("Wrapper functions") 5 | - title: Geographic coordinate cleaning 6 | contents: 7 | - has_concept("Coordinates") 8 | - title: Dataset-levelcleaning 9 | contents: 10 | - has_concept("Datasets") 11 | - title: Fossil cleaning 12 | contents: 13 | - has_concept("fossils") 14 | - title: Data 15 | contents: 16 | - buffland 17 | - buffsea 18 | - countryref 19 | - institutions 20 | - pbdb_example 21 | - aohi 22 | - title: Visualization 23 | contents: 24 | - plot.spatialvalid 25 | - title: Check 26 | contents: 27 | - is.spatialvalid 28 | -------------------------------------------------------------------------------- /_site.yml: -------------------------------------------------------------------------------- 1 | name: CoordinateCleaner 2 | output_dir: docs 3 | navbar: 4 | title: CoordinateCleaner 5 | type: inverse 6 | left: 7 | - text: Home 8 | icon: fa-home 9 | href: index.html 10 | - text: Articles 11 | menu: 12 | - text: Cleaning point occurrence data (GBIF) 13 | href: vignettes/Cleaning_GBIF_data_with_CoordinateCleaner.html 14 | - text: Cleaning fossil records (PBDB) 15 | href: vignettes/Cleaning_PBDB_fossils_with_CoordinateCleaner.html 16 | - text: Using custom gazetteers 17 | href: vignettes/Using_custom_gazetters.html 18 | - text: Geographic outliers 19 | href: articles/Geographic_outliers.html 20 | - text: Dataset level cleaning 21 | href: articles/Dataset_level_cleaning.html 22 | - text: Comparison other software 23 | href: Comparison_other_software.html 24 | right: 25 | - text: GitHub 26 | href: https://github.com/ropensci/CoordinateCleaner 27 | output: 28 | html_document: 29 | lib_dir: site_libs 30 | self_contained: no 31 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | # Version 2.0-20 2 | 3 | Fixing a warning resulting from unbalanced code chunk delimiters in one of the vignettes in the previous version (2.0-18) on CRAN. 4 | 5 | Additionally, change of maintainer email due to a change in institution. 6 | 7 | The NOTE on spelling errors in the DESCRIPTION is spurious in my opinion, since it flags my last name and Latin abbreviation "et al"" from the literature reference. 8 | 9 | The package has been removed from CRAN due to slow response time. Response time was slow because I am on parental leave. -------------------------------------------------------------------------------- /data/aohi.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/data/aohi.rda -------------------------------------------------------------------------------- /data/buffland.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/data/buffland.rda -------------------------------------------------------------------------------- /data/buffsea.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/data/buffsea.rda -------------------------------------------------------------------------------- /data/countryref.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/data/countryref.rda -------------------------------------------------------------------------------- /data/institutions.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/data/institutions.rda -------------------------------------------------------------------------------- /data/pbdb_example.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/data/pbdb_example.rda -------------------------------------------------------------------------------- /docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-25-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-25-1.png -------------------------------------------------------------------------------- /docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-25-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-25-2.png -------------------------------------------------------------------------------- /docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-27-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-27-1.png -------------------------------------------------------------------------------- /docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-27-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-27-2.png -------------------------------------------------------------------------------- /docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /docs/articles/Comparison_other_software_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/Using_custom_gazetteers_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/Using_custom_gazetteers_files/figure-html/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Using_custom_gazetteers_files/figure-html/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /docs/articles/Using_custom_gazetteers_files/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Using_custom_gazetteers_files/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /docs/articles/Using_custom_gazetteers_files/figure-html/unnamed-chunk-2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Using_custom_gazetteers_files/figure-html/unnamed-chunk-2-2.png -------------------------------------------------------------------------------- /docs/articles/clgbif11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/clgbif11-1.png -------------------------------------------------------------------------------- /docs/articles/clgbif16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/clgbif16-1.png -------------------------------------------------------------------------------- /docs/articles/clgbif17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/clgbif17-1.png -------------------------------------------------------------------------------- /docs/articles/clgbif18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/clgbif18-1.png -------------------------------------------------------------------------------- /docs/articles/clgbif19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/clgbif19-1.png -------------------------------------------------------------------------------- /docs/articles/clgbif5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/clgbif5-1.png -------------------------------------------------------------------------------- /docs/articles/clgbif6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/clgbif6-1.png -------------------------------------------------------------------------------- /docs/articles/cusgaz1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/cusgaz1-1.png -------------------------------------------------------------------------------- /docs/articles/cusgaz2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/cusgaz2-1.png -------------------------------------------------------------------------------- /docs/articles/cusgaz2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/cusgaz2-2.png -------------------------------------------------------------------------------- /docs/bootstrap-toc.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ 7 | 8 | /* All levels of nav */ 9 | nav[data-toggle='toc'] .nav > li > a { 10 | display: block; 11 | padding: 4px 20px; 12 | font-size: 13px; 13 | font-weight: 500; 14 | color: #767676; 15 | } 16 | nav[data-toggle='toc'] .nav > li > a:hover, 17 | nav[data-toggle='toc'] .nav > li > a:focus { 18 | padding-left: 19px; 19 | color: #563d7c; 20 | text-decoration: none; 21 | background-color: transparent; 22 | border-left: 1px solid #563d7c; 23 | } 24 | nav[data-toggle='toc'] .nav > .active > a, 25 | nav[data-toggle='toc'] .nav > .active:hover > a, 26 | nav[data-toggle='toc'] .nav > .active:focus > a { 27 | padding-left: 18px; 28 | font-weight: bold; 29 | color: #563d7c; 30 | background-color: transparent; 31 | border-left: 2px solid #563d7c; 32 | } 33 | 34 | /* Nav: second level (shown on .active) */ 35 | nav[data-toggle='toc'] .nav .nav { 36 | display: none; /* Hide by default, but at >768px, show it */ 37 | padding-bottom: 10px; 38 | } 39 | nav[data-toggle='toc'] .nav .nav > li > a { 40 | padding-top: 1px; 41 | padding-bottom: 1px; 42 | padding-left: 30px; 43 | font-size: 12px; 44 | font-weight: normal; 45 | } 46 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 47 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 48 | padding-left: 29px; 49 | } 50 | nav[data-toggle='toc'] .nav .nav > .active > a, 51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 53 | padding-left: 28px; 54 | font-weight: 500; 55 | } 56 | 57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ 58 | nav[data-toggle='toc'] .nav > .active > ul { 59 | display: block; 60 | } 61 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | (function() { 6 | 'use strict'; 7 | 8 | window.Toc = { 9 | helpers: { 10 | // return all matching elements in the set, or their descendants 11 | findOrFilter: function($el, selector) { 12 | // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ 13 | // http://stackoverflow.com/a/12731439/358804 14 | var $descendants = $el.find(selector); 15 | return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); 16 | }, 17 | 18 | generateUniqueIdBase: function(el) { 19 | var text = $(el).text(); 20 | var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); 21 | return anchor || el.tagName.toLowerCase(); 22 | }, 23 | 24 | generateUniqueId: function(el) { 25 | var anchorBase = this.generateUniqueIdBase(el); 26 | for (var i = 0; ; i++) { 27 | var anchor = anchorBase; 28 | if (i > 0) { 29 | // add suffix 30 | anchor += '-' + i; 31 | } 32 | // check if ID already exists 33 | if (!document.getElementById(anchor)) { 34 | return anchor; 35 | } 36 | } 37 | }, 38 | 39 | generateAnchor: function(el) { 40 | if (el.id) { 41 | return el.id; 42 | } else { 43 | var anchor = this.generateUniqueId(el); 44 | el.id = anchor; 45 | return anchor; 46 | } 47 | }, 48 | 49 | createNavList: function() { 50 | return $(''); 51 | }, 52 | 53 | createChildNavList: function($parent) { 54 | var $childList = this.createNavList(); 55 | $parent.append($childList); 56 | return $childList; 57 | }, 58 | 59 | generateNavEl: function(anchor, text) { 60 | var $a = $(''); 61 | $a.attr('href', '#' + anchor); 62 | $a.text(text); 63 | var $li = $('
  • '); 64 | $li.append($a); 65 | return $li; 66 | }, 67 | 68 | generateNavItem: function(headingEl) { 69 | var anchor = this.generateAnchor(headingEl); 70 | var $heading = $(headingEl); 71 | var text = $heading.data('toc-text') || $heading.text(); 72 | return this.generateNavEl(anchor, text); 73 | }, 74 | 75 | // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). 76 | getTopLevel: function($scope) { 77 | for (var i = 1; i <= 6; i++) { 78 | var $headings = this.findOrFilter($scope, 'h' + i); 79 | if ($headings.length > 1) { 80 | return i; 81 | } 82 | } 83 | 84 | return 1; 85 | }, 86 | 87 | // returns the elements for the top level, and the next below it 88 | getHeadings: function($scope, topLevel) { 89 | var topSelector = 'h' + topLevel; 90 | 91 | var secondaryLevel = topLevel + 1; 92 | var secondarySelector = 'h' + secondaryLevel; 93 | 94 | return this.findOrFilter($scope, topSelector + ',' + secondarySelector); 95 | }, 96 | 97 | getNavLevel: function(el) { 98 | return parseInt(el.tagName.charAt(1), 10); 99 | }, 100 | 101 | populateNav: function($topContext, topLevel, $headings) { 102 | var $context = $topContext; 103 | var $prevNav; 104 | 105 | var helpers = this; 106 | $headings.each(function(i, el) { 107 | var $newNav = helpers.generateNavItem(el); 108 | var navLevel = helpers.getNavLevel(el); 109 | 110 | // determine the proper $context 111 | if (navLevel === topLevel) { 112 | // use top level 113 | $context = $topContext; 114 | } else if ($prevNav && $context === $topContext) { 115 | // create a new level of the tree and switch to it 116 | $context = helpers.createChildNavList($prevNav); 117 | } // else use the current $context 118 | 119 | $context.append($newNav); 120 | 121 | $prevNav = $newNav; 122 | }); 123 | }, 124 | 125 | parseOps: function(arg) { 126 | var opts; 127 | if (arg.jquery) { 128 | opts = { 129 | $nav: arg 130 | }; 131 | } else { 132 | opts = arg; 133 | } 134 | opts.$scope = opts.$scope || $(document.body); 135 | return opts; 136 | } 137 | }, 138 | 139 | // accepts a jQuery object, or an options object 140 | init: function(opts) { 141 | opts = this.helpers.parseOps(opts); 142 | 143 | // ensure that the data attribute is in place for styling 144 | opts.$nav.attr('data-toggle', 'toc'); 145 | 146 | var $topContext = this.helpers.createChildNavList(opts.$nav); 147 | var topLevel = this.helpers.getTopLevel(opts.$scope); 148 | var $headings = this.helpers.getHeadings(opts.$scope, topLevel); 149 | this.helpers.populateNav($topContext, topLevel, $headings); 150 | } 151 | }; 152 | 153 | $(function() { 154 | $('nav[data-toggle="toc"]').each(function(i, el) { 155 | var $nav = $(el); 156 | Toc.init($nav); 157 | }); 158 | }); 159 | })(); 160 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('.navbar-fixed-top').headroom(); 6 | 7 | $('body').css('padding-top', $('.navbar').height() + 10); 8 | $(window).resize(function(){ 9 | $('body').css('padding-top', $('.navbar').height() + 10); 10 | }); 11 | 12 | $('[data-toggle="tooltip"]').tooltip(); 13 | 14 | var cur_path = paths(location.pathname); 15 | var links = $("#navbar ul li a"); 16 | var max_length = -1; 17 | var pos = -1; 18 | for (var i = 0; i < links.length; i++) { 19 | if (links[i].getAttribute("href") === "#") 20 | continue; 21 | // Ignore external links 22 | if (links[i].host !== location.host) 23 | continue; 24 | 25 | var nav_path = paths(links[i].pathname); 26 | 27 | var length = prefix_length(nav_path, cur_path); 28 | if (length > max_length) { 29 | max_length = length; 30 | pos = i; 31 | } 32 | } 33 | 34 | // Add class to parent
  • , and enclosing
  • if in dropdown 35 | if (pos >= 0) { 36 | var menu_anchor = $(links[pos]); 37 | menu_anchor.parent().addClass("active"); 38 | menu_anchor.closest("li.dropdown").addClass("active"); 39 | } 40 | }); 41 | 42 | function paths(pathname) { 43 | var pieces = pathname.split("/"); 44 | pieces.shift(); // always starts with / 45 | 46 | var end = pieces[pieces.length - 1]; 47 | if (end === "index.html" || end === "") 48 | pieces.pop(); 49 | return(pieces); 50 | } 51 | 52 | // Returns -1 if not found 53 | function prefix_length(needle, haystack) { 54 | if (needle.length > haystack.length) 55 | return(-1); 56 | 57 | // Special case for length-0 haystack, since for loop won't run 58 | if (haystack.length === 0) { 59 | return(needle.length === 0 ? 0 : -1); 60 | } 61 | 62 | for (var i = 0; i < haystack.length; i++) { 63 | if (needle[i] != haystack[i]) 64 | return(i); 65 | } 66 | 67 | return(haystack.length); 68 | } 69 | 70 | /* Clipboard --------------------------*/ 71 | 72 | function changeTooltipMessage(element, msg) { 73 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 74 | element.setAttribute('data-original-title', msg); 75 | $(element).tooltip('show'); 76 | element.setAttribute('data-original-title', tooltipOriginalTitle); 77 | } 78 | 79 | if(ClipboardJS.isSupported()) { 80 | $(document).ready(function() { 81 | var copyButton = ""; 82 | 83 | $("div.sourceCode").addClass("hasCopyButton"); 84 | 85 | // Insert copy buttons: 86 | $(copyButton).prependTo(".hasCopyButton"); 87 | 88 | // Initialize tooltips: 89 | $('.btn-copy-ex').tooltip({container: 'body'}); 90 | 91 | // Initialize clipboard: 92 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { 93 | text: function(trigger) { 94 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); 95 | } 96 | }); 97 | 98 | clipboardBtnCopies.on('success', function(e) { 99 | changeTooltipMessage(e.trigger, 'Copied!'); 100 | e.clearSelection(); 101 | }); 102 | 103 | clipboardBtnCopies.on('error', function() { 104 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 105 | }); 106 | }); 107 | } 108 | })(window.jQuery || window.$) 109 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 3.1.1 2 | pkgdown: 2.0.7 3 | pkgdown_sha: ~ 4 | articles: 5 | Cleaning_GBIF_data_with_CoordinateCleaner: Cleaning_GBIF_data_with_CoordinateCleaner.html 6 | Cleaning_PBDB_fossils_with_CoordinateCleaner: Cleaning_PBDB_fossils_with_CoordinateCleaner.html 7 | Comparison_other_software: Comparison_other_software.html 8 | Using_custom_gazetteers: Using_custom_gazetteers.html 9 | last_built: 2023-10-24T18:56Z 10 | 11 | -------------------------------------------------------------------------------- /docs/reference/plot.spatialvalid-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/reference/plot.spatialvalid-1.png -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | note <- sprintf("R package version %s", meta$Version) 2 | 3 | bibentry(bibtype = "article", 4 | title = "CoordinateCleaner: standardized cleaning of occurrence records from biological collection databases", 5 | author = c(person("Alexander", "Zizka"), 6 | person("Daniele", "Silvestro"), 7 | person("Tobias", "Andermann"), 8 | person("Josue", "Azevedo"), 9 | person("Camila", "Duarte Ritter"), 10 | person("Daniel", "Edler"), 11 | person("Harith", "Farooq"), 12 | person("Andrei", "Herdean"), 13 | person("Maria", "Ariza"), 14 | person("Ruud", "Scharn"), 15 | person("Sten", "Svanteson"), 16 | person("Niklas", "Wengstrom"), 17 | person("Vera", "Zizka"), 18 | person("Alexandre", "Antonelli")), 19 | journal = "Methods in Ecology and Evolution", 20 | number = 10, 21 | pages = 744-751, 22 | year = 2019, 23 | note = note, 24 | doi = "10.1111/2041-210X.13152", 25 | url = "https://github.com/ropensci/CoordinateCleaner") -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | Avicennia 2 | BioGeoBEARS 3 | CRS 4 | CleanCoordinates 5 | CleanCoordinatesDS 6 | CleanCoordinatesFOS 7 | DD 8 | DDMM 9 | DK 10 | Fabris 11 | Factbook 12 | GBIF 13 | Geonames 14 | Herbariorum 15 | IUCN 16 | MaxT 17 | MinT 18 | Myr 19 | Neotoma 20 | PBDB 21 | PaleobioDB 22 | Paleobiology 23 | Paleobiologydatabase 24 | Panthera 25 | Poisson 26 | PyRate 27 | Pyrate 28 | Sgarbi 29 | SpatialPointsDataFrame 30 | SpatialPolygonsDataFrame 31 | Spatialvalid 32 | Svantesson 33 | Varela 34 | WGS 35 | Wengtrom 36 | WritePyrate 37 | adm 38 | agesequal 39 | barcoding 40 | biogeo 41 | bookdown 42 | cen 43 | clgbif 44 | codecov 45 | color 46 | com 47 | conR 48 | coord 49 | coun 50 | countr 51 | countrycode 52 | countryref 53 | cusgaz 54 | cutoff 55 | dc 56 | dd 57 | ddmm 58 | decimalLatitude 59 | decimalLongitude 60 | dedup 61 | devtools 62 | dismo 63 | doi 64 | dupl 65 | duplicatesexclude 66 | emph 67 | equ 68 | errorcheck 69 | etc 70 | factbook 71 | gbif 72 | geo 73 | geod 74 | geospheric 75 | ggplot 76 | github 77 | interquantile 78 | io 79 | iso 80 | iucn 81 | lat 82 | leo 83 | lon 84 | macroecological 85 | magrittr 86 | migh 87 | missingvalsexclude 88 | mltpl 89 | naturalearth 90 | naturalearthdata 91 | ne 92 | neotomadb 93 | occs 94 | org 95 | outl 96 | palaebiological 97 | paleobioDB 98 | paleobiodb 99 | paleobiology 100 | paleontological 101 | paleontology 102 | phytools 103 | plaeobioDB 104 | precisioncheck 105 | pvalue 106 | pyrate 107 | quickclean 108 | rOpenSci 109 | ras 110 | refcol 111 | referes 112 | rgbif 113 | rgdal 114 | rnaturalearth 115 | ropensci 116 | roxygen 117 | rstudio 118 | sapply 119 | scrubr 120 | sdm 121 | seethedatablog 122 | sp 123 | spatialvalid 124 | spatiotemp 125 | speciesgeocodeR 126 | speedup 127 | taxize 128 | tc 129 | temprange 130 | thres 131 | urb 132 | urbanareas 133 | val 134 | vapply 135 | wordpress 136 | www 137 | -------------------------------------------------------------------------------- /man/CoordinateCleaner-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CoordinateCleaner-package.R 3 | \docType{package} 4 | \name{CoordinateCleaner-package} 5 | \alias{CoordinateCleaner-package} 6 | \alias{CoordinateCleaner} 7 | \title{CoordinateCleaner} 8 | \description{ 9 | Automated Cleaning of Occurrence Records from Biological Collections 10 | } 11 | \details{ 12 | Automated flagging of common spatial and temporal errors in biological and 13 | paleontological collection data, for the use in conservation, ecology and 14 | paleontology. Includes automated tests to easily flag (and exclude) records 15 | assigned to country or province centroid, the open ocean, the headquarters of 16 | the Global Biodiversity Information Facility, urban areas or the location of 17 | biodiversity institutions (museums, zoos, botanical gardens, universities). 18 | Furthermore identifies per species outlier coordinates, zero coordinates, 19 | identical latitude/longitude and invalid coordinates. Also implements an 20 | algorithm to identify data sets with a significant proportion of rounded 21 | coordinates. Especially suited for large data sets. See 22 | for more details and 23 | tutorials. 24 | } 25 | \author{ 26 | Alexander Zizka, Daniele Silvestro, Tobias Andermann, Josue Azevedo, 27 | Camila Duarte Ritter, Daniel Edler, Harith Farooq, Andrei Herdean, Maria Ariza, 28 | Ruud Scharn, Sten Svantesson, Niklas Wengstrom, Vera Zizka 29 | } 30 | \keyword{internal} 31 | -------------------------------------------------------------------------------- /man/aohi.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CoordinateCleaner-package.R 3 | \docType{data} 4 | \name{aohi} 5 | \alias{aohi} 6 | \title{Artificial Hotspot Occurrence Inventory} 7 | \source{ 8 | \url{https://onlinelibrary.wiley.com/doi/10.1111/jbi.14543} 9 | } 10 | \description{ 11 | A data frame with information on Artificial Hotspot Occurrence Inventory (AHOI) 12 | as available in Park et al 2022. For more details see reference. 13 | } 14 | \examples{ 15 | 16 | data("aohi") 17 | } 18 | \references{ 19 | Park, D. S., Xie, Y., Thammavong, H. T., Tulaiha, R., & Feng, X. 20 | (2023). Artificial Hotspot Occurrence Inventory (AHOI). Journal of 21 | Biogeography, 50, 441–449. \doi{10.1111/jbi.14543} 22 | } 23 | \keyword{gazetteers} 24 | -------------------------------------------------------------------------------- /man/buffland.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CoordinateCleaner-package.R 3 | \docType{data} 4 | \name{buffland} 5 | \alias{buffland} 6 | \title{Global Coastlines buffered by 1 degree} 7 | \source{ 8 | \url{https://www.naturalearthdata.com/downloads/10m-physical-vectors/} 9 | } 10 | \description{ 11 | A \code{SpatVector} with global coastlines, with a 1 degree buffer to extent coastlines as alternative reference for \code{\link{cc_sea}}. Can be useful to identify species in the sea, without flagging records in mangroves, marshes, etc. 12 | } 13 | \examples{ 14 | 15 | data("buffland") 16 | } 17 | \keyword{gazetteers} 18 | -------------------------------------------------------------------------------- /man/buffsea.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CoordinateCleaner-package.R 3 | \docType{data} 4 | \name{buffsea} 5 | \alias{buffsea} 6 | \title{Global Coastlines buffered by -1 degree} 7 | \source{ 8 | \url{https://www.naturalearthdata.com/downloads/10m-physical-vectors/} 9 | } 10 | \description{ 11 | A \code{SpatVector} with global coastlines, with a -1 degree buffer to extent coastlines as alternative reference for \code{\link{cc_sea}}. Can be useful to identify marine species on land without flagging records in estuaries, etc. 12 | } 13 | \examples{ 14 | 15 | data("buffsea") 16 | } 17 | \keyword{gazetteers} 18 | -------------------------------------------------------------------------------- /man/cc_aohi.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cc_aohi.R 3 | \name{cc_aohi} 4 | \alias{cc_aohi} 5 | \title{Identify Coordinates in Artificial Hotspot Occurrence Inventory} 6 | \usage{ 7 | cc_aohi( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | species = "species", 12 | taxa = c("Aves", "Insecta", "Mammalia", "Plantae"), 13 | buffer = 10000, 14 | geod = TRUE, 15 | value = "clean", 16 | verbose = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{data.frame. Containing geographical coordinates and species names.} 21 | 22 | \item{lon}{character string. The column with the longitude coordinates. 23 | Default = \dQuote{decimalLongitude}.} 24 | 25 | \item{lat}{character string. The column with the latitude coordinates. 26 | Default = \dQuote{decimalLatitude}.} 27 | 28 | \item{species}{character string. The column with the species identity. Only 29 | required if verify = TRUE.} 30 | 31 | \item{taxa}{Artificial Hotspot Occurrence Inventory (AHOI) were created based 32 | on four different taxa, birds, insecta, mammalia, and plantae. Users can 33 | choose to keep all, or any specific taxa subset to define the AHOI locations. 34 | Default is to keep all: c("Aves", "Insecta", "Mammalia", "Plantae").} 35 | 36 | \item{buffer}{The buffer around each capital coordinate (the centre of the 37 | city), where records should be flagged as problematic. Units depend on 38 | geod. Default = 10 kilometres.} 39 | 40 | \item{geod}{logical. If TRUE the radius around each capital is calculated 41 | based on a sphere, buffer is in meters and independent of latitude. If 42 | FALSE the radius is calculated assuming planar coordinates and varies 43 | slightly with latitude. Default = TRUE. 44 | See https://seethedatablog.wordpress.com/ for detail and credits.} 45 | 46 | \item{value}{character string. Defining the output value. See value.} 47 | 48 | \item{verbose}{logical. If TRUE reports the name of the test and the number 49 | of records flagged.} 50 | } 51 | \value{ 52 | Depending on the \sQuote{value} argument, either a \code{data.frame} 53 | containing the records considered correct by the test (\dQuote{clean}) or a 54 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 55 | failed/potentially problematic . Default = \dQuote{clean}. 56 | } 57 | \description{ 58 | Removes or flags records within Artificial Hotspot Occurrence Inventory. 59 | Poorly geo-referenced occurrence records in biological databases are often 60 | erroneously geo-referenced to highly recurring coordinates that were assessed 61 | by Park et al 2022. See the reference for more details. 62 | } 63 | \note{ 64 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more 65 | details and tutorials. 66 | } 67 | \examples{ 68 | 69 | x <- data.frame(species = letters[1:10], 70 | decimalLongitude = c(runif(99, -180, 180), -47.92), 71 | decimalLatitude = c(runif(99, -90,90), -15.78)) 72 | cc_aohi(x) 73 | 74 | } 75 | \references{ 76 | Park, D. S., Xie, Y., Thammavong, H. T., Tulaiha, R., & Feng, X. 77 | (2023). Artificial Hotspot Occurrence Inventory (AHOI). Journal of 78 | Biogeography, 50, 441–449. \doi{10.1111/jbi.14543} 79 | } 80 | \seealso{ 81 | Other Coordinates: 82 | \code{\link{cc_cap}()}, 83 | \code{\link{cc_cen}()}, 84 | \code{\link{cc_coun}()}, 85 | \code{\link{cc_dupl}()}, 86 | \code{\link{cc_equ}()}, 87 | \code{\link{cc_gbif}()}, 88 | \code{\link{cc_inst}()}, 89 | \code{\link{cc_iucn}()}, 90 | \code{\link{cc_outl}()}, 91 | \code{\link{cc_sea}()}, 92 | \code{\link{cc_urb}()}, 93 | \code{\link{cc_val}()}, 94 | \code{\link{cc_zero}()} 95 | } 96 | \concept{Coordinates} 97 | \keyword{Coordinate} 98 | \keyword{cleaning} 99 | -------------------------------------------------------------------------------- /man/cc_cap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cc_cap.R 3 | \name{cc_cap} 4 | \alias{cc_cap} 5 | \title{Identify Coordinates in Vicinity of Country Capitals.} 6 | \usage{ 7 | cc_cap( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | species = "species", 12 | buffer = 10000, 13 | geod = TRUE, 14 | ref = NULL, 15 | verify = FALSE, 16 | value = "clean", 17 | verbose = TRUE 18 | ) 19 | } 20 | \arguments{ 21 | \item{x}{data.frame. Containing geographical coordinates and species names.} 22 | 23 | \item{lon}{character string. The column with the longitude coordinates. 24 | Default = \dQuote{decimalLongitude}.} 25 | 26 | \item{lat}{character string. The column with the latitude coordinates. 27 | Default = \dQuote{decimalLatitude}.} 28 | 29 | \item{species}{character string. The column with the species identity. Only 30 | required if verify = TRUE.} 31 | 32 | \item{buffer}{The buffer around each capital coordinate (the centre of the 33 | city), where records should be flagged as problematic. Units depend on 34 | geod. Default = 10 kilometres.} 35 | 36 | \item{geod}{logical. If TRUE the radius around each capital is calculated 37 | based on a sphere, buffer is in meters and independent of latitude. If 38 | FALSE the radius is calculated assuming planar coordinates and varies 39 | slightly with latitude. Default = TRUE. 40 | See https://seethedatablog.wordpress.com/ for detail and credits.} 41 | 42 | \item{ref}{SpatVector (geometry: polygons). Providing the geographic 43 | gazetteer. Can be any SpatVector (geometry: polygons), but the structure 44 | must be identical to \code{\link{countryref}}. Default = 45 | \code{\link{countryref}}.} 46 | 47 | \item{verify}{logical. If TRUE records are only flagged if they are the only 48 | record in a given species flagged close to a given reference. If FALSE, the 49 | distance is the only criterion} 50 | 51 | \item{value}{character string. Defining the output value. See value.} 52 | 53 | \item{verbose}{logical. If TRUE reports the name of the test and the number 54 | of records flagged.} 55 | } 56 | \value{ 57 | Depending on the \sQuote{value} argument, either a \code{data.frame} 58 | containing the records considered correct by the test (\dQuote{clean}) or a 59 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 60 | failed/potentially problematic . Default = \dQuote{clean}. 61 | } 62 | \description{ 63 | Removes or flags records within a certain radius around country capitals. 64 | Poorly geo-referenced occurrence records in biological databases are often 65 | erroneously geo-referenced to capitals. 66 | } 67 | \note{ 68 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more 69 | details and tutorials. 70 | } 71 | \examples{ 72 | \dontrun{ 73 | x <- data.frame(species = letters[1:10], 74 | decimalLongitude = c(runif(99, -180, 180), -47.882778), 75 | decimalLatitude = c(runif(99, -90, 90), -15.793889)) 76 | 77 | cc_cap(x) 78 | cc_cap(x, value = "flagged") 79 | } 80 | } 81 | \seealso{ 82 | Other Coordinates: 83 | \code{\link{cc_aohi}()}, 84 | \code{\link{cc_cen}()}, 85 | \code{\link{cc_coun}()}, 86 | \code{\link{cc_dupl}()}, 87 | \code{\link{cc_equ}()}, 88 | \code{\link{cc_gbif}()}, 89 | \code{\link{cc_inst}()}, 90 | \code{\link{cc_iucn}()}, 91 | \code{\link{cc_outl}()}, 92 | \code{\link{cc_sea}()}, 93 | \code{\link{cc_urb}()}, 94 | \code{\link{cc_val}()}, 95 | \code{\link{cc_zero}()} 96 | } 97 | \concept{Coordinates} 98 | \keyword{Coordinate} 99 | \keyword{cleaning} 100 | -------------------------------------------------------------------------------- /man/cc_cen.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cc_cen.R 3 | \name{cc_cen} 4 | \alias{cc_cen} 5 | \title{Identify Coordinates in Vicinity of Country and Province Centroids} 6 | \usage{ 7 | cc_cen( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | species = "species", 12 | buffer = 1000, 13 | geod = TRUE, 14 | test = "both", 15 | ref = NULL, 16 | verify = FALSE, 17 | value = "clean", 18 | verbose = TRUE 19 | ) 20 | } 21 | \arguments{ 22 | \item{x}{data.frame. Containing geographical coordinates and species names.} 23 | 24 | \item{lon}{character string. The column with the longitude coordinates. 25 | Default = \dQuote{decimalLongitude}.} 26 | 27 | \item{lat}{character string. The column with the latitude coordinates. 28 | Default = \dQuote{decimalLatitude}.} 29 | 30 | \item{species}{character string. The column with the species identity. Only 31 | required if verify = TRUE.} 32 | 33 | \item{buffer}{numerical. The buffer around each province or country 34 | centroid, where records should be flagged as problematic. Units depend on geod. 35 | Default = 1 kilometre.} 36 | 37 | \item{geod}{logical. If TRUE the radius around each capital is calculated 38 | based on a sphere, buffer is in meters and independent of latitude. If 39 | FALSE the radius is calculated assuming planar coordinates and varies 40 | slightly with latitude. Default = TRUE. 41 | See https://seethedatablog.wordpress.com/ for detail and credits.} 42 | 43 | \item{test}{a character string. Specifying the details of the test. One of 44 | c(\dQuote{both}, \dQuote{country}, \dQuote{provinces}). If both tests for 45 | country and province centroids.} 46 | 47 | \item{ref}{SpatVector (geometry: polygons). Providing the geographic 48 | gazetteer. Can be any SpatVector (geometry: polygons), but the structure 49 | must be identical to \code{\link{countryref}}. Default = 50 | \code{\link{countryref}}.} 51 | 52 | \item{verify}{logical. If TRUE records are only flagged if they are the only 53 | record in a given species flagged close to a given reference. If FALSE, the 54 | distance is the only criterion} 55 | 56 | \item{value}{character string. Defining the output value. See value.} 57 | 58 | \item{verbose}{logical. If TRUE reports the name of the test and the number 59 | of records flagged.} 60 | } 61 | \value{ 62 | Depending on the \sQuote{value} argument, either a \code{data.frame} 63 | containing the records considered correct by the test (\dQuote{clean}) or a 64 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 65 | failed/potentially problematic . Default = \dQuote{clean}. 66 | } 67 | \description{ 68 | Removes or flags records within a radius around the geographic centroids of political 69 | countries and provinces. Poorly geo-referenced occurrence records in 70 | biological databases are often erroneously geo-referenced to centroids. 71 | } 72 | \note{ 73 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more 74 | details and tutorials. 75 | } 76 | \examples{ 77 | 78 | x <- data.frame(species = letters[1:10], 79 | decimalLongitude = c(runif(99, -180, 180), -47.92), 80 | decimalLatitude = c(runif(99, -90,90), -15.78)) 81 | cc_cen(x, geod = FALSE) 82 | 83 | \dontrun{ 84 | cc_inst(x, value = "flagged", buffer = 50000) #geod = T 85 | } 86 | 87 | } 88 | \seealso{ 89 | Other Coordinates: 90 | \code{\link{cc_aohi}()}, 91 | \code{\link{cc_cap}()}, 92 | \code{\link{cc_coun}()}, 93 | \code{\link{cc_dupl}()}, 94 | \code{\link{cc_equ}()}, 95 | \code{\link{cc_gbif}()}, 96 | \code{\link{cc_inst}()}, 97 | \code{\link{cc_iucn}()}, 98 | \code{\link{cc_outl}()}, 99 | \code{\link{cc_sea}()}, 100 | \code{\link{cc_urb}()}, 101 | \code{\link{cc_val}()}, 102 | \code{\link{cc_zero}()} 103 | } 104 | \concept{Coordinates} 105 | \keyword{Coordinate} 106 | \keyword{cleaning} 107 | -------------------------------------------------------------------------------- /man/cc_coun.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cc_coun.R 3 | \name{cc_coun} 4 | \alias{cc_coun} 5 | \title{Identify Coordinates Outside their Reported Country} 6 | \usage{ 7 | cc_coun( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | iso3 = "countrycode", 12 | value = "clean", 13 | ref = NULL, 14 | ref_col = "iso_a3", 15 | verbose = TRUE, 16 | buffer = NULL 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{data.frame. Containing geographical coordinates and species names.} 21 | 22 | \item{lon}{character string. The column with the longitude coordinates. 23 | Default = \dQuote{decimalLongitude}.} 24 | 25 | \item{lat}{character string. The column with the latitude coordinates. 26 | Default = \dQuote{decimalLatitude}.} 27 | 28 | \item{iso3}{a character string. The column with the country assignment of 29 | each record in three letter ISO code. Default = \dQuote{countrycode}.} 30 | 31 | \item{value}{character string. Defining the output value. See value.} 32 | 33 | \item{ref}{SpatVector (geometry: polygons). Providing the geographic 34 | gazetteer. Can be any SpatVector (geometry: polygons), but the structure 35 | must be identical to \code{rnaturalearth::ne_countries(scale = "medium", 36 | returnclass = "sf")}. 37 | Default = \code{rnaturalearth::ne_countries(scale = "medium", returnclass = 38 | "sf")}} 39 | 40 | \item{ref_col}{the column name in the reference dataset, containing the 41 | relevant ISO codes for matching. Default is to "iso_a3_eh" which refers to 42 | the ISO-3 codes in the reference dataset. See notes.} 43 | 44 | \item{verbose}{logical. If TRUE reports the name of the test and the number 45 | of records flagged.} 46 | 47 | \item{buffer}{numeric. Units are in meters. If provided, a buffer is 48 | created around each country polygon.} 49 | } 50 | \value{ 51 | Depending on the \sQuote{value} argument, either a \code{data.frame} 52 | containing the records considered correct by the test (\dQuote{clean}) or a 53 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 54 | failed/potentially problematic . Default = \dQuote{clean}. 55 | } 56 | \description{ 57 | Removes or flags mismatches between geographic coordinates and additional 58 | country information (usually this information is reliably reported with 59 | specimens). Such a mismatch can occur for example, if latitude and longitude 60 | are switched. 61 | } 62 | \note{ 63 | The ref_col argument allows to adapt the function to the structure of 64 | alternative reference datasets. For instance, for 65 | \code{rnaturalearth::ne_countries(scale = "small")}, the default will fail, 66 | but ref_col = "iso_a3" will work. 67 | 68 | With the default reference, records are flagged if they fall outside 69 | the terrestrial territory of countries, hence records in territorial waters 70 | might be flagged. See \url{https://ropensci.github.io/CoordinateCleaner/} 71 | for more details and tutorials. 72 | } 73 | \examples{ 74 | 75 | \dontrun{ 76 | x <- data.frame(species = letters[1:10], 77 | decimalLongitude = runif(100, -20, 30), 78 | decimalLatitude = runif(100, 35,60), 79 | countrycode = "RUS") 80 | 81 | cc_coun(x, value = "flagged")#non-terrestrial records are flagged as wrong. 82 | } 83 | 84 | } 85 | \seealso{ 86 | Other Coordinates: 87 | \code{\link{cc_aohi}()}, 88 | \code{\link{cc_cap}()}, 89 | \code{\link{cc_cen}()}, 90 | \code{\link{cc_dupl}()}, 91 | \code{\link{cc_equ}()}, 92 | \code{\link{cc_gbif}()}, 93 | \code{\link{cc_inst}()}, 94 | \code{\link{cc_iucn}()}, 95 | \code{\link{cc_outl}()}, 96 | \code{\link{cc_sea}()}, 97 | \code{\link{cc_urb}()}, 98 | \code{\link{cc_val}()}, 99 | \code{\link{cc_zero}()} 100 | } 101 | \concept{Coordinates} 102 | \keyword{Coordinate} 103 | \keyword{cleaning} 104 | -------------------------------------------------------------------------------- /man/cc_dupl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cc_dupl.R 3 | \name{cc_dupl} 4 | \alias{cc_dupl} 5 | \title{Identify Duplicated Records} 6 | \usage{ 7 | cc_dupl( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | species = "species", 12 | additions = NULL, 13 | value = "clean", 14 | verbose = TRUE 15 | ) 16 | } 17 | \arguments{ 18 | \item{x}{data.frame. Containing geographical coordinates and species names.} 19 | 20 | \item{lon}{character string. The column with the longitude coordinates. 21 | Default = \dQuote{decimalLongitude}.} 22 | 23 | \item{lat}{character string. The column with the latitude coordinates. 24 | Default = \dQuote{decimalLatitude}.} 25 | 26 | \item{species}{a character string. The column with the species name. Default 27 | = \dQuote{species}.} 28 | 29 | \item{additions}{a vector of character strings. Additional columns to be 30 | included in the test for duplication. For example as below, collector name 31 | and collector number.} 32 | 33 | \item{value}{character string. Defining the output value. See value.} 34 | 35 | \item{verbose}{logical. If TRUE reports the name of the test and the number 36 | of records flagged.} 37 | } 38 | \value{ 39 | Depending on the \sQuote{value} argument, either a \code{data.frame} 40 | containing the records considered correct by the test (\dQuote{clean}) or a 41 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 42 | failed/potentially problematic . Default = \dQuote{clean}. 43 | } 44 | \description{ 45 | Removes or flags duplicated records based on species name and coordinates, as well as 46 | user-defined additional columns. True (specimen) duplicates or duplicates 47 | from the same species can make up the bulk of records in a biological 48 | collection database, but are undesirable for many analyses. Both can be 49 | flagged with this function, the former given enough additional information. 50 | } 51 | \examples{ 52 | 53 | x <- data.frame(species = letters[1:10], 54 | decimalLongitude = sample(x = 0:10, size = 100, replace = TRUE), 55 | decimalLatitude = sample(x = 0:10, size = 100, replace = TRUE), 56 | collector = "Bonpl", 57 | collector.number = c(1001, 354), 58 | collection = rep(c("K", "WAG","FR", "P", "S"), 20)) 59 | 60 | cc_dupl(x, value = "flagged") 61 | cc_dupl(x, additions = c("collector", "collector.number")) 62 | 63 | } 64 | \seealso{ 65 | Other Coordinates: 66 | \code{\link{cc_aohi}()}, 67 | \code{\link{cc_cap}()}, 68 | \code{\link{cc_cen}()}, 69 | \code{\link{cc_coun}()}, 70 | \code{\link{cc_equ}()}, 71 | \code{\link{cc_gbif}()}, 72 | \code{\link{cc_inst}()}, 73 | \code{\link{cc_iucn}()}, 74 | \code{\link{cc_outl}()}, 75 | \code{\link{cc_sea}()}, 76 | \code{\link{cc_urb}()}, 77 | \code{\link{cc_val}()}, 78 | \code{\link{cc_zero}()} 79 | } 80 | \concept{Coordinates} 81 | \keyword{Coordinate} 82 | \keyword{cleaning} 83 | -------------------------------------------------------------------------------- /man/cc_equ.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cc_equ.R 3 | \name{cc_equ} 4 | \alias{cc_equ} 5 | \title{Identify Records with Identical lat/lon} 6 | \usage{ 7 | cc_equ( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | test = "absolute", 12 | value = "clean", 13 | verbose = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{x}{data.frame. Containing geographical coordinates and species names.} 18 | 19 | \item{lon}{character string. The column with the longitude coordinates. 20 | Default = \dQuote{decimalLongitude}.} 21 | 22 | \item{lat}{character string. The column with the latitude coordinates. 23 | Default = \dQuote{decimalLatitude}.} 24 | 25 | \item{test}{character string. Defines if coordinates are compared exactly 26 | (\dQuote{identical}) or on the absolute scale (i.e. -1 = 1, 27 | \dQuote{absolute}). Default is to \dQuote{absolute}.} 28 | 29 | \item{value}{character string. Defining the output value. See value.} 30 | 31 | \item{verbose}{logical. If TRUE reports the name of the test and the number 32 | of records flagged.} 33 | } 34 | \value{ 35 | Depending on the \sQuote{value} argument, either a \code{data.frame} 36 | containing the records considered correct by the test (\dQuote{clean}) or a 37 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 38 | failed/potentially problematic . Default = \dQuote{clean}. 39 | } 40 | \description{ 41 | Removes or flags records with equal latitude and longitude coordinates, 42 | either exact or absolute. Equal coordinates can often indicate data entry 43 | errors. 44 | } 45 | \examples{ 46 | 47 | x <- data.frame(species = letters[1:10], 48 | decimalLongitude = runif(100, -180, 180), 49 | decimalLatitude = runif(100, -90,90)) 50 | 51 | cc_equ(x) 52 | cc_equ(x, value = "flagged") 53 | 54 | } 55 | \seealso{ 56 | Other Coordinates: 57 | \code{\link{cc_aohi}()}, 58 | \code{\link{cc_cap}()}, 59 | \code{\link{cc_cen}()}, 60 | \code{\link{cc_coun}()}, 61 | \code{\link{cc_dupl}()}, 62 | \code{\link{cc_gbif}()}, 63 | \code{\link{cc_inst}()}, 64 | \code{\link{cc_iucn}()}, 65 | \code{\link{cc_outl}()}, 66 | \code{\link{cc_sea}()}, 67 | \code{\link{cc_urb}()}, 68 | \code{\link{cc_val}()}, 69 | \code{\link{cc_zero}()} 70 | } 71 | \concept{Coordinates} 72 | \keyword{Coordinate} 73 | \keyword{cleaning} 74 | -------------------------------------------------------------------------------- /man/cc_gbif.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cc_gbif.R 3 | \name{cc_gbif} 4 | \alias{cc_gbif} 5 | \title{Identify Records Assigned to GBIF Headquarters} 6 | \usage{ 7 | cc_gbif( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | species = "species", 12 | buffer = 1000, 13 | geod = TRUE, 14 | verify = FALSE, 15 | value = "clean", 16 | verbose = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{data.frame. Containing geographical coordinates and species names.} 21 | 22 | \item{lon}{character string. The column with the longitude coordinates. 23 | Default = \dQuote{decimalLongitude}.} 24 | 25 | \item{lat}{character string. The column with the latitude coordinates. 26 | Default = \dQuote{decimalLatitude}.} 27 | 28 | \item{species}{character string. The column with the species identity. Only 29 | required if verify = TRUE.} 30 | 31 | \item{buffer}{numerical. The buffer around the GBIF headquarters, 32 | where records should be flagged as problematic. Units depend on geod. Default = 100 m.} 33 | 34 | \item{geod}{logical. If TRUE the radius is calculated 35 | based on a sphere, buffer is in meters. If FALSE 36 | the radius is calculated in degrees. Default = T.} 37 | 38 | \item{verify}{logical. If TRUE records are only flagged if they are the only 39 | record in a given species flagged close to a given reference. If FALSE, the 40 | distance is the only criterion} 41 | 42 | \item{value}{character string. Defining the output value. See value.} 43 | 44 | \item{verbose}{logical. If TRUE reports the name of the test and the number 45 | of records flagged.} 46 | } 47 | \value{ 48 | Depending on the \sQuote{value} argument, either a \code{data.frame} 49 | containing the records considered correct by the test (\dQuote{clean}) or a 50 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 51 | failed/potentially problematic . Default = \dQuote{clean}. 52 | } 53 | \description{ 54 | Removes or flags records within 0.5 degree radius around the GBIF headquarters in 55 | Copenhagen, DK. 56 | } 57 | \details{ 58 | Not recommended if working with records from Denmark or the Copenhagen area. 59 | } 60 | \examples{ 61 | 62 | x <- data.frame(species = "A", 63 | decimalLongitude = c(12.58, 12.58), 64 | decimalLatitude = c(55.67, 30.00)) 65 | 66 | cc_gbif(x) 67 | cc_gbif(x, value = "flagged") 68 | 69 | } 70 | \seealso{ 71 | Other Coordinates: 72 | \code{\link{cc_aohi}()}, 73 | \code{\link{cc_cap}()}, 74 | \code{\link{cc_cen}()}, 75 | \code{\link{cc_coun}()}, 76 | \code{\link{cc_dupl}()}, 77 | \code{\link{cc_equ}()}, 78 | \code{\link{cc_inst}()}, 79 | \code{\link{cc_iucn}()}, 80 | \code{\link{cc_outl}()}, 81 | \code{\link{cc_sea}()}, 82 | \code{\link{cc_urb}()}, 83 | \code{\link{cc_val}()}, 84 | \code{\link{cc_zero}()} 85 | } 86 | \concept{Coordinates} 87 | \keyword{Coordinate} 88 | \keyword{cleaning} 89 | -------------------------------------------------------------------------------- /man/cc_inst.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cc_inst.R 3 | \name{cc_inst} 4 | \alias{cc_inst} 5 | \title{Identify Records in the Vicinity of Biodiversity Institutions} 6 | \usage{ 7 | cc_inst( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | species = "species", 12 | buffer = 100, 13 | geod = FALSE, 14 | ref = NULL, 15 | verify = FALSE, 16 | verify_mltpl = 10, 17 | value = "clean", 18 | verbose = TRUE 19 | ) 20 | } 21 | \arguments{ 22 | \item{x}{data.frame. Containing geographical coordinates and species names.} 23 | 24 | \item{lon}{character string. The column with the longitude coordinates. 25 | Default = \dQuote{decimalLongitude}.} 26 | 27 | \item{lat}{character string. The column with the latitude coordinates. 28 | Default = \dQuote{decimalLatitude}.} 29 | 30 | \item{species}{character string. The column with the species identity. Only 31 | required if verify = TRUE.} 32 | 33 | \item{buffer}{numerical. The buffer around each institution, where records 34 | should be flagged as problematic, in decimal degrees. Default = 100m.} 35 | 36 | \item{geod}{logical. If TRUE the radius around each capital is calculated 37 | based on a sphere, buffer is in meters and independent of latitude. If 38 | FALSE the radius is calculated assuming planar coordinates and varies 39 | slightly with latitude. Default = TRUE. 40 | See https://seethedatablog.wordpress.com/ for detail and credits.} 41 | 42 | \item{ref}{SpatVector (geometry: polygons). Providing the geographic 43 | gazetteer. Can be any SpatVector (geometry: polygons), but the structure 44 | must be identical to \code{\link{institutions}}. Default = 45 | \code{\link{institutions}}} 46 | 47 | \item{verify}{logical. If TRUE, records close to institutions are only 48 | flagged, if there are no other records of the same species in the greater 49 | vicinity (a radius of buffer * verify_mltpl).} 50 | 51 | \item{verify_mltpl}{numerical. indicates the factor by which the radius for 52 | verify exceeds the radius of the initial test. Default = 10, which might be 53 | suitable if geod is TRUE, but might be too large otherwise.} 54 | 55 | \item{value}{character string. Defining the output value. See value.} 56 | 57 | \item{verbose}{logical. If TRUE reports the name of the test and the number 58 | of records flagged.} 59 | } 60 | \value{ 61 | Depending on the \sQuote{value} argument, either a \code{data.frame} 62 | containing the records considered correct by the test (\dQuote{clean}) or a 63 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 64 | failed/potentially problematic . Default = \dQuote{clean}. 65 | } 66 | \description{ 67 | Removes or flags records assigned to the location of zoos, botanical gardens, 68 | herbaria, universities and museums, based on a global database of ~10,000 such 69 | biodiversity institutions. Coordinates from these locations can be related to 70 | data-entry errors, false automated geo-reference or individuals in 71 | captivity/horticulture. 72 | } 73 | \details{ 74 | Note: the buffer radius is in degrees, thus will differ slightly between 75 | different latitudes. 76 | } 77 | \examples{ 78 | 79 | x <- data.frame(species = letters[1:10], 80 | decimalLongitude = c(runif(99, -180, 180), 37.577800), 81 | decimalLatitude = c(runif(99, -90,90), 55.710800)) 82 | 83 | #large buffer for demonstration, using geod = FALSE for shorter runtime 84 | cc_inst(x, value = "flagged", buffer = 10, geod = FALSE) 85 | 86 | \dontrun{ 87 | #' cc_inst(x, value = "flagged", buffer = 50000) #geod = T 88 | } 89 | 90 | } 91 | \seealso{ 92 | Other Coordinates: 93 | \code{\link{cc_aohi}()}, 94 | \code{\link{cc_cap}()}, 95 | \code{\link{cc_cen}()}, 96 | \code{\link{cc_coun}()}, 97 | \code{\link{cc_dupl}()}, 98 | \code{\link{cc_equ}()}, 99 | \code{\link{cc_gbif}()}, 100 | \code{\link{cc_iucn}()}, 101 | \code{\link{cc_outl}()}, 102 | \code{\link{cc_sea}()}, 103 | \code{\link{cc_urb}()}, 104 | \code{\link{cc_val}()}, 105 | \code{\link{cc_zero}()} 106 | } 107 | \concept{Coordinates} 108 | \keyword{Coordinate} 109 | \keyword{cleaning} 110 | -------------------------------------------------------------------------------- /man/cc_iucn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cc_iucn.R 3 | \name{cc_iucn} 4 | \alias{cc_iucn} 5 | \title{Identify Records Outside Natural Ranges} 6 | \usage{ 7 | cc_iucn( 8 | x, 9 | range, 10 | lon = "decimalLongitude", 11 | lat = "decimalLatitude", 12 | species = "species", 13 | buffer = 0, 14 | value = "clean", 15 | verbose = TRUE 16 | ) 17 | } 18 | \arguments{ 19 | \item{x}{data.frame. Containing geographical coordinates and species names.} 20 | 21 | \item{range}{a SpatVector of natural ranges for species in x. 22 | Must contain a column named as indicated by \code{species}. See details.} 23 | 24 | \item{lon}{character string. The column with the longitude coordinates. 25 | Default = \dQuote{decimalLongitude}.} 26 | 27 | \item{lat}{character string. The column with the latitude coordinates. 28 | Default = \dQuote{decimalLatitude}.} 29 | 30 | \item{species}{a character string. The column with the species name. 31 | Default = \dQuote{species}.} 32 | 33 | \item{buffer}{numerical. The buffer around each species' range, 34 | from where records should be flagged as problematic, in meters. Default = 0.} 35 | 36 | \item{value}{character string. Defining the output value. See value.} 37 | 38 | \item{verbose}{logical. If TRUE reports the name of the test and the number 39 | of records flagged.} 40 | } 41 | \value{ 42 | Depending on the \sQuote{value} argument, either a \code{data.frame} 43 | containing the records considered correct by the test (\dQuote{clean}) or a 44 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 45 | failed/potentially problematic . Default = \dQuote{clean}. 46 | } 47 | \description{ 48 | Removes or flags records outside of the provided natural range polygon, on a per species basis. 49 | Expects one entry per species. See the example or 50 | \url{https://www.iucnredlist.org/resources/spatial-data-download} for 51 | the required polygon structure. 52 | } 53 | \details{ 54 | Download natural range maps in suitable format for amphibians, birds, 55 | mammals and reptiles 56 | from \url{https://www.iucnredlist.org/resources/spatial-data-download}. 57 | Note: the buffer radius is in degrees, thus will differ slightly between 58 | different latitudes. 59 | } 60 | \note{ 61 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more 62 | details and tutorials. 63 | } 64 | \examples{ 65 | library(terra) 66 | 67 | x <- data.frame(species = c("A", "B"), 68 | decimalLongitude = runif(100, -170, 170), 69 | decimalLatitude = runif(100, -80,80)) 70 | 71 | range_species_A <- cbind(c(-45,-45,-60,-60,-45), c(-10,-25,-25,-10,-10)) 72 | rangeA <- terra::vect(range_species_A, "polygons") 73 | range_species_B <- cbind(c(15,15,32,32,15), c(10,-10,-10,10,10)) 74 | rangeB <- terra::vect(range_species_B, "polygons") 75 | range <- terra::vect(list(rangeA, rangeB)) 76 | range$binomial <- c("A", "B") 77 | 78 | cc_iucn(x = x, range = range, buffer = 0) 79 | 80 | } 81 | \seealso{ 82 | Other Coordinates: 83 | \code{\link{cc_aohi}()}, 84 | \code{\link{cc_cap}()}, 85 | \code{\link{cc_cen}()}, 86 | \code{\link{cc_coun}()}, 87 | \code{\link{cc_dupl}()}, 88 | \code{\link{cc_equ}()}, 89 | \code{\link{cc_gbif}()}, 90 | \code{\link{cc_inst}()}, 91 | \code{\link{cc_outl}()}, 92 | \code{\link{cc_sea}()}, 93 | \code{\link{cc_urb}()}, 94 | \code{\link{cc_val}()}, 95 | \code{\link{cc_zero}()} 96 | } 97 | \concept{Coordinates} 98 | \keyword{Coordinate} 99 | \keyword{cleaning} 100 | -------------------------------------------------------------------------------- /man/cc_sea.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cc_sea.R 3 | \name{cc_sea} 4 | \alias{cc_sea} 5 | \title{Identify Non-terrestrial Coordinates} 6 | \usage{ 7 | cc_sea( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | ref = NULL, 12 | scale = 110, 13 | value = "clean", 14 | speedup = TRUE, 15 | verbose = TRUE, 16 | buffer = NULL 17 | ) 18 | } 19 | \arguments{ 20 | \item{x}{data.frame. Containing geographical coordinates and species names.} 21 | 22 | \item{lon}{character string. The column with the longitude coordinates. 23 | Default = \dQuote{decimalLongitude}.} 24 | 25 | \item{lat}{character string. The column with the latitude coordinates. 26 | Default = \dQuote{decimalLatitude}.} 27 | 28 | \item{ref}{SpatVector (geometry: polygons). Providing the geographic 29 | gazetteer. Can be any SpatVector (geometry: polygons), but the structure 30 | must be identical to rnaturalearth::ne_download(scale = 110, type = 'land', 31 | category = 'physical', returnclass = 'sf'). Default = 32 | rnaturalearth::ne_download(scale = 110, type = 'land', category = 33 | 'physical', returnclass = 'sf').} 34 | 35 | \item{scale}{the scale of the default reference, as downloaded from natural 36 | earth. Must be one of 10, 50, 110. Higher numbers equal higher detail. 37 | Default = 110.} 38 | 39 | \item{value}{character string. Defining the output value. See value.} 40 | 41 | \item{speedup}{logical. Using heuristic to speed up the analysis for large 42 | data sets with many records per location.} 43 | 44 | \item{verbose}{logical. If TRUE reports the name of the test and the number 45 | of records flagged.} 46 | 47 | \item{buffer}{numeric. Units are in meters. If provided, a buffer is 48 | created around the sea polygon, or ref provided.} 49 | } 50 | \value{ 51 | Depending on the \sQuote{value} argument, either a \code{data.frame} 52 | containing the records considered correct by the test (\dQuote{clean}) or a 53 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 54 | failed/potentially problematic . Default = \dQuote{clean}. 55 | } 56 | \description{ 57 | Removes or flags coordinates outside the reference landmass. Can be used to 58 | restrict datasets to terrestrial taxa, or exclude records from the open 59 | ocean, when depending on the reference (see details). Often records of 60 | terrestrial taxa can be found in the open ocean, mostly due to switched 61 | latitude and longitude. 62 | } 63 | \details{ 64 | In some cases flagging records close of the coastline is not recommendable, 65 | because of the low precision of the reference dataset, minor GPS imprecision 66 | or because a dataset might include coast or marshland species. If you only 67 | want to flag records in the open ocean, consider using a buffered landmass 68 | reference, e.g.: \code{\link{buffland}}. 69 | } 70 | \note{ 71 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more 72 | details and tutorials. 73 | } 74 | \examples{ 75 | x <- data.frame(species = letters[1:10], 76 | decimalLongitude = runif(10, -30, 30), 77 | decimalLatitude = runif(10, -30, 30)) 78 | 79 | cc_sea(x, value = "flagged") 80 | 81 | } 82 | \seealso{ 83 | Other Coordinates: 84 | \code{\link{cc_aohi}()}, 85 | \code{\link{cc_cap}()}, 86 | \code{\link{cc_cen}()}, 87 | \code{\link{cc_coun}()}, 88 | \code{\link{cc_dupl}()}, 89 | \code{\link{cc_equ}()}, 90 | \code{\link{cc_gbif}()}, 91 | \code{\link{cc_inst}()}, 92 | \code{\link{cc_iucn}()}, 93 | \code{\link{cc_outl}()}, 94 | \code{\link{cc_urb}()}, 95 | \code{\link{cc_val}()}, 96 | \code{\link{cc_zero}()} 97 | } 98 | \concept{Coordinates} 99 | \keyword{Coordinate} 100 | \keyword{cleaning} 101 | -------------------------------------------------------------------------------- /man/cc_urb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cc_urb.R 3 | \name{cc_urb} 4 | \alias{cc_urb} 5 | \title{Identify Records Inside Urban Areas} 6 | \usage{ 7 | cc_urb( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | ref = NULL, 12 | value = "clean", 13 | verbose = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{x}{data.frame. Containing geographical coordinates and species names.} 18 | 19 | \item{lon}{character string. The column with the longitude coordinates. 20 | Default = \dQuote{decimalLongitude}.} 21 | 22 | \item{lat}{character string. The column with the latitude coordinates. 23 | Default = \dQuote{decimalLatitude}.} 24 | 25 | \item{ref}{a SpatVector. Providing the geographic gazetteer 26 | with the urban areas. See details. By default 27 | rnaturalearth::ne_download(scale = 'medium', type = 'urban_areas', 28 | returnclass = "sf"). Can be any \code{SpatVector}, but the 29 | structure must be identical to \code{rnaturalearth::ne_download()}.} 30 | 31 | \item{value}{character string. Defining the output value. See value.} 32 | 33 | \item{verbose}{logical. If TRUE reports the name of the test and the number 34 | of records flagged.} 35 | } 36 | \value{ 37 | Depending on the \sQuote{value} argument, either a \code{data.frame} 38 | containing the records considered correct by the test (\dQuote{clean}) or a 39 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 40 | failed/potentially problematic . Default = \dQuote{clean}. 41 | } 42 | \description{ 43 | Removes or flags records from inside urban areas, based on a geographic 44 | gazetteer. Often records from large databases span substantial time periods 45 | (centuries) and old records might represent habitats which today are replaced 46 | by city area. 47 | } 48 | \note{ 49 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more 50 | details and tutorials. 51 | } 52 | \examples{ 53 | 54 | \dontrun{ 55 | x <- data.frame(species = letters[1:10], 56 | decimalLongitude = runif(100, -180, 180), 57 | decimalLatitude = runif(100, -90,90)) 58 | 59 | cc_urb(x) 60 | cc_urb(x, value = "flagged") 61 | } 62 | 63 | } 64 | \seealso{ 65 | Other Coordinates: 66 | \code{\link{cc_aohi}()}, 67 | \code{\link{cc_cap}()}, 68 | \code{\link{cc_cen}()}, 69 | \code{\link{cc_coun}()}, 70 | \code{\link{cc_dupl}()}, 71 | \code{\link{cc_equ}()}, 72 | \code{\link{cc_gbif}()}, 73 | \code{\link{cc_inst}()}, 74 | \code{\link{cc_iucn}()}, 75 | \code{\link{cc_outl}()}, 76 | \code{\link{cc_sea}()}, 77 | \code{\link{cc_val}()}, 78 | \code{\link{cc_zero}()} 79 | } 80 | \concept{Coordinates} 81 | \keyword{Coordinate} 82 | \keyword{cleaning} 83 | -------------------------------------------------------------------------------- /man/cc_val.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cc_val.R 3 | \name{cc_val} 4 | \alias{cc_val} 5 | \title{Identify Invalid lat/lon Coordinates} 6 | \usage{ 7 | cc_val( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | value = "clean", 12 | verbose = TRUE 13 | ) 14 | } 15 | \arguments{ 16 | \item{x}{data.frame. Containing geographical coordinates and species names.} 17 | 18 | \item{lon}{character string. The column with the longitude coordinates. 19 | Default = \dQuote{decimalLongitude}.} 20 | 21 | \item{lat}{character string. The column with the latitude coordinates. 22 | Default = \dQuote{decimalLatitude}.} 23 | 24 | \item{value}{character string. Defining the output value. See value.} 25 | 26 | \item{verbose}{logical. If TRUE reports the name of the test and the number 27 | of records flagged.} 28 | } 29 | \value{ 30 | Depending on the \sQuote{value} argument, either a \code{data.frame} 31 | containing the records considered correct by the test (\dQuote{clean}) or a 32 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 33 | failed/potentially problematic . Default = \dQuote{clean}. 34 | } 35 | \description{ 36 | Removes or flags non-numeric and not available coordinates 37 | as well as lat >90, lat <-90, lon > 180 and lon < -180 are flagged. 38 | } 39 | \details{ 40 | This test is obligatory before running any further tests of 41 | CoordinateCleaner, as additional tests only run with valid coordinates. 42 | } 43 | \note{ 44 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more 45 | details and tutorials. 46 | } 47 | \examples{ 48 | 49 | x <- data.frame(species = letters[1:10], 50 | decimalLongitude = c(runif(106, -180, 180), NA, "13W33'", "67,09", 305), 51 | decimalLatitude = runif(110, -90,90)) 52 | 53 | cc_val(x) 54 | cc_val(x, value = "flagged") 55 | 56 | } 57 | \seealso{ 58 | Other Coordinates: 59 | \code{\link{cc_aohi}()}, 60 | \code{\link{cc_cap}()}, 61 | \code{\link{cc_cen}()}, 62 | \code{\link{cc_coun}()}, 63 | \code{\link{cc_dupl}()}, 64 | \code{\link{cc_equ}()}, 65 | \code{\link{cc_gbif}()}, 66 | \code{\link{cc_inst}()}, 67 | \code{\link{cc_iucn}()}, 68 | \code{\link{cc_outl}()}, 69 | \code{\link{cc_sea}()}, 70 | \code{\link{cc_urb}()}, 71 | \code{\link{cc_zero}()} 72 | } 73 | \concept{Coordinates} 74 | \keyword{Coordinate} 75 | \keyword{cleaning} 76 | -------------------------------------------------------------------------------- /man/cc_zero.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cc_zero.R 3 | \name{cc_zero} 4 | \alias{cc_zero} 5 | \title{Identify Zero Coordinates} 6 | \usage{ 7 | cc_zero( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | buffer = 0.5, 12 | value = "clean", 13 | verbose = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{x}{data.frame. Containing geographical coordinates and species names.} 18 | 19 | \item{lon}{character string. The column with the longitude coordinates. 20 | Default = \dQuote{decimalLongitude}.} 21 | 22 | \item{lat}{character string. The column with the latitude coordinates. 23 | Default = \dQuote{decimalLatitude}.} 24 | 25 | \item{buffer}{numerical. The buffer around the 0/0 point, 26 | where records should be flagged as problematic, in decimal 27 | degrees. Default = 0.5.} 28 | 29 | \item{value}{character string. Defining the output value. See value.} 30 | 31 | \item{verbose}{logical. If TRUE reports the name of the test and the number 32 | of records flagged.} 33 | } 34 | \value{ 35 | Depending on the \sQuote{value} argument, either a \code{data.frame} 36 | containing the records considered correct by the test (\dQuote{clean}) or a 37 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 38 | failed/potentially problematic . Default = \dQuote{clean}. 39 | } 40 | \description{ 41 | Removes or flags records with either zero longitude or latitude and a radius 42 | around the point at zero longitude and zero latitude. These problems are 43 | often due to erroneous data-entry or geo-referencing and can lead to typical 44 | patterns of high diversity around the equator. 45 | } 46 | \note{ 47 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more 48 | details and tutorials. 49 | } 50 | \examples{ 51 | 52 | x <- data.frame(species = "A", 53 | decimalLongitude = c(0,34.84, 0, 33.98), 54 | decimalLatitude = c(23.08, 0, 0, 15.98)) 55 | 56 | cc_zero(x) 57 | cc_zero(x, value = "flagged") 58 | 59 | } 60 | \seealso{ 61 | Other Coordinates: 62 | \code{\link{cc_aohi}()}, 63 | \code{\link{cc_cap}()}, 64 | \code{\link{cc_cen}()}, 65 | \code{\link{cc_coun}()}, 66 | \code{\link{cc_dupl}()}, 67 | \code{\link{cc_equ}()}, 68 | \code{\link{cc_gbif}()}, 69 | \code{\link{cc_inst}()}, 70 | \code{\link{cc_iucn}()}, 71 | \code{\link{cc_outl}()}, 72 | \code{\link{cc_sea}()}, 73 | \code{\link{cc_urb}()}, 74 | \code{\link{cc_val}()} 75 | } 76 | \concept{Coordinates} 77 | \keyword{Coordinate} 78 | \keyword{cleaning} 79 | -------------------------------------------------------------------------------- /man/cd_ddmm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cd_ddmm.R 3 | \name{cd_ddmm} 4 | \alias{cd_ddmm} 5 | \title{Identify Datasets with a Degree Conversion Error} 6 | \usage{ 7 | cd_ddmm( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | ds = "dataset", 12 | pvalue = 0.025, 13 | diff = 1, 14 | mat_size = 1000, 15 | min_span = 2, 16 | value = "clean", 17 | verbose = TRUE, 18 | diagnostic = FALSE 19 | ) 20 | } 21 | \arguments{ 22 | \item{x}{data.frame. Containing geographical coordinates and species names.} 23 | 24 | \item{lon}{character string. The column with the longitude coordinates. 25 | Default = \dQuote{decimalLongitude}.} 26 | 27 | \item{lat}{character string. The column with the latitude coordinates. 28 | Default = \dQuote{decimalLatitude}.} 29 | 30 | \item{ds}{a character string. The column with the dataset of each record. In 31 | case \code{x} should be treated as a single dataset, identical for all 32 | records. Default = \dQuote{dataset}.} 33 | 34 | \item{pvalue}{numeric. The p-value for the one-sided t-test to flag the test 35 | as passed or not. Both ddmm.pvalue and diff must be met. Default = 0.025.} 36 | 37 | \item{diff}{numeric. The threshold difference for the ddmm test. Indicates 38 | by which fraction the records with decimals below 0.6 must outnumber the 39 | records with decimals above 0.6. Default = 1} 40 | 41 | \item{mat_size}{numeric. The size of the matrix for the binomial test. Must 42 | be changed in decimals (e.g. 100, 1000, 10000). Adapt to dataset size, 43 | generally 100 is better for datasets < 10000 records, 1000 is better for 44 | datasets with 10000 - 1M records. Higher values also work reasonably well 45 | for smaller datasets, therefore, default = 1000. For large datasets try 46 | 10000.} 47 | 48 | \item{min_span}{numeric. The minimum geographic extent of datasets to be 49 | tested. Default = 2.} 50 | 51 | \item{value}{character string. Defining the output value. See value.} 52 | 53 | \item{verbose}{logical. If TRUE reports the name of the test and the number 54 | of records flagged.} 55 | 56 | \item{diagnostic}{logical. If TRUE plots the analyses matrix for each 57 | dataset.} 58 | } 59 | \value{ 60 | Depending on the \sQuote{value} argument, either a \code{data.frame} 61 | with summary statistics and flags for each dataset (\dQuote{dataset}) or a 62 | \code{data.frame} containing the records considered correct by the test 63 | (\dQuote{clean}) or a logical vector (\dQuote{flags}), with TRUE = test passed and FALSE = 64 | test failed/potentially problematic. Default = 65 | \dQuote{clean}. 66 | } 67 | \description{ 68 | This test flags datasets where a significant fraction of records has 69 | been subject to a common degree minute to decimal degree conversion error, 70 | where the degree sign is recognized as decimal delimiter. 71 | } 72 | \details{ 73 | If the degree sign is recognized as decimal delimiter during coordinate 74 | conversion, no coordinate decimals above 0.59 (59') are possible. The test 75 | here uses a binomial test to test if a significant proportion of records in 76 | a dataset have been subject to this problem. The test is best adjusted via 77 | the diff argument. The lower \code{diff}, the stricter the test. Also scales 78 | with dataset size. Empirically, for datasets with < 5,000 unique coordinate 79 | records \code{diff = 0.1} has proven reasonable flagging most datasets with 80 | >25\% problematic records and all dataset with >50\% problematic records. 81 | For datasets between 5,000 and 100,000 geographic unique records \code{diff 82 | = 0.01} is recommended, for datasets between 100,000 and 1 M records diff = 83 | 0.001, and so on. 84 | } 85 | \note{ 86 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more 87 | details and tutorials. 88 | } 89 | \examples{ 90 | 91 | clean <- data.frame(species = letters[1:10], 92 | decimalLongitude = runif(100, -180, 180), 93 | decimalLatitude = runif(100, -90,90), 94 | dataset = "FR") 95 | 96 | cd_ddmm(x = clean, value = "flagged") 97 | 98 | #problematic dataset 99 | lon <- sample(0:180, size = 100, replace = TRUE) + runif(100, 0,0.59) 100 | lat <- sample(0:90, size = 100, replace = TRUE) + runif(100, 0,0.59) 101 | 102 | prob <- data.frame(species = letters[1:10], 103 | decimalLongitude = lon, 104 | decimalLatitude = lat, 105 | dataset = "FR") 106 | 107 | cd_ddmm(x = prob, value = "flagged") 108 | 109 | } 110 | \seealso{ 111 | Other Datasets: 112 | \code{\link{cd_round}()} 113 | } 114 | \concept{Datasets} 115 | \keyword{"Coordinate} 116 | \keyword{"Dataset} 117 | \keyword{cleaning"} 118 | \keyword{level} 119 | -------------------------------------------------------------------------------- /man/cd_round.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cd_round.R 3 | \name{cd_round} 4 | \alias{cd_round} 5 | \title{Identify Datasets with Rasterized Coordinates} 6 | \usage{ 7 | cd_round( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | ds = "dataset", 12 | T1 = 7, 13 | reg_out_thresh = 2, 14 | reg_dist_min = 0.1, 15 | reg_dist_max = 2, 16 | min_unique_ds_size = 4, 17 | graphs = TRUE, 18 | test = "both", 19 | value = "clean", 20 | verbose = TRUE 21 | ) 22 | } 23 | \arguments{ 24 | \item{x}{data.frame. Containing geographical coordinates and species names.} 25 | 26 | \item{lon}{character string. The column with the longitude coordinates. 27 | Default = \dQuote{decimalLongitude}.} 28 | 29 | \item{lat}{character string. The column with the latitude coordinates. 30 | Default = \dQuote{decimalLatitude}.} 31 | 32 | \item{ds}{a character string. The column with the dataset of each record. In 33 | case \code{x} should be treated as a single dataset, identical for all 34 | records. Default = \dQuote{dataset}.} 35 | 36 | \item{T1}{numeric. The threshold for outlier detection in a in an 37 | interquantile range based test. This is the major parameter to specify the 38 | sensitivity of the test: lower values, equal higher detection rate. Values 39 | between 7-11 are recommended. Default = 7.} 40 | 41 | \item{reg_out_thresh}{numeric. Threshold on the number of equal distances 42 | between outlier points. See details. Default = 2.} 43 | 44 | \item{reg_dist_min}{numeric. The minimum detection distance between 45 | outliers in degrees (the minimum resolution of grids that will be flagged). 46 | Default = 0.1.} 47 | 48 | \item{reg_dist_max}{numeric. The maximum detection distance between 49 | outliers in degrees (the maximum resolution of grids that will be flagged). 50 | Default = 2.} 51 | 52 | \item{min_unique_ds_size}{numeric. The minimum number of unique locations 53 | (values in the tested column) for datasets to be included in the test. 54 | Default = 4.} 55 | 56 | \item{graphs}{logical. If TRUE, diagnostic plots are produced. Default = 57 | TRUE.} 58 | 59 | \item{test}{character string. Indicates which column to test. Either 60 | \dQuote{lat} for latitude, \dQuote{lon} for longitude, or \dQuote{both} for 61 | both. In the latter case datasets are only flagged if both test are failed. 62 | Default = \dQuote{both}} 63 | 64 | \item{value}{character string. Defining the output value. See value.} 65 | 66 | \item{verbose}{logical. If TRUE reports the name of the test and the number 67 | of records flagged.} 68 | } 69 | \value{ 70 | Depending on the \sQuote{value} argument, either a \code{data.frame} 71 | with summary statistics and flags for each dataset (\dQuote{dataset}) or a 72 | \code{data.frame} containing the records considered correct by the test 73 | (\dQuote{clean}) or a logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = 74 | test failed/potentially problematic. Default = 75 | \dQuote{clean}. 76 | } 77 | \description{ 78 | Flags datasets with periodicity patterns indicative of a rasterized 79 | (lattice) collection scheme, as often obtain from e.g. atlas data. Using a 80 | combination of autocorrelation and sliding-window outlier detection to 81 | identify periodicity patterns in the data. See 82 | \url{https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/2041-210X.13152} 83 | for further details and 84 | a description of the algorithm 85 | } 86 | \note{ 87 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more 88 | details and tutorials. 89 | } 90 | \examples{ 91 | 92 | #simulate bias grid, one degree resolution, 10\% error on a 1000 records dataset 93 | #simulate biased fraction of the data, grid resolution = 1 degree 94 | #simulate non-biased fraction of the data 95 | bi <- sample(3 + 0:5, size = 100, replace = TRUE) 96 | mu <- runif(3, 0, 15) 97 | sig <- runif(3, 0.1, 5) 98 | cl <- rnorm(n = 900, mean = mu, sd = sig) 99 | lon <- c(cl, bi) 100 | 101 | bi <- sample(9:13, size = 100, replace = TRUE) 102 | mu <- runif(3, 0, 15) 103 | sig <- runif(3, 0.1, 5) 104 | cl <- rnorm(n = 900, mean = mu, sd = sig) 105 | lat <- c(cl, bi) 106 | 107 | #add biased data 108 | 109 | inp <- data.frame(decimalLongitude = lon, 110 | decimalLatitude = lat, 111 | dataset = "test") 112 | 113 | 114 | #run test 115 | \dontrun{ 116 | cd_round(inp, value = "dataset") 117 | } 118 | 119 | 120 | } 121 | \seealso{ 122 | Other Datasets: 123 | \code{\link{cd_ddmm}()} 124 | } 125 | \concept{Datasets} 126 | \keyword{"Coordinate} 127 | \keyword{"Dataset} 128 | \keyword{cleaning"} 129 | \keyword{level} 130 | -------------------------------------------------------------------------------- /man/cf_age.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cf_age.R 3 | \name{cf_age} 4 | \alias{cf_age} 5 | \title{Identify Fossils with Outlier Age} 6 | \usage{ 7 | cf_age( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | min_age = "min_ma", 12 | max_age = "max_ma", 13 | taxon = "accepted_name", 14 | method = "quantile", 15 | size_thresh = 7, 16 | mltpl = 5, 17 | replicates = 5, 18 | flag_thresh = 0.5, 19 | uniq_loc = FALSE, 20 | value = "clean", 21 | verbose = TRUE 22 | ) 23 | } 24 | \arguments{ 25 | \item{x}{data.frame. Containing fossil records with taxon names, ages, 26 | and geographic coordinates.} 27 | 28 | \item{lon}{character string. The column with the longitude coordinates. 29 | To identify unique records if \code{uniq_loc = TRUE}. 30 | Default = \dQuote{decimalLongitude}.} 31 | 32 | \item{lat}{character string. The column with the longitude coordinates. 33 | Default = \dQuote{decimalLatitude}. To identify unique records if \code{uniq_loc = T}.} 34 | 35 | \item{min_age}{character string. The column with the minimum age. Default 36 | = \dQuote{min_ma}.} 37 | 38 | \item{max_age}{character string. The column with the maximum age. Default 39 | = \dQuote{max_ma}.} 40 | 41 | \item{taxon}{character string. The column with the taxon name. If 42 | \dQuote{}, searches for outliers over the entire dataset, otherwise per 43 | specified taxon. Default = \dQuote{accepted_name}.} 44 | 45 | \item{method}{character string. Defining the method for outlier 46 | selection. See details. Either \dQuote{quantile} or \dQuote{mad}. Default 47 | = \dQuote{quantile}.} 48 | 49 | \item{size_thresh}{numeric. The minimum number of records needed for a 50 | dataset to be tested. Default = 10.} 51 | 52 | \item{mltpl}{numeric. The multiplier of the interquartile range 53 | (\code{method == 'quantile'}) or median absolute deviation (\code{method == 54 | 'mad'}) to identify outliers. See details. Default = 5.} 55 | 56 | \item{replicates}{numeric. The number of replications for the distance 57 | matrix calculation. See details. Default = 5.} 58 | 59 | \item{flag_thresh}{numeric. The fraction of passed replicates necessary to pass the test. 60 | See details. Default = 0.5.} 61 | 62 | \item{uniq_loc}{logical. If TRUE only single records per location and time 63 | point (and taxon if \code{taxon} != "") are used for the outlier testing. 64 | Default = T.} 65 | 66 | \item{value}{character string. Defining the output value. See value.} 67 | 68 | \item{verbose}{logical. If TRUE reports the name of the test and the number 69 | of records flagged.} 70 | } 71 | \value{ 72 | Depending on the \sQuote{value} argument, either a \code{data.frame} 73 | containing the records considered correct by the test (\dQuote{clean}) or a 74 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 75 | failed/potentially problematic . Default = \dQuote{clean}. 76 | } 77 | \description{ 78 | Removes or flags records that are temporal outliers based on 79 | interquantile ranges. 80 | } 81 | \details{ 82 | The outlier detection is based on an interquantile range test. A temporal 83 | distance matrix among all records is calculated based on a single point selected by random 84 | between the minimum and maximum age for each record. The mean distance for 85 | each point to all neighbours is calculated and the sum of these distances 86 | is then tested against the interquantile range and flagged as an outlier if 87 | \eqn{x > IQR(x) + q_75 * mltpl}. The test is replicated \sQuote{replicates} 88 | times, to account for dating uncertainty. Records are flagged as outliers 89 | if they are flagged by a fraction of more than \sQuote{flag.thresh} 90 | replicates. Only datasets/taxa comprising more than \sQuote{size_thresh} 91 | records are tested. Distance are calculated as Euclidean distance. 92 | } 93 | \note{ 94 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more 95 | details and tutorials. 96 | } 97 | \examples{ 98 | 99 | minages <- c(runif(n = 11, min = 10, max = 25), 62.5) 100 | x <- data.frame(species = c(letters[1:10], rep("z", 2)), 101 | min_ma = minages, 102 | max_ma = c(minages[1:11] + runif(n = 11, min = 0, max = 5), 65)) 103 | 104 | cf_age(x, value = "flagged", taxon = "") 105 | 106 | # unique locations only 107 | x <- data.frame(species = c(letters[1:10], rep("z", 2)), 108 | decimalLongitude = c(runif(n = 10, min = 4, max = 16), 75, 7), 109 | decimalLatitude = c(runif(n = 12, min = -5, max = 5)), 110 | min_ma = minages, 111 | max_ma = c(minages[1:11] + runif(n = 11, min = 0, max = 5), 65)) 112 | 113 | cf_age(x, value = "flagged", taxon = "", uniq_loc = TRUE) 114 | 115 | } 116 | \seealso{ 117 | Other fossils: 118 | \code{\link{cf_equal}()}, 119 | \code{\link{cf_outl}()}, 120 | \code{\link{cf_range}()}, 121 | \code{\link{write_pyrate}()} 122 | } 123 | \concept{fossils} 124 | \keyword{Coordinate} 125 | \keyword{Fossil} 126 | \keyword{Temporal} 127 | \keyword{cleaning} 128 | -------------------------------------------------------------------------------- /man/cf_equal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cf_equal.R 3 | \name{cf_equal} 4 | \alias{cf_equal} 5 | \title{Identify Fossils with equal min and max age} 6 | \usage{ 7 | cf_equal( 8 | x, 9 | min_age = "min_ma", 10 | max_age = "max_ma", 11 | value = "clean", 12 | verbose = TRUE 13 | ) 14 | } 15 | \arguments{ 16 | \item{x}{data.frame. Containing fossil records with taxon names, ages, 17 | and geographic coordinates.} 18 | 19 | \item{min_age}{character string. The column with the minimum age. Default 20 | = \dQuote{min_ma}.} 21 | 22 | \item{max_age}{character string. The column with the maximum age. Default 23 | = \dQuote{max_ma}.} 24 | 25 | \item{value}{character string. Defining the output value. See value.} 26 | 27 | \item{verbose}{logical. If TRUE reports the name of the test and the number 28 | of records flagged.} 29 | } 30 | \value{ 31 | Depending on the \sQuote{value} argument, either a \code{data.frame} 32 | containing the records considered correct by the test (\dQuote{clean}) or a 33 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 34 | failed/potentially problematic . Default = \dQuote{clean}. 35 | } 36 | \description{ 37 | Removes or flags records with equal minimum and maximum age. 38 | } 39 | \note{ 40 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more 41 | details and tutorials. 42 | } 43 | \examples{ 44 | 45 | minages <- runif(n = 10, min = 0.1, max = 25) 46 | x <- data.frame(species = letters[1:10], 47 | min_ma = minages, 48 | max_ma = minages + runif(n = 10, min = 0, max = 10)) 49 | x <- rbind(x, data.frame(species = "z", 50 | min_ma = 5, 51 | max_ma = 5)) 52 | 53 | cf_equal(x, value = "flagged") 54 | 55 | } 56 | \seealso{ 57 | Other fossils: 58 | \code{\link{cf_age}()}, 59 | \code{\link{cf_outl}()}, 60 | \code{\link{cf_range}()}, 61 | \code{\link{write_pyrate}()} 62 | } 63 | \concept{fossils} 64 | \keyword{Fossils} 65 | \keyword{Temporal} 66 | \keyword{cleaning} 67 | -------------------------------------------------------------------------------- /man/cf_outl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cf_outl.R 3 | \name{cf_outl} 4 | \alias{cf_outl} 5 | \title{Identify Outlier Records in Space and Time} 6 | \usage{ 7 | cf_outl( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | min_age = "min_ma", 12 | max_age = "max_ma", 13 | taxon = "accepted_name", 14 | method = "quantile", 15 | size_thresh = 7, 16 | mltpl = 5, 17 | replicates = 5, 18 | flag_thresh = 0.5, 19 | uniq_loc = FALSE, 20 | value = "clean", 21 | verbose = TRUE 22 | ) 23 | } 24 | \arguments{ 25 | \item{x}{data.frame. Containing fossil records with taxon names, ages, 26 | and geographic coordinates.} 27 | 28 | \item{lon}{character string. The column with the longitude coordinates. 29 | To identify unique records if \code{uniq_loc = TRUE}. 30 | Default = \dQuote{decimalLongitude}.} 31 | 32 | \item{lat}{character string. The column with the longitude coordinates. 33 | Default = \dQuote{decimalLatitude}. To identify unique records if \code{uniq_loc = T}.} 34 | 35 | \item{min_age}{character string. The column with the minimum age. Default 36 | = \dQuote{min_ma}.} 37 | 38 | \item{max_age}{character string. The column with the maximum age. Default 39 | = \dQuote{max_ma}.} 40 | 41 | \item{taxon}{character string. The column with the taxon name. If 42 | \dQuote{}, searches for outliers over the entire dataset, otherwise per 43 | specified taxon. Default = \dQuote{accepted_name}.} 44 | 45 | \item{method}{character string. Defining the method for outlier 46 | selection. See details. Either \dQuote{quantile} or \dQuote{mad}. Default 47 | = \dQuote{quantile}.} 48 | 49 | \item{size_thresh}{numeric. The minimum number of records needed for a 50 | dataset to be tested. Default = 10.} 51 | 52 | \item{mltpl}{numeric. The multiplier of the interquartile range 53 | (\code{method == 'quantile'}) or median absolute deviation (\code{method == 54 | 'mad'}) to identify outliers. See details. Default = 5.} 55 | 56 | \item{replicates}{numeric. The number of replications for the distance 57 | matrix calculation. See details. Default = 5.} 58 | 59 | \item{flag_thresh}{numeric. The fraction of passed replicates necessary to pass the test. 60 | See details. Default = 0.5.} 61 | 62 | \item{uniq_loc}{logical. If TRUE only single records per location and time 63 | point (and taxon if \code{taxon} != "") are used for the outlier testing. 64 | Default = T.} 65 | 66 | \item{value}{character string. Defining the output value. See value.} 67 | 68 | \item{verbose}{logical. If TRUE reports the name of the test and the number 69 | of records flagged.} 70 | } 71 | \value{ 72 | Depending on the \sQuote{value} argument, either a \code{data.frame} 73 | containing the records considered correct by the test (\dQuote{clean}) or a 74 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 75 | failed/potentially problematic . Default = \dQuote{clean}. 76 | } 77 | \description{ 78 | Removes or flags records of fossils that are spatio-temporal outliers based on 79 | interquantile ranges. Records are flagged if they are either extreme in time 80 | or space, or both. 81 | } 82 | \details{ 83 | The outlier detection is based on an interquantile range test. In a first 84 | step a distance matrix of geographic distances among all records is 85 | calculate. Subsequently a similar distance matrix of temporal distances 86 | among all records is calculated based on a single point selected by random 87 | between the minimum and maximum age for each record. The mean distance for 88 | each point to all neighbours is calculated for both matrices and spatial and 89 | temporal distances are scaled to the same range. The sum of these distanced 90 | is then tested against the interquantile range and flagged as an outlier if 91 | \eqn{x > IQR(x) + q_75 * mltpl}. The test is replicated \sQuote{replicates} 92 | times, to account for temporal uncertainty. Records are flagged as outliers 93 | if they are flagged by a fraction of more than \sQuote{flag.thres} 94 | replicates. Only datasets/taxa comprising more than \sQuote{size_thresh} 95 | records are tested. Note that geographic distances are calculated as 96 | geospheric distances for datasets (or taxa) with fewer than 10,000 records 97 | and approximated as Euclidean distances for datasets/taxa with 10,000 to 98 | 25,000 records. Datasets/taxa comprising more than 25,000 records are 99 | skipped. 100 | } 101 | \note{ 102 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more 103 | details and tutorials. 104 | } 105 | \examples{ 106 | 107 | minages <- c(runif(n = 11, min = 10, max = 25), 62.5) 108 | x <- data.frame(species = c(letters[1:10], rep("z", 2)), 109 | lng = c(runif(n = 10, min = 4, max = 16), 75, 7), 110 | lat = c(runif(n = 12, min = -5, max = 5)), 111 | min_ma = minages, 112 | max_ma = c(minages[1:11] + runif(n = 11, min = 0, max = 5), 65)) 113 | 114 | cf_outl(x, value = "flagged", taxon = "") 115 | 116 | } 117 | \seealso{ 118 | Other fossils: 119 | \code{\link{cf_age}()}, 120 | \code{\link{cf_equal}()}, 121 | \code{\link{cf_range}()}, 122 | \code{\link{write_pyrate}()} 123 | } 124 | \concept{fossils} 125 | \keyword{Coordinate} 126 | \keyword{Fossil} 127 | \keyword{Temporal} 128 | \keyword{cleaning} 129 | -------------------------------------------------------------------------------- /man/cf_range.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cf_range.R 3 | \name{cf_range} 4 | \alias{cf_range} 5 | \title{Identify Fossils with Extreme Age Ranges} 6 | \usage{ 7 | cf_range( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | min_age = "min_ma", 12 | max_age = "max_ma", 13 | taxon = "accepted_name", 14 | method = "quantile", 15 | mltpl = 5, 16 | size_thresh = 7, 17 | max_range = 500, 18 | uniq_loc = FALSE, 19 | value = "clean", 20 | verbose = TRUE 21 | ) 22 | } 23 | \arguments{ 24 | \item{x}{data.frame. Containing fossil records with taxon names, ages, 25 | and geographic coordinates.} 26 | 27 | \item{lon}{character string. The column with the longitude coordinates. 28 | To identify unique records if \code{uniq_loc = TRUE}. 29 | Default = \dQuote{decimalLongitude}.} 30 | 31 | \item{lat}{character string. The column with the longitude coordinates. 32 | Default = \dQuote{decimalLatitude}. To identify unique records if \code{uniq_loc = T}.} 33 | 34 | \item{min_age}{character string. The column with the minimum age. Default 35 | = \dQuote{min_ma}.} 36 | 37 | \item{max_age}{character string. The column with the maximum age. Default 38 | = \dQuote{max_ma}.} 39 | 40 | \item{taxon}{character string. The column with the taxon name. If 41 | \dQuote{}, searches for outliers over the entire dataset, otherwise per 42 | specified taxon. Default = \dQuote{accepted_name}.} 43 | 44 | \item{method}{character string. Defining the method for outlier 45 | selection. See details. Either \dQuote{quantile} or \dQuote{mad}. Default 46 | = \dQuote{quantile}.} 47 | 48 | \item{mltpl}{numeric. The multiplier of the interquartile range 49 | (\code{method == 'quantile'}) or median absolute deviation (\code{method == 50 | 'mad'}) to identify outliers. See details. Default = 5.} 51 | 52 | \item{size_thresh}{numeric. The minimum number of records needed for a 53 | dataset to be tested. Default = 10.} 54 | 55 | \item{max_range}{numeric. A absolute maximum time interval between min age 56 | and max age. Only relevant for \code{method} = \dQuote{time}.} 57 | 58 | \item{uniq_loc}{logical. If TRUE only single records per location and time 59 | point (and taxon if \code{taxon} != "") are used for the outlier testing. 60 | Default = T.} 61 | 62 | \item{value}{character string. Defining the output value. See value.} 63 | 64 | \item{verbose}{logical. If TRUE reports the name of the test and the number 65 | of records flagged.} 66 | } 67 | \value{ 68 | Depending on the \sQuote{value} argument, either a \code{data.frame} 69 | containing the records considered correct by the test (\dQuote{clean}) or a 70 | logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test 71 | failed/potentially problematic . Default = \dQuote{clean}. 72 | } 73 | \description{ 74 | Removes or flags records with an unexpectedly large temporal range, based on a quantile 75 | outlier test. 76 | } 77 | \note{ 78 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more 79 | details and tutorials. 80 | } 81 | \examples{ 82 | 83 | minages <- runif(n = 11, min = 0.1, max = 25) 84 | x <- data.frame(species = c(letters[1:10], "z"), 85 | lng = c(runif(n = 9, min = 4, max = 16), 75, 7), 86 | lat = c(runif(n = 11, min = -5, max = 5)), 87 | min_ma = minages, 88 | max_ma = minages + c(runif(n = 10, min = 0, max = 5), 25)) 89 | 90 | cf_range(x, value = "flagged", taxon = "") 91 | 92 | } 93 | \seealso{ 94 | Other fossils: 95 | \code{\link{cf_age}()}, 96 | \code{\link{cf_equal}()}, 97 | \code{\link{cf_outl}()}, 98 | \code{\link{write_pyrate}()} 99 | } 100 | \concept{fossils} 101 | \keyword{Fossil} 102 | \keyword{Temporal} 103 | \keyword{cleaning} 104 | -------------------------------------------------------------------------------- /man/clean_dataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/clean_dataset.R 3 | \name{clean_dataset} 4 | \alias{clean_dataset} 5 | \title{Coordinate Cleaning using Dataset Properties} 6 | \usage{ 7 | clean_dataset( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | ds = "dataset", 12 | tests = c("ddmm", "periodicity"), 13 | value = "dataset", 14 | verbose = TRUE, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{x}{data.frame. Containing geographical coordinates and species names.} 20 | 21 | \item{lon}{character string. The column with the longitude coordinates. 22 | Default = \dQuote{decimalLongitude}.} 23 | 24 | \item{lat}{character string. The column with the latitude coordinates. 25 | Default = \dQuote{decimalLatitude}.} 26 | 27 | \item{ds}{a character string. The column with the dataset of each record. In 28 | case \code{x} should be treated as a single dataset, identical for all 29 | records. Default = \dQuote{dataset}.} 30 | 31 | \item{tests}{a vector of character strings, indicating which tests to run. 32 | See details for all tests available. Default = c("ddmm", "periodicity")} 33 | 34 | \item{value}{a character string. Defining the output value. See value. 35 | Default = \dQuote{dataset}.} 36 | 37 | \item{verbose}{logical. If TRUE reports the name of the test and the number 38 | of records flagged.} 39 | 40 | \item{...}{additional arguments to be passed to \code{\link{cd_ddmm}} and 41 | \code{\link{cd_round}} to customize test sensitivity.} 42 | } 43 | \value{ 44 | Depending on the \sQuote{value} argument: 45 | \describe{ 46 | \item{\dQuote{dataset}}{a \code{data.frame} with the 47 | the test summary statistics for each dataset in \code{x}} 48 | \item{\dQuote{clean}}{a \code{data.frame} containing only 49 | records from datasets in \code{x} that passed the tests} 50 | \item{\dQuote{flagged}}{a logical vector of the same length as 51 | rows in \code{x}, with TRUE = test passed and 52 | FALSE = test failed/potentially problematic.} 53 | } 54 | } 55 | \description{ 56 | Tests for problems associated with coordinate conversions and rounding, 57 | based on dataset properties. Includes test to identify contributing datasets with 58 | potential errors with converting ddmm to dd.dd, and 59 | periodicity in the data decimals indicating rounding or a raster basis 60 | linked to low coordinate precision. Specifically: 61 | \itemize{ 62 | \item ddmm tests for erroneous conversion from a degree 63 | minute format (ddmm) to a decimal degree (dd.dd) format 64 | \item periodicity test for periodicity in the data, 65 | which can indicate imprecise coordinates, due to rounding or rasterization. 66 | } 67 | } 68 | \details{ 69 | These tests are based on the statistical distribution of coordinates and 70 | their decimals within 71 | datasets of geographic distribution records to identify datasets with 72 | potential errors/biases. Three potential error sources can be identified. 73 | The ddmm flag tests for the particular pattern that emerges if geographical 74 | coordinates in a degree minute annotation are transferred into decimal 75 | degrees, simply replacing the degree symbol with the decimal point. This 76 | kind of problem has been observed by in older datasets first recorded on 77 | paper using typewriters, where e.g. a floating point was used as symbol for 78 | degrees. The function uses a binomial test to check if more records than 79 | expected have decimals below 0.6 (which is the maximum that can be obtained 80 | in minutes, as one degree has 60 minutes) and if the number of these records 81 | is higher than those above 0.59 by a certain proportion. The periodicity 82 | test uses rate estimation in a Poisson process to estimate if there is 83 | periodicity in the decimals of a dataset (as would be expected by for 84 | example rounding or data that was collected in a raster format) and if there 85 | is an over proportional number of records with the decimal 0 (full degrees) 86 | which indicates rounding and thus low precision. The default values are 87 | empirically optimized by with GBIF data, but should probably be adapted. 88 | } 89 | \note{ 90 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more details 91 | and tutorials. 92 | } 93 | \examples{ 94 | #Create test dataset 95 | clean <- data.frame(dataset = rep("clean", 1000), 96 | decimalLongitude = runif(min = -43, max = -40, n = 1000), 97 | decimalLatitude = runif(min = -13, max = -10, n = 1000)) 98 | 99 | bias.long <- c(round(runif(min = -42, max = -40, n = 500), 1), 100 | round(runif(min = -42, max = -40, n = 300), 0), 101 | runif(min = -42, max = -40, n = 200)) 102 | bias.lat <- c(round(runif(min = -12, max = -10, n = 500), 1), 103 | round(runif(min = -12, max = -10, n = 300), 0), 104 | runif(min = -12, max = -10, n = 200)) 105 | bias <- data.frame(dataset = rep("biased", 1000), 106 | decimalLongitude = bias.long, 107 | decimalLatitude = bias.lat) 108 | test <- rbind(clean, bias) 109 | 110 | \dontrun{ 111 | #run clean_dataset 112 | flags <- clean_dataset(test) 113 | 114 | #check problems 115 | #clean 116 | hist(test[test$dataset == rownames(flags[flags$summary,]), "decimalLongitude"]) 117 | #biased 118 | hist(test[test$dataset == rownames(flags[!flags$summary,]), "decimalLongitude"]) 119 | 120 | } 121 | } 122 | \seealso{ 123 | \code{\link{cd_ddmm}} \code{\link{cd_round}} 124 | 125 | Other Wrapper functions: 126 | \code{\link{clean_coordinates}()}, 127 | \code{\link{clean_fossils}()} 128 | } 129 | \concept{Wrapper functions} 130 | \keyword{Coordinate} 131 | \keyword{cleaning} 132 | \keyword{wrapper} 133 | -------------------------------------------------------------------------------- /man/countryref.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CoordinateCleaner-package.R 3 | \docType{data} 4 | \name{countryref} 5 | \alias{countryref} 6 | \title{Country Centroids and Country Capitals} 7 | \format{ 8 | A data frame with 5,305 observations on 13 variables. 9 | #' \describe{ 10 | \item{iso3}{ISO-3 code for each country, in case of provinces also referring to the country.} 11 | \item{iso2}{ISO-2 code for each country, in case of provinces also referring to the country.} 12 | \item{adm1_code}{adm code for countries and provinces.} 13 | \item{name}{a factor; name of the country or province.} 14 | \item{type}{identifying if the entry refers to a country or province level.} 15 | \item{centroid.lon}{Longitude of the country centroid.} 16 | \item{centroid.lat}{Latitude of the country centroid.} 17 | \item{capital}{Name of the country capital, empty for provinces.} 18 | \item{capital.lon}{Longitude of the country capital.} 19 | \item{capital.lat}{Latitude of the country capital.} 20 | \item{area_sqkm}{The area of the country or province.} 21 | \item{uncertaintyRadiusMeters}{The uncertainty of the country centroid.} 22 | \item{source}{The data source. Currently only available for \url{https://geo-locate.org}}} 23 | } 24 | \source{ 25 | CENTRAL INTELLIGENCE AGENCY (2014) \emph{The World Factbook}, 26 | Washington, DC. 27 | 28 | \url{https://www.cia.gov/the-world-factbook/} 29 | \url{https://thematicmapping.org/downloads/world_borders.php} 30 | \url{https://geo-locate.org} 31 | } 32 | \description{ 33 | A \code{data.frame} with coordinates of country and province centroids and 34 | country capitals as reference for the \code{\link{clean_coordinates}}, 35 | \code{\link{cc_cen}} and \code{\link{cc_cap}} functions. Coordinates are 36 | based on the Central Intelligence Agency World Factbook 37 | \url{https://www.cia.gov/the-world-factbook/}, 38 | \url{https://thematicmapping.org/downloads/world_borders.php} and geolocate 39 | \url{https://geo-locate.org}. 40 | } 41 | \examples{ 42 | 43 | data(countryref) 44 | head(countryref) 45 | } 46 | \keyword{gazetteers} 47 | -------------------------------------------------------------------------------- /man/institutions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CoordinateCleaner-package.R 3 | \docType{data} 4 | \name{institutions} 5 | \alias{institutions} 6 | \title{Global Locations of Biodiversity Institutions} 7 | \format{ 8 | A data frame with 12170 observations on 12 variables. 9 | } 10 | \source{ 11 | Compiled from various sources: \itemize{ \item Global Biodiversity 12 | Information Facility \url{https://www.gbif.org/} \item Wikipedia 13 | \url{https://www.wikipedia.org/} \item Geonames \url{https://www.geonames.org/} \item The Global 14 | Registry of Biodiversity Repositories \item Index 15 | Herbariorum \url{https://sweetgum.nybg.org/science/ih/} 16 | \item Botanic Gardens Conservation International \url{https://www.bgci.org/} 17 | } 18 | } 19 | \description{ 20 | A global gazetteer for biodiversity institutions from various sources, 21 | including zoos, museums, botanical gardens, GBIF contributors, herbaria, 22 | university collections. 23 | } 24 | \examples{ 25 | 26 | data(institutions) 27 | str(institutions) 28 | 29 | } 30 | \keyword{gazetteers} 31 | -------------------------------------------------------------------------------- /man/is.spatialvalid.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/methods.spatialvalid.R 3 | \name{is.spatialvalid} 4 | \alias{is.spatialvalid} 5 | \title{Check spatialvalid object} 6 | \usage{ 7 | is.spatialvalid(x) 8 | } 9 | \arguments{ 10 | \item{x}{the object to be tested} 11 | } 12 | \value{ 13 | returns \code{TRUE} if its argument is a spatialvalid 14 | } 15 | \description{ 16 | Test if its argument is a spatialvalid object 17 | } 18 | \keyword{Check} 19 | -------------------------------------------------------------------------------- /man/pbdb_example.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CoordinateCleaner-package.R 3 | \docType{data} 4 | \name{pbdb_example} 5 | \alias{pbdb_example} 6 | \title{Example data from the Paleobiologydatabase} 7 | \format{ 8 | A data frame with 5000 observations on 36 variables. 9 | } 10 | \source{ 11 | \itemize{ 12 | \item The Paleobiology database \url{https://paleobiodb.org/} 13 | \item Sara Varela, Javier Gonzalez Hernandez and Luciano Fabris Sgarbi (2016). 14 | paleobioDB: Download and Process Data from the Paleobiology Database. 15 | R package version 0.5.0. \url{https://CRAN.R-project.org/package=paleobioDB}. 16 | } 17 | } 18 | \description{ 19 | A dataset of 5000 flowering plant fossil occurrences as example for data of the paleobiology Database, downloaded using the paleobioDB packages as specified in the vignette \dQuote{Cleaning_PBDB_fossils_with_CoordinateCleaner}. 20 | } 21 | \examples{ 22 | 23 | data(institutions) 24 | str(institutions) 25 | 26 | } 27 | \keyword{gazetteers} 28 | -------------------------------------------------------------------------------- /man/plot.spatialvalid.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/methods.spatialvalid.R 3 | \name{plot.spatialvalid} 4 | \alias{plot.spatialvalid} 5 | \title{Plot Method for Class Spatialvalid} 6 | \usage{ 7 | \method{plot}{spatialvalid}( 8 | x, 9 | lon = "decimalLongitude", 10 | lat = "decimalLatitude", 11 | bgmap = NULL, 12 | clean = TRUE, 13 | details = FALSE, 14 | pts_size = 1, 15 | font_size = 10, 16 | zoom_f = 0.1, 17 | ... 18 | ) 19 | } 20 | \arguments{ 21 | \item{x}{an object of the class \code{spatialvalid} as from 22 | \code{\link{clean_coordinates}}.} 23 | 24 | \item{lon}{character string. The column with the longitude coordinates. 25 | Default = \dQuote{decimalLongitude}.} 26 | 27 | \item{lat}{character string. The column with the latitude coordinates. 28 | Default = \dQuote{decimalLatitude}.} 29 | 30 | \item{bgmap}{an object of the class \code{SpatVector} or \code{sf} used as 31 | background map. Default = ggplot::borders()} 32 | 33 | \item{clean}{logical. If TRUE, non-flagged coordinates are included in the 34 | map.} 35 | 36 | \item{details}{logical. If TRUE, occurrences are color-coded by the type of 37 | flag.} 38 | 39 | \item{pts_size}{numeric. The point size for the plot.} 40 | 41 | \item{font_size}{numeric. The font size for the legend and axes} 42 | 43 | \item{zoom_f}{numeric. the fraction by which to expand the plotting area 44 | from the occurrence records. Increase, if countries do not show 45 | up on the background map.} 46 | 47 | \item{\dots}{arguments to be passed to methods.} 48 | } 49 | \value{ 50 | A plot of the records flagged as potentially erroneous by 51 | \code{\link{clean_coordinates}}. 52 | } 53 | \description{ 54 | A set of plots to explore objects of the class \code{spatialvalid}. A plot 55 | to visualize the flags from clean_coordinates 56 | } 57 | \examples{ 58 | 59 | 60 | exmpl <- data.frame(species = sample(letters, size = 250, replace = TRUE), 61 | decimalLongitude = runif(250, min = 42, max = 51), 62 | decimalLatitude = runif(250, min = -26, max = -11)) 63 | 64 | test <- clean_coordinates(exmpl, species = "species", 65 | tests = c("sea", "gbif", "zeros"), 66 | verbose = FALSE) 67 | 68 | summary(test) 69 | plot(test) 70 | } 71 | \seealso{ 72 | \code{\link{clean_coordinates}} 73 | } 74 | \keyword{Visualisation} 75 | -------------------------------------------------------------------------------- /man/write_pyrate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/write_pyrate.R 3 | \name{write_pyrate} 4 | \alias{write_pyrate} 5 | \title{Create Input Files for PyRate} 6 | \usage{ 7 | write_pyrate( 8 | x, 9 | status, 10 | fname, 11 | taxon = "accepted_name", 12 | min_age = "min_ma", 13 | max_age = "max_ma", 14 | trait = NULL, 15 | path = getwd(), 16 | replicates = 1, 17 | cutoff = NULL, 18 | random = TRUE 19 | ) 20 | } 21 | \arguments{ 22 | \item{x}{data.frame. Containing fossil records with taxon names, ages, 23 | and geographic coordinates.} 24 | 25 | \item{status}{a vector of character strings of length \code{nrow(x)}. 26 | Indicating for each record \dQuote{extinct} or \dQuote{extant}.} 27 | 28 | \item{fname}{a character string. The prefix to use for the output files.} 29 | 30 | \item{taxon}{character string. The column with the taxon name. 31 | Default = \dQuote{accepted_name}.} 32 | 33 | \item{min_age}{character string. The column with the minimum age. Default 34 | = \dQuote{min_ma}.} 35 | 36 | \item{max_age}{character string. The column with the maximum age. Default 37 | = \dQuote{max_ma}.} 38 | 39 | \item{trait}{a numeric vector of length \code{nrow(x)}. Indicating trait 40 | values for each record. Optional. Default = NULL.} 41 | 42 | \item{path}{a character string. giving the absolute path to write the output 43 | files. Default is the working directory.} 44 | 45 | \item{replicates}{a numerical. The number of replicates for the randomized 46 | age generation. See details. Default = 1.} 47 | 48 | \item{cutoff}{a numerical. Specify a threshold to exclude fossil occurrences 49 | with a high temporal uncertainty, i.e. with a wide temporal range between 50 | min_age and max_age. Examples: cutoff=NULL (default; all occurrences are 51 | kept in the data set) cutoff=5 (all occurrences with a temporal range of 5 52 | Myr or higher are excluded from the data set)} 53 | 54 | \item{random}{logical. Specify whether to take a random age (between MinT 55 | and MaxT) for each occurrence or the midpoint age. Note that this option 56 | defaults to TRUE if several replicates are generated (i.e. replicates > 1). 57 | Examples: random = TRUE (default) random = FALSE (use midpoint ages)} 58 | } 59 | \value{ 60 | PyRate input files in the working directory. 61 | } 62 | \description{ 63 | Creates the input necessary to run Pyrate, based on a data.frame with fossil 64 | ages (as derived e.g. from clean_fossils) and a vector of the 65 | extinction status for each sample. Creates files in the working directory! 66 | } 67 | \details{ 68 | The replicate option allows the user to generate several replicates of the 69 | data set in a single input file, each time re-drawing the ages of the 70 | occurrences at random from uniform distributions with boundaries MinT and 71 | MaxT. The replicates can be analysed in different runs (see PyRate command 72 | -j) and combining the results of these replicates is a way to account for 73 | the uncertainty of the true ages of the fossil occurrences. Examples: 74 | replicates=1 (default, generates 1 data set), replicates=10 (generates 10 75 | random replicates of the data set). 76 | } 77 | \note{ 78 | See \url{https://github.com/dsilvestro/PyRate/wiki} for more details 79 | and tutorials on PyRate and PyRate input. 80 | } 81 | \examples{ 82 | 83 | minages <- runif(250, 0, 65) 84 | exmpl <- data.frame(accepted_name = sample(letters, size = 250, replace = TRUE), 85 | lng = runif(250, min = 42, max = 51), 86 | lat = runif(250, min = -26, max = -11), 87 | min_ma = minages, 88 | max_ma = minages + runif(250, 0.1, 65)) 89 | 90 | #a vector with the status for each record, 91 | #make sure species are only classified as either extinct or extant, 92 | #otherwise the function will drop an error 93 | 94 | status <- sample(c("extinct", "extant"), size = nrow(exmpl), replace = TRUE) 95 | 96 | #or from a list of species 97 | status <- sample(c("extinct", "extant"), size = length(letters), replace = TRUE) 98 | names(status) <- letters 99 | status <- status[exmpl$accepted_name] 100 | 101 | \dontrun{ 102 | write_pyrate(x = exmpl,fname = "test", status = status) 103 | } 104 | 105 | } 106 | \seealso{ 107 | Other fossils: 108 | \code{\link{cf_age}()}, 109 | \code{\link{cf_equal}()}, 110 | \code{\link{cf_outl}()}, 111 | \code{\link{cf_range}()} 112 | } 113 | \concept{fossils} 114 | \keyword{Fossil} 115 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(CoordinateCleaner) 3 | 4 | test_check("CoordinateCleaner") 5 | -------------------------------------------------------------------------------- /tests/testthat/Rplots.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/tests/testthat/Rplots.pdf -------------------------------------------------------------------------------- /tests/testthat/test_datasetlevel_functions.R: -------------------------------------------------------------------------------- 1 | context("Dataset level functions ds_*") 2 | 3 | #Create test dataset 4 | set.seed(1) 5 | clean <- data.frame(dataset = rep("clean", 1000), 6 | decimalLongitude = runif(min = -43, max = -40, n = 1000), 7 | decimalLatitude = runif(min = -13, max = -10, n = 1000)) 8 | bias.long <- c(round(runif(min = -42, max = -40, n = 500), 1), 9 | round(runif(min = -42, max = -40, n = 300), 0), 10 | runif(min = -42, max = -40, n = 200)) 11 | bias.lat <- c(round(runif(min = -12, max = -10, n = 500), 1), 12 | round(runif(min = -12, max = -10, n = 300), 0), 13 | runif(min = -12, max = -10, n = 200)) 14 | bias <- data.frame(dataset = rep("biased", 1000), 15 | decimalLongitude = bias.long, 16 | decimalLatitude = bias.lat) 17 | test <- rbind(clean, bias) 18 | 19 | #cd_round 20 | test_that("cd_round identifies existing bias", { 21 | skip_on_cran() 22 | #test target 23 | ## multiple datasets 24 | expect_equal(mean(cd_round(test, value = "dataset", 25 | graphs = F, test = "both")$summary), 0.5) 26 | expect_equal(mean(cd_round(test, value = "dataset", 27 | graphs = F, test = "lat")$summary), 0.5) 28 | expect_equal(mean(cd_round(test, value = "dataset", 29 | graphs = F, test = "lon")$summary), 0.5) 30 | 31 | # single dataset 32 | 33 | expect_equal(mean(cd_round(bias, value = "dataset", 34 | graphs = F, test = "both")$summary), 0) 35 | expect_equal(mean(cd_round(bias, value = "dataset", 36 | graphs = F, test = "lat")$summary), 0) 37 | expect_equal(mean(cd_round(bias, value = "dataset", 38 | graphs = F, test = "lon")$summary), 0) 39 | 40 | #dataset output 41 | t1 <- cd_round(test, value = "dataset", graphs = F) 42 | expect_is(t1, "data.frame") 43 | expect_equal(sum(t1$summary), 1) 44 | 45 | #flags output 46 | t2 <- cd_round(test, value = "flagged", graphs = F) 47 | expect_is(t2, "logical") 48 | expect_equal(mean(t2), 0.5) 49 | 50 | #graphs 51 | expect_equal(mean(cd_round(test, value = "flagged", graphs = T)), 0.5) 52 | 53 | # test targets 54 | 55 | 56 | #column specification 57 | expect_error(cd_round(x = test, lat = "latitude")) 58 | expect_error(cd_round(x = test, lon = "longitude")) 59 | expect_error(cd_round(x = test, ds = "source")) 60 | }) 61 | 62 | 63 | # cd_ddmm 64 | set.seed(1) 65 | clean <- data.frame(species = letters[1:10], 66 | decimalLongitude = runif(100, -180, 180), 67 | decimalLatitude = runif(100, -90,90), 68 | dataset = "clean") 69 | #problematic dataset 70 | lon <- sample(0:180, size = 100, replace = TRUE) + runif(100, 0,0.59) 71 | lat <- sample(0:90, size = 100, replace = TRUE) + runif(100, 0,0.59) 72 | 73 | prob <- data.frame(species = letters[1:10], 74 | decimalLongitude = lon, 75 | decimalLatitude = lat, 76 | dataset = "prob") 77 | 78 | test <- rbind(prob,clean) 79 | 80 | test_that("cd_ddmm identifies existing bias", { 81 | skip_on_cran() 82 | t1 <- cd_ddmm(test, value = "dataset") 83 | expect_is(t1, "data.frame") 84 | expect_equal(sum(t1$pass), 1) 85 | 86 | t2 <- cd_ddmm(test, value = "flagged") 87 | expect_is(t2, "logical") 88 | expect_equal(mean(t2), 0.5) 89 | 90 | expect_equal(mean(cd_ddmm(test, value = "flagged")), 0.5) 91 | 92 | expect_error(cd_ddmm(x = test, lat = "latitude")) 93 | expect_error(cd_ddmm(x = test, lon = "longitude")) 94 | expect_error(cd_ddmm(x = test, ds = "source")) 95 | }) 96 | -------------------------------------------------------------------------------- /tests/testthat/test_fossillevel_functions.R: -------------------------------------------------------------------------------- 1 | context("Fossil cleaning tc_*") 2 | 3 | set.seed(1) 4 | #cf_range 5 | minages <- runif(n = 100, min = 0.1, max = 25) 6 | set.seed(1) 7 | maxages <- minages + c(runif(n = 99, min = 0, max = 5), 25) 8 | 9 | test <- data.frame( 10 | species = c(letters[1:9], "z"), 11 | decimalLongitude = c(runif( 12 | n = 98, min = 4, max = 16 13 | ), 75, 7), 14 | decimalLatitude = c(runif( 15 | n = 100, min = -5, max = 5 16 | )), 17 | min_ma = minages, 18 | max_ma = maxages 19 | ) 20 | 21 | 22 | 23 | # cf_range 24 | test_that("cf_range identifies existing bias", { 25 | # skip_on_cran() 26 | #return value 27 | expect_is(cf_range(test, value = "flagged", taxon = ""), "logical") 28 | expect_is(cf_range(test, value = "clean", taxon = ""), "data.frame") 29 | 30 | #outlier method 31 | expect_equal(sum( 32 | cf_range( 33 | test, 34 | value = "flagged", 35 | method = "quantile", 36 | taxon = "" 37 | ) 38 | ), 99) 39 | expect_equal(sum(cf_range( 40 | test, 41 | value = "flagged", 42 | method = "mad", 43 | taxon = "" 44 | )), 99) 45 | expect_equal(sum( 46 | cf_range( 47 | test, 48 | value = "flagged", 49 | method = "time", 50 | taxon = "", 51 | max_range = 20 52 | ) 53 | ), 99) 54 | 55 | expect_equal(nrow( 56 | cf_range( 57 | test, 58 | value = "clean", 59 | method = "quantile", 60 | taxon = "", 61 | uniq_loc = TRUE 62 | ) 63 | ), 99) 64 | expect_equal(nrow( 65 | cf_range( 66 | test, 67 | value = "clean", 68 | method = "mad", 69 | taxon = "", 70 | uniq_loc = TRUE 71 | ) 72 | ), 99) 73 | expect_equal(nrow( 74 | cf_range( 75 | test, 76 | value = "clean", 77 | method = "time", 78 | taxon = "", 79 | uniq_loc = FALSE 80 | ) 81 | ), 100) 82 | 83 | expect_equal(nrow( 84 | cf_range( 85 | test, 86 | value = "clean", 87 | method = "quantile", 88 | taxon = "species", 89 | uniq_loc = TRUE 90 | ) 91 | ), 99) 92 | 93 | expect_equal(nrow( 94 | cf_range( 95 | test, 96 | value = "clean", 97 | method = "quantile", 98 | taxon = "species", 99 | uniq_loc = TRUE 100 | ) 101 | ), 99) 102 | }) 103 | 104 | #cf_age 105 | test_that("cf_age runs", { 106 | # skip_on_cran() 107 | #return value 108 | expect_is(cf_age(test, value = "flagged", taxon = ""), "logical") 109 | expect_is(cf_age(test, value = "clean", taxon = ""), "data.frame") 110 | 111 | #outlier method 112 | expect_equal(sum( 113 | cf_age( 114 | test, 115 | value = "flagged", 116 | method = "quantile", 117 | taxon = "", 118 | replicates = 10 119 | ) 120 | ), 100) 121 | expect_equal(sum( 122 | cf_age( 123 | test, 124 | value = "flagged", 125 | method = "quantile", 126 | taxon = "", 127 | uniq_loc = F, 128 | replicates = 10 129 | ) 130 | ), 100) 131 | expect_equal(sum( 132 | cf_age( 133 | test, 134 | value = "flagged", 135 | method = "quantile", 136 | taxon = "species", 137 | uniq_loc = F, 138 | replicates = 10 139 | ) 140 | ), 100) 141 | expect_equal(sum( 142 | cf_age( 143 | test, 144 | value = "flagged", 145 | method = "mad", 146 | taxon = "", 147 | replicates = 10, 148 | flag_thresh = 0.1, 149 | mltpl = 10 150 | ) 151 | ), 100) 152 | expect_equal(sum( 153 | cf_age( 154 | test, 155 | value = "flagged", 156 | method = "mad", 157 | taxon = "species", 158 | replicates = 10, 159 | flag_thresh = 0.1, 160 | mltpl = 10 161 | ) 162 | ), 100) 163 | expect_equal(sum( 164 | cf_age( 165 | test, 166 | value = "flagged", 167 | method = "mad", 168 | taxon = "", 169 | uniq_loc = F 170 | ) 171 | ), 100) 172 | }) 173 | 174 | 175 | #cf_outl 176 | set.seed(1) 177 | minages <- c(runif(n = 11, min = 10, max = 25), 62.5) 178 | test <- data.frame( 179 | species = c(letters[1:10], rep("z", 2)), 180 | decimalLongitude = c(runif( 181 | n = 10, min = 4, max = 16 182 | ), 75, 7), 183 | decimalLatitude = c(runif( 184 | n = 12, min = -5, max = 5 185 | )), 186 | min_ma = minages, 187 | max_ma = c(minages[1:11] + 188 | runif( 189 | n = 11, min = 0, max = 5 190 | ), 65) 191 | ) 192 | 193 | test_that("cf_outl identifies existing bias", { 194 | # skip_on_cran() 195 | #return value 196 | expect_is(cf_outl(test, value = "flagged", taxon = ""), "logical") 197 | expect_is(cf_outl(test, value = "clean", taxon = ""), "data.frame") 198 | 199 | #outlier method 200 | expect_equal(sum(cf_outl( 201 | test, 202 | value = "flagged", 203 | method = "quantile", 204 | taxon = "" 205 | )), 10) 206 | expect_equal(sum(cf_outl( 207 | test, 208 | taxon = "", 209 | value = "flagged", 210 | method = "quantile" 211 | )), 10) 212 | expect_equal(sum(cf_outl( 213 | test, 214 | value = "flagged", 215 | method = "mad", 216 | taxon = "" 217 | )), 10) 218 | expect_equal(sum(cf_outl( 219 | test, 220 | value = "flagged", 221 | method = "mad", 222 | taxon = "" 223 | )), 10) 224 | 225 | }) 226 | -------------------------------------------------------------------------------- /tests/testthat/test_wrapper_functions.R: -------------------------------------------------------------------------------- 1 | context("Wrapper functions") 2 | 3 | # Coordinate level cleaning 4 | set.seed(1) 5 | sp <- sample(letters, size = 250, replace = TRUE) 6 | set.seed(1) 7 | lon <- runif(250, min = 42, max = 51) 8 | set.seed(1) 9 | lat <- runif(250, min = -26, max = -11) 10 | 11 | exmpl <- data.frame(species = sp, 12 | decimalLongitude = lon, 13 | decimalLatitude = lat, 14 | ISO3 = "RUS") 15 | 16 | 17 | test_that("clean_coordinates produces correct output", { 18 | skip("message") 19 | t1 <- clean_coordinates(x = exmpl) 20 | expect_equal(ncol(t1), 14) 21 | expect_equal(nrow(t1), 250) 22 | expect_equal(sum(t1$.summary), 185) 23 | 24 | expect_is(plot(t1), "gg") 25 | expect_is(plot(t1, clean = FALSE), "gg") 26 | expect_is(plot(t1, details = FALSE), "gg") 27 | expect_is(plot(t1, details = FALSE, clean = FALSE), "gg") 28 | 29 | expect_is(summary(t1), "integer") 30 | 31 | expect_equal(is(t1), "spatialvalid") 32 | 33 | }) 34 | 35 | test_that("clean_coordinates countries argument produces correct output", { 36 | skip("message") 37 | #skip_on_cran() 38 | expect_equal(sum( 39 | clean_coordinates(x = exmpl, countries = "ISO3", 40 | tests = c("countries", "seas"))$.summary), 0) 41 | }) 42 | 43 | #Dataset level cleaning 44 | #Create test dataset 45 | clean <- data.frame(dataset = rep("clean", 1000), 46 | decimalLongitude = runif(min = -43, max = -40, n = 1000), 47 | decimalLatitude = runif(min = -13, max = -10, n = 1000)) 48 | 49 | bias.long <- c(round(runif(min = -42, max = -40, n = 500), 1), 50 | round(runif(min = -42, max = -40, n = 300), 0), 51 | runif(min = -42, max = -40, n = 200)) 52 | bias.lat <- c(round(runif(min = -12, max = -10, n = 500), 1), 53 | round(runif(min = -12, max = -10, n = 300), 0), 54 | runif(min = -12, max = -10, n = 200)) 55 | bias <- data.frame(dataset = rep("biased", 1000), 56 | decimalLongitude = bias.long, 57 | decimalLatitude = bias.lat) 58 | test <- rbind(clean, bias) 59 | 60 | 61 | test_that("dataset level cleaning works", { 62 | skip("message") 63 | #test activated 64 | expect_is(clean_dataset(test), "data.frame") 65 | expect_is(clean_dataset(test, tests = c("ddmm")), "data.frame") 66 | expect_is(clean_dataset(test, tests = c("periodicity")), "data.frame") 67 | 68 | #Output value 69 | expect_is(clean_dataset(test, value = "clean"), "data.frame") 70 | expect_is(clean_dataset(test, value = "flagged"), "data.frame") 71 | 72 | expect_equal(sum(clean_dataset(test)$summary), 1) 73 | }) 74 | 75 | # test_that("CleanCoordinatesDS work", { 76 | # expect_equal(CleanCoordinatesDS(test), 250) 77 | # }) 78 | 79 | 80 | 81 | #Fossil wrapper function 82 | set.seed(1) 83 | minages <- runif(250, 0, 65) 84 | set.seed(1) 85 | lat <- runif(250, min = -26, max = -11) 86 | set.seed(1) 87 | lng <- runif(250, min = 42, max = 51) 88 | set.seed(1) 89 | age <- runif(250, 0.1, 65) 90 | 91 | exmpl <- data.frame(accepted_name = sample(letters, size = 250, replace = TRUE), 92 | decimalLongitude = lng, 93 | decimalLatitude = lat, 94 | min_ma = minages, 95 | max_ma = minages + age) 96 | 97 | 98 | test_that("fossil wrapper cleaning works", { 99 | skip("message") 100 | expect_is(clean_fossils(exmpl), "spatialvalid") 101 | expect_equal(sum(clean_fossils(exmpl)$.summary), 250) 102 | }) 103 | 104 | # test_that("CleanCoordinatesFOS work", { 105 | # expect_equal(sum(CleanCoordinatesFOS(exmpl)$summary), 249) 106 | # }) 107 | 108 | #Write Pyrate output 109 | 110 | test.str1 <- "test.pdf" 111 | 112 | test_that("WritePyRate interal functions work", { 113 | skip("message") 114 | expect_is(CoordinateCleaner:::.NoExtension(test.str1), "character") 115 | expect_equal(CoordinateCleaner:::.NoExtension(test.str1), "test") 116 | }) 117 | -------------------------------------------------------------------------------- /vignettes/Cleaning_GBIF_data_with_CoordinateCleaner_files/header-attrs-2.21/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /vignettes/Using_custom_gazetteers.R: -------------------------------------------------------------------------------- 1 | ## ----options, echo = FALSE---------------------------------------------------- 2 | knitr::opts_chunk$set(eval = FALSE) 3 | 4 | ## ----------------------------------------------------------------------------- 5 | # library(CoordinateCleaner) 6 | # library(dplyr) 7 | # library(ggplot2) 8 | # library(rgbif) 9 | # library(viridis) 10 | # library(terra) 11 | # 12 | # #download data from GBIF 13 | # dat <- rgbif::occ_search(scientificName = "Avicennia", limit = 1000, 14 | # hasCoordinate = T) 15 | # 16 | # dat <- dat$data 17 | # 18 | # dat <- dat %>% 19 | # dplyr::select(species = name, decimalLongitude = decimalLongitude, 20 | # decimalLatitude = decimalLatitude, countryCode) 21 | # 22 | # # run with default gazetteer 23 | # outl <- cc_sea(dat, value = "flagged") 24 | # ## OGR data source with driver: ESRI Shapefile 25 | # ## Source: "C:\Users\az64mycy\AppData\Local\Temp\Rtmp4SRhHV", layer: "ne_110m_land" 26 | # ## with 127 features 27 | # ## It has 3 fields 28 | # 29 | # plo <- data.frame(dat, outlier = as.factor(!outl)) 30 | # 31 | # #plot results 32 | # ggplot() + 33 | # borders(fill = "grey60") + 34 | # geom_point(data = plo, 35 | # aes(x = decimalLongitude, y = decimalLatitude, col = outlier)) + 36 | # scale_color_viridis(discrete = T, name = "Flagged outlier") + 37 | # coord_fixed() + 38 | # theme_bw() + 39 | # theme(legend.position = "bottom") 40 | 41 | ## ----------------------------------------------------------------------------- 42 | # # The buffered custom gazetteer 43 | # data("buffland") 44 | # buffland <- terra::vect(buffland) 45 | # plot(buffland) 46 | 47 | ## ----------------------------------------------------------------------------- 48 | # 49 | # # run with custom gazetteer 50 | # outl <- cc_sea(dat, value = "flagged", ref = buffland) 51 | # 52 | # plo <- data.frame(dat, outlier = as.factor(!outl)) 53 | # 54 | # #plot results 55 | # ggplot()+ 56 | # borders(fill = "grey60")+ 57 | # geom_point(data = plo, 58 | # aes(x = decimalLongitude, y = decimalLatitude, col = outlier))+ 59 | # scale_color_viridis(discrete = T, name = "Flagged outlier")+ 60 | # coord_fixed()+ 61 | # theme_bw()+ 62 | # theme(legend.position = "bottom") 63 | 64 | -------------------------------------------------------------------------------- /vignettes/Using_custom_gazetteers.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using customized gazetteers" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Using customized gazetteers} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteSuggests{rgbif} 8 | \usepackage[utf8]{inputenc} 9 | --- 10 | 11 | ```{r options, echo = FALSE} 12 | knitr::opts_chunk$set(eval = FALSE) 13 | ``` 14 | 15 | CoordinateCleaner identifies potentially erroneous geographic records with 16 | coordinates assigned to the sea, countr coordinate, country capitals, urban 17 | areas, institutions, the GBIF headquarters and countries based on the comparison 18 | with geographic gazetteers (i.e. reference databases). All of these functions 19 | include default reference databases compiled from various sources. These default 20 | references have been selected suitable for regional to global analyses. They 21 | will also work for smaller scale analyses, but in some case different references 22 | might be desirable and available. this could be for instance centroids of small 23 | scale political units, a different set of urban areas, or a different coastline 24 | when working with coastal species. To account for this, each *CoordinateCleaner* 25 | function using a gazetteer has a `ref` argument to specify custom gazetteers. 26 | 27 | We will use the case of coastlines and a coastal species to demonstrate the 28 | application of custom gazetteers. The purpose of `cc_sea` is to flag records in 29 | the sea, since these often represent erroneous and undesired records for 30 | terrestrial organisms. The standard gazetteer for this function is fetched from 31 | naturalearthdata.com at a 1:50m scale. However, often coordinates available from 32 | public databases are only precise at the scale of kilometres, which might lead 33 | to an overly critical flagging of coordinates close to the coastline, which is a 34 | problem especially for coastal or intertidal species. WE illustrate the issue on 35 | for the mangrove tree genus *Avicennia*. 36 | 37 | 38 | ```{r} 39 | library(CoordinateCleaner) 40 | library(dplyr) 41 | library(ggplot2) 42 | library(rgbif) 43 | library(viridis) 44 | library(terra) 45 | 46 | #download data from GBIF 47 | dat <- rgbif::occ_search(scientificName = "Avicennia", limit = 1000, 48 | hasCoordinate = T) 49 | 50 | dat <- dat$data 51 | 52 | dat <- dat %>% 53 | dplyr::select(species = name, decimalLongitude = decimalLongitude, 54 | decimalLatitude = decimalLatitude, countryCode) 55 | 56 | # run with default gazetteer 57 | outl <- cc_sea(dat, value = "flagged") 58 | ## OGR data source with driver: ESRI Shapefile 59 | ## Source: "C:\Users\az64mycy\AppData\Local\Temp\Rtmp4SRhHV", layer: "ne_110m_land" 60 | ## with 127 features 61 | ## It has 3 fields 62 | 63 | plo <- data.frame(dat, outlier = as.factor(!outl)) 64 | 65 | #plot results 66 | ggplot() + 67 | borders(fill = "grey60") + 68 | geom_point(data = plo, 69 | aes(x = decimalLongitude, y = decimalLatitude, col = outlier)) + 70 | scale_color_viridis(discrete = T, name = "Flagged outlier") + 71 | coord_fixed() + 72 | theme_bw() + 73 | theme(legend.position = "bottom") 74 | ``` 75 | 76 | ![plot of chunk cusgaz1](cusgaz-cusgaz1-1.png) 77 | 78 | A large number of the coastal records gets flagged, which in this case is undesirable, because it is not a function of the records being wrong, but rather of the precision of the coordinates and the resolution of the reference. To avoid this problem you can use a buffered reference, which avoids flagging records close to the coast line and only flags records from the open ocean. *CoordinateCleaner* comes with a one degree buffered reference (`buffland`). In case a narrower or distance true buffer is necessary, you can provide any SpatVector similar in structure to `buffland` via the `ref` argument. 79 | 80 | 81 | ```{r} 82 | # The buffered custom gazetteer 83 | data("buffland") 84 | buffland <- terra::vect(buffland) 85 | plot(buffland) 86 | ``` 87 | 88 | ![plot of chunk cusgaz2](cusgaz-cusgaz2-1.png) 89 | 90 | ```{r} 91 | 92 | # run with custom gazetteer 93 | outl <- cc_sea(dat, value = "flagged", ref = buffland) 94 | 95 | plo <- data.frame(dat, outlier = as.factor(!outl)) 96 | 97 | #plot results 98 | ggplot()+ 99 | borders(fill = "grey60")+ 100 | geom_point(data = plo, 101 | aes(x = decimalLongitude, y = decimalLatitude, col = outlier))+ 102 | scale_color_viridis(discrete = T, name = "Flagged outlier")+ 103 | coord_fixed()+ 104 | theme_bw()+ 105 | theme(legend.position = "bottom") 106 | ``` 107 | 108 | ![plot of chunk cusgaz2](cusgaz-cusgaz2-2.png) 109 | 110 | -------------------------------------------------------------------------------- /vignettes/Using_custom_gazetteers.Rmd.orig: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using customized gazetteers" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Using customized gazetteers} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteSuggests{rgbif} 8 | \usepackage[utf8]{inputenc} 9 | --- 10 | 11 | ```{r clgbif1, echo = F, eval = T} 12 | knitr::opts_chunk$set(fig.width = 7, fig.path = "cusgaz-") 13 | ``` 14 | 15 | 16 | CoordinateCleaner identifies potentially erroneous geographic records with coordinates assigned to the sea, countr coordinate, country capitals, urban areas, institutions, the GBIF headquarters and countries based on the comparison with geographic gazetteers (i.e. reference databases). All of these functions include default reference databases compiled from various sources. These default references have been selected suitable for regional to global analyses. They will also work for smaller scale analyses, but in some case different references might be desirable and available. this could be for instance centroids of small scale political units, a different set of urban areas, or a different coastline when working with coastal species. To account for this, each *CoordinateCleaner* function using a gazetteer has a `ref` argument to specify custom gazetteers. 17 | 18 | We will use the case of coastlines and a coastal species to demonstrate the application of custom gazetteers. The purpose of `cc_sea` is to flag records in the sea, since these often represent erroneous and undesired records for terrestrial organisms. The standard gazetteer for this function is fetched from naturalearthdata.com at a 1:50m scale. However, often coordinates available from public databases are only precise at the scale of kilometres, which might lead to an overly critical flagging of coordinates close to the coastline, which is a problem especially for coastal or intertidal species. WE illustrate the issue on for the mangrove tree genus *Avicennia*. 19 | 20 | ```{r cusgaz1, warning = F, message = F, collapse = T, fig.width=8, fig.height=6} 21 | library(CoordinateCleaner) 22 | library(dplyr) 23 | library(ggplot2) 24 | library(rgbif) 25 | library(sp) 26 | library(viridis) 27 | 28 | 29 | #download data from GBIF 30 | dat <- rgbif::occ_search(scientificName = "Avicennia", limit = 1000, 31 | hasCoordinate = T) 32 | 33 | dat <- dat$data 34 | 35 | dat <- dat %>% 36 | dplyr::select(species = name, decimalLongitude = decimalLongitude, 37 | decimalLatitude = decimalLatitude, countryCode) 38 | 39 | # run with default gazetteer 40 | outl <- cc_sea(dat, value = "flagged") 41 | 42 | plo <- data.frame(dat, outlier = as.factor(!outl)) 43 | 44 | #plot results 45 | ggplot()+ 46 | borders(fill = "grey60")+ 47 | geom_point(data = plo, 48 | aes(x = decimalLongitude, y = decimalLatitude, col = outlier))+ 49 | scale_color_viridis(discrete = T, name = "Flagged outlier")+ 50 | coord_fixed()+ 51 | theme_bw()+ 52 | theme(legend.position = "bottom") 53 | 54 | ``` 55 | 56 | A large number of the coastal records gets flagged, which in this case is undesirable, because it is not a function of the records being wrong, but rather of the precision of the coordinates and the resolution of the reference. To avoid this problem you can use a buffered reference, which avoids flagging records close to the coast line and only flags records from the open ocean. *CoordinateCleaner* comes with a one degree buffered reference (`buffland`). In case a narrower or distance true buffer is necessary, you can provide any SpatVector similar in structure to `buffland` via the `ref` argument. 57 | 58 | ```{r cusgaz2, warning = F, message = F, collapse = T, fig.width=8, fig.height=6} 59 | # The buffered custom gazetteer 60 | data("buffland") 61 | plot(buffland) 62 | 63 | # run with custom gazetteer 64 | outl <- cc_sea(dat, value = "flagged", ref = buffland) 65 | 66 | plo <- data.frame(dat, outlier = as.factor(!outl)) 67 | 68 | #plot results 69 | ggplot()+ 70 | borders(fill = "grey60")+ 71 | geom_point(data = plo, 72 | aes(x = decimalLongitude, y = decimalLatitude, col = outlier))+ 73 | scale_color_viridis(discrete = T, name = "Flagged outlier")+ 74 | coord_fixed()+ 75 | theme_bw()+ 76 | theme(legend.position = "bottom") 77 | ``` 78 | 79 | -------------------------------------------------------------------------------- /vignettes/cusgaz-cusgaz1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/cusgaz-cusgaz1-1.png -------------------------------------------------------------------------------- /vignettes/cusgaz-cusgaz2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/cusgaz-cusgaz2-1.png -------------------------------------------------------------------------------- /vignettes/cusgaz-cusgaz2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/cusgaz-cusgaz2-2.png -------------------------------------------------------------------------------- /vignettes/gbif-clgbif11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/gbif-clgbif11-1.png -------------------------------------------------------------------------------- /vignettes/gbif-clgbif16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/gbif-clgbif16-1.png -------------------------------------------------------------------------------- /vignettes/gbif-clgbif17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/gbif-clgbif17-1.png -------------------------------------------------------------------------------- /vignettes/gbif-clgbif18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/gbif-clgbif18-1.png -------------------------------------------------------------------------------- /vignettes/gbif-clgbif19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/gbif-clgbif19-1.png -------------------------------------------------------------------------------- /vignettes/gbif-clgbif5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/gbif-clgbif5-1.png -------------------------------------------------------------------------------- /vignettes/gbif-clgbif6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/gbif-clgbif6-1.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-24-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-24-1.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-24-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-24-2.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-25-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-25-1.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-25-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-25-2.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-26-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-26-1.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-26-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-26-2.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-27-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-27-1.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-27-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-27-2.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-33-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-33-1.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-33-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-33-2.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-35-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-35-1.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-35-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-35-2.png -------------------------------------------------------------------------------- /vignettes/pbdb-unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-7-1.png --------------------------------------------------------------------------------