├── .DS_Store
├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   ├── R-CMD-check.yaml
    │   └── pkgdown.yaml
├── .gitignore
├── CONTRIBUTING.md
├── CRAN-RELEASE
├── CoordinateCleaner.Rproj
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── CoordinateCleaner-package.R
    ├── cc_aohi.R
    ├── cc_cap.R
    ├── cc_cen.R
    ├── cc_coun.R
    ├── cc_dupl.R
    ├── cc_equ.R
    ├── cc_gbif.R
    ├── cc_inst.R
    ├── cc_iucn.R
    ├── cc_outl.R
    ├── cc_sea.R
    ├── cc_urb.R
    ├── cc_val.R
    ├── cc_zero.R
    ├── cd_ddmm.R
    ├── cd_round.R
    ├── cf_age.R
    ├── cf_equal.R
    ├── cf_outl.R
    ├── cf_range.R
    ├── clean_coordinates.R
    ├── clean_dataset.R
    ├── clean_fossils.R
    ├── internal_clean_coordinate.R
    ├── internal_clean_dataset.R
    ├── internal_write_pyrate.R
    ├── methods.spatialvalid.R
    ├── sysdata.rda
    └── write_pyrate.R
├── README.md
├── _pkgdown.yml
├── _site.yml
├── articles
    ├── CoordinateCleaner.bib
    ├── Dataset_level_cleaning.Rmd
    ├── Geographic_outliers.Rmd
    ├── The_institutions_database.Rmd
    ├── The_institutions_database.html
    └── apa.csl
├── codemeta.json
├── cran-comments.md
├── data
    ├── aohi.rda
    ├── buffland.rda
    ├── buffsea.rda
    ├── countryref.rda
    ├── institutions.rda
    └── pbdb_example.rda
├── docs
    ├── 404.html
    ├── CONTRIBUTING.html
    ├── articles
    │   ├── Cleaning_GBIF_data_with_CoordinateCleaner.html
    │   ├── Cleaning_GBIF_data_with_CoordinateCleaner_files
    │   │   ├── accessible-code-block-0.0.1
    │   │   │   └── empty-anchor.js
    │   │   └── figure-html
    │   │   │   ├── unnamed-chunk-11-1.png
    │   │   │   ├── unnamed-chunk-16-1.png
    │   │   │   ├── unnamed-chunk-17-1.png
    │   │   │   ├── unnamed-chunk-18-1.png
    │   │   │   ├── unnamed-chunk-19-1.png
    │   │   │   ├── unnamed-chunk-5-1.png
    │   │   │   └── unnamed-chunk-6-1.png
    │   ├── Cleaning_PBDB_fossils_with_CoordinateCleaner.html
    │   ├── Cleaning_PBDB_fossils_with_CoordinateCleaner_files
    │   │   ├── accessible-code-block-0.0.1
    │   │   │   └── empty-anchor.js
    │   │   └── figure-html
    │   │   │   ├── unnamed-chunk-16-1.png
    │   │   │   ├── unnamed-chunk-17-1.png
    │   │   │   ├── unnamed-chunk-25-1.png
    │   │   │   ├── unnamed-chunk-25-2.png
    │   │   │   ├── unnamed-chunk-27-1.png
    │   │   │   ├── unnamed-chunk-27-2.png
    │   │   │   └── unnamed-chunk-7-1.png
    │   ├── Comparison_other_software.html
    │   ├── Comparison_other_software_files
    │   │   └── accessible-code-block-0.0.1
    │   │   │   └── empty-anchor.js
    │   ├── Using_custom_gazetteers.html
    │   ├── Using_custom_gazetteers_files
    │   │   ├── accessible-code-block-0.0.1
    │   │   │   └── empty-anchor.js
    │   │   └── figure-html
    │   │   │   ├── unnamed-chunk-1-1.png
    │   │   │   ├── unnamed-chunk-2-1.png
    │   │   │   └── unnamed-chunk-2-2.png
    │   ├── clgbif11-1.png
    │   ├── clgbif16-1.png
    │   ├── clgbif17-1.png
    │   ├── clgbif18-1.png
    │   ├── clgbif19-1.png
    │   ├── clgbif5-1.png
    │   ├── clgbif6-1.png
    │   ├── cusgaz1-1.png
    │   ├── cusgaz2-1.png
    │   ├── cusgaz2-2.png
    │   └── index.html
    ├── authors.html
    ├── bootstrap-toc.css
    ├── bootstrap-toc.js
    ├── docsearch.css
    ├── docsearch.js
    ├── index.html
    ├── link.svg
    ├── news
    │   └── index.html
    ├── pkgdown.css
    ├── pkgdown.js
    ├── pkgdown.yml
    └── reference
    │   ├── CoordinateCleaner-defunct.html
    │   ├── CoordinateCleaner-package.html
    │   ├── buffland.html
    │   ├── cc_cap.html
    │   ├── cc_cen.html
    │   ├── cc_coun.html
    │   ├── cc_dupl.html
    │   ├── cc_equ.html
    │   ├── cc_gbif.html
    │   ├── cc_inst.html
    │   ├── cc_iucn.html
    │   ├── cc_outl.html
    │   ├── cc_sea.html
    │   ├── cc_urb.html
    │   ├── cc_val.html
    │   ├── cc_zero.html
    │   ├── cd_ddmm.html
    │   ├── cd_round.html
    │   ├── cf_age.html
    │   ├── cf_equal.html
    │   ├── cf_outl.html
    │   ├── cf_range.html
    │   ├── clean_coordinates.html
    │   ├── clean_dataset.html
    │   ├── clean_fossils.html
    │   ├── countryref.html
    │   ├── index.html
    │   ├── institutions.html
    │   ├── pbdb_example.html
    │   ├── plot.spatialvalid-1.png
    │   ├── plot.spatialvalid.html
    │   └── write_pyrate.html
├── index.Rmd
├── inst
    ├── CITATION
    └── WORDLIST
├── man
    ├── CoordinateCleaner-package.Rd
    ├── aohi.Rd
    ├── buffland.Rd
    ├── buffsea.Rd
    ├── cc_aohi.Rd
    ├── cc_cap.Rd
    ├── cc_cen.Rd
    ├── cc_coun.Rd
    ├── cc_dupl.Rd
    ├── cc_equ.Rd
    ├── cc_gbif.Rd
    ├── cc_inst.Rd
    ├── cc_iucn.Rd
    ├── cc_outl.Rd
    ├── cc_sea.Rd
    ├── cc_urb.Rd
    ├── cc_val.Rd
    ├── cc_zero.Rd
    ├── cd_ddmm.Rd
    ├── cd_round.Rd
    ├── cf_age.Rd
    ├── cf_equal.Rd
    ├── cf_outl.Rd
    ├── cf_range.Rd
    ├── clean_coordinates.Rd
    ├── clean_dataset.Rd
    ├── clean_fossils.Rd
    ├── countryref.Rd
    ├── institutions.Rd
    ├── is.spatialvalid.Rd
    ├── pbdb_example.Rd
    ├── plot.spatialvalid.Rd
    └── write_pyrate.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── Rplots.pdf
    │   ├── test_coordinatelevel_functions.R
    │   ├── test_datasetlevel_functions.R
    │   ├── test_fossillevel_functions.R
    │   └── test_wrapper_functions.R
└── vignettes
    ├── Cleaning_GBIF_data_with_CoordinateCleaner.R
    ├── Cleaning_GBIF_data_with_CoordinateCleaner.Rmd
    ├── Cleaning_GBIF_data_with_CoordinateCleaner.html
    ├── Cleaning_GBIF_data_with_CoordinateCleaner_files
        └── header-attrs-2.21
        │   └── header-attrs.js
    ├── Cleaning_PBDB_fossils_with_CoordinateCleaner.R
    ├── Cleaning_PBDB_fossils_with_CoordinateCleaner.Rmd
    ├── Cleaning_PBDB_fossils_with_CoordinateCleaner.html
    ├── Comparison_other_software.Rmd
    ├── Comparison_other_software.html
    ├── CoordinateCleaner.bib
    ├── Using_custom_gazetteers.R
    ├── Using_custom_gazetteers.Rmd
    ├── Using_custom_gazetteers.Rmd.orig
    ├── Using_custom_gazetteers.html
    ├── apa.csl
    ├── cusgaz-cusgaz1-1.png
    ├── cusgaz-cusgaz2-1.png
    ├── cusgaz-cusgaz2-2.png
    ├── gbif-clgbif11-1.png
    ├── gbif-clgbif16-1.png
    ├── gbif-clgbif17-1.png
    ├── gbif-clgbif18-1.png
    ├── gbif-clgbif19-1.png
    ├── gbif-clgbif5-1.png
    ├── gbif-clgbif6-1.png
    ├── paleobioDB_angiosperms_PyRate.py
    ├── paleobioDB_angiosperms_TaxonList.txt
    ├── pbdb-unnamed-chunk-15-1.png
    ├── pbdb-unnamed-chunk-16-1.png
    ├── pbdb-unnamed-chunk-17-1.png
    ├── pbdb-unnamed-chunk-24-1.png
    ├── pbdb-unnamed-chunk-24-2.png
    ├── pbdb-unnamed-chunk-25-1.png
    ├── pbdb-unnamed-chunk-25-2.png
    ├── pbdb-unnamed-chunk-26-1.png
    ├── pbdb-unnamed-chunk-26-2.png
    ├── pbdb-unnamed-chunk-27-1.png
    ├── pbdb-unnamed-chunk-27-2.png
    ├── pbdb-unnamed-chunk-33-1.png
    ├── pbdb-unnamed-chunk-33-2.png
    ├── pbdb-unnamed-chunk-35-1.png
    ├── pbdb-unnamed-chunk-35-2.png
    └── pbdb-unnamed-chunk-7-1.png


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/.DS_Store


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^CRAN-RELEASE$
 2 | ^codemeta\.json$
 3 | ^.*\.Rproj$
 4 | ^\.Rproj\.user$
 5 | extra_gazetteers
 6 | README.Rmd
 7 | CoordinateCleaner.Rcheck
 8 | old
 9 | .Rhistory
10 | .travis.yml
11 | Code.Rproj
12 | Tutorials
13 | CONTRIBUTING.md
14 | docs/
15 | ^_pkgdown\.yml$
16 | ^docs$
17 | cran-comments.md
18 | pre_submission_tests.R
19 | ^articles$
20 | _site.yml
21 | index.Rmd
22 | vignettes/Cleaning_GBIF_data_with_CoordinateCleaner.Rmd.orig
23 | Using_custom_gazetteers.Rmd.orig
24 | ^\.github$
25 | ^pkgdown$
26 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macos-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v3
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           upload-snapshots: true
50 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |     tags: ['*']
 7 | 
 8 | name: pkgdown
 9 | 
10 | jobs:
11 |   pkgdown:
12 |     runs-on: ubuntu-latest
13 |     # Only restrict concurrency for non-PR jobs
14 |     concurrency:
15 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |     permissions:
19 |       contents: write
20 |     steps:
21 |       - uses: actions/checkout@v3
22 | 
23 |       - uses: r-lib/actions/setup-pandoc@v2
24 | 
25 |       - uses: r-lib/actions/setup-r@v2
26 |         with:
27 |           use-public-rspm: true
28 | 
29 |       - uses: r-lib/actions/setup-r-dependencies@v2
30 |         with:
31 |           extra-packages: any::pkgdown, local::.
32 |           needs: website
33 | 
34 |       - name: Build site
35 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
36 |         shell: Rscript {0}
37 | 
38 |       - name: Deploy to GitHub pages 🚀
39 |         if: github.event_name != 'pull_request'
40 |         uses: JamesIves/github-pages-deploy-action@v4.4.1
41 |         with:
42 |           clean: false
43 |           branch: gh-pages
44 |           folder: docs


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | .Rhistory
 6 | Package.Rproj
 7 | articles/inst
 8 | pre_submission_tests.R
 9 | inst/doc
10 | /docs/articles/inst/
11 | docs
12 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # CONTRIBUTING #
2 | 
3 | ## Bugs, suggestions or feature requests?
4 | 
5 | * Submit an issue on the [Issues page](https://github.com/azizka/CoordinateCleaner/issues) - be sure to include R session information and a reproducible example.
6 | 
7 | ## Code contribution
8 | 
9 | * If you want to contribute to the package - awesome. Please get in touch with [zizka.alexander@gmail.com](mailto:zizka.alexander@gmail.com).


--------------------------------------------------------------------------------
/CRAN-RELEASE:
--------------------------------------------------------------------------------
1 | This package was submitted to CRAN on 2020-10-13.
2 | Once it is accepted, delete this file and tag the release (commit f876093).
3 | 


--------------------------------------------------------------------------------
/CoordinateCleaner.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | PackageCheckArgs: --as-cran _SP_EVOLUTION_STATUS_=2 R CMD check _R_CHECK_S3_METHODS_SHOW_POSSIBLE_ISSUES_=true
19 | PackageRoxygenize: rd,collate,namespace,vignette
20 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Type: Package
 2 | Package: CoordinateCleaner
 3 | Title: Automated Cleaning of Occurrence Records from Biological
 4 |     Collections
 5 | Version: 3.0.1
 6 | Authors@R: c(person(given = "Alexander", family = "Zizka", email = "zizka.alexander@gmail.com",
 7 |                     role = c("aut", "cre")),
 8 |              person(given = "Daniele", family = "Silvestro", role = "ctb"),
 9 |              person(given = "Tobias", family = "Andermann", role = "ctb"),
10 |              person(given = "Josue", family = "Azevedo", role = "ctb"),
11 |              person(given = "Camila", family = "Duarte Ritter", role = "ctb"),
12 |              person(given = "Daniel", family = "Edler", role = "ctb"),
13 |              person(given = "Harith", family = "Farooq", role = "ctb"),
14 |              person(given = "Andrei", family = "Herdean", role = "ctb"),
15 |              person(given = "Maria", family = "Ariza", role = "ctb"),
16 |              person(given = "Ruud", family = "Scharn", role = "ctb"),
17 |              person(given = "Sten", family = "Svanteson", role = "ctb"),
18 |              person(given = "Niklas", family = "Wengstrom", role = "ctb"),
19 |              person(given = "Vera", family = "Zizka", role = "ctb"),
20 |              person(given = "Alexandre", family ="Antonelli", role = "ctb"),
21 |              person(given = "Bruno", family = "Vilela", role = "ctb",
22 |              comment = "Bruno updated the package to remove dependencies on sp, raster, rgdal, maptools, and rgeos packages"),
23 |              person("Irene", "Steves", role = "rev",
24 |     comment = "Irene reviewed the package for ropensci, see <https://github.com/ropensci/onboarding/issues/210>"),
25 |              person("Francisco", "Rodriguez-Sanchez", role = "rev",
26 |     comment = "Francisco reviewed the package for ropensci, see <https://github.com/ropensci/onboarding/issues/210>"))
27 | Description: Automated flagging of common spatial and temporal
28 |     errors in biological and paleontological collection data, for the use
29 |     in conservation, ecology and paleontology. Includes automated tests to
30 |     easily flag (and exclude) records assigned to country or province
31 |     centroid, the open ocean, the headquarters of the Global Biodiversity
32 |     Information Facility, urban areas or the location of biodiversity
33 |     institutions (museums, zoos, botanical gardens, universities).
34 |     Furthermore identifies per species outlier coordinates, zero
35 |     coordinates, identical latitude/longitude and invalid coordinates.
36 |     Also implements an algorithm to identify data sets with a significant
37 |     proportion of rounded coordinates. Especially suited for large data
38 |     sets. The reference for the methodology is: Zizka et al. (2019)
39 |     <doi:10.1111/2041-210X.13152>.
40 | License: GPL-3
41 | URL: https://ropensci.github.io/CoordinateCleaner/
42 | BugReports: https://github.com/ropensci/CoordinateCleaner/issues
43 | Depends:
44 |     R (>= 3.5.0)
45 | Imports:
46 |     dplyr,
47 |     geosphere,
48 |     ggplot2,
49 |     graphics,
50 |     grDevices,
51 |     methods,
52 |     rgbif,
53 |     rnaturalearth (>= 0.3.2),
54 |     stats,
55 |     terra,
56 |     tidyselect,
57 |     utils
58 | Suggests:
59 |     countrycode,
60 |     covr,
61 |     knitr,
62 |     magrittr,
63 |     maps,
64 |     rmarkdown,
65 |     rnaturalearthdata,
66 |     sf,
67 |     testthat,
68 |     viridis
69 | Config/Needs/website: tidyverse, viridis, caret, msm, countrycode, cran/speciesgeocodeR
70 | VignetteBuilder:
71 |     knitr
72 | Encoding: UTF-8
73 | Language: en-gb
74 | LazyData: true
75 | RoxygenNote: 7.2.3
76 | SystemRequirements: GDAL (>= 2.0.1)
77 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(plot,spatialvalid)
 4 | S3method(summary,spatialvalid)
 5 | export(cc_aohi)
 6 | export(cc_cap)
 7 | export(cc_cen)
 8 | export(cc_coun)
 9 | export(cc_dupl)
10 | export(cc_equ)
11 | export(cc_gbif)
12 | export(cc_inst)
13 | export(cc_iucn)
14 | export(cc_outl)
15 | export(cc_sea)
16 | export(cc_urb)
17 | export(cc_val)
18 | export(cc_zero)
19 | export(cd_ddmm)
20 | export(cd_round)
21 | export(cf_age)
22 | export(cf_equal)
23 | export(cf_outl)
24 | export(cf_range)
25 | export(clean_coordinates)
26 | export(clean_dataset)
27 | export(clean_fossils)
28 | export(is.spatialvalid)
29 | export(write_pyrate)
30 | importFrom(dplyr,bind_rows)
31 | importFrom(dplyr,inner_join)
32 | importFrom(dplyr,left_join)
33 | importFrom(dplyr,select)
34 | importFrom(geosphere,destPoint)
35 | importFrom(geosphere,distHaversine)
36 | importFrom(geosphere,distm)
37 | importFrom(ggplot2,aes_string)
38 | importFrom(ggplot2,borders)
39 | importFrom(ggplot2,coord_fixed)
40 | importFrom(ggplot2,element_blank)
41 | importFrom(ggplot2,element_text)
42 | importFrom(ggplot2,fortify)
43 | importFrom(ggplot2,geom_point)
44 | importFrom(ggplot2,geom_polygon)
45 | importFrom(ggplot2,map_data)
46 | importFrom(ggplot2,scale_colour_manual)
47 | importFrom(ggplot2,scale_shape_manual)
48 | importFrom(ggplot2,theme)
49 | importFrom(ggplot2,theme_bw)
50 | importFrom(grDevices,extendrange)
51 | importFrom(graphics,abline)
52 | importFrom(graphics,hist)
53 | importFrom(graphics,plot)
54 | importFrom(graphics,segments)
55 | importFrom(graphics,title)
56 | importFrom(methods,as)
57 | importFrom(methods,is)
58 | importFrom(rgbif,occ_count)
59 | importFrom(rnaturalearth,ne_download)
60 | importFrom(rnaturalearth,ne_file_name)
61 | importFrom(stats,IQR)
62 | importFrom(stats,aggregate)
63 | importFrom(stats,binom.test)
64 | importFrom(stats,complete.cases)
65 | importFrom(stats,cov)
66 | importFrom(stats,dist)
67 | importFrom(stats,mad)
68 | importFrom(stats,median)
69 | importFrom(stats,na.omit)
70 | importFrom(stats,quantile)
71 | importFrom(stats,runif)
72 | importFrom(terra,buffer)
73 | importFrom(terra,crop)
74 | importFrom(terra,crs)
75 | importFrom(terra,expanse)
76 | importFrom(terra,ext)
77 | importFrom(terra,extract)
78 | importFrom(terra,geom)
79 | importFrom(terra,geomtype)
80 | importFrom(terra,plot)
81 | importFrom(terra,project)
82 | importFrom(terra,rast)
83 | importFrom(terra,subset)
84 | importFrom(terra,union)
85 | importFrom(terra,vect)
86 | importFrom(tidyselect,starts_with)
87 | importFrom(utils,data)
88 | importFrom(utils,write.table)
89 | 


--------------------------------------------------------------------------------
/R/cc_aohi.R:
--------------------------------------------------------------------------------
  1 | #' Identify Coordinates in Artificial Hotspot Occurrence Inventory
  2 | #' 
  3 | #' Removes or flags records within Artificial Hotspot Occurrence Inventory.
  4 | #' Poorly geo-referenced occurrence records in biological databases are often
  5 | #' erroneously geo-referenced to highly recurring coordinates that were assessed
  6 | #' by Park et al 2022. See the reference for more details.
  7 | #' 
  8 | #' 
  9 | #' @inheritParams cc_cap
 10 | #' @param taxa Artificial Hotspot Occurrence Inventory (AHOI) were created based
 11 | #'   on four different taxa, birds, insecta, mammalia, and plantae. Users can
 12 | #'   choose to keep all, or any specific taxa subset to define the AHOI locations.
 13 | #'   Default is to keep all: c("Aves", "Insecta", "Mammalia", "Plantae").
 14 | #' @inherit cc_cap return
 15 | #' 
 16 | #' @note See \url{https://ropensci.github.io/CoordinateCleaner/} for more
 17 | #' details and tutorials.
 18 | #' 
 19 | #' @keywords Coordinate cleaning
 20 | #' @family Coordinates
 21 | #' 
 22 | #' @references Park, D. S., Xie, Y., Thammavong, H. T., Tulaiha, R., & Feng, X.
 23 | #'   (2023). Artificial Hotspot Occurrence Inventory (AHOI). Journal of
 24 | #'   Biogeography, 50, 441–449. \doi{10.1111/jbi.14543}
 25 | #' 
 26 | #' @examples
 27 | #' 
 28 | #' x <- data.frame(species = letters[1:10], 
 29 | #'                 decimalLongitude = c(runif(99, -180, 180), -47.92), 
 30 | #'                 decimalLatitude = c(runif(99, -90,90), -15.78))
 31 | #' cc_aohi(x)
 32 | #' 
 33 | #' @export
 34 | #' @importFrom geosphere destPoint
 35 | #' @importFrom terra vect ext crop buffer geom
 36 | #' @importFrom utils data
 37 | 
 38 | cc_aohi <- function(x, 
 39 |                    lon = "decimalLongitude", 
 40 |                    lat = "decimalLatitude", 
 41 |                    species = "species",
 42 |                    taxa = c("Aves", "Insecta", "Mammalia", "Plantae"),
 43 |                    buffer = 10000,
 44 |                    geod = TRUE,
 45 |                    value = "clean", 
 46 |                    verbose = TRUE) {
 47 |   
 48 |   # check value argument
 49 |   match.arg(value, choices = c("clean", "flagged"))
 50 | 
 51 |   if (verbose) {
 52 |     message("Testing Artificial Hotspot Occurrence Inventory")
 53 |   }
 54 |   if (buffer > 10 & !geod) {
 55 |     warnings("Using large buffer check 'geod'")
 56 |   }
 57 |   if (buffer < 100 & geod) {
 58 |     warnings("Using small buffer check 'geod'")
 59 |   }
 60 |   
 61 |   # set default projection
 62 |   wgs84 <- "+proj=longlat +datum=WGS84 +no_defs"
 63 |   
 64 |   # select relevant columns
 65 |   dat <- terra::vect(x[, c(lon, lat), drop = FALSE], 
 66 |                      geom = c(lon, lat),
 67 |                      crs = wgs84)
 68 |   
 69 |   # Load ref
 70 |   aohi <- get0("aohi", envir = asNamespace("CoordinateCleaner"))
 71 |   aohi <- aohi[aohi$taxa %in% taxa, ]
 72 |   lon_lat <- c("decimalLongitude", "decimalLatitude")
 73 |   ref <- terra::vect(aohi[, lon_lat],
 74 |                     geom = lon_lat,
 75 |                     crs = wgs84)
 76 |   
 77 |   # fix buffer 0
 78 |   buffer <- ifelse(buffer == 0, 0.00000000001, buffer)
 79 |   
 80 |   if (geod) {
 81 |     # credits to https://seethedatablog.wordpress.com
 82 |     dg <- seq(from = 0, to = 360, by = 5)
 83 |     
 84 |     buff_XY <-
 85 |       geosphere::destPoint(
 86 |         p = terra::geom(ref)[, c("x", "y")],
 87 |         b = rep(dg, each = length(ref)),
 88 |         d = buffer
 89 |       )
 90 |     
 91 |     id <- rep(seq_along(ref), times = length(dg))
 92 |     
 93 |     
 94 |     lst <- split(data.frame(buff_XY), f = id)
 95 |     
 96 |     # Make SpatialPolygons out of the list of coordinates
 97 |     lst <- lapply(lst, as.matrix)
 98 |     ref <-
 99 |       sapply(lst, terra::vect, crs = wgs84, type = "polygons")
100 |     ref <- terra::vect(ref)
101 |     
102 |     #point in polygon test
103 |     ext_dat <- terra::extract(ref, dat)
104 |     out <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2])
105 |   } else {
106 |     ref_buff <- terra::buffer(ref, buffer)
107 |     # There is a weird bug in terra, so I did this work around
108 |     ref <- terra::vect(stats::na.omit(terra::geom(ref_buff)), 
109 |                        type = "polygon", crs = ref)
110 |     terra::values(ref) <- terra::values(ref_buff)
111 |     
112 |     ext_dat <- terra::extract(ref, dat)
113 |     out <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2])
114 |   }
115 |   
116 |   # create output based on value argument
117 |   if (verbose) {
118 |     if (value == "clean") {
119 |       message(sprintf("Removed %s records.", sum(!out)))
120 |     } else {
121 |       message(sprintf("Flagged %s records.", sum(!out)))
122 |     }
123 |   }
124 |   
125 |   switch(value, clean = return(x[out, ]), flagged = return(out))
126 | }
127 | 


--------------------------------------------------------------------------------
/R/cc_cen.R:
--------------------------------------------------------------------------------
  1 | #' Identify Coordinates in Vicinity of Country and Province Centroids
  2 | #' 
  3 | #' Removes or flags records within a radius around the geographic centroids of political
  4 | #' countries and provinces. Poorly geo-referenced occurrence records in
  5 | #' biological databases are often erroneously geo-referenced to centroids.
  6 | #' 
  7 | #' @param buffer numerical. The buffer around each province or country
  8 | #' centroid, where records should be flagged as problematic. Units depend on geod.  
  9 | #' Default = 1 kilometre.
 10 | #' @param test a character string. Specifying the details of the test. One of
 11 | #' c(\dQuote{both}, \dQuote{country}, \dQuote{provinces}).  If both tests for
 12 | #' country and province centroids.
 13 | #' @inheritParams cc_cap
 14 | #' 
 15 | #' @inherit cc_cap return
 16 | #' 
 17 | #' @note See \url{https://ropensci.github.io/CoordinateCleaner/} for more
 18 | #' details and tutorials.
 19 | #' 
 20 | #' @keywords Coordinate cleaning
 21 | #' @family Coordinates
 22 | #' 
 23 | #' @examples
 24 | #' 
 25 | #' x <- data.frame(species = letters[1:10], 
 26 | #'                 decimalLongitude = c(runif(99, -180, 180), -47.92), 
 27 | #'                 decimalLatitude = c(runif(99, -90,90), -15.78))
 28 | #' cc_cen(x, geod = FALSE)
 29 | #' 
 30 | #' \dontrun{
 31 | #' cc_inst(x, value = "flagged", buffer = 50000) #geod = T
 32 | #' }
 33 | #' 
 34 | #' @export
 35 | #' @importFrom geosphere destPoint
 36 | #' @importFrom terra vect ext crop buffer geom
 37 | 
 38 | cc_cen <- function(x, 
 39 |                    lon = "decimalLongitude", 
 40 |                    lat = "decimalLatitude", 
 41 |                    species = "species",
 42 |                    buffer = 1000,
 43 |                    geod = TRUE,
 44 |                    test = "both", 
 45 |                    ref = NULL,
 46 |                    verify = FALSE,
 47 |                    value = "clean", 
 48 |                    verbose = TRUE) {
 49 |   
 50 |   # check value argument
 51 |   match.arg(value, choices = c("clean", "flagged"))
 52 |   match.arg(test, choices = c("both", "country", "provinces"))
 53 |   
 54 |   if (verbose) {
 55 |     message("Testing country centroids")
 56 |   }
 57 |   if (buffer > 10 & !geod) {
 58 |     warnings("Using large buffer check 'geod'")
 59 |   }
 60 |   if (buffer < 100 & geod) {
 61 |     warnings("Using small buffer check 'geod'")
 62 |   }
 63 |   
 64 |   # set default projection
 65 |   wgs84 <- "+proj=longlat +datum=WGS84 +no_defs"
 66 |   
 67 |   # select relevant columns
 68 |   dat <- terra::vect(x[, c(lon, lat), drop = FALSE], 
 69 |                      geom = c(lon, lat),
 70 |                      crs = wgs84)
 71 |   
 72 |   if (is.null(ref)) {
 73 |     ref <- CoordinateCleaner::countryref
 74 |     
 75 |     switch(test, country = {
 76 |       ref <- ref[ref$type == "country", ]
 77 |     }, province = {
 78 |       ref <- ref[ref$type == "province", ]
 79 |     })
 80 |   } else {
 81 |     #proj4string(ref) <- wgs84
 82 |     warning("assuming lat/lon for centroids.ref")
 83 |   }
 84 |   buffer <- ifelse(buffer == 0, 0.00000000001, buffer)
 85 |   limits <- terra::ext(terra::buffer(dat, width = buffer))
 86 |   
 87 |   # subset of testdatset according to speed up buffer
 88 |   lon_lat <- c("centroid.lon", "centroid.lat")
 89 |   ref <- terra::crop(
 90 |     terra::vect(ref[, lon_lat],
 91 |                 geom = lon_lat,
 92 |                 crs = wgs84),
 93 |     limits)
 94 |   
 95 |   # run buffering incase no centroids are found in the study area
 96 |   if (is.null(ref)  | nrow(ref) == 0) {
 97 |     out <- rep(TRUE, nrow(x))
 98 |   } else {
 99 |     if (geod) {
100 |       # credits to https://seethedatablog.wordpress.com
101 |       dg <- seq(from = 0, to = 360, by = 5)
102 |       
103 |       buff_XY <- geosphere::destPoint(p = terra::geom(ref)[, c("x", "y")], 
104 |                                       b = rep(dg, each = length(ref)), 
105 |                                       d = buffer)
106 |       
107 |       id <- rep(seq_along(ref), times = length(dg))
108 |       
109 |       
110 |       lst <- split(data.frame(buff_XY), f = id)
111 |       
112 |       # Make SpatialPolygons out of the list of coordinates
113 |       lst <- lapply(lst, as.matrix)
114 |       ref <- sapply(lst, terra::vect, crs = wgs84, type = "polygons")
115 |       ref <- terra::vect(ref)
116 |       
117 |       #point in polygon test
118 |       ext_dat <- terra::extract(ref, dat)
119 |       out <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2])
120 |     } else {
121 |       ref <- terra::buffer(ref, width = buffer)
122 |       ext_dat <- terra::extract(ref, dat)
123 |       out <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2])
124 |     }
125 |   }
126 |   
127 |   # implement the verification
128 |   if (verify & sum(out) > 0) {
129 |     # get flagged coordinates
130 |     ver <- x[!out,]
131 |     
132 |     #count the instances of all flagged records
133 |     ver_count <- aggregate(ver[[species]] ~ ver[[lon]] + 
134 |                              ver[[lat]] , FUN = "length")
135 |     names(ver_count) <- c(lon, lat, "coord.count")
136 |     
137 |     ver_spec <- aggregate(ver[[lon]] ~ ver[[species]], FUN = "length")
138 |     names(ver_spec) <- c(species, "species.count")
139 |     
140 |     #test which flagged x occur multiple times
141 |     tester <- data.frame(x, ord = seq_len(nrow(x)))
142 |     tester <- merge(tester, ver_count, by = c(lon,lat), all = TRUE)
143 |     tester <- merge(tester, ver_spec, by = species, all = TRUE)
144 |     
145 |     tester <- tester[order(tester$ord),]
146 |     tester[is.na(tester)] <- 0
147 |     
148 |     #only flag those records that occure with only one coordinate in the buffer
149 |     out <-  tester$coord.count <= tester$species.count | out
150 |   }
151 |   # create output based on value argument
152 |   if (verbose) {
153 |     if (value == "clean") {
154 |       message(sprintf("Removed %s records.", sum(!out)))
155 |     } else {
156 |       message(sprintf("Flagged %s records.", sum(!out)))
157 |     }
158 |   }
159 |   
160 |   switch(value, clean = return(x[out, ]), flagged = return(out))
161 | }
162 | 


--------------------------------------------------------------------------------
/R/cc_coun.R:
--------------------------------------------------------------------------------
  1 | #' Identify Coordinates Outside their Reported Country
  2 | #'
  3 | #' Removes or flags mismatches between geographic coordinates and additional
  4 | #' country information (usually this information is reliably reported with
  5 | #' specimens). Such a mismatch can occur for example, if latitude and longitude
  6 | #' are switched.
  7 | #'
  8 | #'
  9 | #' @param iso3 a character string. The column with the country assignment of
 10 | #'   each record in three letter ISO code. Default = \dQuote{countrycode}.
 11 | #' @param ref SpatVector (geometry: polygons). Providing the geographic
 12 | #'   gazetteer. Can be any SpatVector (geometry: polygons), but the structure
 13 | #'   must be identical to \code{rnaturalearth::ne_countries(scale = "medium",
 14 | #'   returnclass = "sf")}.
 15 | #'   Default = \code{rnaturalearth::ne_countries(scale = "medium", returnclass =
 16 | #'   "sf")}
 17 | #' @param ref_col the column name in the reference dataset, containing the
 18 | #'   relevant ISO codes for matching. Default is to "iso_a3_eh" which refers to
 19 | #'   the ISO-3 codes in the reference dataset. See notes.
 20 | #' @param buffer numeric. Units are in meters. If provided, a buffer is
 21 | #'   created around each country polygon.
 22 | #' @inheritParams cc_cen
 23 | #'
 24 | #' @inherit cc_cap return
 25 | #'
 26 | #' @note The ref_col argument allows to adapt the function to the structure of
 27 | #'   alternative reference datasets. For instance, for
 28 | #'   \code{rnaturalearth::ne_countries(scale = "small")}, the default will fail,
 29 | #'   but ref_col = "iso_a3" will work.
 30 | #'
 31 | #' @note With the default reference, records are flagged if they fall outside
 32 | #'   the terrestrial territory of countries, hence records in territorial waters
 33 | #'   might be flagged. See \url{https://ropensci.github.io/CoordinateCleaner/}
 34 | #'   for more details and tutorials.
 35 | #'
 36 | #' @keywords Coordinate cleaning
 37 | #' @family Coordinates
 38 | #'
 39 | #' @examples
 40 | #'
 41 | #' \dontrun{
 42 | #' x <- data.frame(species = letters[1:10],
 43 | #'                 decimalLongitude = runif(100, -20, 30),
 44 | #'                 decimalLatitude = runif(100, 35,60),
 45 | #'                 countrycode = "RUS")
 46 | #'
 47 | #' cc_coun(x, value = "flagged")#non-terrestrial records are flagged as wrong.
 48 | #' }
 49 | #'
 50 | #' @export
 51 | #' @importFrom terra vect geomtype extract
 52 | #' @importFrom stats na.omit
 53 | 
 54 | cc_coun <- function(x, 
 55 |                     lon = "decimalLongitude", 
 56 |                     lat = "decimalLatitude", 
 57 |                     iso3 = "countrycode",
 58 |                     value = "clean", 
 59 |                     ref = NULL, 
 60 |                     ref_col = "iso_a3",
 61 |                     verbose = TRUE,
 62 |                     buffer = NULL) {
 63 | 
 64 |   # check function arguments for validity
 65 |   match.arg(value, choices = c("clean", "flagged"))
 66 |   if (!iso3 %in% names(x)) {
 67 |     stop("iso3 argument missing, please specify")
 68 |   }
 69 | 
 70 |   if (verbose) {
 71 |     message("Testing country identity")
 72 |   }
 73 | 
 74 |   # set reference and check for dependency
 75 |   if (is.null(ref)) {
 76 |     if (!requireNamespace("rnaturalearth", quietly = TRUE)) {
 77 |       stop("Install the 'rnaturalearth' package or provide a custom reference",
 78 |         call. = FALSE
 79 |       )
 80 |     }
 81 |     ref <- terra::vect(rnaturalearth::ne_countries(scale = "medium",
 82 |                                                    returnclass = "sf"))
 83 |   } else {
 84 |     #Enable sf formatted custom references
 85 |     if (any(is(ref) == "Spatial")  | inherits(ref, "sf")) {
 86 |       ref <- terra::vect(ref)
 87 |     }
 88 |     # Check if object is a SpatVector 
 89 |     if (!(inherits(ref, "SpatVector") & 
 90 |         terra::geomtype(ref) == "polygons")) {
 91 |       stop("ref must be a SpatVector with geomtype 'polygons'")
 92 |     }
 93 |     #Check projection of custom reference and reproject if necessary
 94 |     ref <- reproj(ref)
 95 |   }
 96 |   
 97 |   # prepare data
 98 |   dat <- terra::vect(x[, c(lon, lat)], 
 99 |                      geom = c(lon, lat),
100 |                      crs = ref)
101 |   
102 |   # Buffer around countries
103 |   if (is.numeric(buffer)) {
104 |     buffer <- ifelse(buffer == 0, 0.00000000001, buffer)
105 |     ref_buff <- terra::buffer(ref, buffer)
106 |     # There is a weird bug in terra, so I did this work around
107 |     ref <- terra::vect(stats::na.omit(terra::geom(ref_buff)), 
108 |                        type = "polygon", crs = ref)
109 |     terra::values(ref) <- terra::values(ref_buff)
110 |   }
111 |   
112 |   # get country from coordinates and compare with provided country
113 |   country <- terra::extract(ref, dat)
114 |   count_dat <- as.character(unlist(x[, iso3]))
115 |   
116 |   if (is.numeric(buffer)) {
117 |     out <- logical(length(dat))
118 |     for (i in seq_along(dat)) {
119 |       out[i] <- count_dat[i] %in% country[country[, 1] == i, ref_col]
120 |     }
121 |   } else {
122 |     country <- country[, ref_col]
123 |     out <- as.character(country) == count_dat
124 |     out[is.na(out)] <- FALSE # marine records are set to False
125 |   }
126 |   # return output
127 |   if (verbose) {
128 |     if (value == "clean") {
129 |       message(sprintf("Removed %s records.", sum(!out)))
130 |     } else {
131 |       message(sprintf("Flagged %s records.", sum(!out)))
132 |     }
133 |   }
134 | 
135 |   switch(value, clean = return(x[out, ]), flagged = return(out))
136 | }
137 | 


--------------------------------------------------------------------------------
/R/cc_dupl.R:
--------------------------------------------------------------------------------
 1 | #' Identify Duplicated Records
 2 | #' 
 3 | #' Removes or flags duplicated records based on species name and coordinates, as well as
 4 | #' user-defined additional columns. True (specimen) duplicates or duplicates
 5 | #' from the same species can make up the bulk of records in a biological
 6 | #' collection database, but are undesirable for many analyses. Both can be
 7 | #' flagged with this function, the former given enough additional information.
 8 | #' 
 9 | #' 
10 | #' @param species a character string. The column with the species name. Default
11 | #' = \dQuote{species}.
12 | #' @param additions a vector of character strings. Additional columns to be
13 | #' included in the test for duplication. For example as below, collector name
14 | #' and collector number.
15 | #' @inheritParams cc_cap
16 | #' 
17 | #' @inherit cc_cap return
18 | #' 
19 | #' @keywords Coordinate cleaning
20 | #' @family Coordinates
21 | #' 
22 | #' @examples
23 | #' 
24 | #' x <- data.frame(species = letters[1:10], 
25 | #'                 decimalLongitude = sample(x = 0:10, size = 100, replace = TRUE), 
26 | #'                 decimalLatitude = sample(x = 0:10, size = 100, replace = TRUE),
27 | #'                 collector = "Bonpl",
28 | #'                 collector.number = c(1001, 354),
29 | #'                 collection = rep(c("K", "WAG","FR", "P", "S"), 20))
30 | #' 
31 | #' cc_dupl(x, value = "flagged")
32 | #' cc_dupl(x, additions = c("collector", "collector.number"))
33 | #' 
34 | #' @export
35 | cc_dupl <- function(x, 
36 |                     lon = "decimalLongitude", 
37 |                     lat = "decimalLatitude", 
38 |                     species = "species",
39 |                     additions = NULL, 
40 |                     value = "clean", 
41 |                     verbose = TRUE) {
42 | 
43 |   # check value argument
44 |   match.arg(value, choices = c("clean", "flagged"))
45 | 
46 |   if (verbose) {
47 |     message("Testing duplicates")
48 |   }
49 |   # test duplication
50 |   out <- !duplicated(x[, c(lon, lat, species, additions)])
51 | 
52 |   # create output based on value argument
53 |   if (verbose) {
54 |     if(value == "clean"){
55 |       message(sprintf("Removed %s records.", sum(!out)))
56 |     }else{
57 |       message(sprintf("Flagged %s records.", sum(!out)))
58 |     }
59 |   }
60 | 
61 |   switch(value, clean = return(x[out, ]), flagged = return(out))
62 | }
63 | 


--------------------------------------------------------------------------------
/R/cc_equ.R:
--------------------------------------------------------------------------------
 1 | #' Identify Records with Identical lat/lon
 2 | #' 
 3 | #' Removes or flags records with equal latitude and longitude coordinates,
 4 | #' either exact or absolute. Equal coordinates can often indicate data entry
 5 | #' errors.
 6 | #' 
 7 | #' 
 8 | #' @param test character string. Defines if coordinates are compared exactly
 9 | #' (\dQuote{identical}) or on the absolute scale (i.e. -1 = 1,
10 | #' \dQuote{absolute}). Default is to \dQuote{absolute}.
11 | #' @inheritParams cc_cap
12 | #' 
13 | #' @inherit cc_cap return
14 | #' 
15 | #' @keywords Coordinate cleaning
16 | #' @family Coordinates
17 | #' 
18 | #' @examples
19 | #' 
20 | #' x <- data.frame(species = letters[1:10], 
21 | #'                 decimalLongitude = runif(100, -180, 180), 
22 | #'                 decimalLatitude = runif(100, -90,90))
23 | #' 
24 | #' cc_equ(x)
25 | #' cc_equ(x, value = "flagged")
26 | #' 
27 | #' @export
28 | cc_equ <- function(x, 
29 |                    lon = "decimalLongitude", 
30 |                    lat = "decimalLatitude", 
31 |                    test = "absolute",
32 |                    value = "clean", 
33 |                    verbose = TRUE) {
34 | 
35 |   # check value and test arguments
36 |   match.arg(test, choices = c("absolute", "identical"))
37 |   match.arg(value, choices = c("clean", "flagged"))
38 | 
39 |   if (verbose) {
40 |     message("Testing equal lat/lon")
41 |   }
42 | 
43 |   switch(test, absolute = {
44 |     out <- !(abs(x[[lon]]) == abs(x[[lat]]))
45 |   }, identical = {
46 |     out <- !(x[[lon]] == x[[lat]])
47 |   })
48 | 
49 |   if (verbose) {
50 |     if (value == "clean"){
51 |       message(sprintf("Removed %s records.", sum(!out)))
52 |     } else {
53 |       message(sprintf("Flagged %s records.", sum(!out)))
54 |     }
55 |   }
56 | 
57 |   switch(value, clean = return(x[out, ]), flagged = return(out))
58 | }
59 | 


--------------------------------------------------------------------------------
/R/cc_gbif.R:
--------------------------------------------------------------------------------
  1 | #' Identify Records Assigned to GBIF Headquarters
  2 | #' 
  3 | #' Removes or flags records within 0.5 degree radius around the GBIF headquarters in
  4 | #' Copenhagen, DK.
  5 | #' 
  6 | #' Not recommended if working with records from Denmark or the Copenhagen area.
  7 | #' 
  8 | #' @param buffer numerical. The buffer around the GBIF headquarters,
  9 | #' where records should be flagged as problematic. Units depend on geod. Default = 100 m.
 10 | #' @param geod logical. If TRUE the radius is calculated
 11 | #' based on a sphere, buffer is in meters. If FALSE
 12 | #' the radius is calculated in degrees. Default = T.
 13 | #' @inheritParams cc_cap
 14 | #' 
 15 | #' @inherit cc_cap return
 16 | #' 
 17 | #' @keywords Coordinate cleaning
 18 | #' @family Coordinates
 19 | #' 
 20 | #' @examples
 21 | #' 
 22 | #' x <- data.frame(species = "A", 
 23 | #'                 decimalLongitude = c(12.58, 12.58), 
 24 | #'                 decimalLatitude = c(55.67, 30.00))
 25 | #'                 
 26 | #' cc_gbif(x)
 27 | #' cc_gbif(x, value = "flagged")
 28 | #' 
 29 | #' @export
 30 | #' @importFrom geosphere destPoint
 31 | #' @importFrom terra vect buffer extract
 32 | 
 33 | cc_gbif <- function(x, 
 34 |                     lon = "decimalLongitude", 
 35 |                     lat = "decimalLatitude",
 36 |                     species = "species",
 37 |                     buffer = 1000,
 38 |                     geod = TRUE,
 39 |                     verify = FALSE,
 40 |                     value = "clean",
 41 |                     verbose = TRUE) {
 42 | 
 43 |   # check function argument validity
 44 |   match.arg(value, choices = c("clean", "flagged"))
 45 |   
 46 |   if (verbose) {
 47 |     message("Testing GBIF headquarters, flagging records around Copenhagen")
 48 |   }
 49 |   
 50 |   
 51 |   if (buffer > 10 & !geod){
 52 |     warnings("Using large buffer check 'geod'")
 53 |   }
 54 |   if (buffer < 100 & geod){
 55 |     warnings("Using small buffer check 'geod'")
 56 |   }
 57 | 
 58 |   # Fix buffer when equals 0 
 59 |   buffer <- ifelse(buffer == 0, 0.00000000001, buffer)
 60 |   
 61 |   # set default projection
 62 |   wgs84 <- "+proj=longlat +datum=WGS84 +no_defs"
 63 |   dat <- terra::vect(x[, c(lon, lat)],
 64 |                      geom = c(lon, lat),
 65 |                      crs = wgs84)
 66 |   if (geod) {
 67 |     # credits to https://seethedatablog.wordpress.com
 68 |     dg <- seq(from = 0, to = 360, by = 5)
 69 |     
 70 |     buff_XY <- geosphere::destPoint(p = cbind(12.58, 55.67), 
 71 |                                     b = rep(dg, each = 1), 
 72 |                                     d = buffer)
 73 |     
 74 |     id <- rep(1, times = length(dg))
 75 |     
 76 |     
 77 |     lst <- split(data.frame(buff_XY), f = id)
 78 |     
 79 |     
 80 |     # Make SpatialPolygons out of the list of coordinates
 81 |     lst <- lapply(lst, as.matrix)
 82 |     ref <- sapply(lst, terra::vect, crs = wgs84, type = "polygons")
 83 |     ref <- terra::vect(ref)
 84 |     
 85 |     #point in polygon test
 86 |     ext_dat <- terra::extract(ref, dat)
 87 |     out <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2])
 88 |   } else {
 89 |     ref_cen <- terra::vect(cbind(12.58, 55.67),
 90 |                            crs = wgs84)
 91 |     ref <- terra::buffer(ref_cen, width = buffer)
 92 |     #point in polygon test
 93 |     ext_dat <- terra::extract(ref, dat)
 94 |     out <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2])
 95 |   }
 96 |   
 97 |   # implement the verification
 98 |   if(verify & sum(out) > 0){
 99 |     # get flagged coordinates
100 |     ver <- x[!out,]
101 |     
102 |     #count the instances of all flagged records
103 |     ver_count <- aggregate(ver[[species]] ~ ver[[lon]] + 
104 |                             ver[[lat]] , FUN = "length")
105 |     names(ver_count) <- c(lon, lat, "coord.count")
106 |     
107 |     ver_spec <- aggregate(ver[[lon]] ~ ver[[species]], FUN = "length")
108 |     names(ver_spec) <- c(species, "species.count")
109 | 
110 |     #test which flagged x occur multiple times
111 |     tester <- data.frame(x, ord = seq_len(nrow(x)))
112 |     tester <- merge(tester, ver_count, by = c(lon,lat), all = TRUE)
113 |     tester <- merge(tester, ver_spec, by = species, all = TRUE)
114 |     
115 |     tester <- tester[order(tester$ord),]
116 |     tester[is.na(tester)] <- 0
117 |     
118 |     #only flag those records that occure with only one coordinate in the buffer
119 |     out <-  tester$coord.count <= tester$species.count| out
120 |   }
121 |   
122 |   if (verbose) {
123 |     if(value == "clean"){
124 |       message(sprintf("Removed %s records.", sum(!out)))
125 |     }else{
126 |       message(sprintf("Flagged %s records.", sum(!out)))
127 |     }
128 |   }
129 | 
130 |   switch(value, clean = return(x[out, ]), flagged = return(out))
131 | }
132 | 


--------------------------------------------------------------------------------
/R/cc_iucn.R:
--------------------------------------------------------------------------------
  1 | #' Identify Records Outside Natural Ranges
  2 | #' 
  3 | #' Removes or flags records outside of the provided natural range polygon, on a per species basis. 
  4 | #' Expects one entry per species. See the example or 
  5 | #' \url{https://www.iucnredlist.org/resources/spatial-data-download} for 
  6 | #' the required polygon structure. 
  7 | #' 
  8 | #' Download natural range maps in suitable format for amphibians, birds,
  9 | #' mammals and reptiles
 10 | #' from \url{https://www.iucnredlist.org/resources/spatial-data-download}.
 11 | #' Note: the buffer radius is in degrees, thus will differ slightly between
 12 | #' different latitudes. 
 13 | #' 
 14 | #' @param range a SpatVector of natural ranges for species in x. 
 15 | #' Must contain a column named as indicated by \code{species}. See details.  
 16 | #' @param species a character string. The column with the species name. 
 17 | #' Default = \dQuote{species}.
 18 | #' @param buffer numerical. The buffer around each species' range,
 19 | #' from where records should be flagged as problematic, in meters. Default = 0.
 20 | #' @inheritParams cc_cap
 21 | #' 
 22 | #' @inherit cc_cap return
 23 | #' 
 24 | #' @note See \url{https://ropensci.github.io/CoordinateCleaner/} for more
 25 | #' details and tutorials.
 26 | #' 
 27 | #' @keywords Coordinate cleaning
 28 | #' @family Coordinates
 29 | #' @examples
 30 | #' library(terra)
 31 | #' 
 32 | #' x <- data.frame(species = c("A", "B"),
 33 | #' decimalLongitude = runif(100, -170, 170),
 34 | #' decimalLatitude = runif(100, -80,80))
 35 | #'
 36 | #' range_species_A <- cbind(c(-45,-45,-60,-60,-45), c(-10,-25,-25,-10,-10))
 37 | #' rangeA <- terra::vect(range_species_A, "polygons")
 38 | #' range_species_B <- cbind(c(15,15,32,32,15), c(10,-10,-10,10,10))
 39 | #' rangeB <- terra::vect(range_species_B, "polygons")
 40 | #' range <- terra::vect(list(rangeA, rangeB))
 41 | #' range$binomial <- c("A", "B")
 42 | #'
 43 | #' cc_iucn(x = x, range = range, buffer = 0)
 44 | #' 
 45 | #' @export
 46 | #' @importFrom dplyr bind_rows
 47 | #' @importFrom terra vect buffer extract geomtype subset crs
 48 | 
 49 | cc_iucn <- function(x,
 50 |                      range,
 51 |                      lon = "decimalLongitude", 
 52 |                      lat = "decimalLatitude",
 53 |                      species = "species",
 54 |                      buffer = 0,
 55 |                      value = "clean",
 56 |                      verbose = TRUE){
 57 |   
 58 |   # Check value argument
 59 |   match.arg(value, choices = c("clean", "flagged"))
 60 |   
 61 |   if (verbose) {
 62 |     message("Testing natural ranges")
 63 |   }
 64 |   
 65 |   if (any(is(range) == "Spatial")  | inherits(range, "sf")) {
 66 |     range <- terra::vect(range)
 67 |   }
 68 |   # Check if object is a SpatVector 
 69 |   if (!(inherits(range, "SpatVector") & 
 70 |       terra::geomtype(range) == "polygons")) {
 71 |     stop("ref must be a SpatVector with geomtype 'polygons'")
 72 |   }
 73 |   
 74 |   # Prepare shape file
 75 |   ## Adapt to iucn polygons
 76 |   if("binomial" %in% names(range) &
 77 |      !species %in% names(range) &
 78 |      species %in% names(x)) {
 79 |     names(range)[names(range) == "binomial"] <- species
 80 |   }
 81 |   
 82 |   ## Reduce to species in dataset
 83 |   test_range <- range[[species]][, 1] %in% unique(unlist(x[, species]))
 84 |   range <- terra::subset(range, test_range)
 85 |   # Split by species
 86 |   dat <- data.frame(x, order = rownames(x))
 87 |   dat <- split(dat, f = dat[, species])
 88 |   
 89 |   # Apply buffer to ranges
 90 |   if (buffer != 0) {
 91 |     range <- terra::buffer(range, width = buffer)
 92 |   } 
 93 |   
 94 |   # Check projection of ranges
 95 |   wgs84 <- "+proj=longlat +datum=WGS84 +no_defs"
 96 |   
 97 |   if (terra::crs(range) == "") {
 98 |     warning("no projection information for reference found, 
 99 |               assuming '+proj=longlat +datum=WGS84 +no_defs'")
100 |     terra::crs(range) <- wgs84
101 |   }else if(terra::crs(range) != terra::crs(wgs84)) {
102 |     range <- terra::project(range, wgs84)
103 |     warning("reprojecting reference to '+proj=longlat +datum=WGS84 +no_defs'")
104 |   }
105 |   
106 |   # Point-in-polygon-test
107 |   out <- lapply(dat, function(k){
108 |     if (unique(k[, species]) %in% range[[species]][, 1]) {
109 |       sub <- terra::vect(k[, c(lon, lat)], 
110 |                          crs = wgs84, 
111 |                          geom = c(lon, lat))
112 |       test_range_sub <- range[[species]][, 1] == unique(k[, species])
113 |       range_sub <- terra::subset(range, test_range_sub)
114 |       #point in polygon test
115 |       ext_dat <- terra::extract(range_sub, sub)
116 |       flag <- !is.na(ext_dat[!duplicated(ext_dat[, 1]), 2])
117 | 
118 |       data.frame(order = k$order,
119 |                  flag = flag)
120 |     }else{
121 |       data.frame(order = k$order,
122 |                  flag = TRUE)
123 |     }
124 |   })
125 | 
126 |   out <- dplyr::bind_rows(out)
127 |   out <- out[order(as.numeric(as.character(out$order))), ]
128 | 
129 |   # Warning for species not in range
130 |   tester <- unique(unlist(x[, species]))
131 |   if(sum(!tester %in% range[[species]][, 1]) > 0){
132 |     miss <- tester[!tester %in% range[[species]][, 1]]
133 |     warning(sprintf("species not found in range and not tested %s\n", miss))
134 |   }
135 | 
136 |   # Generate output
137 |   if (verbose) {
138 |     if(value == "clean"){
139 |       message(sprintf("Removed %s records.", sum(!out$flag)))
140 |     }else{
141 |       message(sprintf("Flagged %s records.", sum(!out$flag)))
142 |     }
143 |   }
144 |   
145 |   switch(value, clean = return(x[out$flag, ]), 
146 |          flagged = return(out$flag))
147 | }
148 | 
149 | 


--------------------------------------------------------------------------------
/R/cc_urb.R:
--------------------------------------------------------------------------------
  1 | #' Identify Records Inside Urban Areas
  2 | #'
  3 | #' Removes or flags records from inside urban areas, based on a geographic
  4 | #' gazetteer. Often records from large databases span substantial time periods
  5 | #' (centuries) and old records might represent habitats which today are replaced
  6 | #' by city area.
  7 | #'
  8 | #'
  9 | #' @param ref a SpatVector. Providing the geographic gazetteer
 10 | #'   with the urban areas. See details. By default
 11 | #'   rnaturalearth::ne_download(scale = 'medium', type = 'urban_areas',
 12 | #'   returnclass = "sf"). Can be any \code{SpatVector}, but the
 13 | #'   structure must be identical to \code{rnaturalearth::ne_download()}.
 14 | #' @inheritParams cc_cap
 15 | #'
 16 | #' @inherit cc_cap return
 17 | #'
 18 | #' @note See \url{https://ropensci.github.io/CoordinateCleaner/} for more
 19 | #'   details and tutorials.
 20 | #'
 21 | #' @keywords Coordinate cleaning
 22 | #' @family Coordinates
 23 | #'
 24 | #' @examples
 25 | #'
 26 | #' \dontrun{
 27 | #' x <- data.frame(species = letters[1:10],
 28 | #'                 decimalLongitude = runif(100, -180, 180),
 29 | #'                 decimalLatitude = runif(100, -90,90))
 30 | #'
 31 | #' cc_urb(x)
 32 | #' cc_urb(x, value = "flagged")
 33 | #' }
 34 | #'
 35 | #' @export
 36 | #' @importFrom terra vect crop project extract
 37 | #' @importFrom rnaturalearth ne_download ne_file_name
 38 | 
 39 | cc_urb <- function(x,
 40 |                    lon = "decimalLongitude",
 41 |                    lat = "decimalLatitude",
 42 |                    ref = NULL,
 43 |                    value = "clean",
 44 |                    verbose = TRUE) {
 45 | 
 46 |   # check value argument
 47 |   match.arg(value, choices = c("clean", "flagged"))
 48 | 
 49 |   if (verbose) {
 50 |     message("Testing urban areas")
 51 |   }
 52 | 
 53 |   # check for reference data. 
 54 |   if (is.null(ref)) {
 55 |     message("Downloading urban areas via rnaturalearth")
 56 |     ref <-
 57 |       try(suppressWarnings(terra::vect(
 58 |         rnaturalearth::ne_download(scale = 'medium',
 59 |                                    type = 'urban_areas',
 60 |                                    returnclass = "sf")
 61 |       )),
 62 |       silent = TRUE)
 63 |     
 64 |     if (inherits(ref, "try-error")) {
 65 |       warning(sprintf("Gazetteer for urban areas not found at\n%s",
 66 |                       rnaturalearth::ne_file_name(scale = 'medium',
 67 |                                                   type = 'urban_areas',
 68 |                                                   full_url = TRUE)))
 69 |       warning("Skipping urban test")
 70 |       switch(value, clean = return(x), flagged = return(rep(NA, nrow(x))))
 71 |     }
 72 | 
 73 |   } else {
 74 |     # Enable sf formatted custom references
 75 |     if (any(is(ref) == c("Spatial"))  | inherits(ref, "sf")) {
 76 |       ref <- terra::vect(ref)
 77 |     }
 78 |     # Check if object is a SpatVector 
 79 |     if (!(inherits(ref, "SpatVector") & 
 80 |         terra::geomtype(ref) == "polygons")) {
 81 |       stop("ref must be a SpatVector with geomtype 'polygons'")
 82 |     }
 83 |     ref <- reproj(ref)
 84 |   }
 85 | 
 86 |   # Prepare input points and extent
 87 |   wgs84 <- "+proj=longlat +datum=WGS84 +no_defs"
 88 | 
 89 |   dat <- terra::vect(x[, c(lon, lat)],
 90 |                      geom = c(lon, lat),
 91 |                      crs = wgs84)
 92 |   limits <- terra::ext(dat) + 1
 93 |   ref <- terra::crop(ref, limits)
 94 |   ref <- terra::project(ref, wgs84)
 95 |   
 96 |   # test if any points fall within the buffer in case no urban areas are found
 97 |   # in the study area
 98 |   if (is.null(ref)) {
 99 |     out <- rep(TRUE, nrow(x))
100 |   } else {
101 |     #point in polygon test
102 |     ext_dat <- terra::extract(ref, dat)
103 |     out <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2])
104 |   }
105 | 
106 |   if (verbose) {
107 |     if (value == "clean") {
108 |       message(sprintf("Removed %s records.", sum(!out)))
109 |     }else{
110 |       message(sprintf("Flagged %s records.", sum(!out)))
111 |     }
112 |   }
113 | 
114 |   switch(value, clean = return(x[out, ]), flagged = return(out))
115 | }
116 | 


--------------------------------------------------------------------------------
/R/cc_val.R:
--------------------------------------------------------------------------------
 1 | #' Identify Invalid lat/lon Coordinates
 2 | #' 
 3 | #' Removes or flags non-numeric and not available coordinates
 4 | #' as well as lat >90, lat <-90, lon > 180 and lon < -180 are flagged.
 5 | #' 
 6 | #' This test is obligatory before running any further tests of
 7 | #' CoordinateCleaner, as additional tests only run with valid coordinates.
 8 | #' 
 9 | #' @inheritParams cc_cap
10 | #' 
11 | #' @inherit cc_cap return
12 | #' 
13 | #' @note See \url{https://ropensci.github.io/CoordinateCleaner/} for more
14 | #' details and tutorials.
15 | #' 
16 | #' @keywords Coordinate cleaning
17 | #' @family Coordinates
18 | #' 
19 | #' @examples
20 | #' 
21 | #' x <- data.frame(species = letters[1:10], 
22 | #'                 decimalLongitude = c(runif(106, -180, 180), NA, "13W33'", "67,09", 305), 
23 | #'                 decimalLatitude = runif(110, -90,90))
24 | #'                 
25 | #' cc_val(x)
26 | #' cc_val(x, value = "flagged")
27 | #' 
28 | #' @export
29 | cc_val <- function(x, 
30 |                    lon = "decimalLongitude", 
31 |                    lat = "decimalLatitude", 
32 |                    value = "clean",
33 |                    verbose = TRUE) {
34 | 
35 |   # check value argument
36 |   match.arg(value, choices = c("clean", "flagged"))
37 | 
38 |   if (verbose) {
39 |     message("Testing coordinate validity")
40 |   }
41 | 
42 |   x[[lon]] <- suppressWarnings(as.numeric(as.character(x[[lon]])))
43 |   x[[lat]] <- suppressWarnings(as.numeric(as.character(x[[lat]])))
44 |   
45 |   out <- list(
46 |     is.na(x[[lon]]), 
47 |     is.na(x[[lat]]), 
48 |     x[[lon]] < -180, 
49 |     x[[lon]] > 180,
50 |     x[[lat]] < -90, 
51 |     x[[lat]] > 90
52 |   )
53 | 
54 |   out <- !Reduce("|", out)
55 | 
56 |   if (verbose) {
57 |     if (value == "clean") {
58 |       message(sprintf("Removed %s records.", sum(!out)))
59 |     } else {
60 |       message(sprintf("Flagged %s records.", sum(!out)))
61 |     }
62 |   }
63 | 
64 |   switch(value, clean = return(x[out, ]), flagged = return(out))
65 | }
66 | 


--------------------------------------------------------------------------------
/R/cc_zero.R:
--------------------------------------------------------------------------------
 1 | #' Identify Zero Coordinates
 2 | #' 
 3 | #' Removes or flags records with either zero longitude or latitude and a radius
 4 | #' around the point at zero longitude and zero latitude. These problems are
 5 | #' often due to erroneous data-entry or geo-referencing and can lead to typical
 6 | #' patterns of high diversity around the equator.
 7 | #' 
 8 | #' 
 9 | #' @param buffer numerical. The buffer around the 0/0 point,
10 | #' where records should be flagged as problematic, in decimal
11 | #' degrees.  Default = 0.5.
12 | #' @inheritParams cc_cap
13 | #' 
14 | #' @inherit cc_cap return
15 | #' 
16 | #' @note See \url{https://ropensci.github.io/CoordinateCleaner/} for more
17 | #' details and tutorials.
18 | #' 
19 | #' @keywords Coordinate cleaning
20 | #' @family Coordinates
21 | #' 
22 | #' @examples
23 | #' 
24 | #' x <- data.frame(species = "A", 
25 | #'                 decimalLongitude = c(0,34.84, 0, 33.98), 
26 | #'                 decimalLatitude = c(23.08, 0, 0, 15.98))
27 | #'                 
28 | #' cc_zero(x)
29 | #' cc_zero(x, value = "flagged")
30 | #' 
31 | #' @export
32 | #' @importFrom terra extract buffer vect
33 | cc_zero <- function(x, 
34 |                     lon = "decimalLongitude", 
35 |                     lat = "decimalLatitude",
36 |                     buffer = 0.5,
37 |                     value = "clean", 
38 |                     verbose = TRUE) {
39 | 
40 |   # check value argument
41 |   match.arg(value, choices = c("clean", "flagged"))
42 | 
43 |   if (verbose) {
44 |     message("Testing zero coordinates")
45 |   }
46 | 
47 |   # plain zero in coordinates
48 |   t1 <- !(x[[lon]] == 0 | x[[lat]] == 0)
49 | 
50 |   # radius around point 0/0
51 |   dat <- terra::vect(x[, c(lon, lat)], geom = c(lon, lat))
52 |   if (buffer == 0) { # error when buffer = 0
53 |     buffer <- 0.00000000000001
54 |   }
55 |   buff <- terra::buffer(terra::vect(data.frame("lat" = 0, "lon" = 0)), 
56 |                       width = buffer)
57 |   ext_dat <- terra::extract(buff, dat)
58 |   t2 <- is.na(ext_dat[!duplicated(ext_dat[, 1]), 2])
59 | 
60 |   # combine test results
61 |   out <- Reduce("&", list(t1, t2))
62 | 
63 |   if (verbose) {
64 |     if (value == "clean") {
65 |       message(sprintf("Removed %s records.", sum(!out)))
66 |     } else {
67 |       message(sprintf("Flagged %s records.", sum(!out)))
68 |     }
69 |   }
70 | 
71 |   switch(value, clean = return(x[out, ]), flagged = return(out))
72 | }
73 | 


--------------------------------------------------------------------------------
/R/cf_equal.R:
--------------------------------------------------------------------------------
 1 | #' Identify Fossils with equal min and max age
 2 | #' 
 3 | #' Removes or flags records with equal minimum and maximum age.
 4 | #' 
 5 | #' @inheritParams cf_age
 6 | #' 
 7 | #' @inherit cc_cap return
 8 | #' 
 9 | #' @note See \url{https://ropensci.github.io/CoordinateCleaner/} for more
10 | #' details and tutorials.
11 | #' 
12 | #' @keywords Temporal cleaning Fossils
13 | #' @family fossils
14 | #' 
15 | #' @examples
16 | #' 
17 | #' minages <- runif(n = 10, min = 0.1, max = 25)
18 | #' x <- data.frame(species = letters[1:10], 
19 | #'                 min_ma = minages, 
20 | #'                 max_ma = minages + runif(n = 10, min = 0, max = 10))
21 | #' x <- rbind(x, data.frame(species = "z", 
22 | #'                 min_ma = 5, 
23 | #'                 max_ma = 5))
24 | #'                 
25 | #' cf_equal(x, value = "flagged")
26 | #' 
27 | #' @export
28 | cf_equal <- function(x, min_age = "min_ma", 
29 |                      max_age = "max_ma", 
30 |                      value = "clean",
31 |                      verbose = TRUE) {
32 |   match.arg(value, choices = c("clean", "flagged"))
33 | 
34 | 
35 |   if (verbose) {
36 |     message("Testing age validity")
37 |   }
38 | 
39 |   # min_age == max_age
40 |   t1 <- x[[max_age]] == x[[min_age]]
41 | 
42 |   # min_age > max_age
43 | 
44 |   t2 <- x[[min_age]] > x[[max_age]]
45 | 
46 |   flags <- t1 | t2
47 | 
48 |   # create output
49 |   out <- rep(TRUE, nrow(x))
50 |   out[flags] <- FALSE
51 | 
52 |   if (verbose) {
53 |     if(value == "clean"){
54 |       message(sprintf("Removed %s records.", sum(!out, na.rm = TRUE)))
55 |     }else{
56 |       message(sprintf("Flagged %s records.", sum(!out, na.rm = TRUE)))
57 |     }
58 |   }
59 | 
60 |   # value
61 |   switch(value, clean = return(x[out, ]), flagged = return(out))
62 | }
63 | 


--------------------------------------------------------------------------------
/R/internal_clean_coordinate.R:
--------------------------------------------------------------------------------
  1 | # Check projection of custom reference and reproject to wgs84 if necessary
  2 | reproj <- function(ref) {
  3 |   wgs84 <- "+proj=longlat +datum=WGS84 +no_defs"
  4 |   ref_crs <- terra::crs(ref, proj = TRUE)
  5 |   # if no projection information is given assume wgs84
  6 |   if (ref_crs == "") {
  7 |     warning(
  8 |       "no projection information for reference found,
  9 |             assuming '+proj=longlat +datum=WGS84 +no_defs'"
 10 |     )
 11 |     terra::crs(ref) <- wgs84
 12 |   } else {
 13 |     if (is.na(ref_crs)) {
 14 |       warning(
 15 |         "no projection information for reference found,
 16 |             assuming '+proj=longlat +datum=WGS84 +no_defs'"
 17 |       )
 18 |       ref <- terra::project(ref, wgs84)
 19 |     } else if (ref_crs != wgs84) {
 20 |       #otherwise reproject
 21 |       ref <- terra::project(ref, wgs84)
 22 |       warning("reprojecting reference to '+proj=longlat +datum=WGS84 +no_defs'")
 23 |     }
 24 |   }
 25 |   return(ref)
 26 | }
 27 | 
 28 | # A function to create a raster from an input dataset, used in cc_outl for spatial thinning, 
 29 | # based on a point dataset and a raster resolution
 30 | 
 31 | ras_create <- function(x, lat, lon,  thinning_res){
 32 |   # get data extend
 33 |   ex <- terra::ext(terra::vect(x[, c(lon, lat)], 
 34 |                                geom = c(lon, lat))) + thinning_res * 2
 35 |   
 36 |   # check for boundary conditions
 37 |   if (ex[1] < -180 | ex[2] > 180 | ex[3] < -90 | ex[4] > 90) {
 38 |     warning("fixing raster boundaries, assuming lat/lon projection")
 39 |     
 40 |     if (ex[1] < -180) {
 41 |       ex[1] <- -180
 42 |     }
 43 |     
 44 |     if (ex[2] > 180) {
 45 |       ex[2] <- 180
 46 |     }
 47 |     
 48 |     if (ex[3] < -90) {
 49 |       ex[3] <- -90
 50 |     }
 51 |     
 52 |     if (ex[4] > 90) {
 53 |       ex[4] <- 90
 54 |     }
 55 |   }
 56 |   
 57 |   # create raster
 58 |   ras <- terra::rast(x = ex, resolution = thinning_res)
 59 |   
 60 |   # set cell ids
 61 |   vals <- seq_len(terra::ncell(ras))
 62 |   ras <- terra::setValues(ras, vals)
 63 |   
 64 |   return(ras)
 65 | }
 66 | 
 67 | 
 68 | # A function to get the distance between raster midpoints and 
 69 | #output a data.frame with the distances and the cell IDs as row and column names for cc_outl
 70 | 
 71 | ras_dist <-  function(x, lat, lon, ras, weights) {
 72 |   #x = a data.frame of point coordinates, ras = a raster with cell IDs as layer,
 73 |   #weight = logical, shall the distance matrix be weightened by the number of
 74 |   #points per cell? assign each point to a raster cell
 75 |   pts <- terra::extract(x = ras, 
 76 |                         y = terra::vect(x[, c(lon, lat)],
 77 |                                         geom = c(lon, lat),
 78 |                                         crs = ras))
 79 |   
 80 |   # convert to data.frame
 81 |   midp <- data.frame(terra::as.points(ras), 
 82 |                      terra::xyFromCell(ras, 1:terra::ncell(ras)))
 83 |   
 84 |   # retain only cells that contain points
 85 |   midp <- midp[midp$lyr.1 %in% unique(pts$lyr.1), , drop = FALSE]
 86 |   
 87 |   # order
 88 |   midp <- midp[match(unique(pts$lyr.1), midp$lyr.1), , drop = FALSE]
 89 |   
 90 |   
 91 |   # calculate geospheric distance between raster cells with points
 92 |   dist <- geosphere::distm(midp[, c("x", "y")], 
 93 |                            fun = geosphere::distHaversine) / 1000
 94 |   
 95 |   # set rownames and colnames to cell IDs
 96 |   dist <- as.data.frame(dist, row.names = as.integer(midp$lyr.1))
 97 |   names(dist) <- midp$lyr.1
 98 |   
 99 |   if (weights) {
100 |     # approximate within cell distance as half 
101 |     # the cell size, assumin 1 deg = 100km
102 |     # this is crude, but doesn't really matter
103 |     dist[dist == 0] <- 100 * mean(terra::res(ras)) / 2
104 |     
105 |     # weight matrix to account for the number of points per cell
106 |     ## the number of points in each cell
107 |     cou <- table(pts$lyr.1)
108 |     
109 |     ## order
110 |     cou <- cou[match(unique(pts$lyr.1), names(cou))]
111 |     
112 |     # weight matrix, representing the number of distances between or within the cellse (points cell 1 * points cell 2)
113 |     wm <- outer(cou, cou)
114 |     
115 |     # multiply matrix elements to get weightend sum
116 |     dist <- round(dist * wm, 0)
117 |     
118 |     dist <- list(pts = pts, dist = dist, wm = wm)
119 |   } else {
120 |     # set diagonale to NA, so it does not influence the mean
121 |     dist[dist == 0] <- NA
122 |     
123 |     dist <- list(pts = pts, dist = dist)
124 |   }
125 |   
126 |   return(dist)
127 | }
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/R/internal_write_pyrate.R:
--------------------------------------------------------------------------------
1 | .NoExtension <- function(filename) {
2 |   if (substr(filename, nchar(filename), nchar(filename)) == ".") {
3 |     return(substr(filename, 1, nchar(filename) - 1))
4 |   } else {
5 |     .NoExtension(substr(filename, 1, nchar(filename) - 1))
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/R/sysdata.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/R/sysdata.rda


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # CoordinateCleaner v3.0
  2 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/CoordinateCleaner)](https://cranlogs.r-pkg.org:443/badges/CoordinateCleaner)
  3 | [![downloads](https://cranlogs.r-pkg.org/badges/grand-total/CoordinateCleaner)](https://cranlogs.r-pkg.org:443/badges/grand-total/CoordinateCleaner)
  4 | [![rstudio mirror downloads](https://cranlogs.r-pkg.org/badges/CoordinateCleaner)](https://cranlogs.r-pkg.org:443/badges/CoordinateCleaner)
  5 | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
  6 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2539408.svg)](https://doi.org/10.5281/zenodo.2539408)
  7 | [![rOpenSci peer-review](https://badges.ropensci.org/210_status.svg)](https://github.com/ropensci/software-review/issues/210)
  8 | 
  9 | **CoordinateCleaner has been updated to version 3.0 on github and on CRAN to adapt to the retirement of sp and raster. The update may not be compatible with analysis-pipelines build with version 2.x***
 10 | 
 11 | Automated flagging of common spatial and temporal errors in biological and palaeontological collection data, for the use in conservation, ecology and palaeontology. Specifically includes tests for
 12 | 
 13 | * General coordinate validity
 14 | * Country and province centroids
 15 | * Capital coordinates
 16 | * Coordinates of biodiversity institutions
 17 | * Spatial outliers
 18 | * Temporal outliers
 19 | * Coordinate-country discordance
 20 | * Duplicated coordinates per species
 21 | * Assignment to the location of the GBIF headquarters
 22 | * Urban areas
 23 | * Seas
 24 | * Plain zeros
 25 | * Equal longitude and latitude
 26 | * Rounded coordinates
 27 | * DDMM to DD.DD coordinate conversion errors
 28 | * Large temporal uncertainty (fossils)
 29 | * Equal minimum and maximum ages (fossils)
 30 | * Spatio-temporal outliers (fossils)
 31 | 
 32 | CoordinateCleaner can be particularly useful to improve data quality when using data from GBIF (e.g. obtained with [rgbif]( https://github.com/ropensci/rgbif)) or the Paleobiology database (e.g. obtained with [paleobioDB](https://github.com/ropensci/paleobioDB)) for historical biogeography (e.g. with [BioGeoBEARS](https://CRAN.R-project.org/package=BioGeoBEARS) or [phytools](https://CRAN.R-project.org/package=phytools)), automated conservation assessment (e.g. with [speciesgeocodeR](https://github.com/azizka/speciesgeocodeR/wiki) or [conR](https://CRAN.R-project.org/package=ConR)) or species distribution modelling (e.g. with [dismo](https://CRAN.R-project.org/package=dismo) or [sdm](https://CRAN.R-project.org/package=sdm)). See [scrubr](https://github.com/ropensci-archive/scrubr) and [taxize](https://github.com/ropensci/taxize) for complementary taxonomic cleaning or [biogeo](https://github.com/cran/biogeo) for correcting spatial coordinate errors.
 33 | 
 34 | See [News](https://github.com/ropensci/CoordinateCleaner/blob/master/NEWS.md) for update information.
 35 | 
 36 | # Installation
 37 | ## Stable from CRAN
 38 | 
 39 | ```r
 40 | install.packages("CoordinateCleaner")
 41 | library(CoordinateCleaner)
 42 | ```
 43 | 
 44 | ## Developmental from GitHub
 45 | ```r
 46 | devtools::install_github("ropensci/CoordinateCleaner")
 47 | library(CoordinateCleaner)
 48 | ```
 49 | 
 50 | # Usage
 51 | A simple example:
 52 | 
 53 | ```r
 54 | # Simulate example data
 55 | minages <- runif(250, 0, 65)
 56 | exmpl <- data.frame(species = sample(letters, size = 250, replace = TRUE),
 57 |                     decimalLongitude = runif(250, min = 42, max = 51),
 58 |                     decimalLatitude = runif(250, min = -26, max = -11),
 59 |                     min_ma = minages,
 60 |                     max_ma = minages + runif(250, 0.1, 65),
 61 |                     dataset = "clean")
 62 | 
 63 | # Run record-level tests
 64 | rl <- clean_coordinates(x = exmpl)
 65 | summary(rl)
 66 | plot(rl)
 67 | 
 68 | # Dataset level 
 69 | dsl <- clean_dataset(exmpl)
 70 | 
 71 | # For fossils
 72 | fl <- clean_fossils(x = exmpl,
 73 |                           taxon = "species",
 74 |                           lon = "decimalLongitude", 
 75 |                           lat = "decimalLatitude")
 76 | summary(fl)
 77 | 
 78 | # Alternative example using the pipe
 79 | library(tidyverse)
 80 | 
 81 | cl <- exmpl %>%
 82 |   cc_val()%>%
 83 |   cc_cap()%>%
 84 |   cd_ddmm()%>%
 85 |   cf_range(lon = "decimalLongitude", 
 86 |            lat = "decimalLatitude", 
 87 |            taxon  ="species")
 88 | ```
 89 | 
 90 | # Documentation
 91 | Pipelines for cleaning data from the Global Biodiversity Information Facility (GBIF) and the Paleobiology Database (PaleobioDB) are available in [here](https://ropensci.github.io/CoordinateCleaner/articles/).
 92 | 
 93 | 
 94 | # Contributing
 95 | See the [CONTRIBUTING](https://github.com/ropensci/CoordinateCleaner/blob/master/CONTRIBUTING.md) document.
 96 | 
 97 | # Citation
 98 | Zizka A, Silvestro D, Andermann T, Azevedo J, Duarte Ritter C, Edler D, Farooq H, Herdean A, Ariza M, Scharn R, Svanteson S, Wengtrom N, Zizka V & Antonelli A (2019) CoordinateCleaner: standardized cleaning of occurrence records from biological collection databases. Methods in Ecology and Evolution, 10(5):744-751, doi:10.1111/2041-210X.13152, https://github.com/ropensci/CoordinateCleaner
 99 | 
100 | [![ropensci_footer](https://ropensci.org/public_images/ropensci_footer.png)](https://ropensci.org)
101 | 
102 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | reference:
 2 | - title: Wrapper functions
 3 |   contents:
 4 |   - has_concept("Wrapper functions")
 5 | - title: Geographic coordinate cleaning
 6 |   contents:
 7 |   - has_concept("Coordinates")
 8 | - title: Dataset-levelcleaning
 9 |   contents:
10 |   - has_concept("Datasets")
11 | - title: Fossil cleaning
12 |   contents:
13 |   - has_concept("fossils")
14 | - title: Data
15 |   contents:
16 |   - buffland
17 |   - buffsea
18 |   - countryref
19 |   - institutions
20 |   - pbdb_example
21 |   - aohi
22 | - title: Visualization
23 |   contents: 
24 |   - plot.spatialvalid
25 | - title: Check
26 |   contents:
27 |   - is.spatialvalid
28 | 


--------------------------------------------------------------------------------
/_site.yml:
--------------------------------------------------------------------------------
 1 | name: CoordinateCleaner
 2 | output_dir: docs
 3 | navbar:
 4 |   title: CoordinateCleaner
 5 |   type: inverse
 6 |   left:
 7 |   - text: Home
 8 |     icon: fa-home 
 9 |     href: index.html
10 |   - text: Articles
11 |     menu:
12 |       - text: Cleaning point occurrence data (GBIF)
13 |         href: vignettes/Cleaning_GBIF_data_with_CoordinateCleaner.html
14 |       - text: Cleaning fossil records (PBDB)
15 |         href: vignettes/Cleaning_PBDB_fossils_with_CoordinateCleaner.html
16 |       - text: Using custom gazetteers
17 |         href: vignettes/Using_custom_gazetters.html
18 |       - text: Geographic outliers
19 |         href: articles/Geographic_outliers.html
20 |       - text: Dataset level cleaning
21 |         href: articles/Dataset_level_cleaning.html
22 |       - text: Comparison other software
23 |         href: Comparison_other_software.html
24 |   right:
25 |   - text: GitHub
26 |     href: https://github.com/ropensci/CoordinateCleaner
27 | output:
28 |   html_document:
29 |     lib_dir: site_libs
30 |     self_contained: no
31 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | # Version 2.0-20
2 | 
3 | Fixing a warning resulting from unbalanced code chunk delimiters in one of the vignettes in the previous version (2.0-18) on CRAN. 
4 | 
5 | Additionally, change of maintainer email due to a change in institution.
6 | 
7 | The NOTE on spelling errors in the DESCRIPTION is spurious in my opinion, since it flags my last name and Latin abbreviation "et al"" from the literature reference.
8 | 
9 | The package has been removed from CRAN due to slow response time. Response time was slow because I am on parental leave.


--------------------------------------------------------------------------------
/data/aohi.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/data/aohi.rda


--------------------------------------------------------------------------------
/data/buffland.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/data/buffland.rda


--------------------------------------------------------------------------------
/data/buffsea.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/data/buffsea.rda


--------------------------------------------------------------------------------
/data/countryref.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/data/countryref.rda


--------------------------------------------------------------------------------
/data/institutions.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/data/institutions.rda


--------------------------------------------------------------------------------
/data/pbdb_example.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/data/pbdb_example.rda


--------------------------------------------------------------------------------
/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
 1 | // Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
 2 | // v0.0.1
 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
 4 | 
 5 | document.addEventListener('DOMContentLoaded', function() {
 6 |   const codeList = document.getElementsByClassName("sourceCode");
 7 |   for (var i = 0; i < codeList.length; i++) {
 8 |     var linkList = codeList[i].getElementsByTagName('a');
 9 |     for (var j = 0; j < linkList.length; j++) {
10 |       if (linkList[j].innerHTML === "") {
11 |         linkList[j].setAttribute('aria-hidden', 'true');
12 |       }
13 |     }
14 |   }
15 | });
16 | 


--------------------------------------------------------------------------------
/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-18-1.png


--------------------------------------------------------------------------------
/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-19-1.png


--------------------------------------------------------------------------------
/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_GBIF_data_with_CoordinateCleaner_files/figure-html/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
 1 | // Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
 2 | // v0.0.1
 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
 4 | 
 5 | document.addEventListener('DOMContentLoaded', function() {
 6 |   const codeList = document.getElementsByClassName("sourceCode");
 7 |   for (var i = 0; i < codeList.length; i++) {
 8 |     var linkList = codeList[i].getElementsByTagName('a');
 9 |     for (var j = 0; j < linkList.length; j++) {
10 |       if (linkList[j].innerHTML === "") {
11 |         linkList[j].setAttribute('aria-hidden', 'true');
12 |       }
13 |     }
14 |   }
15 | });
16 | 


--------------------------------------------------------------------------------
/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-25-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-25-1.png


--------------------------------------------------------------------------------
/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-25-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-25-2.png


--------------------------------------------------------------------------------
/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-27-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-27-1.png


--------------------------------------------------------------------------------
/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-27-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-27-2.png


--------------------------------------------------------------------------------
/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Cleaning_PBDB_fossils_with_CoordinateCleaner_files/figure-html/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/docs/articles/Comparison_other_software_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
 1 | // Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
 2 | // v0.0.1
 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
 4 | 
 5 | document.addEventListener('DOMContentLoaded', function() {
 6 |   const codeList = document.getElementsByClassName("sourceCode");
 7 |   for (var i = 0; i < codeList.length; i++) {
 8 |     var linkList = codeList[i].getElementsByTagName('a');
 9 |     for (var j = 0; j < linkList.length; j++) {
10 |       if (linkList[j].innerHTML === "") {
11 |         linkList[j].setAttribute('aria-hidden', 'true');
12 |       }
13 |     }
14 |   }
15 | });
16 | 


--------------------------------------------------------------------------------
/docs/articles/Using_custom_gazetteers_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
 1 | // Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
 2 | // v0.0.1
 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
 4 | 
 5 | document.addEventListener('DOMContentLoaded', function() {
 6 |   const codeList = document.getElementsByClassName("sourceCode");
 7 |   for (var i = 0; i < codeList.length; i++) {
 8 |     var linkList = codeList[i].getElementsByTagName('a');
 9 |     for (var j = 0; j < linkList.length; j++) {
10 |       if (linkList[j].innerHTML === "") {
11 |         linkList[j].setAttribute('aria-hidden', 'true');
12 |       }
13 |     }
14 |   }
15 | });
16 | 


--------------------------------------------------------------------------------
/docs/articles/Using_custom_gazetteers_files/figure-html/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Using_custom_gazetteers_files/figure-html/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/docs/articles/Using_custom_gazetteers_files/figure-html/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Using_custom_gazetteers_files/figure-html/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/docs/articles/Using_custom_gazetteers_files/figure-html/unnamed-chunk-2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/Using_custom_gazetteers_files/figure-html/unnamed-chunk-2-2.png


--------------------------------------------------------------------------------
/docs/articles/clgbif11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/clgbif11-1.png


--------------------------------------------------------------------------------
/docs/articles/clgbif16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/clgbif16-1.png


--------------------------------------------------------------------------------
/docs/articles/clgbif17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/clgbif17-1.png


--------------------------------------------------------------------------------
/docs/articles/clgbif18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/clgbif18-1.png


--------------------------------------------------------------------------------
/docs/articles/clgbif19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/clgbif19-1.png


--------------------------------------------------------------------------------
/docs/articles/clgbif5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/clgbif5-1.png


--------------------------------------------------------------------------------
/docs/articles/clgbif6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/clgbif6-1.png


--------------------------------------------------------------------------------
/docs/articles/cusgaz1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/cusgaz1-1.png


--------------------------------------------------------------------------------
/docs/articles/cusgaz2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/cusgaz2-1.png


--------------------------------------------------------------------------------
/docs/articles/cusgaz2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/articles/cusgaz2-2.png


--------------------------------------------------------------------------------
/docs/bootstrap-toc.css:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
 3 |  * Copyright 2015 Aidan Feldman
 4 |  * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
 5 | 
 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */
 7 | 
 8 | /* All levels of nav */
 9 | nav[data-toggle='toc'] .nav > li > a {
10 |   display: block;
11 |   padding: 4px 20px;
12 |   font-size: 13px;
13 |   font-weight: 500;
14 |   color: #767676;
15 | }
16 | nav[data-toggle='toc'] .nav > li > a:hover,
17 | nav[data-toggle='toc'] .nav > li > a:focus {
18 |   padding-left: 19px;
19 |   color: #563d7c;
20 |   text-decoration: none;
21 |   background-color: transparent;
22 |   border-left: 1px solid #563d7c;
23 | }
24 | nav[data-toggle='toc'] .nav > .active > a,
25 | nav[data-toggle='toc'] .nav > .active:hover > a,
26 | nav[data-toggle='toc'] .nav > .active:focus > a {
27 |   padding-left: 18px;
28 |   font-weight: bold;
29 |   color: #563d7c;
30 |   background-color: transparent;
31 |   border-left: 2px solid #563d7c;
32 | }
33 | 
34 | /* Nav: second level (shown on .active) */
35 | nav[data-toggle='toc'] .nav .nav {
36 |   display: none; /* Hide by default, but at >768px, show it */
37 |   padding-bottom: 10px;
38 | }
39 | nav[data-toggle='toc'] .nav .nav > li > a {
40 |   padding-top: 1px;
41 |   padding-bottom: 1px;
42 |   padding-left: 30px;
43 |   font-size: 12px;
44 |   font-weight: normal;
45 | }
46 | nav[data-toggle='toc'] .nav .nav > li > a:hover,
47 | nav[data-toggle='toc'] .nav .nav > li > a:focus {
48 |   padding-left: 29px;
49 | }
50 | nav[data-toggle='toc'] .nav .nav > .active > a,
51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a,
52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a {
53 |   padding-left: 28px;
54 |   font-weight: 500;
55 | }
56 | 
57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */
58 | nav[data-toggle='toc'] .nav > .active > ul {
59 |   display: block;
60 | }
61 | 


--------------------------------------------------------------------------------
/docs/bootstrap-toc.js:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
  3 |  * Copyright 2015 Aidan Feldman
  4 |  * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
  5 | (function() {
  6 |   'use strict';
  7 | 
  8 |   window.Toc = {
  9 |     helpers: {
 10 |       // return all matching elements in the set, or their descendants
 11 |       findOrFilter: function($el, selector) {
 12 |         // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/
 13 |         // http://stackoverflow.com/a/12731439/358804
 14 |         var $descendants = $el.find(selector);
 15 |         return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])');
 16 |       },
 17 | 
 18 |       generateUniqueIdBase: function(el) {
 19 |         var text = $(el).text();
 20 |         var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-');
 21 |         return anchor || el.tagName.toLowerCase();
 22 |       },
 23 | 
 24 |       generateUniqueId: function(el) {
 25 |         var anchorBase = this.generateUniqueIdBase(el);
 26 |         for (var i = 0; ; i++) {
 27 |           var anchor = anchorBase;
 28 |           if (i > 0) {
 29 |             // add suffix
 30 |             anchor += '-' + i;
 31 |           }
 32 |           // check if ID already exists
 33 |           if (!document.getElementById(anchor)) {
 34 |             return anchor;
 35 |           }
 36 |         }
 37 |       },
 38 | 
 39 |       generateAnchor: function(el) {
 40 |         if (el.id) {
 41 |           return el.id;
 42 |         } else {
 43 |           var anchor = this.generateUniqueId(el);
 44 |           el.id = anchor;
 45 |           return anchor;
 46 |         }
 47 |       },
 48 | 
 49 |       createNavList: function() {
 50 |         return $('<ul class="nav"></ul>');
 51 |       },
 52 | 
 53 |       createChildNavList: function($parent) {
 54 |         var $childList = this.createNavList();
 55 |         $parent.append($childList);
 56 |         return $childList;
 57 |       },
 58 | 
 59 |       generateNavEl: function(anchor, text) {
 60 |         var $a = $('<a></a>');
 61 |         $a.attr('href', '#' + anchor);
 62 |         $a.text(text);
 63 |         var $li = $('<li></li>');
 64 |         $li.append($a);
 65 |         return $li;
 66 |       },
 67 | 
 68 |       generateNavItem: function(headingEl) {
 69 |         var anchor = this.generateAnchor(headingEl);
 70 |         var $heading = $(headingEl);
 71 |         var text = $heading.data('toc-text') || $heading.text();
 72 |         return this.generateNavEl(anchor, text);
 73 |       },
 74 | 
 75 |       // Find the first heading level (`<h1>`, then `<h2>`, etc.) that has more than one element. Defaults to 1 (for `<h1>`).
 76 |       getTopLevel: function($scope) {
 77 |         for (var i = 1; i <= 6; i++) {
 78 |           var $headings = this.findOrFilter($scope, 'h' + i);
 79 |           if ($headings.length > 1) {
 80 |             return i;
 81 |           }
 82 |         }
 83 | 
 84 |         return 1;
 85 |       },
 86 | 
 87 |       // returns the elements for the top level, and the next below it
 88 |       getHeadings: function($scope, topLevel) {
 89 |         var topSelector = 'h' + topLevel;
 90 | 
 91 |         var secondaryLevel = topLevel + 1;
 92 |         var secondarySelector = 'h' + secondaryLevel;
 93 | 
 94 |         return this.findOrFilter($scope, topSelector + ',' + secondarySelector);
 95 |       },
 96 | 
 97 |       getNavLevel: function(el) {
 98 |         return parseInt(el.tagName.charAt(1), 10);
 99 |       },
100 | 
101 |       populateNav: function($topContext, topLevel, $headings) {
102 |         var $context = $topContext;
103 |         var $prevNav;
104 | 
105 |         var helpers = this;
106 |         $headings.each(function(i, el) {
107 |           var $newNav = helpers.generateNavItem(el);
108 |           var navLevel = helpers.getNavLevel(el);
109 | 
110 |           // determine the proper $context
111 |           if (navLevel === topLevel) {
112 |             // use top level
113 |             $context = $topContext;
114 |           } else if ($prevNav && $context === $topContext) {
115 |             // create a new level of the tree and switch to it
116 |             $context = helpers.createChildNavList($prevNav);
117 |           } // else use the current $context
118 | 
119 |           $context.append($newNav);
120 | 
121 |           $prevNav = $newNav;
122 |         });
123 |       },
124 | 
125 |       parseOps: function(arg) {
126 |         var opts;
127 |         if (arg.jquery) {
128 |           opts = {
129 |             $nav: arg
130 |           };
131 |         } else {
132 |           opts = arg;
133 |         }
134 |         opts.$scope = opts.$scope || $(document.body);
135 |         return opts;
136 |       }
137 |     },
138 | 
139 |     // accepts a jQuery object, or an options object
140 |     init: function(opts) {
141 |       opts = this.helpers.parseOps(opts);
142 | 
143 |       // ensure that the data attribute is in place for styling
144 |       opts.$nav.attr('data-toggle', 'toc');
145 | 
146 |       var $topContext = this.helpers.createChildNavList(opts.$nav);
147 |       var topLevel = this.helpers.getTopLevel(opts.$scope);
148 |       var $headings = this.helpers.getHeadings(opts.$scope, topLevel);
149 |       this.helpers.populateNav($topContext, topLevel, $headings);
150 |     }
151 |   };
152 | 
153 |   $(function() {
154 |     $('nav[data-toggle="toc"]').each(function(i, el) {
155 |       var $nav = $(el);
156 |       Toc.init($nav);
157 |     });
158 |   });
159 | })();
160 | 


--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
 1 | $(function() {
 2 | 
 3 |   // register a handler to move the focus to the search bar
 4 |   // upon pressing shift + "/" (i.e. "?")
 5 |   $(document).on('keydown', function(e) {
 6 |     if (e.shiftKey && e.keyCode == 191) {
 7 |       e.preventDefault();
 8 |       $("#search-input").focus();
 9 |     }
10 |   });
11 | 
12 |   $(document).ready(function() {
13 |     // do keyword highlighting
14 |     /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 |     var mark = function() {
16 | 
17 |       var referrer = document.URL ;
18 |       var paramKey = "q" ;
19 | 
20 |       if (referrer.indexOf("?") !== -1) {
21 |         var qs = referrer.substr(referrer.indexOf('?') + 1);
22 |         var qs_noanchor = qs.split('#')[0];
23 |         var qsa = qs_noanchor.split('&');
24 |         var keyword = "";
25 | 
26 |         for (var i = 0; i < qsa.length; i++) {
27 |           var currentParam = qsa[i].split('=');
28 | 
29 |           if (currentParam.length !== 2) {
30 |             continue;
31 |           }
32 | 
33 |           if (currentParam[0] == paramKey) {
34 |             keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 |           }
36 |         }
37 | 
38 |         if (keyword !== "") {
39 |           $(".contents").unmark({
40 |             done: function() {
41 |               $(".contents").mark(keyword);
42 |             }
43 |           });
44 |         }
45 |       }
46 |     };
47 | 
48 |     mark();
49 |   });
50 | });
51 | 
52 | /* Search term highlighting ------------------------------*/
53 | 
54 | function matchedWords(hit) {
55 |   var words = [];
56 | 
57 |   var hierarchy = hit._highlightResult.hierarchy;
58 |   // loop to fetch from lvl0, lvl1, etc.
59 |   for (var idx in hierarchy) {
60 |     words = words.concat(hierarchy[idx].matchedWords);
61 |   }
62 | 
63 |   var content = hit._highlightResult.content;
64 |   if (content) {
65 |     words = words.concat(content.matchedWords);
66 |   }
67 | 
68 |   // return unique words
69 |   var words_uniq = [...new Set(words)];
70 |   return words_uniq;
71 | }
72 | 
73 | function updateHitURL(hit) {
74 | 
75 |   var words = matchedWords(hit);
76 |   var url = "";
77 | 
78 |   if (hit.anchor) {
79 |     url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 |   } else {
81 |     url = hit.url + '?q=' + escape(words.join(" "));
82 |   }
83 | 
84 |   return url;
85 | }
86 | 


--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#75AADB;}
 7 | </style>
 8 | <path class="st0" d="M4,11.3h1.3v1.3H4c-2,0-4-2.3-4-4.7s2.1-4.7,4-4.7h5.3c1.9,0,4,2.3,4,4.7c0,1.9-1.2,3.6-2.7,4.3v-1.5
 9 | 	C11.4,10.2,12,9.1,12,8c0-1.7-1.4-3.3-2.7-3.3H4C2.7,4.7,1.3,6.3,1.3,8S2.7,11.3,4,11.3z M16,7.3h-1.3v1.3H16c1.3,0,2.7,1.6,2.7,3.3
10 | 	s-1.4,3.3-2.7,3.3h-5.3C9.4,15.3,8,13.7,8,12c0-1.1,0.6-2.2,1.3-2.8V7.7C7.9,8.4,6.7,10.1,6.7,12c0,2.4,2.1,4.7,4,4.7H16
11 | 	c1.9,0,4-2.3,4-4.7S18,7.3,16,7.3z"/>
12 | </svg>
13 | 


--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
  1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
  2 | (function($) {
  3 |   $(function() {
  4 | 
  5 |     $('.navbar-fixed-top').headroom();
  6 | 
  7 |     $('body').css('padding-top', $('.navbar').height() + 10);
  8 |     $(window).resize(function(){
  9 |       $('body').css('padding-top', $('.navbar').height() + 10);
 10 |     });
 11 | 
 12 |     $('[data-toggle="tooltip"]').tooltip();
 13 | 
 14 |     var cur_path = paths(location.pathname);
 15 |     var links = $("#navbar ul li a");
 16 |     var max_length = -1;
 17 |     var pos = -1;
 18 |     for (var i = 0; i < links.length; i++) {
 19 |       if (links[i].getAttribute("href") === "#")
 20 |         continue;
 21 |       // Ignore external links
 22 |       if (links[i].host !== location.host)
 23 |         continue;
 24 | 
 25 |       var nav_path = paths(links[i].pathname);
 26 | 
 27 |       var length = prefix_length(nav_path, cur_path);
 28 |       if (length > max_length) {
 29 |         max_length = length;
 30 |         pos = i;
 31 |       }
 32 |     }
 33 | 
 34 |     // Add class to parent <li>, and enclosing <li> if in dropdown
 35 |     if (pos >= 0) {
 36 |       var menu_anchor = $(links[pos]);
 37 |       menu_anchor.parent().addClass("active");
 38 |       menu_anchor.closest("li.dropdown").addClass("active");
 39 |     }
 40 |   });
 41 | 
 42 |   function paths(pathname) {
 43 |     var pieces = pathname.split("/");
 44 |     pieces.shift(); // always starts with /
 45 | 
 46 |     var end = pieces[pieces.length - 1];
 47 |     if (end === "index.html" || end === "")
 48 |       pieces.pop();
 49 |     return(pieces);
 50 |   }
 51 | 
 52 |   // Returns -1 if not found
 53 |   function prefix_length(needle, haystack) {
 54 |     if (needle.length > haystack.length)
 55 |       return(-1);
 56 | 
 57 |     // Special case for length-0 haystack, since for loop won't run
 58 |     if (haystack.length === 0) {
 59 |       return(needle.length === 0 ? 0 : -1);
 60 |     }
 61 | 
 62 |     for (var i = 0; i < haystack.length; i++) {
 63 |       if (needle[i] != haystack[i])
 64 |         return(i);
 65 |     }
 66 | 
 67 |     return(haystack.length);
 68 |   }
 69 | 
 70 |   /* Clipboard --------------------------*/
 71 | 
 72 |   function changeTooltipMessage(element, msg) {
 73 |     var tooltipOriginalTitle=element.getAttribute('data-original-title');
 74 |     element.setAttribute('data-original-title', msg);
 75 |     $(element).tooltip('show');
 76 |     element.setAttribute('data-original-title', tooltipOriginalTitle);
 77 |   }
 78 | 
 79 |   if(ClipboardJS.isSupported()) {
 80 |     $(document).ready(function() {
 81 |       var copyButton = "<button type='button' class='btn btn-primary btn-copy-ex' type = 'submit' title='Copy to clipboard' aria-label='Copy to clipboard' data-toggle='tooltip' data-placement='left auto' data-trigger='hover' data-clipboard-copy><i class='fa fa-copy'></i></button>";
 82 | 
 83 |       $("div.sourceCode").addClass("hasCopyButton");
 84 | 
 85 |       // Insert copy buttons:
 86 |       $(copyButton).prependTo(".hasCopyButton");
 87 | 
 88 |       // Initialize tooltips:
 89 |       $('.btn-copy-ex').tooltip({container: 'body'});
 90 | 
 91 |       // Initialize clipboard:
 92 |       var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', {
 93 |         text: function(trigger) {
 94 |           return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, "");
 95 |         }
 96 |       });
 97 | 
 98 |       clipboardBtnCopies.on('success', function(e) {
 99 |         changeTooltipMessage(e.trigger, 'Copied!');
100 |         e.clearSelection();
101 |       });
102 | 
103 |       clipboardBtnCopies.on('error', function() {
104 |         changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
105 |       });
106 |     });
107 |   }
108 | })(window.jQuery || window.$)
109 | 


--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
 1 | pandoc: 3.1.1
 2 | pkgdown: 2.0.7
 3 | pkgdown_sha: ~
 4 | articles:
 5 |   Cleaning_GBIF_data_with_CoordinateCleaner: Cleaning_GBIF_data_with_CoordinateCleaner.html
 6 |   Cleaning_PBDB_fossils_with_CoordinateCleaner: Cleaning_PBDB_fossils_with_CoordinateCleaner.html
 7 |   Comparison_other_software: Comparison_other_software.html
 8 |   Using_custom_gazetteers: Using_custom_gazetteers.html
 9 | last_built: 2023-10-24T18:56Z
10 | 
11 | 


--------------------------------------------------------------------------------
/docs/reference/plot.spatialvalid-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/docs/reference/plot.spatialvalid-1.png


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | note <- sprintf("R package version %s", meta$Version)
 2 | 
 3 | bibentry(bibtype = "article",
 4 |          title = "CoordinateCleaner: standardized cleaning of occurrence records from biological collection databases",
 5 |          author = c(person("Alexander", "Zizka"),
 6 |                     person("Daniele", "Silvestro"),
 7 |                     person("Tobias", "Andermann"),
 8 |                     person("Josue", "Azevedo"),
 9 |                     person("Camila", "Duarte Ritter"),
10 |                     person("Daniel", "Edler"),
11 |                     person("Harith", "Farooq"),
12 |                     person("Andrei", "Herdean"),
13 |                     person("Maria", "Ariza"),
14 |                     person("Ruud", "Scharn"),
15 |                     person("Sten", "Svanteson"),
16 |                     person("Niklas", "Wengstrom"),
17 |                     person("Vera", "Zizka"),
18 |                     person("Alexandre", "Antonelli")),
19 |          journal = "Methods in Ecology and Evolution",
20 |          number = 10,
21 |          pages = 744-751,
22 |          year = 2019,
23 |          note = note,
24 |          doi = "10.1111/2041-210X.13152",
25 |          url = "https://github.com/ropensci/CoordinateCleaner")


--------------------------------------------------------------------------------
/inst/WORDLIST:
--------------------------------------------------------------------------------
  1 | Avicennia
  2 | BioGeoBEARS
  3 | CRS
  4 | CleanCoordinates
  5 | CleanCoordinatesDS
  6 | CleanCoordinatesFOS
  7 | DD
  8 | DDMM
  9 | DK
 10 | Fabris
 11 | Factbook
 12 | GBIF
 13 | Geonames
 14 | Herbariorum
 15 | IUCN
 16 | MaxT
 17 | MinT
 18 | Myr
 19 | Neotoma
 20 | PBDB
 21 | PaleobioDB
 22 | Paleobiology
 23 | Paleobiologydatabase
 24 | Panthera
 25 | Poisson
 26 | PyRate
 27 | Pyrate
 28 | Sgarbi
 29 | SpatialPointsDataFrame
 30 | SpatialPolygonsDataFrame
 31 | Spatialvalid
 32 | Svantesson
 33 | Varela
 34 | WGS
 35 | Wengtrom
 36 | WritePyrate
 37 | adm
 38 | agesequal
 39 | barcoding
 40 | biogeo
 41 | bookdown
 42 | cen
 43 | clgbif
 44 | codecov
 45 | color
 46 | com
 47 | conR
 48 | coord
 49 | coun
 50 | countr
 51 | countrycode
 52 | countryref
 53 | cusgaz
 54 | cutoff
 55 | dc
 56 | dd
 57 | ddmm
 58 | decimalLatitude
 59 | decimalLongitude
 60 | dedup
 61 | devtools
 62 | dismo
 63 | doi
 64 | dupl
 65 | duplicatesexclude
 66 | emph
 67 | equ
 68 | errorcheck
 69 | etc
 70 | factbook
 71 | gbif
 72 | geo
 73 | geod
 74 | geospheric
 75 | ggplot
 76 | github
 77 | interquantile
 78 | io
 79 | iso
 80 | iucn
 81 | lat
 82 | leo
 83 | lon
 84 | macroecological
 85 | magrittr
 86 | migh
 87 | missingvalsexclude
 88 | mltpl
 89 | naturalearth
 90 | naturalearthdata
 91 | ne
 92 | neotomadb
 93 | occs
 94 | org
 95 | outl
 96 | palaebiological
 97 | paleobioDB
 98 | paleobiodb
 99 | paleobiology
100 | paleontological
101 | paleontology
102 | phytools
103 | plaeobioDB
104 | precisioncheck
105 | pvalue
106 | pyrate
107 | quickclean
108 | rOpenSci
109 | ras
110 | refcol
111 | referes
112 | rgbif
113 | rgdal
114 | rnaturalearth
115 | ropensci
116 | roxygen
117 | rstudio
118 | sapply
119 | scrubr
120 | sdm
121 | seethedatablog
122 | sp
123 | spatialvalid
124 | spatiotemp
125 | speciesgeocodeR
126 | speedup
127 | taxize
128 | tc
129 | temprange
130 | thres
131 | urb
132 | urbanareas
133 | val
134 | vapply
135 | wordpress
136 | www
137 | 


--------------------------------------------------------------------------------
/man/CoordinateCleaner-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CoordinateCleaner-package.R
 3 | \docType{package}
 4 | \name{CoordinateCleaner-package}
 5 | \alias{CoordinateCleaner-package}
 6 | \alias{CoordinateCleaner}
 7 | \title{CoordinateCleaner}
 8 | \description{
 9 | Automated Cleaning of Occurrence Records from Biological Collections
10 | }
11 | \details{
12 | Automated flagging of common spatial and temporal errors in biological and
13 | paleontological collection data, for the use in conservation, ecology and
14 | paleontology. Includes automated tests to easily flag (and exclude) records
15 | assigned to country or province centroid, the open ocean, the headquarters of
16 | the Global Biodiversity Information Facility, urban areas or the location of
17 | biodiversity institutions (museums, zoos, botanical gardens, universities).
18 | Furthermore identifies per species outlier coordinates, zero coordinates,
19 | identical latitude/longitude and invalid coordinates. Also implements an
20 | algorithm to identify data sets with a significant proportion of rounded
21 | coordinates. Especially suited for large data sets. See
22 | <https://ropensci.github.io/CoordinateCleaner/> for more details and
23 | tutorials.
24 | }
25 | \author{
26 | Alexander Zizka, Daniele Silvestro, Tobias Andermann, Josue Azevedo, 
27 | Camila Duarte Ritter, Daniel Edler, Harith Farooq, Andrei Herdean, Maria Ariza, 
28 | Ruud Scharn, Sten Svantesson, Niklas Wengstrom, Vera Zizka
29 | }
30 | \keyword{internal}
31 | 


--------------------------------------------------------------------------------
/man/aohi.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CoordinateCleaner-package.R
 3 | \docType{data}
 4 | \name{aohi}
 5 | \alias{aohi}
 6 | \title{Artificial Hotspot Occurrence Inventory}
 7 | \source{
 8 | \url{https://onlinelibrary.wiley.com/doi/10.1111/jbi.14543}
 9 | }
10 | \description{
11 | A data frame with information on Artificial Hotspot Occurrence Inventory (AHOI)
12 |  as available in Park et al 2022. For more details see reference.
13 | }
14 | \examples{
15 | 
16 | data("aohi")
17 | }
18 | \references{
19 | Park, D. S., Xie, Y., Thammavong, H. T., Tulaiha, R., & Feng, X.
20 |   (2023). Artificial Hotspot Occurrence Inventory (AHOI). Journal of
21 |   Biogeography, 50, 441–449. \doi{10.1111/jbi.14543}
22 | }
23 | \keyword{gazetteers}
24 | 


--------------------------------------------------------------------------------
/man/buffland.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CoordinateCleaner-package.R
 3 | \docType{data}
 4 | \name{buffland}
 5 | \alias{buffland}
 6 | \title{Global Coastlines buffered by 1 degree}
 7 | \source{
 8 | \url{https://www.naturalearthdata.com/downloads/10m-physical-vectors/}
 9 | }
10 | \description{
11 | A \code{SpatVector} with global coastlines, with a 1 degree buffer to extent coastlines as alternative reference for \code{\link{cc_sea}}. Can be useful to identify species in the sea, without flagging records in mangroves, marshes, etc.
12 | }
13 | \examples{
14 | 
15 | data("buffland")
16 | }
17 | \keyword{gazetteers}
18 | 


--------------------------------------------------------------------------------
/man/buffsea.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CoordinateCleaner-package.R
 3 | \docType{data}
 4 | \name{buffsea}
 5 | \alias{buffsea}
 6 | \title{Global Coastlines buffered by -1 degree}
 7 | \source{
 8 | \url{https://www.naturalearthdata.com/downloads/10m-physical-vectors/}
 9 | }
10 | \description{
11 | A \code{SpatVector} with global coastlines, with a -1 degree buffer to extent coastlines as alternative reference for \code{\link{cc_sea}}. Can be useful to identify marine species on land without flagging records in estuaries, etc.
12 | }
13 | \examples{
14 | 
15 | data("buffsea")
16 | }
17 | \keyword{gazetteers}
18 | 


--------------------------------------------------------------------------------
/man/cc_aohi.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cc_aohi.R
 3 | \name{cc_aohi}
 4 | \alias{cc_aohi}
 5 | \title{Identify Coordinates in Artificial Hotspot Occurrence Inventory}
 6 | \usage{
 7 | cc_aohi(
 8 |   x,
 9 |   lon = "decimalLongitude",
10 |   lat = "decimalLatitude",
11 |   species = "species",
12 |   taxa = c("Aves", "Insecta", "Mammalia", "Plantae"),
13 |   buffer = 10000,
14 |   geod = TRUE,
15 |   value = "clean",
16 |   verbose = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{x}{data.frame. Containing geographical coordinates and species names.}
21 | 
22 | \item{lon}{character string. The column with the longitude coordinates.
23 | Default = \dQuote{decimalLongitude}.}
24 | 
25 | \item{lat}{character string. The column with the latitude coordinates.
26 | Default = \dQuote{decimalLatitude}.}
27 | 
28 | \item{species}{character string. The column with the species identity. Only
29 | required if verify = TRUE.}
30 | 
31 | \item{taxa}{Artificial Hotspot Occurrence Inventory (AHOI) were created based
32 | on four different taxa, birds, insecta, mammalia, and plantae. Users can
33 | choose to keep all, or any specific taxa subset to define the AHOI locations.
34 | Default is to keep all: c("Aves", "Insecta", "Mammalia", "Plantae").}
35 | 
36 | \item{buffer}{The buffer around each capital coordinate (the centre of the
37 | city), where records should be flagged as problematic. Units depend on
38 | geod. Default = 10 kilometres.}
39 | 
40 | \item{geod}{logical. If TRUE the radius around each capital is calculated
41 | based on a sphere, buffer is in meters and independent of latitude. If
42 | FALSE the radius is calculated assuming planar coordinates and varies
43 | slightly with latitude. Default = TRUE.
44 | See https://seethedatablog.wordpress.com/ for detail and credits.}
45 | 
46 | \item{value}{character string.  Defining the output value. See value.}
47 | 
48 | \item{verbose}{logical. If TRUE reports the name of the test and the number
49 | of records flagged.}
50 | }
51 | \value{
52 | Depending on the \sQuote{value} argument, either a \code{data.frame}
53 |   containing the records considered correct by the test (\dQuote{clean}) or a
54 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
55 |   failed/potentially problematic . Default = \dQuote{clean}.
56 | }
57 | \description{
58 | Removes or flags records within Artificial Hotspot Occurrence Inventory.
59 | Poorly geo-referenced occurrence records in biological databases are often
60 | erroneously geo-referenced to highly recurring coordinates that were assessed
61 | by Park et al 2022. See the reference for more details.
62 | }
63 | \note{
64 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more
65 | details and tutorials.
66 | }
67 | \examples{
68 | 
69 | x <- data.frame(species = letters[1:10], 
70 |                 decimalLongitude = c(runif(99, -180, 180), -47.92), 
71 |                 decimalLatitude = c(runif(99, -90,90), -15.78))
72 | cc_aohi(x)
73 | 
74 | }
75 | \references{
76 | Park, D. S., Xie, Y., Thammavong, H. T., Tulaiha, R., & Feng, X.
77 |   (2023). Artificial Hotspot Occurrence Inventory (AHOI). Journal of
78 |   Biogeography, 50, 441–449. \doi{10.1111/jbi.14543}
79 | }
80 | \seealso{
81 | Other Coordinates: 
82 | \code{\link{cc_cap}()},
83 | \code{\link{cc_cen}()},
84 | \code{\link{cc_coun}()},
85 | \code{\link{cc_dupl}()},
86 | \code{\link{cc_equ}()},
87 | \code{\link{cc_gbif}()},
88 | \code{\link{cc_inst}()},
89 | \code{\link{cc_iucn}()},
90 | \code{\link{cc_outl}()},
91 | \code{\link{cc_sea}()},
92 | \code{\link{cc_urb}()},
93 | \code{\link{cc_val}()},
94 | \code{\link{cc_zero}()}
95 | }
96 | \concept{Coordinates}
97 | \keyword{Coordinate}
98 | \keyword{cleaning}
99 | 


--------------------------------------------------------------------------------
/man/cc_cap.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cc_cap.R
  3 | \name{cc_cap}
  4 | \alias{cc_cap}
  5 | \title{Identify Coordinates in Vicinity of Country Capitals.}
  6 | \usage{
  7 | cc_cap(
  8 |   x,
  9 |   lon = "decimalLongitude",
 10 |   lat = "decimalLatitude",
 11 |   species = "species",
 12 |   buffer = 10000,
 13 |   geod = TRUE,
 14 |   ref = NULL,
 15 |   verify = FALSE,
 16 |   value = "clean",
 17 |   verbose = TRUE
 18 | )
 19 | }
 20 | \arguments{
 21 | \item{x}{data.frame. Containing geographical coordinates and species names.}
 22 | 
 23 | \item{lon}{character string. The column with the longitude coordinates.
 24 | Default = \dQuote{decimalLongitude}.}
 25 | 
 26 | \item{lat}{character string. The column with the latitude coordinates.
 27 | Default = \dQuote{decimalLatitude}.}
 28 | 
 29 | \item{species}{character string. The column with the species identity. Only
 30 | required if verify = TRUE.}
 31 | 
 32 | \item{buffer}{The buffer around each capital coordinate (the centre of the
 33 | city), where records should be flagged as problematic. Units depend on
 34 | geod. Default = 10 kilometres.}
 35 | 
 36 | \item{geod}{logical. If TRUE the radius around each capital is calculated
 37 | based on a sphere, buffer is in meters and independent of latitude. If
 38 | FALSE the radius is calculated assuming planar coordinates and varies
 39 | slightly with latitude. Default = TRUE.
 40 | See https://seethedatablog.wordpress.com/ for detail and credits.}
 41 | 
 42 | \item{ref}{SpatVector (geometry: polygons). Providing the geographic
 43 | gazetteer. Can be any SpatVector (geometry: polygons), but the structure
 44 | must be identical to \code{\link{countryref}}.  Default =
 45 | \code{\link{countryref}}.}
 46 | 
 47 | \item{verify}{logical. If TRUE records are only flagged if they are the only
 48 | record in a given species flagged close to a given reference. If FALSE, the
 49 | distance is the only criterion}
 50 | 
 51 | \item{value}{character string.  Defining the output value. See value.}
 52 | 
 53 | \item{verbose}{logical. If TRUE reports the name of the test and the number
 54 | of records flagged.}
 55 | }
 56 | \value{
 57 | Depending on the \sQuote{value} argument, either a \code{data.frame}
 58 |   containing the records considered correct by the test (\dQuote{clean}) or a
 59 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
 60 |   failed/potentially problematic . Default = \dQuote{clean}.
 61 | }
 62 | \description{
 63 | Removes or flags records within a certain radius around country capitals.
 64 | Poorly geo-referenced occurrence records in biological databases are often
 65 | erroneously geo-referenced to capitals.
 66 | }
 67 | \note{
 68 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more
 69 |   details and tutorials.
 70 | }
 71 | \examples{
 72 | \dontrun{
 73 | x <- data.frame(species = letters[1:10],
 74 |                 decimalLongitude = c(runif(99, -180, 180), -47.882778),
 75 |                 decimalLatitude = c(runif(99, -90, 90), -15.793889))
 76 | 
 77 | cc_cap(x)
 78 | cc_cap(x, value = "flagged")
 79 | }
 80 | }
 81 | \seealso{
 82 | Other Coordinates: 
 83 | \code{\link{cc_aohi}()},
 84 | \code{\link{cc_cen}()},
 85 | \code{\link{cc_coun}()},
 86 | \code{\link{cc_dupl}()},
 87 | \code{\link{cc_equ}()},
 88 | \code{\link{cc_gbif}()},
 89 | \code{\link{cc_inst}()},
 90 | \code{\link{cc_iucn}()},
 91 | \code{\link{cc_outl}()},
 92 | \code{\link{cc_sea}()},
 93 | \code{\link{cc_urb}()},
 94 | \code{\link{cc_val}()},
 95 | \code{\link{cc_zero}()}
 96 | }
 97 | \concept{Coordinates}
 98 | \keyword{Coordinate}
 99 | \keyword{cleaning}
100 | 


--------------------------------------------------------------------------------
/man/cc_cen.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cc_cen.R
  3 | \name{cc_cen}
  4 | \alias{cc_cen}
  5 | \title{Identify Coordinates in Vicinity of Country and Province Centroids}
  6 | \usage{
  7 | cc_cen(
  8 |   x,
  9 |   lon = "decimalLongitude",
 10 |   lat = "decimalLatitude",
 11 |   species = "species",
 12 |   buffer = 1000,
 13 |   geod = TRUE,
 14 |   test = "both",
 15 |   ref = NULL,
 16 |   verify = FALSE,
 17 |   value = "clean",
 18 |   verbose = TRUE
 19 | )
 20 | }
 21 | \arguments{
 22 | \item{x}{data.frame. Containing geographical coordinates and species names.}
 23 | 
 24 | \item{lon}{character string. The column with the longitude coordinates.
 25 | Default = \dQuote{decimalLongitude}.}
 26 | 
 27 | \item{lat}{character string. The column with the latitude coordinates.
 28 | Default = \dQuote{decimalLatitude}.}
 29 | 
 30 | \item{species}{character string. The column with the species identity. Only
 31 | required if verify = TRUE.}
 32 | 
 33 | \item{buffer}{numerical. The buffer around each province or country
 34 | centroid, where records should be flagged as problematic. Units depend on geod.  
 35 | Default = 1 kilometre.}
 36 | 
 37 | \item{geod}{logical. If TRUE the radius around each capital is calculated
 38 | based on a sphere, buffer is in meters and independent of latitude. If
 39 | FALSE the radius is calculated assuming planar coordinates and varies
 40 | slightly with latitude. Default = TRUE.
 41 | See https://seethedatablog.wordpress.com/ for detail and credits.}
 42 | 
 43 | \item{test}{a character string. Specifying the details of the test. One of
 44 | c(\dQuote{both}, \dQuote{country}, \dQuote{provinces}).  If both tests for
 45 | country and province centroids.}
 46 | 
 47 | \item{ref}{SpatVector (geometry: polygons). Providing the geographic
 48 | gazetteer. Can be any SpatVector (geometry: polygons), but the structure
 49 | must be identical to \code{\link{countryref}}.  Default =
 50 | \code{\link{countryref}}.}
 51 | 
 52 | \item{verify}{logical. If TRUE records are only flagged if they are the only
 53 | record in a given species flagged close to a given reference. If FALSE, the
 54 | distance is the only criterion}
 55 | 
 56 | \item{value}{character string.  Defining the output value. See value.}
 57 | 
 58 | \item{verbose}{logical. If TRUE reports the name of the test and the number
 59 | of records flagged.}
 60 | }
 61 | \value{
 62 | Depending on the \sQuote{value} argument, either a \code{data.frame}
 63 |   containing the records considered correct by the test (\dQuote{clean}) or a
 64 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
 65 |   failed/potentially problematic . Default = \dQuote{clean}.
 66 | }
 67 | \description{
 68 | Removes or flags records within a radius around the geographic centroids of political
 69 | countries and provinces. Poorly geo-referenced occurrence records in
 70 | biological databases are often erroneously geo-referenced to centroids.
 71 | }
 72 | \note{
 73 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more
 74 | details and tutorials.
 75 | }
 76 | \examples{
 77 | 
 78 | x <- data.frame(species = letters[1:10], 
 79 |                 decimalLongitude = c(runif(99, -180, 180), -47.92), 
 80 |                 decimalLatitude = c(runif(99, -90,90), -15.78))
 81 | cc_cen(x, geod = FALSE)
 82 | 
 83 | \dontrun{
 84 | cc_inst(x, value = "flagged", buffer = 50000) #geod = T
 85 | }
 86 | 
 87 | }
 88 | \seealso{
 89 | Other Coordinates: 
 90 | \code{\link{cc_aohi}()},
 91 | \code{\link{cc_cap}()},
 92 | \code{\link{cc_coun}()},
 93 | \code{\link{cc_dupl}()},
 94 | \code{\link{cc_equ}()},
 95 | \code{\link{cc_gbif}()},
 96 | \code{\link{cc_inst}()},
 97 | \code{\link{cc_iucn}()},
 98 | \code{\link{cc_outl}()},
 99 | \code{\link{cc_sea}()},
100 | \code{\link{cc_urb}()},
101 | \code{\link{cc_val}()},
102 | \code{\link{cc_zero}()}
103 | }
104 | \concept{Coordinates}
105 | \keyword{Coordinate}
106 | \keyword{cleaning}
107 | 


--------------------------------------------------------------------------------
/man/cc_coun.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cc_coun.R
  3 | \name{cc_coun}
  4 | \alias{cc_coun}
  5 | \title{Identify Coordinates Outside their Reported Country}
  6 | \usage{
  7 | cc_coun(
  8 |   x,
  9 |   lon = "decimalLongitude",
 10 |   lat = "decimalLatitude",
 11 |   iso3 = "countrycode",
 12 |   value = "clean",
 13 |   ref = NULL,
 14 |   ref_col = "iso_a3",
 15 |   verbose = TRUE,
 16 |   buffer = NULL
 17 | )
 18 | }
 19 | \arguments{
 20 | \item{x}{data.frame. Containing geographical coordinates and species names.}
 21 | 
 22 | \item{lon}{character string. The column with the longitude coordinates.
 23 | Default = \dQuote{decimalLongitude}.}
 24 | 
 25 | \item{lat}{character string. The column with the latitude coordinates.
 26 | Default = \dQuote{decimalLatitude}.}
 27 | 
 28 | \item{iso3}{a character string. The column with the country assignment of
 29 | each record in three letter ISO code. Default = \dQuote{countrycode}.}
 30 | 
 31 | \item{value}{character string.  Defining the output value. See value.}
 32 | 
 33 | \item{ref}{SpatVector (geometry: polygons). Providing the geographic
 34 | gazetteer. Can be any SpatVector (geometry: polygons), but the structure
 35 | must be identical to \code{rnaturalearth::ne_countries(scale = "medium",
 36 | returnclass = "sf")}.
 37 | Default = \code{rnaturalearth::ne_countries(scale = "medium", returnclass =
 38 | "sf")}}
 39 | 
 40 | \item{ref_col}{the column name in the reference dataset, containing the
 41 | relevant ISO codes for matching. Default is to "iso_a3_eh" which refers to
 42 | the ISO-3 codes in the reference dataset. See notes.}
 43 | 
 44 | \item{verbose}{logical. If TRUE reports the name of the test and the number
 45 | of records flagged.}
 46 | 
 47 | \item{buffer}{numeric. Units are in meters. If provided, a buffer is
 48 | created around each country polygon.}
 49 | }
 50 | \value{
 51 | Depending on the \sQuote{value} argument, either a \code{data.frame}
 52 |   containing the records considered correct by the test (\dQuote{clean}) or a
 53 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
 54 |   failed/potentially problematic . Default = \dQuote{clean}.
 55 | }
 56 | \description{
 57 | Removes or flags mismatches between geographic coordinates and additional
 58 | country information (usually this information is reliably reported with
 59 | specimens). Such a mismatch can occur for example, if latitude and longitude
 60 | are switched.
 61 | }
 62 | \note{
 63 | The ref_col argument allows to adapt the function to the structure of
 64 |   alternative reference datasets. For instance, for
 65 |   \code{rnaturalearth::ne_countries(scale = "small")}, the default will fail,
 66 |   but ref_col = "iso_a3" will work.
 67 | 
 68 | With the default reference, records are flagged if they fall outside
 69 |   the terrestrial territory of countries, hence records in territorial waters
 70 |   might be flagged. See \url{https://ropensci.github.io/CoordinateCleaner/}
 71 |   for more details and tutorials.
 72 | }
 73 | \examples{
 74 | 
 75 | \dontrun{
 76 | x <- data.frame(species = letters[1:10],
 77 |                 decimalLongitude = runif(100, -20, 30),
 78 |                 decimalLatitude = runif(100, 35,60),
 79 |                 countrycode = "RUS")
 80 | 
 81 | cc_coun(x, value = "flagged")#non-terrestrial records are flagged as wrong.
 82 | }
 83 | 
 84 | }
 85 | \seealso{
 86 | Other Coordinates: 
 87 | \code{\link{cc_aohi}()},
 88 | \code{\link{cc_cap}()},
 89 | \code{\link{cc_cen}()},
 90 | \code{\link{cc_dupl}()},
 91 | \code{\link{cc_equ}()},
 92 | \code{\link{cc_gbif}()},
 93 | \code{\link{cc_inst}()},
 94 | \code{\link{cc_iucn}()},
 95 | \code{\link{cc_outl}()},
 96 | \code{\link{cc_sea}()},
 97 | \code{\link{cc_urb}()},
 98 | \code{\link{cc_val}()},
 99 | \code{\link{cc_zero}()}
100 | }
101 | \concept{Coordinates}
102 | \keyword{Coordinate}
103 | \keyword{cleaning}
104 | 


--------------------------------------------------------------------------------
/man/cc_dupl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cc_dupl.R
 3 | \name{cc_dupl}
 4 | \alias{cc_dupl}
 5 | \title{Identify Duplicated Records}
 6 | \usage{
 7 | cc_dupl(
 8 |   x,
 9 |   lon = "decimalLongitude",
10 |   lat = "decimalLatitude",
11 |   species = "species",
12 |   additions = NULL,
13 |   value = "clean",
14 |   verbose = TRUE
15 | )
16 | }
17 | \arguments{
18 | \item{x}{data.frame. Containing geographical coordinates and species names.}
19 | 
20 | \item{lon}{character string. The column with the longitude coordinates.
21 | Default = \dQuote{decimalLongitude}.}
22 | 
23 | \item{lat}{character string. The column with the latitude coordinates.
24 | Default = \dQuote{decimalLatitude}.}
25 | 
26 | \item{species}{a character string. The column with the species name. Default
27 | = \dQuote{species}.}
28 | 
29 | \item{additions}{a vector of character strings. Additional columns to be
30 | included in the test for duplication. For example as below, collector name
31 | and collector number.}
32 | 
33 | \item{value}{character string.  Defining the output value. See value.}
34 | 
35 | \item{verbose}{logical. If TRUE reports the name of the test and the number
36 | of records flagged.}
37 | }
38 | \value{
39 | Depending on the \sQuote{value} argument, either a \code{data.frame}
40 |   containing the records considered correct by the test (\dQuote{clean}) or a
41 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
42 |   failed/potentially problematic . Default = \dQuote{clean}.
43 | }
44 | \description{
45 | Removes or flags duplicated records based on species name and coordinates, as well as
46 | user-defined additional columns. True (specimen) duplicates or duplicates
47 | from the same species can make up the bulk of records in a biological
48 | collection database, but are undesirable for many analyses. Both can be
49 | flagged with this function, the former given enough additional information.
50 | }
51 | \examples{
52 | 
53 | x <- data.frame(species = letters[1:10], 
54 |                 decimalLongitude = sample(x = 0:10, size = 100, replace = TRUE), 
55 |                 decimalLatitude = sample(x = 0:10, size = 100, replace = TRUE),
56 |                 collector = "Bonpl",
57 |                 collector.number = c(1001, 354),
58 |                 collection = rep(c("K", "WAG","FR", "P", "S"), 20))
59 | 
60 | cc_dupl(x, value = "flagged")
61 | cc_dupl(x, additions = c("collector", "collector.number"))
62 | 
63 | }
64 | \seealso{
65 | Other Coordinates: 
66 | \code{\link{cc_aohi}()},
67 | \code{\link{cc_cap}()},
68 | \code{\link{cc_cen}()},
69 | \code{\link{cc_coun}()},
70 | \code{\link{cc_equ}()},
71 | \code{\link{cc_gbif}()},
72 | \code{\link{cc_inst}()},
73 | \code{\link{cc_iucn}()},
74 | \code{\link{cc_outl}()},
75 | \code{\link{cc_sea}()},
76 | \code{\link{cc_urb}()},
77 | \code{\link{cc_val}()},
78 | \code{\link{cc_zero}()}
79 | }
80 | \concept{Coordinates}
81 | \keyword{Coordinate}
82 | \keyword{cleaning}
83 | 


--------------------------------------------------------------------------------
/man/cc_equ.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cc_equ.R
 3 | \name{cc_equ}
 4 | \alias{cc_equ}
 5 | \title{Identify Records with Identical lat/lon}
 6 | \usage{
 7 | cc_equ(
 8 |   x,
 9 |   lon = "decimalLongitude",
10 |   lat = "decimalLatitude",
11 |   test = "absolute",
12 |   value = "clean",
13 |   verbose = TRUE
14 | )
15 | }
16 | \arguments{
17 | \item{x}{data.frame. Containing geographical coordinates and species names.}
18 | 
19 | \item{lon}{character string. The column with the longitude coordinates.
20 | Default = \dQuote{decimalLongitude}.}
21 | 
22 | \item{lat}{character string. The column with the latitude coordinates.
23 | Default = \dQuote{decimalLatitude}.}
24 | 
25 | \item{test}{character string. Defines if coordinates are compared exactly
26 | (\dQuote{identical}) or on the absolute scale (i.e. -1 = 1,
27 | \dQuote{absolute}). Default is to \dQuote{absolute}.}
28 | 
29 | \item{value}{character string.  Defining the output value. See value.}
30 | 
31 | \item{verbose}{logical. If TRUE reports the name of the test and the number
32 | of records flagged.}
33 | }
34 | \value{
35 | Depending on the \sQuote{value} argument, either a \code{data.frame}
36 |   containing the records considered correct by the test (\dQuote{clean}) or a
37 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
38 |   failed/potentially problematic . Default = \dQuote{clean}.
39 | }
40 | \description{
41 | Removes or flags records with equal latitude and longitude coordinates,
42 | either exact or absolute. Equal coordinates can often indicate data entry
43 | errors.
44 | }
45 | \examples{
46 | 
47 | x <- data.frame(species = letters[1:10], 
48 |                 decimalLongitude = runif(100, -180, 180), 
49 |                 decimalLatitude = runif(100, -90,90))
50 | 
51 | cc_equ(x)
52 | cc_equ(x, value = "flagged")
53 | 
54 | }
55 | \seealso{
56 | Other Coordinates: 
57 | \code{\link{cc_aohi}()},
58 | \code{\link{cc_cap}()},
59 | \code{\link{cc_cen}()},
60 | \code{\link{cc_coun}()},
61 | \code{\link{cc_dupl}()},
62 | \code{\link{cc_gbif}()},
63 | \code{\link{cc_inst}()},
64 | \code{\link{cc_iucn}()},
65 | \code{\link{cc_outl}()},
66 | \code{\link{cc_sea}()},
67 | \code{\link{cc_urb}()},
68 | \code{\link{cc_val}()},
69 | \code{\link{cc_zero}()}
70 | }
71 | \concept{Coordinates}
72 | \keyword{Coordinate}
73 | \keyword{cleaning}
74 | 


--------------------------------------------------------------------------------
/man/cc_gbif.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cc_gbif.R
 3 | \name{cc_gbif}
 4 | \alias{cc_gbif}
 5 | \title{Identify Records Assigned to GBIF Headquarters}
 6 | \usage{
 7 | cc_gbif(
 8 |   x,
 9 |   lon = "decimalLongitude",
10 |   lat = "decimalLatitude",
11 |   species = "species",
12 |   buffer = 1000,
13 |   geod = TRUE,
14 |   verify = FALSE,
15 |   value = "clean",
16 |   verbose = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{x}{data.frame. Containing geographical coordinates and species names.}
21 | 
22 | \item{lon}{character string. The column with the longitude coordinates.
23 | Default = \dQuote{decimalLongitude}.}
24 | 
25 | \item{lat}{character string. The column with the latitude coordinates.
26 | Default = \dQuote{decimalLatitude}.}
27 | 
28 | \item{species}{character string. The column with the species identity. Only
29 | required if verify = TRUE.}
30 | 
31 | \item{buffer}{numerical. The buffer around the GBIF headquarters,
32 | where records should be flagged as problematic. Units depend on geod. Default = 100 m.}
33 | 
34 | \item{geod}{logical. If TRUE the radius is calculated
35 | based on a sphere, buffer is in meters. If FALSE
36 | the radius is calculated in degrees. Default = T.}
37 | 
38 | \item{verify}{logical. If TRUE records are only flagged if they are the only
39 | record in a given species flagged close to a given reference. If FALSE, the
40 | distance is the only criterion}
41 | 
42 | \item{value}{character string.  Defining the output value. See value.}
43 | 
44 | \item{verbose}{logical. If TRUE reports the name of the test and the number
45 | of records flagged.}
46 | }
47 | \value{
48 | Depending on the \sQuote{value} argument, either a \code{data.frame}
49 |   containing the records considered correct by the test (\dQuote{clean}) or a
50 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
51 |   failed/potentially problematic . Default = \dQuote{clean}.
52 | }
53 | \description{
54 | Removes or flags records within 0.5 degree radius around the GBIF headquarters in
55 | Copenhagen, DK.
56 | }
57 | \details{
58 | Not recommended if working with records from Denmark or the Copenhagen area.
59 | }
60 | \examples{
61 | 
62 | x <- data.frame(species = "A", 
63 |                 decimalLongitude = c(12.58, 12.58), 
64 |                 decimalLatitude = c(55.67, 30.00))
65 |                 
66 | cc_gbif(x)
67 | cc_gbif(x, value = "flagged")
68 | 
69 | }
70 | \seealso{
71 | Other Coordinates: 
72 | \code{\link{cc_aohi}()},
73 | \code{\link{cc_cap}()},
74 | \code{\link{cc_cen}()},
75 | \code{\link{cc_coun}()},
76 | \code{\link{cc_dupl}()},
77 | \code{\link{cc_equ}()},
78 | \code{\link{cc_inst}()},
79 | \code{\link{cc_iucn}()},
80 | \code{\link{cc_outl}()},
81 | \code{\link{cc_sea}()},
82 | \code{\link{cc_urb}()},
83 | \code{\link{cc_val}()},
84 | \code{\link{cc_zero}()}
85 | }
86 | \concept{Coordinates}
87 | \keyword{Coordinate}
88 | \keyword{cleaning}
89 | 


--------------------------------------------------------------------------------
/man/cc_inst.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cc_inst.R
  3 | \name{cc_inst}
  4 | \alias{cc_inst}
  5 | \title{Identify Records in the Vicinity of Biodiversity Institutions}
  6 | \usage{
  7 | cc_inst(
  8 |   x,
  9 |   lon = "decimalLongitude",
 10 |   lat = "decimalLatitude",
 11 |   species = "species",
 12 |   buffer = 100,
 13 |   geod = FALSE,
 14 |   ref = NULL,
 15 |   verify = FALSE,
 16 |   verify_mltpl = 10,
 17 |   value = "clean",
 18 |   verbose = TRUE
 19 | )
 20 | }
 21 | \arguments{
 22 | \item{x}{data.frame. Containing geographical coordinates and species names.}
 23 | 
 24 | \item{lon}{character string. The column with the longitude coordinates.
 25 | Default = \dQuote{decimalLongitude}.}
 26 | 
 27 | \item{lat}{character string. The column with the latitude coordinates.
 28 | Default = \dQuote{decimalLatitude}.}
 29 | 
 30 | \item{species}{character string. The column with the species identity. Only
 31 | required if verify = TRUE.}
 32 | 
 33 | \item{buffer}{numerical. The buffer around each institution, where records
 34 | should be flagged as problematic, in decimal degrees.  Default = 100m.}
 35 | 
 36 | \item{geod}{logical. If TRUE the radius around each capital is calculated
 37 | based on a sphere, buffer is in meters and independent of latitude. If
 38 | FALSE the radius is calculated assuming planar coordinates and varies
 39 | slightly with latitude. Default = TRUE.
 40 | See https://seethedatablog.wordpress.com/ for detail and credits.}
 41 | 
 42 | \item{ref}{SpatVector (geometry: polygons). Providing the geographic
 43 | gazetteer. Can be any SpatVector (geometry: polygons), but the structure
 44 | must be identical to \code{\link{institutions}}.  Default =
 45 | \code{\link{institutions}}}
 46 | 
 47 | \item{verify}{logical. If TRUE, records close to institutions are only
 48 | flagged, if there are no other records of the same species in the greater
 49 | vicinity (a radius of buffer * verify_mltpl).}
 50 | 
 51 | \item{verify_mltpl}{numerical. indicates the factor by which the radius for
 52 | verify exceeds the radius of the initial test. Default = 10, which might be
 53 | suitable if geod is TRUE, but might be too large otherwise.}
 54 | 
 55 | \item{value}{character string.  Defining the output value. See value.}
 56 | 
 57 | \item{verbose}{logical. If TRUE reports the name of the test and the number
 58 | of records flagged.}
 59 | }
 60 | \value{
 61 | Depending on the \sQuote{value} argument, either a \code{data.frame}
 62 |   containing the records considered correct by the test (\dQuote{clean}) or a
 63 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
 64 |   failed/potentially problematic . Default = \dQuote{clean}.
 65 | }
 66 | \description{
 67 | Removes or flags records assigned to the location of zoos, botanical gardens,
 68 | herbaria, universities and museums, based on a global database of ~10,000 such
 69 | biodiversity institutions. Coordinates from these locations can be related to
 70 | data-entry errors, false automated geo-reference or individuals in
 71 | captivity/horticulture.
 72 | }
 73 | \details{
 74 | Note: the buffer radius is in degrees, thus will differ slightly between
 75 | different latitudes.
 76 | }
 77 | \examples{
 78 | 
 79 | x <- data.frame(species = letters[1:10],
 80 |                 decimalLongitude = c(runif(99, -180, 180), 37.577800),
 81 |                 decimalLatitude = c(runif(99, -90,90), 55.710800))
 82 | 
 83 | #large buffer for demonstration, using geod = FALSE for shorter runtime
 84 | cc_inst(x, value = "flagged", buffer = 10, geod = FALSE)
 85 | 
 86 | \dontrun{
 87 | #' cc_inst(x, value = "flagged", buffer = 50000) #geod = T
 88 | }
 89 | 
 90 | }
 91 | \seealso{
 92 | Other Coordinates: 
 93 | \code{\link{cc_aohi}()},
 94 | \code{\link{cc_cap}()},
 95 | \code{\link{cc_cen}()},
 96 | \code{\link{cc_coun}()},
 97 | \code{\link{cc_dupl}()},
 98 | \code{\link{cc_equ}()},
 99 | \code{\link{cc_gbif}()},
100 | \code{\link{cc_iucn}()},
101 | \code{\link{cc_outl}()},
102 | \code{\link{cc_sea}()},
103 | \code{\link{cc_urb}()},
104 | \code{\link{cc_val}()},
105 | \code{\link{cc_zero}()}
106 | }
107 | \concept{Coordinates}
108 | \keyword{Coordinate}
109 | \keyword{cleaning}
110 | 


--------------------------------------------------------------------------------
/man/cc_iucn.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cc_iucn.R
  3 | \name{cc_iucn}
  4 | \alias{cc_iucn}
  5 | \title{Identify Records Outside Natural Ranges}
  6 | \usage{
  7 | cc_iucn(
  8 |   x,
  9 |   range,
 10 |   lon = "decimalLongitude",
 11 |   lat = "decimalLatitude",
 12 |   species = "species",
 13 |   buffer = 0,
 14 |   value = "clean",
 15 |   verbose = TRUE
 16 | )
 17 | }
 18 | \arguments{
 19 | \item{x}{data.frame. Containing geographical coordinates and species names.}
 20 | 
 21 | \item{range}{a SpatVector of natural ranges for species in x. 
 22 | Must contain a column named as indicated by \code{species}. See details.}
 23 | 
 24 | \item{lon}{character string. The column with the longitude coordinates.
 25 | Default = \dQuote{decimalLongitude}.}
 26 | 
 27 | \item{lat}{character string. The column with the latitude coordinates.
 28 | Default = \dQuote{decimalLatitude}.}
 29 | 
 30 | \item{species}{a character string. The column with the species name. 
 31 | Default = \dQuote{species}.}
 32 | 
 33 | \item{buffer}{numerical. The buffer around each species' range,
 34 | from where records should be flagged as problematic, in meters. Default = 0.}
 35 | 
 36 | \item{value}{character string.  Defining the output value. See value.}
 37 | 
 38 | \item{verbose}{logical. If TRUE reports the name of the test and the number
 39 | of records flagged.}
 40 | }
 41 | \value{
 42 | Depending on the \sQuote{value} argument, either a \code{data.frame}
 43 |   containing the records considered correct by the test (\dQuote{clean}) or a
 44 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
 45 |   failed/potentially problematic . Default = \dQuote{clean}.
 46 | }
 47 | \description{
 48 | Removes or flags records outside of the provided natural range polygon, on a per species basis. 
 49 | Expects one entry per species. See the example or 
 50 | \url{https://www.iucnredlist.org/resources/spatial-data-download} for 
 51 | the required polygon structure.
 52 | }
 53 | \details{
 54 | Download natural range maps in suitable format for amphibians, birds,
 55 | mammals and reptiles
 56 | from \url{https://www.iucnredlist.org/resources/spatial-data-download}.
 57 | Note: the buffer radius is in degrees, thus will differ slightly between
 58 | different latitudes.
 59 | }
 60 | \note{
 61 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more
 62 | details and tutorials.
 63 | }
 64 | \examples{
 65 | library(terra)
 66 | 
 67 | x <- data.frame(species = c("A", "B"),
 68 | decimalLongitude = runif(100, -170, 170),
 69 | decimalLatitude = runif(100, -80,80))
 70 | 
 71 | range_species_A <- cbind(c(-45,-45,-60,-60,-45), c(-10,-25,-25,-10,-10))
 72 | rangeA <- terra::vect(range_species_A, "polygons")
 73 | range_species_B <- cbind(c(15,15,32,32,15), c(10,-10,-10,10,10))
 74 | rangeB <- terra::vect(range_species_B, "polygons")
 75 | range <- terra::vect(list(rangeA, rangeB))
 76 | range$binomial <- c("A", "B")
 77 | 
 78 | cc_iucn(x = x, range = range, buffer = 0)
 79 | 
 80 | }
 81 | \seealso{
 82 | Other Coordinates: 
 83 | \code{\link{cc_aohi}()},
 84 | \code{\link{cc_cap}()},
 85 | \code{\link{cc_cen}()},
 86 | \code{\link{cc_coun}()},
 87 | \code{\link{cc_dupl}()},
 88 | \code{\link{cc_equ}()},
 89 | \code{\link{cc_gbif}()},
 90 | \code{\link{cc_inst}()},
 91 | \code{\link{cc_outl}()},
 92 | \code{\link{cc_sea}()},
 93 | \code{\link{cc_urb}()},
 94 | \code{\link{cc_val}()},
 95 | \code{\link{cc_zero}()}
 96 | }
 97 | \concept{Coordinates}
 98 | \keyword{Coordinate}
 99 | \keyword{cleaning}
100 | 


--------------------------------------------------------------------------------
/man/cc_sea.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cc_sea.R
  3 | \name{cc_sea}
  4 | \alias{cc_sea}
  5 | \title{Identify Non-terrestrial Coordinates}
  6 | \usage{
  7 | cc_sea(
  8 |   x,
  9 |   lon = "decimalLongitude",
 10 |   lat = "decimalLatitude",
 11 |   ref = NULL,
 12 |   scale = 110,
 13 |   value = "clean",
 14 |   speedup = TRUE,
 15 |   verbose = TRUE,
 16 |   buffer = NULL
 17 | )
 18 | }
 19 | \arguments{
 20 | \item{x}{data.frame. Containing geographical coordinates and species names.}
 21 | 
 22 | \item{lon}{character string. The column with the longitude coordinates.
 23 | Default = \dQuote{decimalLongitude}.}
 24 | 
 25 | \item{lat}{character string. The column with the latitude coordinates.
 26 | Default = \dQuote{decimalLatitude}.}
 27 | 
 28 | \item{ref}{SpatVector (geometry: polygons). Providing the geographic
 29 | gazetteer. Can be any SpatVector (geometry: polygons), but the structure
 30 | must be identical to rnaturalearth::ne_download(scale = 110, type = 'land',
 31 | category = 'physical', returnclass = 'sf'). Default =
 32 | rnaturalearth::ne_download(scale = 110, type = 'land', category =
 33 | 'physical', returnclass = 'sf').}
 34 | 
 35 | \item{scale}{the scale of the default reference, as downloaded from natural
 36 | earth. Must be one of 10, 50, 110. Higher numbers equal higher detail.
 37 | Default = 110.}
 38 | 
 39 | \item{value}{character string.  Defining the output value. See value.}
 40 | 
 41 | \item{speedup}{logical. Using heuristic to speed up the analysis for large
 42 | data sets with many records per location.}
 43 | 
 44 | \item{verbose}{logical. If TRUE reports the name of the test and the number
 45 | of records flagged.}
 46 | 
 47 | \item{buffer}{numeric. Units are in meters. If provided, a buffer is
 48 | created around the sea polygon, or ref provided.}
 49 | }
 50 | \value{
 51 | Depending on the \sQuote{value} argument, either a \code{data.frame}
 52 |   containing the records considered correct by the test (\dQuote{clean}) or a
 53 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
 54 |   failed/potentially problematic . Default = \dQuote{clean}.
 55 | }
 56 | \description{
 57 | Removes or flags coordinates outside the reference landmass. Can be used to
 58 | restrict datasets to terrestrial taxa, or exclude records from the open
 59 | ocean, when depending on the reference (see details). Often records of
 60 | terrestrial taxa can be found in the open ocean, mostly due to switched
 61 | latitude and longitude.
 62 | }
 63 | \details{
 64 | In some cases flagging records close of the coastline is not recommendable,
 65 | because of the low precision of the reference dataset, minor GPS imprecision
 66 | or because a dataset might include coast or marshland species. If you only
 67 | want to flag records in the open ocean, consider using a buffered landmass
 68 | reference, e.g.: \code{\link{buffland}}.
 69 | }
 70 | \note{
 71 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more
 72 |   details and tutorials.
 73 | }
 74 | \examples{
 75 | x <- data.frame(species = letters[1:10], 
 76 |                 decimalLongitude = runif(10, -30, 30), 
 77 |                 decimalLatitude = runif(10, -30, 30))
 78 |                 
 79 | cc_sea(x, value = "flagged")
 80 | 
 81 | }
 82 | \seealso{
 83 | Other Coordinates: 
 84 | \code{\link{cc_aohi}()},
 85 | \code{\link{cc_cap}()},
 86 | \code{\link{cc_cen}()},
 87 | \code{\link{cc_coun}()},
 88 | \code{\link{cc_dupl}()},
 89 | \code{\link{cc_equ}()},
 90 | \code{\link{cc_gbif}()},
 91 | \code{\link{cc_inst}()},
 92 | \code{\link{cc_iucn}()},
 93 | \code{\link{cc_outl}()},
 94 | \code{\link{cc_urb}()},
 95 | \code{\link{cc_val}()},
 96 | \code{\link{cc_zero}()}
 97 | }
 98 | \concept{Coordinates}
 99 | \keyword{Coordinate}
100 | \keyword{cleaning}
101 | 


--------------------------------------------------------------------------------
/man/cc_urb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cc_urb.R
 3 | \name{cc_urb}
 4 | \alias{cc_urb}
 5 | \title{Identify Records Inside Urban Areas}
 6 | \usage{
 7 | cc_urb(
 8 |   x,
 9 |   lon = "decimalLongitude",
10 |   lat = "decimalLatitude",
11 |   ref = NULL,
12 |   value = "clean",
13 |   verbose = TRUE
14 | )
15 | }
16 | \arguments{
17 | \item{x}{data.frame. Containing geographical coordinates and species names.}
18 | 
19 | \item{lon}{character string. The column with the longitude coordinates.
20 | Default = \dQuote{decimalLongitude}.}
21 | 
22 | \item{lat}{character string. The column with the latitude coordinates.
23 | Default = \dQuote{decimalLatitude}.}
24 | 
25 | \item{ref}{a SpatVector. Providing the geographic gazetteer
26 | with the urban areas. See details. By default
27 | rnaturalearth::ne_download(scale = 'medium', type = 'urban_areas',
28 | returnclass = "sf"). Can be any \code{SpatVector}, but the
29 | structure must be identical to \code{rnaturalearth::ne_download()}.}
30 | 
31 | \item{value}{character string.  Defining the output value. See value.}
32 | 
33 | \item{verbose}{logical. If TRUE reports the name of the test and the number
34 | of records flagged.}
35 | }
36 | \value{
37 | Depending on the \sQuote{value} argument, either a \code{data.frame}
38 |   containing the records considered correct by the test (\dQuote{clean}) or a
39 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
40 |   failed/potentially problematic . Default = \dQuote{clean}.
41 | }
42 | \description{
43 | Removes or flags records from inside urban areas, based on a geographic
44 | gazetteer. Often records from large databases span substantial time periods
45 | (centuries) and old records might represent habitats which today are replaced
46 | by city area.
47 | }
48 | \note{
49 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more
50 |   details and tutorials.
51 | }
52 | \examples{
53 | 
54 | \dontrun{
55 | x <- data.frame(species = letters[1:10],
56 |                 decimalLongitude = runif(100, -180, 180),
57 |                 decimalLatitude = runif(100, -90,90))
58 | 
59 | cc_urb(x)
60 | cc_urb(x, value = "flagged")
61 | }
62 | 
63 | }
64 | \seealso{
65 | Other Coordinates: 
66 | \code{\link{cc_aohi}()},
67 | \code{\link{cc_cap}()},
68 | \code{\link{cc_cen}()},
69 | \code{\link{cc_coun}()},
70 | \code{\link{cc_dupl}()},
71 | \code{\link{cc_equ}()},
72 | \code{\link{cc_gbif}()},
73 | \code{\link{cc_inst}()},
74 | \code{\link{cc_iucn}()},
75 | \code{\link{cc_outl}()},
76 | \code{\link{cc_sea}()},
77 | \code{\link{cc_val}()},
78 | \code{\link{cc_zero}()}
79 | }
80 | \concept{Coordinates}
81 | \keyword{Coordinate}
82 | \keyword{cleaning}
83 | 


--------------------------------------------------------------------------------
/man/cc_val.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cc_val.R
 3 | \name{cc_val}
 4 | \alias{cc_val}
 5 | \title{Identify Invalid lat/lon Coordinates}
 6 | \usage{
 7 | cc_val(
 8 |   x,
 9 |   lon = "decimalLongitude",
10 |   lat = "decimalLatitude",
11 |   value = "clean",
12 |   verbose = TRUE
13 | )
14 | }
15 | \arguments{
16 | \item{x}{data.frame. Containing geographical coordinates and species names.}
17 | 
18 | \item{lon}{character string. The column with the longitude coordinates.
19 | Default = \dQuote{decimalLongitude}.}
20 | 
21 | \item{lat}{character string. The column with the latitude coordinates.
22 | Default = \dQuote{decimalLatitude}.}
23 | 
24 | \item{value}{character string.  Defining the output value. See value.}
25 | 
26 | \item{verbose}{logical. If TRUE reports the name of the test and the number
27 | of records flagged.}
28 | }
29 | \value{
30 | Depending on the \sQuote{value} argument, either a \code{data.frame}
31 |   containing the records considered correct by the test (\dQuote{clean}) or a
32 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
33 |   failed/potentially problematic . Default = \dQuote{clean}.
34 | }
35 | \description{
36 | Removes or flags non-numeric and not available coordinates
37 | as well as lat >90, lat <-90, lon > 180 and lon < -180 are flagged.
38 | }
39 | \details{
40 | This test is obligatory before running any further tests of
41 | CoordinateCleaner, as additional tests only run with valid coordinates.
42 | }
43 | \note{
44 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more
45 | details and tutorials.
46 | }
47 | \examples{
48 | 
49 | x <- data.frame(species = letters[1:10], 
50 |                 decimalLongitude = c(runif(106, -180, 180), NA, "13W33'", "67,09", 305), 
51 |                 decimalLatitude = runif(110, -90,90))
52 |                 
53 | cc_val(x)
54 | cc_val(x, value = "flagged")
55 | 
56 | }
57 | \seealso{
58 | Other Coordinates: 
59 | \code{\link{cc_aohi}()},
60 | \code{\link{cc_cap}()},
61 | \code{\link{cc_cen}()},
62 | \code{\link{cc_coun}()},
63 | \code{\link{cc_dupl}()},
64 | \code{\link{cc_equ}()},
65 | \code{\link{cc_gbif}()},
66 | \code{\link{cc_inst}()},
67 | \code{\link{cc_iucn}()},
68 | \code{\link{cc_outl}()},
69 | \code{\link{cc_sea}()},
70 | \code{\link{cc_urb}()},
71 | \code{\link{cc_zero}()}
72 | }
73 | \concept{Coordinates}
74 | \keyword{Coordinate}
75 | \keyword{cleaning}
76 | 


--------------------------------------------------------------------------------
/man/cc_zero.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cc_zero.R
 3 | \name{cc_zero}
 4 | \alias{cc_zero}
 5 | \title{Identify Zero Coordinates}
 6 | \usage{
 7 | cc_zero(
 8 |   x,
 9 |   lon = "decimalLongitude",
10 |   lat = "decimalLatitude",
11 |   buffer = 0.5,
12 |   value = "clean",
13 |   verbose = TRUE
14 | )
15 | }
16 | \arguments{
17 | \item{x}{data.frame. Containing geographical coordinates and species names.}
18 | 
19 | \item{lon}{character string. The column with the longitude coordinates.
20 | Default = \dQuote{decimalLongitude}.}
21 | 
22 | \item{lat}{character string. The column with the latitude coordinates.
23 | Default = \dQuote{decimalLatitude}.}
24 | 
25 | \item{buffer}{numerical. The buffer around the 0/0 point,
26 | where records should be flagged as problematic, in decimal
27 | degrees.  Default = 0.5.}
28 | 
29 | \item{value}{character string.  Defining the output value. See value.}
30 | 
31 | \item{verbose}{logical. If TRUE reports the name of the test and the number
32 | of records flagged.}
33 | }
34 | \value{
35 | Depending on the \sQuote{value} argument, either a \code{data.frame}
36 |   containing the records considered correct by the test (\dQuote{clean}) or a
37 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
38 |   failed/potentially problematic . Default = \dQuote{clean}.
39 | }
40 | \description{
41 | Removes or flags records with either zero longitude or latitude and a radius
42 | around the point at zero longitude and zero latitude. These problems are
43 | often due to erroneous data-entry or geo-referencing and can lead to typical
44 | patterns of high diversity around the equator.
45 | }
46 | \note{
47 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more
48 | details and tutorials.
49 | }
50 | \examples{
51 | 
52 | x <- data.frame(species = "A", 
53 |                 decimalLongitude = c(0,34.84, 0, 33.98), 
54 |                 decimalLatitude = c(23.08, 0, 0, 15.98))
55 |                 
56 | cc_zero(x)
57 | cc_zero(x, value = "flagged")
58 | 
59 | }
60 | \seealso{
61 | Other Coordinates: 
62 | \code{\link{cc_aohi}()},
63 | \code{\link{cc_cap}()},
64 | \code{\link{cc_cen}()},
65 | \code{\link{cc_coun}()},
66 | \code{\link{cc_dupl}()},
67 | \code{\link{cc_equ}()},
68 | \code{\link{cc_gbif}()},
69 | \code{\link{cc_inst}()},
70 | \code{\link{cc_iucn}()},
71 | \code{\link{cc_outl}()},
72 | \code{\link{cc_sea}()},
73 | \code{\link{cc_urb}()},
74 | \code{\link{cc_val}()}
75 | }
76 | \concept{Coordinates}
77 | \keyword{Coordinate}
78 | \keyword{cleaning}
79 | 


--------------------------------------------------------------------------------
/man/cd_ddmm.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cd_ddmm.R
  3 | \name{cd_ddmm}
  4 | \alias{cd_ddmm}
  5 | \title{Identify Datasets with a Degree Conversion Error}
  6 | \usage{
  7 | cd_ddmm(
  8 |   x,
  9 |   lon = "decimalLongitude",
 10 |   lat = "decimalLatitude",
 11 |   ds = "dataset",
 12 |   pvalue = 0.025,
 13 |   diff = 1,
 14 |   mat_size = 1000,
 15 |   min_span = 2,
 16 |   value = "clean",
 17 |   verbose = TRUE,
 18 |   diagnostic = FALSE
 19 | )
 20 | }
 21 | \arguments{
 22 | \item{x}{data.frame. Containing geographical coordinates and species names.}
 23 | 
 24 | \item{lon}{character string. The column with the longitude coordinates.
 25 | Default = \dQuote{decimalLongitude}.}
 26 | 
 27 | \item{lat}{character string. The column with the latitude coordinates.
 28 | Default = \dQuote{decimalLatitude}.}
 29 | 
 30 | \item{ds}{a character string. The column with the dataset of each record. In
 31 | case \code{x} should be treated as a single dataset, identical for all
 32 | records.  Default = \dQuote{dataset}.}
 33 | 
 34 | \item{pvalue}{numeric. The p-value for the one-sided t-test to flag the test
 35 | as passed or not. Both ddmm.pvalue and diff must be met. Default = 0.025.}
 36 | 
 37 | \item{diff}{numeric. The threshold difference for the ddmm test. Indicates
 38 | by which fraction the records with decimals below 0.6 must outnumber the
 39 | records with decimals above 0.6. Default = 1}
 40 | 
 41 | \item{mat_size}{numeric. The size of the matrix for the binomial test. Must
 42 | be changed in decimals (e.g. 100, 1000, 10000). Adapt to dataset size,
 43 | generally 100 is better for datasets < 10000 records, 1000 is better for
 44 | datasets with 10000 - 1M records. Higher values also work reasonably well
 45 | for smaller datasets, therefore, default = 1000. For large datasets try
 46 | 10000.}
 47 | 
 48 | \item{min_span}{numeric. The minimum geographic extent of datasets to be
 49 | tested. Default = 2.}
 50 | 
 51 | \item{value}{character string.  Defining the output value. See value.}
 52 | 
 53 | \item{verbose}{logical. If TRUE reports the name of the test and the number
 54 | of records flagged.}
 55 | 
 56 | \item{diagnostic}{logical. If TRUE plots the analyses matrix for each
 57 | dataset.}
 58 | }
 59 | \value{
 60 | Depending on the \sQuote{value} argument, either a \code{data.frame}
 61 | with summary statistics and flags for each dataset (\dQuote{dataset}) or a
 62 | \code{data.frame} containing the records considered correct by the test
 63 | (\dQuote{clean}) or a logical vector (\dQuote{flags}), with TRUE = test passed and FALSE =
 64 | test failed/potentially problematic. Default =
 65 | \dQuote{clean}.
 66 | }
 67 | \description{
 68 | This test flags datasets where a significant fraction of records has
 69 | been subject to a common degree minute to decimal degree conversion error,
 70 | where the degree sign is recognized as decimal delimiter.
 71 | }
 72 | \details{
 73 | If the degree sign is recognized as decimal delimiter during coordinate
 74 | conversion, no coordinate decimals above 0.59 (59') are possible. The test
 75 | here uses a binomial test to test if a significant proportion of records in
 76 | a dataset have been subject to this problem. The test is best adjusted via
 77 | the diff argument. The lower \code{diff}, the stricter the test. Also scales
 78 | with dataset size. Empirically, for datasets with < 5,000 unique coordinate
 79 | records \code{diff = 0.1} has proven reasonable flagging most datasets with
 80 | >25\% problematic records and all dataset with >50\% problematic records.
 81 | For datasets between 5,000 and 100,000 geographic unique records \code{diff
 82 | = 0.01} is recommended, for datasets between 100,000 and 1 M records diff =
 83 | 0.001, and so on.
 84 | }
 85 | \note{
 86 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more
 87 | details and tutorials.
 88 | }
 89 | \examples{
 90 | 
 91 | clean <- data.frame(species = letters[1:10], 
 92 |                 decimalLongitude = runif(100, -180, 180), 
 93 |                 decimalLatitude = runif(100, -90,90),
 94 |                 dataset = "FR")
 95 |                 
 96 | cd_ddmm(x = clean, value = "flagged")
 97 | 
 98 | #problematic dataset
 99 | lon <- sample(0:180, size = 100, replace = TRUE) + runif(100, 0,0.59)
100 | lat <- sample(0:90, size = 100, replace = TRUE) + runif(100, 0,0.59)
101 | 
102 | prob <-  data.frame(species = letters[1:10], 
103 |                 decimalLongitude = lon, 
104 |                 decimalLatitude = lat,
105 |                 dataset = "FR")
106 |                 
107 | cd_ddmm(x = prob, value = "flagged")
108 | 
109 | }
110 | \seealso{
111 | Other Datasets: 
112 | \code{\link{cd_round}()}
113 | }
114 | \concept{Datasets}
115 | \keyword{"Coordinate}
116 | \keyword{"Dataset}
117 | \keyword{cleaning"}
118 | \keyword{level}
119 | 


--------------------------------------------------------------------------------
/man/cd_round.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cd_round.R
  3 | \name{cd_round}
  4 | \alias{cd_round}
  5 | \title{Identify Datasets with Rasterized Coordinates}
  6 | \usage{
  7 | cd_round(
  8 |   x,
  9 |   lon = "decimalLongitude",
 10 |   lat = "decimalLatitude",
 11 |   ds = "dataset",
 12 |   T1 = 7,
 13 |   reg_out_thresh = 2,
 14 |   reg_dist_min = 0.1,
 15 |   reg_dist_max = 2,
 16 |   min_unique_ds_size = 4,
 17 |   graphs = TRUE,
 18 |   test = "both",
 19 |   value = "clean",
 20 |   verbose = TRUE
 21 | )
 22 | }
 23 | \arguments{
 24 | \item{x}{data.frame. Containing geographical coordinates and species names.}
 25 | 
 26 | \item{lon}{character string. The column with the longitude coordinates.
 27 | Default = \dQuote{decimalLongitude}.}
 28 | 
 29 | \item{lat}{character string. The column with the latitude coordinates.
 30 | Default = \dQuote{decimalLatitude}.}
 31 | 
 32 | \item{ds}{a character string. The column with the dataset of each record. In
 33 | case \code{x} should be treated as a single dataset, identical for all
 34 | records.  Default = \dQuote{dataset}.}
 35 | 
 36 | \item{T1}{numeric.  The threshold for outlier detection in a in an
 37 | interquantile range based test. This is the major parameter to specify the
 38 | sensitivity of the test: lower values, equal higher detection rate. Values
 39 | between 7-11 are recommended. Default = 7.}
 40 | 
 41 | \item{reg_out_thresh}{numeric. Threshold on the number of equal distances
 42 | between outlier points.  See details.  Default = 2.}
 43 | 
 44 | \item{reg_dist_min}{numeric.  The minimum detection distance between
 45 | outliers in degrees (the minimum resolution of grids that will be flagged).
 46 | Default = 0.1.}
 47 | 
 48 | \item{reg_dist_max}{numeric.  The maximum detection distance between
 49 | outliers in degrees (the maximum resolution of grids that will be flagged).
 50 | Default = 2.}
 51 | 
 52 | \item{min_unique_ds_size}{numeric.  The minimum number of unique locations
 53 | (values in the tested column) for datasets to be included in the test.
 54 | Default = 4.}
 55 | 
 56 | \item{graphs}{logical. If TRUE, diagnostic plots are produced.  Default =
 57 | TRUE.}
 58 | 
 59 | \item{test}{character string.  Indicates which column to test. Either
 60 | \dQuote{lat} for latitude, \dQuote{lon} for longitude, or \dQuote{both} for
 61 | both.  In the latter case datasets are only flagged if both test are failed.
 62 | Default = \dQuote{both}}
 63 | 
 64 | \item{value}{character string.  Defining the output value. See value.}
 65 | 
 66 | \item{verbose}{logical. If TRUE reports the name of the test and the number
 67 | of records flagged.}
 68 | }
 69 | \value{
 70 | Depending on the \sQuote{value} argument, either a \code{data.frame}
 71 | with summary statistics and flags for each dataset (\dQuote{dataset}) or a
 72 | \code{data.frame} containing the records considered correct by the test
 73 | (\dQuote{clean}) or a logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE =
 74 | test failed/potentially problematic. Default =
 75 | \dQuote{clean}.
 76 | }
 77 | \description{
 78 | Flags datasets with periodicity patterns indicative of a rasterized
 79 | (lattice) collection scheme, as often obtain from e.g. atlas data. Using a
 80 | combination of autocorrelation and sliding-window outlier detection to
 81 | identify periodicity patterns in the data. See 
 82 | \url{https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/2041-210X.13152}
 83 | for further details and 
 84 | a description of the algorithm
 85 | }
 86 | \note{
 87 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more
 88 | details and tutorials.
 89 | }
 90 | \examples{
 91 | 
 92 | #simulate bias grid, one degree resolution, 10\% error on a 1000 records dataset
 93 | #simulate biased fraction of the data, grid resolution = 1 degree
 94 | #simulate non-biased fraction of the data
 95 |   bi <- sample(3 + 0:5, size = 100, replace = TRUE)
 96 |   mu <- runif(3, 0, 15)
 97 |   sig <- runif(3, 0.1, 5)
 98 |   cl <- rnorm(n = 900, mean = mu, sd = sig)
 99 |   lon <- c(cl, bi)
100 |   
101 |   bi <- sample(9:13, size = 100, replace = TRUE)
102 |   mu <- runif(3, 0, 15)
103 |   sig <- runif(3, 0.1, 5)
104 |   cl <- rnorm(n = 900, mean = mu, sd = sig)
105 |   lat <- c(cl, bi)
106 |   
107 |   #add biased data
108 |   
109 |   inp <- data.frame(decimalLongitude = lon,
110 |                     decimalLatitude = lat,
111 |                     dataset = "test")
112 |             
113 |           
114 |   #run test
115 |   \dontrun{
116 |   cd_round(inp, value = "dataset")
117 |   }
118 |   
119 | 
120 | }
121 | \seealso{
122 | Other Datasets: 
123 | \code{\link{cd_ddmm}()}
124 | }
125 | \concept{Datasets}
126 | \keyword{"Coordinate}
127 | \keyword{"Dataset}
128 | \keyword{cleaning"}
129 | \keyword{level}
130 | 


--------------------------------------------------------------------------------
/man/cf_age.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cf_age.R
  3 | \name{cf_age}
  4 | \alias{cf_age}
  5 | \title{Identify Fossils with Outlier Age}
  6 | \usage{
  7 | cf_age(
  8 |   x,
  9 |   lon = "decimalLongitude",
 10 |   lat = "decimalLatitude",
 11 |   min_age = "min_ma",
 12 |   max_age = "max_ma",
 13 |   taxon = "accepted_name",
 14 |   method = "quantile",
 15 |   size_thresh = 7,
 16 |   mltpl = 5,
 17 |   replicates = 5,
 18 |   flag_thresh = 0.5,
 19 |   uniq_loc = FALSE,
 20 |   value = "clean",
 21 |   verbose = TRUE
 22 | )
 23 | }
 24 | \arguments{
 25 | \item{x}{data.frame. Containing fossil records with taxon names, ages, 
 26 | and geographic coordinates.}
 27 | 
 28 | \item{lon}{character string. The column with the longitude coordinates.
 29 | To identify unique records if \code{uniq_loc  = TRUE}.
 30 | Default = \dQuote{decimalLongitude}.}
 31 | 
 32 | \item{lat}{character string. The column with the longitude coordinates.
 33 | Default = \dQuote{decimalLatitude}. To identify unique records if \code{uniq_loc  = T}.}
 34 | 
 35 | \item{min_age}{character string. The column with the minimum age. Default
 36 | = \dQuote{min_ma}.}
 37 | 
 38 | \item{max_age}{character string. The column with the maximum age. Default
 39 | = \dQuote{max_ma}.}
 40 | 
 41 | \item{taxon}{character string. The column with the taxon name. If
 42 | \dQuote{}, searches for outliers over the entire dataset, otherwise per
 43 | specified taxon. Default = \dQuote{accepted_name}.}
 44 | 
 45 | \item{method}{character string.  Defining the method for outlier
 46 | selection.  See details. Either \dQuote{quantile} or \dQuote{mad}.  Default
 47 | = \dQuote{quantile}.}
 48 | 
 49 | \item{size_thresh}{numeric.  The minimum number of records needed for a
 50 | dataset to be tested. Default = 10.}
 51 | 
 52 | \item{mltpl}{numeric. The multiplier of the interquartile range
 53 | (\code{method == 'quantile'}) or median absolute deviation (\code{method ==
 54 | 'mad'}) to identify outliers. See details.  Default = 5.}
 55 | 
 56 | \item{replicates}{numeric. The number of replications for the distance
 57 | matrix calculation. See details.  Default = 5.}
 58 | 
 59 | \item{flag_thresh}{numeric.  The fraction of passed replicates necessary to pass the test. 
 60 | See details. Default = 0.5.}
 61 | 
 62 | \item{uniq_loc}{logical.  If TRUE only single records per location and time
 63 | point (and taxon if \code{taxon} != "") are used for the outlier testing.
 64 | Default = T.}
 65 | 
 66 | \item{value}{character string.  Defining the output value. See value.}
 67 | 
 68 | \item{verbose}{logical. If TRUE reports the name of the test and the number
 69 | of records flagged.}
 70 | }
 71 | \value{
 72 | Depending on the \sQuote{value} argument, either a \code{data.frame}
 73 |   containing the records considered correct by the test (\dQuote{clean}) or a
 74 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
 75 |   failed/potentially problematic . Default = \dQuote{clean}.
 76 | }
 77 | \description{
 78 | Removes or flags records that are temporal outliers based on
 79 | interquantile ranges.
 80 | }
 81 | \details{
 82 | The outlier detection is based on an interquantile range test. A temporal
 83 | distance matrix among all records is calculated based on a single point selected by random
 84 | between the minimum and maximum age for each record. The mean distance for
 85 | each point to all neighbours is calculated and the sum of these distances
 86 | is then tested against the interquantile range and flagged as an outlier if
 87 | \eqn{x > IQR(x) + q_75 * mltpl}. The test is replicated \sQuote{replicates}
 88 | times, to account for dating uncertainty. Records are flagged as outliers
 89 | if they are flagged by a fraction of more than \sQuote{flag.thresh}
 90 | replicates. Only datasets/taxa comprising more than \sQuote{size_thresh}
 91 | records are tested. Distance are calculated as Euclidean distance.
 92 | }
 93 | \note{
 94 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more
 95 | details and tutorials.
 96 | }
 97 | \examples{
 98 | 
 99 | minages <- c(runif(n = 11, min = 10, max = 25), 62.5)
100 | x <- data.frame(species = c(letters[1:10], rep("z", 2)),
101 |                 min_ma = minages,
102 |                 max_ma = c(minages[1:11] + runif(n = 11, min = 0, max = 5), 65))
103 | 
104 | cf_age(x, value = "flagged", taxon = "")
105 | 
106 | # unique locations only
107 | x <- data.frame(species = c(letters[1:10], rep("z", 2)),
108 |                 decimalLongitude = c(runif(n = 10, min = 4, max = 16), 75, 7),
109 |                 decimalLatitude = c(runif(n = 12, min = -5, max = 5)),
110 |                 min_ma = minages, 
111 |                 max_ma = c(minages[1:11] + runif(n = 11, min = 0, max = 5), 65))
112 | 
113 | cf_age(x, value = "flagged", taxon = "", uniq_loc = TRUE)
114 | 
115 | }
116 | \seealso{
117 | Other fossils: 
118 | \code{\link{cf_equal}()},
119 | \code{\link{cf_outl}()},
120 | \code{\link{cf_range}()},
121 | \code{\link{write_pyrate}()}
122 | }
123 | \concept{fossils}
124 | \keyword{Coordinate}
125 | \keyword{Fossil}
126 | \keyword{Temporal}
127 | \keyword{cleaning}
128 | 


--------------------------------------------------------------------------------
/man/cf_equal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cf_equal.R
 3 | \name{cf_equal}
 4 | \alias{cf_equal}
 5 | \title{Identify Fossils with equal min and max age}
 6 | \usage{
 7 | cf_equal(
 8 |   x,
 9 |   min_age = "min_ma",
10 |   max_age = "max_ma",
11 |   value = "clean",
12 |   verbose = TRUE
13 | )
14 | }
15 | \arguments{
16 | \item{x}{data.frame. Containing fossil records with taxon names, ages, 
17 | and geographic coordinates.}
18 | 
19 | \item{min_age}{character string. The column with the minimum age. Default
20 | = \dQuote{min_ma}.}
21 | 
22 | \item{max_age}{character string. The column with the maximum age. Default
23 | = \dQuote{max_ma}.}
24 | 
25 | \item{value}{character string.  Defining the output value. See value.}
26 | 
27 | \item{verbose}{logical. If TRUE reports the name of the test and the number
28 | of records flagged.}
29 | }
30 | \value{
31 | Depending on the \sQuote{value} argument, either a \code{data.frame}
32 |   containing the records considered correct by the test (\dQuote{clean}) or a
33 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
34 |   failed/potentially problematic . Default = \dQuote{clean}.
35 | }
36 | \description{
37 | Removes or flags records with equal minimum and maximum age.
38 | }
39 | \note{
40 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more
41 | details and tutorials.
42 | }
43 | \examples{
44 | 
45 | minages <- runif(n = 10, min = 0.1, max = 25)
46 | x <- data.frame(species = letters[1:10], 
47 |                 min_ma = minages, 
48 |                 max_ma = minages + runif(n = 10, min = 0, max = 10))
49 | x <- rbind(x, data.frame(species = "z", 
50 |                 min_ma = 5, 
51 |                 max_ma = 5))
52 |                 
53 | cf_equal(x, value = "flagged")
54 | 
55 | }
56 | \seealso{
57 | Other fossils: 
58 | \code{\link{cf_age}()},
59 | \code{\link{cf_outl}()},
60 | \code{\link{cf_range}()},
61 | \code{\link{write_pyrate}()}
62 | }
63 | \concept{fossils}
64 | \keyword{Fossils}
65 | \keyword{Temporal}
66 | \keyword{cleaning}
67 | 


--------------------------------------------------------------------------------
/man/cf_outl.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cf_outl.R
  3 | \name{cf_outl}
  4 | \alias{cf_outl}
  5 | \title{Identify Outlier Records in Space and Time}
  6 | \usage{
  7 | cf_outl(
  8 |   x,
  9 |   lon = "decimalLongitude",
 10 |   lat = "decimalLatitude",
 11 |   min_age = "min_ma",
 12 |   max_age = "max_ma",
 13 |   taxon = "accepted_name",
 14 |   method = "quantile",
 15 |   size_thresh = 7,
 16 |   mltpl = 5,
 17 |   replicates = 5,
 18 |   flag_thresh = 0.5,
 19 |   uniq_loc = FALSE,
 20 |   value = "clean",
 21 |   verbose = TRUE
 22 | )
 23 | }
 24 | \arguments{
 25 | \item{x}{data.frame. Containing fossil records with taxon names, ages, 
 26 | and geographic coordinates.}
 27 | 
 28 | \item{lon}{character string. The column with the longitude coordinates.
 29 | To identify unique records if \code{uniq_loc  = TRUE}.
 30 | Default = \dQuote{decimalLongitude}.}
 31 | 
 32 | \item{lat}{character string. The column with the longitude coordinates.
 33 | Default = \dQuote{decimalLatitude}. To identify unique records if \code{uniq_loc  = T}.}
 34 | 
 35 | \item{min_age}{character string. The column with the minimum age. Default
 36 | = \dQuote{min_ma}.}
 37 | 
 38 | \item{max_age}{character string. The column with the maximum age. Default
 39 | = \dQuote{max_ma}.}
 40 | 
 41 | \item{taxon}{character string. The column with the taxon name. If
 42 | \dQuote{}, searches for outliers over the entire dataset, otherwise per
 43 | specified taxon. Default = \dQuote{accepted_name}.}
 44 | 
 45 | \item{method}{character string.  Defining the method for outlier
 46 | selection.  See details. Either \dQuote{quantile} or \dQuote{mad}.  Default
 47 | = \dQuote{quantile}.}
 48 | 
 49 | \item{size_thresh}{numeric.  The minimum number of records needed for a
 50 | dataset to be tested. Default = 10.}
 51 | 
 52 | \item{mltpl}{numeric. The multiplier of the interquartile range
 53 | (\code{method == 'quantile'}) or median absolute deviation (\code{method ==
 54 | 'mad'}) to identify outliers. See details.  Default = 5.}
 55 | 
 56 | \item{replicates}{numeric. The number of replications for the distance
 57 | matrix calculation. See details.  Default = 5.}
 58 | 
 59 | \item{flag_thresh}{numeric.  The fraction of passed replicates necessary to pass the test. 
 60 | See details. Default = 0.5.}
 61 | 
 62 | \item{uniq_loc}{logical.  If TRUE only single records per location and time
 63 | point (and taxon if \code{taxon} != "") are used for the outlier testing.
 64 | Default = T.}
 65 | 
 66 | \item{value}{character string.  Defining the output value. See value.}
 67 | 
 68 | \item{verbose}{logical. If TRUE reports the name of the test and the number
 69 | of records flagged.}
 70 | }
 71 | \value{
 72 | Depending on the \sQuote{value} argument, either a \code{data.frame}
 73 |   containing the records considered correct by the test (\dQuote{clean}) or a
 74 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
 75 |   failed/potentially problematic . Default = \dQuote{clean}.
 76 | }
 77 | \description{
 78 | Removes or flags records of fossils that are spatio-temporal outliers based on
 79 | interquantile ranges. Records are flagged if they are either extreme in time
 80 | or space, or both.
 81 | }
 82 | \details{
 83 | The outlier detection is based on an interquantile range test. In a first
 84 | step a distance matrix of geographic distances among all records is
 85 | calculate. Subsequently a similar distance matrix of temporal distances
 86 | among all records is calculated based on a single point selected by random
 87 | between the minimum and maximum age for each record. The mean distance for
 88 | each point to all neighbours is calculated for both matrices and spatial and
 89 | temporal distances are scaled to the same range. The sum of these distanced
 90 | is then tested against the interquantile range and flagged as an outlier if
 91 | \eqn{x > IQR(x) + q_75 * mltpl}. The test is replicated \sQuote{replicates}
 92 | times, to account for temporal uncertainty. Records are flagged as outliers
 93 | if they are flagged by a fraction of more than \sQuote{flag.thres}
 94 | replicates. Only datasets/taxa comprising more than \sQuote{size_thresh}
 95 | records are tested. Note that geographic distances are calculated as
 96 | geospheric distances for datasets (or taxa) with fewer than 10,000 records
 97 | and approximated as Euclidean distances for datasets/taxa with 10,000 to
 98 | 25,000 records. Datasets/taxa comprising more than 25,000 records are
 99 | skipped.
100 | }
101 | \note{
102 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more
103 | details and tutorials.
104 | }
105 | \examples{
106 | 
107 | minages <- c(runif(n = 11, min = 10, max = 25), 62.5)
108 | x <- data.frame(species = c(letters[1:10], rep("z", 2)),
109 |                 lng = c(runif(n = 10, min = 4, max = 16), 75, 7),
110 |                 lat = c(runif(n = 12, min = -5, max = 5)),
111 |                 min_ma = minages, 
112 |                 max_ma = c(minages[1:11] + runif(n = 11, min = 0, max = 5), 65))
113 | 
114 | cf_outl(x, value = "flagged", taxon = "")
115 | 
116 | }
117 | \seealso{
118 | Other fossils: 
119 | \code{\link{cf_age}()},
120 | \code{\link{cf_equal}()},
121 | \code{\link{cf_range}()},
122 | \code{\link{write_pyrate}()}
123 | }
124 | \concept{fossils}
125 | \keyword{Coordinate}
126 | \keyword{Fossil}
127 | \keyword{Temporal}
128 | \keyword{cleaning}
129 | 


--------------------------------------------------------------------------------
/man/cf_range.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cf_range.R
  3 | \name{cf_range}
  4 | \alias{cf_range}
  5 | \title{Identify Fossils with Extreme Age Ranges}
  6 | \usage{
  7 | cf_range(
  8 |   x,
  9 |   lon = "decimalLongitude",
 10 |   lat = "decimalLatitude",
 11 |   min_age = "min_ma",
 12 |   max_age = "max_ma",
 13 |   taxon = "accepted_name",
 14 |   method = "quantile",
 15 |   mltpl = 5,
 16 |   size_thresh = 7,
 17 |   max_range = 500,
 18 |   uniq_loc = FALSE,
 19 |   value = "clean",
 20 |   verbose = TRUE
 21 | )
 22 | }
 23 | \arguments{
 24 | \item{x}{data.frame. Containing fossil records with taxon names, ages, 
 25 | and geographic coordinates.}
 26 | 
 27 | \item{lon}{character string. The column with the longitude coordinates.
 28 | To identify unique records if \code{uniq_loc  = TRUE}.
 29 | Default = \dQuote{decimalLongitude}.}
 30 | 
 31 | \item{lat}{character string. The column with the longitude coordinates.
 32 | Default = \dQuote{decimalLatitude}. To identify unique records if \code{uniq_loc  = T}.}
 33 | 
 34 | \item{min_age}{character string. The column with the minimum age. Default
 35 | = \dQuote{min_ma}.}
 36 | 
 37 | \item{max_age}{character string. The column with the maximum age. Default
 38 | = \dQuote{max_ma}.}
 39 | 
 40 | \item{taxon}{character string. The column with the taxon name. If
 41 | \dQuote{}, searches for outliers over the entire dataset, otherwise per
 42 | specified taxon. Default = \dQuote{accepted_name}.}
 43 | 
 44 | \item{method}{character string.  Defining the method for outlier
 45 | selection.  See details. Either \dQuote{quantile} or \dQuote{mad}.  Default
 46 | = \dQuote{quantile}.}
 47 | 
 48 | \item{mltpl}{numeric. The multiplier of the interquartile range
 49 | (\code{method == 'quantile'}) or median absolute deviation (\code{method ==
 50 | 'mad'}) to identify outliers. See details.  Default = 5.}
 51 | 
 52 | \item{size_thresh}{numeric.  The minimum number of records needed for a
 53 | dataset to be tested. Default = 10.}
 54 | 
 55 | \item{max_range}{numeric. A absolute maximum time interval between min age
 56 | and max age. Only relevant for \code{method} = \dQuote{time}.}
 57 | 
 58 | \item{uniq_loc}{logical.  If TRUE only single records per location and time
 59 | point (and taxon if \code{taxon} != "") are used for the outlier testing.
 60 | Default = T.}
 61 | 
 62 | \item{value}{character string.  Defining the output value. See value.}
 63 | 
 64 | \item{verbose}{logical. If TRUE reports the name of the test and the number
 65 | of records flagged.}
 66 | }
 67 | \value{
 68 | Depending on the \sQuote{value} argument, either a \code{data.frame}
 69 |   containing the records considered correct by the test (\dQuote{clean}) or a
 70 |   logical vector (\dQuote{flagged}), with TRUE = test passed and FALSE = test
 71 |   failed/potentially problematic . Default = \dQuote{clean}.
 72 | }
 73 | \description{
 74 | Removes or flags records with an unexpectedly large temporal range, based on a quantile
 75 | outlier test.
 76 | }
 77 | \note{
 78 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more
 79 | details and tutorials.
 80 | }
 81 | \examples{
 82 | 
 83 | minages <- runif(n = 11, min = 0.1, max = 25)
 84 | x <- data.frame(species = c(letters[1:10], "z"),
 85 |                 lng = c(runif(n = 9, min = 4, max = 16), 75, 7),
 86 |                 lat = c(runif(n = 11, min = -5, max = 5)),
 87 |                 min_ma = minages, 
 88 |                 max_ma = minages + c(runif(n = 10, min = 0, max = 5), 25))
 89 | 
 90 | cf_range(x, value = "flagged", taxon = "")
 91 | 
 92 | }
 93 | \seealso{
 94 | Other fossils: 
 95 | \code{\link{cf_age}()},
 96 | \code{\link{cf_equal}()},
 97 | \code{\link{cf_outl}()},
 98 | \code{\link{write_pyrate}()}
 99 | }
100 | \concept{fossils}
101 | \keyword{Fossil}
102 | \keyword{Temporal}
103 | \keyword{cleaning}
104 | 


--------------------------------------------------------------------------------
/man/clean_dataset.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/clean_dataset.R
  3 | \name{clean_dataset}
  4 | \alias{clean_dataset}
  5 | \title{Coordinate Cleaning using Dataset Properties}
  6 | \usage{
  7 | clean_dataset(
  8 |   x,
  9 |   lon = "decimalLongitude",
 10 |   lat = "decimalLatitude",
 11 |   ds = "dataset",
 12 |   tests = c("ddmm", "periodicity"),
 13 |   value = "dataset",
 14 |   verbose = TRUE,
 15 |   ...
 16 | )
 17 | }
 18 | \arguments{
 19 | \item{x}{data.frame. Containing geographical coordinates and species names.}
 20 | 
 21 | \item{lon}{character string. The column with the longitude coordinates.
 22 | Default = \dQuote{decimalLongitude}.}
 23 | 
 24 | \item{lat}{character string. The column with the latitude coordinates.
 25 | Default = \dQuote{decimalLatitude}.}
 26 | 
 27 | \item{ds}{a character string. The column with the dataset of each record. In
 28 | case \code{x} should be treated as a single dataset, identical for all
 29 | records. Default = \dQuote{dataset}.}
 30 | 
 31 | \item{tests}{a vector of character strings, indicating which tests to run.
 32 | See details for all tests available. Default = c("ddmm", "periodicity")}
 33 | 
 34 | \item{value}{a character string.  Defining the output value. See value.
 35 | Default = \dQuote{dataset}.}
 36 | 
 37 | \item{verbose}{logical. If TRUE reports the name of the test and the number
 38 | of records flagged.}
 39 | 
 40 | \item{...}{additional arguments to be passed to \code{\link{cd_ddmm}} and
 41 | \code{\link{cd_round}} to customize test sensitivity.}
 42 | }
 43 | \value{
 44 | Depending on the \sQuote{value} argument:
 45 | \describe{
 46 | \item{\dQuote{dataset}}{a \code{data.frame} with the
 47 | the test summary statistics for each dataset in \code{x}}
 48 | \item{\dQuote{clean}}{a \code{data.frame} containing only
 49 | records from datasets in \code{x} that passed the tests}
 50 | \item{\dQuote{flagged}}{a logical vector of the same length as
 51 | rows in \code{x}, with TRUE = test passed and
 52 | FALSE = test failed/potentially problematic.}
 53 | }
 54 | }
 55 | \description{
 56 | Tests for problems associated with coordinate conversions and rounding,
 57 | based on dataset properties. Includes test to identify contributing datasets with
 58 | potential errors with converting ddmm to dd.dd, and
 59 | periodicity in the data decimals indicating rounding or a raster basis
 60 | linked to low coordinate precision. Specifically:
 61 | \itemize{
 62 | \item ddmm  tests for erroneous conversion from a degree
 63 | minute format (ddmm) to a decimal degree (dd.dd) format
 64 | \item periodicity test for periodicity in the data,
 65 | which can indicate imprecise coordinates, due to rounding or rasterization.
 66 | }
 67 | }
 68 | \details{
 69 | These tests are based on the statistical distribution of coordinates and
 70 | their decimals within
 71 | datasets of geographic distribution records to identify datasets with
 72 | potential errors/biases. Three potential error sources can be identified.
 73 | The ddmm flag tests for the particular pattern that emerges if geographical
 74 | coordinates in a degree minute annotation are transferred into decimal
 75 | degrees, simply replacing the degree symbol with the decimal point. This
 76 | kind of problem has been observed by in older datasets first recorded on
 77 | paper using typewriters, where e.g. a floating point was used as symbol for
 78 | degrees. The function uses a binomial test to check if more records than
 79 | expected have decimals below 0.6 (which is the maximum that can be obtained
 80 | in minutes, as one degree has 60 minutes) and if the number of these records
 81 | is higher than those above 0.59 by a certain proportion. The periodicity
 82 | test uses rate estimation in a Poisson process to estimate if there is
 83 | periodicity in the decimals of a dataset (as would be expected by for
 84 | example rounding or data that was collected in a raster format) and if there
 85 | is an over proportional number of records with the decimal 0 (full degrees)
 86 | which indicates rounding and thus low precision. The default values are
 87 | empirically optimized by with GBIF data, but should probably be adapted.
 88 | }
 89 | \note{
 90 | See \url{https://ropensci.github.io/CoordinateCleaner/} for more details
 91 | and tutorials.
 92 | }
 93 | \examples{
 94 | #Create test dataset
 95 | clean <- data.frame(dataset = rep("clean", 1000),
 96 |                     decimalLongitude = runif(min = -43, max = -40, n = 1000),
 97 |                     decimalLatitude = runif(min = -13, max = -10, n = 1000))
 98 |                     
 99 | bias.long <- c(round(runif(min = -42, max = -40, n = 500), 1),
100 |                round(runif(min = -42, max = -40, n = 300), 0),
101 |                runif(min = -42, max = -40, n = 200))
102 | bias.lat <- c(round(runif(min = -12, max = -10, n = 500), 1),
103 |               round(runif(min = -12, max = -10, n = 300), 0),
104 |               runif(min = -12, max = -10, n = 200))
105 | bias <- data.frame(dataset = rep("biased", 1000),
106 |                    decimalLongitude = bias.long,
107 |                    decimalLatitude = bias.lat)
108 | test <- rbind(clean, bias)
109 | 
110 | \dontrun{                  
111 | #run clean_dataset
112 | flags <- clean_dataset(test)
113 | 
114 | #check problems
115 | #clean
116 | hist(test[test$dataset == rownames(flags[flags$summary,]), "decimalLongitude"])
117 | #biased
118 | hist(test[test$dataset == rownames(flags[!flags$summary,]), "decimalLongitude"])
119 | 
120 | }
121 | }
122 | \seealso{
123 | \code{\link{cd_ddmm}} \code{\link{cd_round}}
124 | 
125 | Other Wrapper functions: 
126 | \code{\link{clean_coordinates}()},
127 | \code{\link{clean_fossils}()}
128 | }
129 | \concept{Wrapper functions}
130 | \keyword{Coordinate}
131 | \keyword{cleaning}
132 | \keyword{wrapper}
133 | 


--------------------------------------------------------------------------------
/man/countryref.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CoordinateCleaner-package.R
 3 | \docType{data}
 4 | \name{countryref}
 5 | \alias{countryref}
 6 | \title{Country Centroids and Country Capitals}
 7 | \format{
 8 | A data frame with 5,305 observations on 13 variables.
 9 | #' \describe{ 
10 | \item{iso3}{ISO-3 code for each country, in case of provinces also referring to the country.}
11 | \item{iso2}{ISO-2 code for each country, in case of provinces also referring to the country.} 
12 | \item{adm1_code}{adm code for countries and provinces.} 
13 | \item{name}{a factor; name of the country or province.} 
14 | \item{type}{identifying if the entry refers to a country or province level.} 
15 | \item{centroid.lon}{Longitude of the country centroid.}
16 | \item{centroid.lat}{Latitude of the country centroid.}
17 | \item{capital}{Name of the country capital, empty for provinces.}
18 | \item{capital.lon}{Longitude of the country capital.}
19 | \item{capital.lat}{Latitude of the country capital.}
20 | \item{area_sqkm}{The area of the country or province.}
21 | \item{uncertaintyRadiusMeters}{The uncertainty of the country centroid.}
22 | \item{source}{The data source. Currently only available for \url{https://geo-locate.org}}}
23 | }
24 | \source{
25 | CENTRAL INTELLIGENCE AGENCY (2014) \emph{The World Factbook},
26 | Washington, DC.
27 | 
28 | \url{https://www.cia.gov/the-world-factbook/}
29 | \url{https://thematicmapping.org/downloads/world_borders.php}
30 | \url{https://geo-locate.org}
31 | }
32 | \description{
33 | A \code{data.frame} with coordinates of country and province centroids and
34 | country capitals as reference for the \code{\link{clean_coordinates}},
35 | \code{\link{cc_cen}} and \code{\link{cc_cap}} functions. Coordinates are
36 | based on the Central Intelligence Agency World Factbook
37 | \url{https://www.cia.gov/the-world-factbook/},
38 | \url{https://thematicmapping.org/downloads/world_borders.php} and geolocate
39 | \url{https://geo-locate.org}.
40 | }
41 | \examples{
42 | 
43 | data(countryref)
44 | head(countryref)
45 | }
46 | \keyword{gazetteers}
47 | 


--------------------------------------------------------------------------------
/man/institutions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CoordinateCleaner-package.R
 3 | \docType{data}
 4 | \name{institutions}
 5 | \alias{institutions}
 6 | \title{Global Locations of Biodiversity Institutions}
 7 | \format{
 8 | A data frame with 12170 observations on 12 variables.
 9 | }
10 | \source{
11 | Compiled from various sources: \itemize{ \item Global Biodiversity
12 | Information Facility \url{https://www.gbif.org/} \item Wikipedia
13 | \url{https://www.wikipedia.org/} \item Geonames \url{https://www.geonames.org/} \item The Global
14 | Registry of Biodiversity Repositories \item Index
15 | Herbariorum \url{https://sweetgum.nybg.org/science/ih/}
16 | \item Botanic Gardens Conservation International \url{https://www.bgci.org/}
17 | }
18 | }
19 | \description{
20 | A global gazetteer for biodiversity institutions from various sources,
21 | including zoos, museums, botanical gardens, GBIF contributors, herbaria,
22 | university collections.
23 | }
24 | \examples{
25 | 
26 | data(institutions)
27 | str(institutions)
28 | 
29 | }
30 | \keyword{gazetteers}
31 | 


--------------------------------------------------------------------------------
/man/is.spatialvalid.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/methods.spatialvalid.R
 3 | \name{is.spatialvalid}
 4 | \alias{is.spatialvalid}
 5 | \title{Check spatialvalid object}
 6 | \usage{
 7 | is.spatialvalid(x)
 8 | }
 9 | \arguments{
10 | \item{x}{the object to be tested}
11 | }
12 | \value{
13 | returns \code{TRUE} if its argument is a spatialvalid
14 | }
15 | \description{
16 | Test if its argument is a spatialvalid object
17 | }
18 | \keyword{Check}
19 | 


--------------------------------------------------------------------------------
/man/pbdb_example.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CoordinateCleaner-package.R
 3 | \docType{data}
 4 | \name{pbdb_example}
 5 | \alias{pbdb_example}
 6 | \title{Example data from the Paleobiologydatabase}
 7 | \format{
 8 | A data frame with 5000 observations on 36 variables.
 9 | }
10 | \source{
11 | \itemize{ 
12 | \item The Paleobiology database \url{https://paleobiodb.org/} 
13 | \item Sara Varela, Javier Gonzalez Hernandez and Luciano Fabris Sgarbi (2016). 
14 | paleobioDB: Download and Process Data from the Paleobiology Database. 
15 | R package version 0.5.0. \url{https://CRAN.R-project.org/package=paleobioDB}.
16 | }
17 | }
18 | \description{
19 | A dataset of 5000 flowering plant fossil occurrences as example for data of the paleobiology Database, downloaded using the paleobioDB packages as specified in the vignette \dQuote{Cleaning_PBDB_fossils_with_CoordinateCleaner}.
20 | }
21 | \examples{
22 | 
23 | data(institutions)
24 | str(institutions)
25 | 
26 | }
27 | \keyword{gazetteers}
28 | 


--------------------------------------------------------------------------------
/man/plot.spatialvalid.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/methods.spatialvalid.R
 3 | \name{plot.spatialvalid}
 4 | \alias{plot.spatialvalid}
 5 | \title{Plot Method for Class Spatialvalid}
 6 | \usage{
 7 | \method{plot}{spatialvalid}(
 8 |   x,
 9 |   lon = "decimalLongitude",
10 |   lat = "decimalLatitude",
11 |   bgmap = NULL,
12 |   clean = TRUE,
13 |   details = FALSE,
14 |   pts_size = 1,
15 |   font_size = 10,
16 |   zoom_f = 0.1,
17 |   ...
18 | )
19 | }
20 | \arguments{
21 | \item{x}{an object of the class \code{spatialvalid} as from
22 | \code{\link{clean_coordinates}}.}
23 | 
24 | \item{lon}{character string. The column with the longitude coordinates.
25 | Default = \dQuote{decimalLongitude}.}
26 | 
27 | \item{lat}{character string. The column with the latitude coordinates.
28 | Default = \dQuote{decimalLatitude}.}
29 | 
30 | \item{bgmap}{an object of the class \code{SpatVector} or \code{sf} used as
31 | background map. Default = ggplot::borders()}
32 | 
33 | \item{clean}{logical.  If TRUE, non-flagged coordinates are included in the
34 | map.}
35 | 
36 | \item{details}{logical. If TRUE, occurrences are color-coded by the type of
37 | flag.}
38 | 
39 | \item{pts_size}{numeric. The point size for the plot.}
40 | 
41 | \item{font_size}{numeric. The font size for the legend and axes}
42 | 
43 | \item{zoom_f}{numeric. the fraction by which to expand the plotting area 
44 | from the occurrence records. Increase, if countries do not show 
45 | up on the background map.}
46 | 
47 | \item{\dots}{arguments to be passed to methods.}
48 | }
49 | \value{
50 | A plot of the records flagged as potentially erroneous by
51 | \code{\link{clean_coordinates}}.
52 | }
53 | \description{
54 | A set of plots to explore objects of the class \code{spatialvalid}. A plot
55 | to visualize the flags from clean_coordinates
56 | }
57 | \examples{
58 | 
59 | 
60 | exmpl <- data.frame(species = sample(letters, size = 250, replace = TRUE),
61 |                     decimalLongitude = runif(250, min = 42, max = 51),
62 |                     decimalLatitude = runif(250, min = -26, max = -11))
63 | 
64 | test <- clean_coordinates(exmpl, species = "species", 
65 |                           tests = c("sea", "gbif", "zeros"),
66 |                           verbose = FALSE)
67 | 
68 | summary(test)
69 | plot(test)
70 | }
71 | \seealso{
72 | \code{\link{clean_coordinates}}
73 | }
74 | \keyword{Visualisation}
75 | 


--------------------------------------------------------------------------------
/man/write_pyrate.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/write_pyrate.R
  3 | \name{write_pyrate}
  4 | \alias{write_pyrate}
  5 | \title{Create Input Files for PyRate}
  6 | \usage{
  7 | write_pyrate(
  8 |   x,
  9 |   status,
 10 |   fname,
 11 |   taxon = "accepted_name",
 12 |   min_age = "min_ma",
 13 |   max_age = "max_ma",
 14 |   trait = NULL,
 15 |   path = getwd(),
 16 |   replicates = 1,
 17 |   cutoff = NULL,
 18 |   random = TRUE
 19 | )
 20 | }
 21 | \arguments{
 22 | \item{x}{data.frame. Containing fossil records with taxon names, ages, 
 23 | and geographic coordinates.}
 24 | 
 25 | \item{status}{a vector of character strings of length \code{nrow(x)}.
 26 | Indicating for each record \dQuote{extinct} or \dQuote{extant}.}
 27 | 
 28 | \item{fname}{a character string. The prefix to use for the output files.}
 29 | 
 30 | \item{taxon}{character string. The column with the taxon name. 
 31 | Default = \dQuote{accepted_name}.}
 32 | 
 33 | \item{min_age}{character string. The column with the minimum age. Default
 34 | = \dQuote{min_ma}.}
 35 | 
 36 | \item{max_age}{character string. The column with the maximum age. Default
 37 | = \dQuote{max_ma}.}
 38 | 
 39 | \item{trait}{a numeric vector of length \code{nrow(x)}. Indicating trait
 40 | values for each record. Optional.  Default = NULL.}
 41 | 
 42 | \item{path}{a character string. giving the absolute path to write the output
 43 | files. Default is the working directory.}
 44 | 
 45 | \item{replicates}{a numerical. The number of replicates for the randomized
 46 | age generation. See details. Default = 1.}
 47 | 
 48 | \item{cutoff}{a numerical. Specify a threshold to exclude fossil occurrences
 49 | with a high temporal uncertainty, i.e. with a wide temporal range between
 50 | min_age and max_age. Examples: cutoff=NULL (default; all occurrences are
 51 | kept in the data set) cutoff=5 (all occurrences with a temporal range of 5
 52 | Myr or higher are excluded from the data set)}
 53 | 
 54 | \item{random}{logical. Specify whether to take a random age (between MinT
 55 | and MaxT) for each occurrence or the midpoint age. Note that this option
 56 | defaults to TRUE if several replicates are generated (i.e. replicates > 1).
 57 | Examples: random = TRUE (default) random = FALSE (use midpoint ages)}
 58 | }
 59 | \value{
 60 | PyRate input files in the working directory.
 61 | }
 62 | \description{
 63 | Creates the input necessary to run Pyrate, based on a data.frame with fossil
 64 | ages (as derived e.g. from clean_fossils) and a vector of the
 65 | extinction status for each sample. Creates files in the working directory!
 66 | }
 67 | \details{
 68 | The replicate option allows the user to generate several replicates of the
 69 | data set in a single input file, each time re-drawing the ages of the
 70 | occurrences at random from uniform distributions with boundaries MinT and
 71 | MaxT. The replicates can be analysed in different runs (see PyRate command
 72 | -j) and combining the results of these replicates is a way to account for
 73 | the uncertainty of the true ages of the fossil occurrences. Examples:
 74 | replicates=1 (default, generates 1 data set), replicates=10 (generates 10
 75 | random replicates of the data set).
 76 | }
 77 | \note{
 78 | See \url{https://github.com/dsilvestro/PyRate/wiki} for more details
 79 | and tutorials on PyRate and PyRate input.
 80 | }
 81 | \examples{
 82 | 
 83 | minages <- runif(250, 0, 65)
 84 | exmpl <- data.frame(accepted_name = sample(letters, size = 250, replace = TRUE),
 85 |                     lng = runif(250, min = 42, max = 51),
 86 |                     lat = runif(250, min = -26, max = -11),
 87 |                     min_ma = minages,
 88 |                     max_ma = minages + runif(250, 0.1, 65))
 89 | 
 90 | #a vector with the status for each record, 
 91 | #make sure species are only classified as either extinct or extant, 
 92 | #otherwise the function will drop an error
 93 | 
 94 | status <- sample(c("extinct", "extant"), size = nrow(exmpl), replace = TRUE)
 95 | 
 96 | #or from a list of species
 97 | status <- sample(c("extinct", "extant"), size = length(letters), replace = TRUE)
 98 | names(status) <- letters
 99 | status <- status[exmpl$accepted_name]
100 | 
101 | \dontrun{
102 | write_pyrate(x = exmpl,fname = "test", status = status)
103 | }
104 | 
105 | }
106 | \seealso{
107 | Other fossils: 
108 | \code{\link{cf_age}()},
109 | \code{\link{cf_equal}()},
110 | \code{\link{cf_outl}()},
111 | \code{\link{cf_range}()}
112 | }
113 | \concept{fossils}
114 | \keyword{Fossil}
115 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(CoordinateCleaner)
3 | 
4 | test_check("CoordinateCleaner")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/Rplots.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/tests/testthat/Rplots.pdf


--------------------------------------------------------------------------------
/tests/testthat/test_datasetlevel_functions.R:
--------------------------------------------------------------------------------
 1 | context("Dataset level functions ds_*")
 2 | 
 3 | #Create test dataset
 4 | set.seed(1)
 5 | clean <- data.frame(dataset = rep("clean", 1000),
 6 |                     decimalLongitude = runif(min = -43, max = -40, n = 1000),
 7 |                     decimalLatitude = runif(min = -13, max = -10, n = 1000))
 8 | bias.long <- c(round(runif(min = -42, max = -40, n = 500), 1),
 9 |                round(runif(min = -42, max = -40, n = 300), 0),
10 |                runif(min = -42, max = -40, n = 200))
11 | bias.lat <- c(round(runif(min = -12, max = -10, n = 500), 1),
12 |               round(runif(min = -12, max = -10, n = 300), 0),
13 |               runif(min = -12, max = -10, n = 200))
14 | bias <- data.frame(dataset = rep("biased", 1000),
15 |                    decimalLongitude = bias.long,
16 |                    decimalLatitude = bias.lat)
17 | test <- rbind(clean, bias)
18 | 
19 | #cd_round
20 | test_that("cd_round identifies existing bias", {
21 |   skip_on_cran()
22 |   #test target
23 |   ## multiple datasets
24 |   expect_equal(mean(cd_round(test, value = "dataset", 
25 |                              graphs = F, test = "both")$summary), 0.5)
26 |   expect_equal(mean(cd_round(test, value = "dataset", 
27 |                              graphs = F, test = "lat")$summary), 0.5)
28 |   expect_equal(mean(cd_round(test, value = "dataset", 
29 |                              graphs = F, test = "lon")$summary), 0.5)
30 |   
31 |   # single dataset
32 |   
33 |   expect_equal(mean(cd_round(bias, value = "dataset", 
34 |                              graphs = F, test = "both")$summary), 0)
35 |   expect_equal(mean(cd_round(bias, value = "dataset", 
36 |                              graphs = F, test = "lat")$summary), 0)
37 |   expect_equal(mean(cd_round(bias, value = "dataset", 
38 |                              graphs = F, test = "lon")$summary), 0)
39 |   
40 |   #dataset output
41 |   t1 <- cd_round(test, value = "dataset", graphs = F)
42 |   expect_is(t1, "data.frame")
43 |   expect_equal(sum(t1$summary), 1)
44 |   
45 |   #flags output
46 |   t2 <- cd_round(test, value = "flagged", graphs = F)
47 |   expect_is(t2, "logical")
48 |   expect_equal(mean(t2), 0.5)
49 |   
50 |   #graphs
51 |   expect_equal(mean(cd_round(test, value = "flagged", graphs = T)), 0.5)
52 |   
53 |   # test targets
54 |   
55 |   
56 |   #column specification
57 |   expect_error(cd_round(x = test, lat = "latitude"))
58 |   expect_error(cd_round(x = test, lon = "longitude"))
59 |   expect_error(cd_round(x = test, ds = "source"))
60 | })
61 | 
62 | 
63 | # cd_ddmm
64 | set.seed(1)
65 | clean <- data.frame(species = letters[1:10], 
66 |                     decimalLongitude = runif(100, -180, 180), 
67 |                     decimalLatitude = runif(100, -90,90),
68 |                     dataset = "clean")
69 | #problematic dataset
70 | lon <- sample(0:180, size = 100, replace = TRUE) + runif(100, 0,0.59)
71 | lat <- sample(0:90, size = 100, replace = TRUE) + runif(100, 0,0.59)
72 | 
73 | prob <-  data.frame(species = letters[1:10], 
74 |                     decimalLongitude = lon, 
75 |                     decimalLatitude = lat,
76 |                     dataset = "prob")
77 | 
78 | test <- rbind(prob,clean)
79 | 
80 | test_that("cd_ddmm identifies existing bias", {
81 |   skip_on_cran()
82 |   t1 <- cd_ddmm(test, value = "dataset")
83 |   expect_is(t1, "data.frame")
84 |   expect_equal(sum(t1$pass), 1)
85 |   
86 |   t2 <- cd_ddmm(test, value = "flagged")
87 |   expect_is(t2, "logical")
88 |   expect_equal(mean(t2), 0.5)
89 |   
90 |   expect_equal(mean(cd_ddmm(test, value = "flagged")), 0.5)
91 |   
92 |   expect_error(cd_ddmm(x = test, lat = "latitude"))
93 |   expect_error(cd_ddmm(x = test, lon = "longitude"))
94 |   expect_error(cd_ddmm(x = test, ds = "source"))
95 | })
96 | 


--------------------------------------------------------------------------------
/tests/testthat/test_fossillevel_functions.R:
--------------------------------------------------------------------------------
  1 | context("Fossil cleaning tc_*")
  2 | 
  3 | set.seed(1)
  4 | #cf_range
  5 | minages <- runif(n = 100, min = 0.1, max = 25)
  6 | set.seed(1)
  7 | maxages <- minages + c(runif(n = 99, min = 0, max = 5), 25)
  8 | 
  9 | test <- data.frame(
 10 |   species = c(letters[1:9], "z"),
 11 |   decimalLongitude = c(runif(
 12 |     n = 98, min = 4, max = 16
 13 |   ), 75, 7),
 14 |   decimalLatitude = c(runif(
 15 |     n = 100, min = -5, max = 5
 16 |   )),
 17 |   min_ma = minages,
 18 |   max_ma = maxages
 19 | )
 20 | 
 21 | 
 22 | 
 23 | # cf_range
 24 | test_that("cf_range identifies existing bias", {
 25 |   #  skip_on_cran()
 26 |   #return value
 27 |   expect_is(cf_range(test, value = "flagged", taxon = ""), "logical")
 28 |   expect_is(cf_range(test, value = "clean", taxon = ""), "data.frame")
 29 |   
 30 |   #outlier method
 31 |   expect_equal(sum(
 32 |     cf_range(
 33 |       test,
 34 |       value = "flagged",
 35 |       method = "quantile",
 36 |       taxon = ""
 37 |     )
 38 |   ), 99)
 39 |   expect_equal(sum(cf_range(
 40 |     test,
 41 |     value = "flagged",
 42 |     method = "mad",
 43 |     taxon = ""
 44 |   )), 99)
 45 |   expect_equal(sum(
 46 |     cf_range(
 47 |       test,
 48 |       value = "flagged",
 49 |       method = "time",
 50 |       taxon = "",
 51 |       max_range = 20
 52 |     )
 53 |   ), 99)
 54 |   
 55 |   expect_equal(nrow(
 56 |     cf_range(
 57 |       test,
 58 |       value = "clean",
 59 |       method = "quantile",
 60 |       taxon = "",
 61 |       uniq_loc = TRUE
 62 |     )
 63 |   ), 99)
 64 |   expect_equal(nrow(
 65 |     cf_range(
 66 |       test,
 67 |       value = "clean",
 68 |       method = "mad",
 69 |       taxon = "",
 70 |       uniq_loc = TRUE
 71 |     )
 72 |   ), 99)
 73 |   expect_equal(nrow(
 74 |     cf_range(
 75 |       test,
 76 |       value = "clean",
 77 |       method = "time",
 78 |       taxon = "",
 79 |       uniq_loc = FALSE
 80 |     )
 81 |   ), 100)
 82 |   
 83 |   expect_equal(nrow(
 84 |     cf_range(
 85 |       test,
 86 |       value = "clean",
 87 |       method = "quantile",
 88 |       taxon = "species",
 89 |       uniq_loc = TRUE
 90 |     )
 91 |   ), 99)
 92 |   
 93 |   expect_equal(nrow(
 94 |     cf_range(
 95 |       test,
 96 |       value = "clean",
 97 |       method = "quantile",
 98 |       taxon = "species",
 99 |       uniq_loc = TRUE
100 |     )
101 |   ), 99)
102 | })
103 | 
104 | #cf_age
105 | test_that("cf_age runs", {
106 |   #  skip_on_cran()
107 |   #return value
108 |   expect_is(cf_age(test, value = "flagged", taxon = ""), "logical")
109 |   expect_is(cf_age(test, value = "clean", taxon = ""), "data.frame")
110 |   
111 |   #outlier method
112 |   expect_equal(sum(
113 |     cf_age(
114 |       test,
115 |       value = "flagged",
116 |       method = "quantile",
117 |       taxon = "",
118 |       replicates = 10
119 |     )
120 |   ), 100)
121 |   expect_equal(sum(
122 |     cf_age(
123 |       test,
124 |       value = "flagged",
125 |       method = "quantile",
126 |       taxon = "",
127 |       uniq_loc = F,
128 |       replicates = 10
129 |     )
130 |   ), 100)
131 |   expect_equal(sum(
132 |     cf_age(
133 |       test,
134 |       value = "flagged",
135 |       method = "quantile",
136 |       taxon = "species",
137 |       uniq_loc = F,
138 |       replicates = 10
139 |     )
140 |   ), 100)
141 |   expect_equal(sum(
142 |     cf_age(
143 |       test,
144 |       value = "flagged",
145 |       method = "mad",
146 |       taxon = "",
147 |       replicates = 10,
148 |       flag_thresh = 0.1,
149 |       mltpl = 10
150 |     )
151 |   ), 100)
152 |   expect_equal(sum(
153 |     cf_age(
154 |       test,
155 |       value = "flagged",
156 |       method = "mad",
157 |       taxon = "species",
158 |       replicates = 10,
159 |       flag_thresh = 0.1,
160 |       mltpl = 10
161 |     )
162 |   ), 100)
163 |   expect_equal(sum(
164 |     cf_age(
165 |       test,
166 |       value = "flagged",
167 |       method = "mad",
168 |       taxon = "",
169 |       uniq_loc = F
170 |     )
171 |   ), 100)
172 | })
173 | 
174 | 
175 | #cf_outl
176 | set.seed(1)
177 | minages <- c(runif(n = 11, min = 10, max = 25), 62.5)
178 | test <- data.frame(
179 |   species = c(letters[1:10], rep("z", 2)),
180 |   decimalLongitude = c(runif(
181 |     n = 10, min = 4, max = 16
182 |   ), 75, 7),
183 |   decimalLatitude = c(runif(
184 |     n = 12, min = -5, max = 5
185 |   )),
186 |   min_ma = minages,
187 |   max_ma = c(minages[1:11] +
188 |                runif(
189 |                  n = 11, min = 0, max = 5
190 |                ), 65)
191 | )
192 | 
193 | test_that("cf_outl identifies existing bias", {
194 |   #  skip_on_cran()
195 |   #return value
196 |   expect_is(cf_outl(test, value = "flagged", taxon = ""), "logical")
197 |   expect_is(cf_outl(test, value = "clean", taxon = ""), "data.frame")
198 |   
199 |   #outlier method
200 |   expect_equal(sum(cf_outl(
201 |     test,
202 |     value = "flagged",
203 |     method = "quantile",
204 |     taxon = ""
205 |   )), 10)
206 |   expect_equal(sum(cf_outl(
207 |     test,
208 |     taxon = "",
209 |     value = "flagged",
210 |     method = "quantile"
211 |   )), 10)
212 |   expect_equal(sum(cf_outl(
213 |     test,
214 |     value = "flagged",
215 |     method = "mad",
216 |     taxon = ""
217 |   )), 10)
218 |   expect_equal(sum(cf_outl(
219 |     test,
220 |     value = "flagged",
221 |     method = "mad",
222 |     taxon = ""
223 |   )), 10)
224 |   
225 | })
226 | 


--------------------------------------------------------------------------------
/tests/testthat/test_wrapper_functions.R:
--------------------------------------------------------------------------------
  1 | context("Wrapper functions")
  2 | 
  3 | # Coordinate level cleaning
  4 | set.seed(1)
  5 | sp <- sample(letters, size = 250, replace = TRUE)
  6 | set.seed(1)
  7 | lon <- runif(250, min = 42, max = 51)
  8 | set.seed(1)
  9 | lat <- runif(250, min = -26, max = -11)
 10 | 
 11 | exmpl <- data.frame(species = sp,
 12 |                     decimalLongitude = lon,
 13 |                     decimalLatitude = lat,
 14 |                     ISO3 = "RUS")
 15 | 
 16 | 
 17 | test_that("clean_coordinates produces correct output", {
 18 |   skip("message")
 19 |   t1 <- clean_coordinates(x = exmpl)
 20 |   expect_equal(ncol(t1), 14)
 21 |   expect_equal(nrow(t1), 250)
 22 |   expect_equal(sum(t1$.summary), 185)
 23 |   
 24 |   expect_is(plot(t1), "gg")
 25 |   expect_is(plot(t1, clean = FALSE), "gg")
 26 |   expect_is(plot(t1, details = FALSE), "gg")
 27 |   expect_is(plot(t1, details = FALSE, clean = FALSE), "gg")
 28 |   
 29 |   expect_is(summary(t1), "integer")
 30 |   
 31 |   expect_equal(is(t1), "spatialvalid")
 32 | 
 33 | })
 34 | 
 35 | test_that("clean_coordinates countries argument produces correct output", {
 36 |   skip("message")
 37 |   #skip_on_cran()
 38 |   expect_equal(sum(
 39 |     clean_coordinates(x = exmpl, countries = "ISO3", 
 40 |                       tests = c("countries", "seas"))$.summary), 0)
 41 | })
 42 | 
 43 | #Dataset level cleaning
 44 | #Create test dataset
 45 | clean <- data.frame(dataset = rep("clean", 1000),
 46 |                     decimalLongitude = runif(min = -43, max = -40, n = 1000),
 47 |                     decimalLatitude = runif(min = -13, max = -10, n = 1000))
 48 | 
 49 | bias.long <- c(round(runif(min = -42, max = -40, n = 500), 1),
 50 |                round(runif(min = -42, max = -40, n = 300), 0),
 51 |                runif(min = -42, max = -40, n = 200))
 52 | bias.lat <- c(round(runif(min = -12, max = -10, n = 500), 1),
 53 |               round(runif(min = -12, max = -10, n = 300), 0),
 54 |               runif(min = -12, max = -10, n = 200))
 55 | bias <- data.frame(dataset = rep("biased", 1000),
 56 |                    decimalLongitude = bias.long,
 57 |                    decimalLatitude = bias.lat)
 58 | test <- rbind(clean, bias)
 59 | 
 60 | 
 61 | test_that("dataset level cleaning works", {
 62 |   skip("message")
 63 |   #test activated
 64 |   expect_is(clean_dataset(test), "data.frame")
 65 |   expect_is(clean_dataset(test, tests = c("ddmm")), "data.frame")
 66 |   expect_is(clean_dataset(test, tests = c("periodicity")), "data.frame")
 67 |   
 68 |   #Output value
 69 |   expect_is(clean_dataset(test, value = "clean"), "data.frame")
 70 |   expect_is(clean_dataset(test, value = "flagged"), "data.frame")
 71 |   
 72 |   expect_equal(sum(clean_dataset(test)$summary), 1)
 73 | })
 74 | 
 75 | # test_that("CleanCoordinatesDS work", {
 76 | #   expect_equal(CleanCoordinatesDS(test), 250)
 77 | # })
 78 | 
 79 | 
 80 | 
 81 | #Fossil wrapper function
 82 | set.seed(1)
 83 | minages <- runif(250, 0, 65)
 84 | set.seed(1)
 85 | lat <- runif(250, min = -26, max = -11)
 86 | set.seed(1)
 87 | lng <- runif(250, min = 42, max = 51)
 88 | set.seed(1)
 89 | age <- runif(250, 0.1, 65)
 90 | 
 91 | exmpl <- data.frame(accepted_name = sample(letters, size = 250, replace = TRUE),
 92 |                     decimalLongitude = lng,
 93 |                     decimalLatitude = lat,
 94 |                     min_ma = minages,
 95 |                     max_ma = minages + age)
 96 | 
 97 | 
 98 | test_that("fossil wrapper cleaning works", {
 99 |   skip("message")
100 |   expect_is(clean_fossils(exmpl), "spatialvalid")
101 |   expect_equal(sum(clean_fossils(exmpl)$.summary), 250)
102 | })
103 | 
104 | # test_that("CleanCoordinatesFOS work", {
105 | #   expect_equal(sum(CleanCoordinatesFOS(exmpl)$summary), 249)
106 | # })
107 | 
108 | #Write Pyrate output
109 | 
110 | test.str1 <- "test.pdf"
111 | 
112 | test_that("WritePyRate interal functions work", {
113 |   skip("message")
114 |   expect_is(CoordinateCleaner:::.NoExtension(test.str1), "character")
115 |   expect_equal(CoordinateCleaner:::.NoExtension(test.str1), "test")
116 | })
117 | 


--------------------------------------------------------------------------------
/vignettes/Cleaning_GBIF_data_with_CoordinateCleaner_files/header-attrs-2.21/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/vignettes/Using_custom_gazetteers.R:
--------------------------------------------------------------------------------
 1 | ## ----options, echo = FALSE----------------------------------------------------
 2 | knitr::opts_chunk$set(eval = FALSE)
 3 | 
 4 | ## -----------------------------------------------------------------------------
 5 | #  library(CoordinateCleaner)
 6 | #  library(dplyr)
 7 | #  library(ggplot2)
 8 | #  library(rgbif)
 9 | #  library(viridis)
10 | #  library(terra)
11 | #  
12 | #  #download data from GBIF
13 | #  dat <- rgbif::occ_search(scientificName = "Avicennia", limit = 1000,
14 | #           hasCoordinate = T)
15 | #  
16 | #  dat <- dat$data
17 | #  
18 | #  dat <-  dat %>%
19 | #    dplyr::select(species = name, decimalLongitude = decimalLongitude,
20 | #           decimalLatitude = decimalLatitude, countryCode)
21 | #  
22 | #  # run with default gazetteer
23 | #  outl <- cc_sea(dat, value = "flagged")
24 | #  ## OGR data source with driver: ESRI Shapefile
25 | #  ## Source: "C:\Users\az64mycy\AppData\Local\Temp\Rtmp4SRhHV", layer: "ne_110m_land"
26 | #  ## with 127 features
27 | #  ## It has 3 fields
28 | #  
29 | #  plo <- data.frame(dat, outlier =  as.factor(!outl))
30 | #  
31 | #  #plot results
32 | #  ggplot() +
33 | #    borders(fill = "grey60") +
34 | #    geom_point(data = plo,
35 | #               aes(x = decimalLongitude, y = decimalLatitude, col = outlier)) +
36 | #    scale_color_viridis(discrete = T, name = "Flagged outlier") +
37 | #    coord_fixed() +
38 | #    theme_bw() +
39 | #    theme(legend.position = "bottom")
40 | 
41 | ## -----------------------------------------------------------------------------
42 | #  # The buffered custom gazetteer
43 | #  data("buffland")
44 | #  buffland <- terra::vect(buffland)
45 | #  plot(buffland)
46 | 
47 | ## -----------------------------------------------------------------------------
48 | #  
49 | #  # run with custom gazetteer
50 | #  outl <- cc_sea(dat, value = "flagged", ref = buffland)
51 | #  
52 | #  plo <- data.frame(dat, outlier =  as.factor(!outl))
53 | #  
54 | #  #plot results
55 | #  ggplot()+
56 | #    borders(fill = "grey60")+
57 | #    geom_point(data = plo,
58 | #               aes(x = decimalLongitude, y = decimalLatitude, col = outlier))+
59 | #    scale_color_viridis(discrete = T, name = "Flagged outlier")+
60 | #    coord_fixed()+
61 | #    theme_bw()+
62 | #    theme(legend.position = "bottom")
63 | 
64 | 


--------------------------------------------------------------------------------
/vignettes/Using_custom_gazetteers.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using customized gazetteers"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{Using customized gazetteers}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteSuggests{rgbif}
  8 |   \usepackage[utf8]{inputenc}
  9 | ---
 10 | 
 11 | ```{r options, echo = FALSE}
 12 | knitr::opts_chunk$set(eval = FALSE)
 13 | ```
 14 | 
 15 | CoordinateCleaner identifies potentially erroneous geographic records with
 16 | coordinates assigned to the sea, countr coordinate, country capitals, urban
 17 | areas, institutions, the GBIF headquarters and countries based on the comparison
 18 | with geographic gazetteers (i.e. reference databases). All of these functions
 19 | include default reference databases compiled from various sources. These default
 20 | references have been selected suitable for regional to global analyses. They
 21 | will also work for smaller scale analyses, but in some case different references
 22 | might be desirable and available. this could be for instance centroids of small
 23 | scale political units, a different set of urban areas, or a different coastline
 24 | when working with coastal species. To account for this, each *CoordinateCleaner*
 25 | function using a gazetteer has a `ref` argument to specify custom gazetteers.
 26 | 
 27 | We will use the case of coastlines and a coastal species to demonstrate the
 28 | application of custom gazetteers. The purpose of `cc_sea` is to flag records in
 29 | the sea, since these often represent erroneous and undesired records for
 30 | terrestrial organisms. The standard gazetteer for this function is fetched from
 31 | naturalearthdata.com at a 1:50m scale. However, often coordinates available from
 32 | public databases are only precise at the scale of kilometres, which might lead
 33 | to an overly critical flagging of coordinates close to the coastline, which is a
 34 | problem especially for coastal or intertidal species. WE illustrate the issue on
 35 | for the mangrove tree genus *Avicennia*.
 36 | 
 37 | 
 38 | ```{r}
 39 | library(CoordinateCleaner)
 40 | library(dplyr)
 41 | library(ggplot2)
 42 | library(rgbif)
 43 | library(viridis)
 44 | library(terra)
 45 | 
 46 | #download data from GBIF
 47 | dat <- rgbif::occ_search(scientificName = "Avicennia", limit = 1000,
 48 |          hasCoordinate = T)
 49 | 
 50 | dat <- dat$data
 51 | 
 52 | dat <-  dat %>% 
 53 |   dplyr::select(species = name, decimalLongitude = decimalLongitude, 
 54 |          decimalLatitude = decimalLatitude, countryCode)
 55 | 
 56 | # run with default gazetteer
 57 | outl <- cc_sea(dat, value = "flagged")
 58 | ## OGR data source with driver: ESRI Shapefile 
 59 | ## Source: "C:\Users\az64mycy\AppData\Local\Temp\Rtmp4SRhHV", layer: "ne_110m_land"
 60 | ## with 127 features
 61 | ## It has 3 fields
 62 | 
 63 | plo <- data.frame(dat, outlier =  as.factor(!outl))
 64 | 
 65 | #plot results
 66 | ggplot() +
 67 |   borders(fill = "grey60") +
 68 |   geom_point(data = plo, 
 69 |              aes(x = decimalLongitude, y = decimalLatitude, col = outlier)) +
 70 |   scale_color_viridis(discrete = T, name = "Flagged outlier") +
 71 |   coord_fixed() +
 72 |   theme_bw() +
 73 |   theme(legend.position = "bottom")
 74 | ```
 75 | 
 76 | ![plot of chunk cusgaz1](cusgaz-cusgaz1-1.png)
 77 | 
 78 | A large number of the coastal records gets flagged, which in this case is undesirable, because it is not a function of the records being wrong, but rather of the precision of the coordinates and the resolution of the reference. To avoid this problem you can use a buffered reference, which avoids flagging records close to the coast line and only flags records from the open ocean. *CoordinateCleaner* comes with a one degree buffered reference (`buffland`). In case a narrower or distance true buffer is necessary, you can provide any SpatVector similar in structure to `buffland` via the `ref` argument.
 79 | 
 80 | 
 81 | ```{r}
 82 | # The buffered custom gazetteer
 83 | data("buffland")
 84 | buffland <- terra::vect(buffland)
 85 | plot(buffland)
 86 | ```
 87 | 
 88 | ![plot of chunk cusgaz2](cusgaz-cusgaz2-1.png)
 89 | 
 90 | ```{r}
 91 | 
 92 | # run with custom gazetteer
 93 | outl <- cc_sea(dat, value = "flagged", ref = buffland)
 94 | 
 95 | plo <- data.frame(dat, outlier =  as.factor(!outl))
 96 | 
 97 | #plot results
 98 | ggplot()+
 99 |   borders(fill = "grey60")+
100 |   geom_point(data = plo, 
101 |              aes(x = decimalLongitude, y = decimalLatitude, col = outlier))+
102 |   scale_color_viridis(discrete = T, name = "Flagged outlier")+
103 |   coord_fixed()+
104 |   theme_bw()+
105 |   theme(legend.position = "bottom")
106 | ```
107 | 
108 | ![plot of chunk cusgaz2](cusgaz-cusgaz2-2.png)
109 | 
110 | 


--------------------------------------------------------------------------------
/vignettes/Using_custom_gazetteers.Rmd.orig:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Using customized gazetteers"
 3 | output: rmarkdown::html_vignette
 4 | vignette: >
 5 |   %\VignetteIndexEntry{Using customized gazetteers}
 6 |   %\VignetteEngine{knitr::rmarkdown}
 7 |   %\VignetteSuggests{rgbif}
 8 |   \usepackage[utf8]{inputenc}
 9 | ---
10 | 
11 | ```{r clgbif1, echo = F, eval = T}
12 | knitr::opts_chunk$set(fig.width = 7,  fig.path = "cusgaz-")
13 | ```
14 | 
15 | 
16 | CoordinateCleaner identifies potentially erroneous geographic records with coordinates assigned to the sea, countr coordinate, country capitals, urban areas, institutions, the GBIF headquarters and countries based on the comparison with geographic gazetteers (i.e. reference databases). All of these functions include default reference databases compiled from various sources. These default references have been selected suitable for regional to global analyses. They will also work for smaller scale analyses, but in some case different references might be desirable and available. this could be for instance centroids of small scale political units, a different set of urban areas, or a different coastline when working with coastal species. To account for this, each *CoordinateCleaner* function using a gazetteer has a `ref` argument to specify custom gazetteers.
17 | 
18 | We will use the case of coastlines and a coastal species to demonstrate the application of custom gazetteers. The purpose of `cc_sea` is to flag records in the sea, since these often represent erroneous and undesired records for terrestrial organisms. The standard gazetteer for this function is fetched from naturalearthdata.com at a 1:50m scale. However, often coordinates available from public databases are only precise at the scale of kilometres, which might lead to an overly critical flagging of coordinates close to the coastline, which is a problem especially for coastal or intertidal species. WE illustrate the issue on for the mangrove tree genus *Avicennia*.
19 | 
20 | ```{r cusgaz1, warning = F, message = F, collapse = T, fig.width=8, fig.height=6}
21 | library(CoordinateCleaner)
22 | library(dplyr)
23 | library(ggplot2)
24 | library(rgbif)
25 | library(sp)
26 | library(viridis)
27 | 
28 | 
29 | #download data from GBIF
30 | dat <- rgbif::occ_search(scientificName = "Avicennia", limit = 1000,
31 |          hasCoordinate = T)
32 | 
33 | dat <- dat$data
34 | 
35 | dat <-  dat %>% 
36 |   dplyr::select(species = name, decimalLongitude = decimalLongitude, 
37 |          decimalLatitude = decimalLatitude, countryCode)
38 | 
39 | # run with default gazetteer
40 | outl <- cc_sea(dat, value = "flagged")
41 | 
42 | plo <- data.frame(dat, outlier =  as.factor(!outl))
43 | 
44 | #plot results
45 | ggplot()+
46 |   borders(fill = "grey60")+
47 |   geom_point(data = plo, 
48 |              aes(x = decimalLongitude, y = decimalLatitude, col = outlier))+
49 |   scale_color_viridis(discrete = T, name = "Flagged outlier")+
50 |   coord_fixed()+
51 |   theme_bw()+
52 |   theme(legend.position = "bottom")
53 | 
54 | ```
55 | 
56 | A large number of the coastal records gets flagged, which in this case is undesirable, because it is not a function of the records being wrong, but rather of the precision of the coordinates and the resolution of the reference. To avoid this problem you can use a buffered reference, which avoids flagging records close to the coast line and only flags records from the open ocean. *CoordinateCleaner* comes with a one degree buffered reference (`buffland`). In case a narrower or distance true buffer is necessary, you can provide any SpatVector similar in structure to `buffland` via the `ref` argument.
57 | 
58 | ```{r cusgaz2, warning = F, message = F, collapse = T, fig.width=8, fig.height=6}
59 | # The buffered custom gazetteer
60 | data("buffland")
61 | plot(buffland)
62 | 
63 | # run with custom gazetteer
64 | outl <- cc_sea(dat, value = "flagged", ref = buffland)
65 | 
66 | plo <- data.frame(dat, outlier =  as.factor(!outl))
67 | 
68 | #plot results
69 | ggplot()+
70 |   borders(fill = "grey60")+
71 |   geom_point(data = plo, 
72 |              aes(x = decimalLongitude, y = decimalLatitude, col = outlier))+
73 |   scale_color_viridis(discrete = T, name = "Flagged outlier")+
74 |   coord_fixed()+
75 |   theme_bw()+
76 |   theme(legend.position = "bottom")
77 | ```
78 | 
79 | 


--------------------------------------------------------------------------------
/vignettes/cusgaz-cusgaz1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/cusgaz-cusgaz1-1.png


--------------------------------------------------------------------------------
/vignettes/cusgaz-cusgaz2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/cusgaz-cusgaz2-1.png


--------------------------------------------------------------------------------
/vignettes/cusgaz-cusgaz2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/cusgaz-cusgaz2-2.png


--------------------------------------------------------------------------------
/vignettes/gbif-clgbif11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/gbif-clgbif11-1.png


--------------------------------------------------------------------------------
/vignettes/gbif-clgbif16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/gbif-clgbif16-1.png


--------------------------------------------------------------------------------
/vignettes/gbif-clgbif17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/gbif-clgbif17-1.png


--------------------------------------------------------------------------------
/vignettes/gbif-clgbif18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/gbif-clgbif18-1.png


--------------------------------------------------------------------------------
/vignettes/gbif-clgbif19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/gbif-clgbif19-1.png


--------------------------------------------------------------------------------
/vignettes/gbif-clgbif5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/gbif-clgbif5-1.png


--------------------------------------------------------------------------------
/vignettes/gbif-clgbif6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/gbif-clgbif6-1.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-24-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-24-1.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-24-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-24-2.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-25-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-25-1.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-25-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-25-2.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-26-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-26-1.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-26-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-26-2.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-27-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-27-1.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-27-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-27-2.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-33-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-33-1.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-33-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-33-2.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-35-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-35-1.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-35-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-35-2.png


--------------------------------------------------------------------------------
/vignettes/pbdb-unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/CoordinateCleaner/9195fb64154dd20a921a412b5e4d24cc18d3803c/vignettes/pbdb-unnamed-chunk-7-1.png


--------------------------------------------------------------------------------