├── .Rbuildignore
├── .github
    ├── .gitignore
    ├── CODEOWNERS
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    └── workflows
    │   ├── R-CMD-check-hard.yaml
    │   ├── R-CMD-check.yaml
    │   ├── lock.yaml
    │   ├── pkgdown.yaml
    │   ├── pr-commands.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
    ├── autoplot.R
    ├── buffer.R
    ├── checks.R
    ├── compat-vctrs-helpers.R
    ├── compat-vctrs.R
    ├── cpp11.R
    ├── data.R
    ├── labels.R
    ├── misc.R
    ├── spatial_block_cv.R
    ├── spatial_clustering_cv.R
    ├── spatial_nndm_cv.R
    ├── spatial_vfold_cv.R
    ├── spatialsample-package.R
    ├── sysdata.rda
    ├── zzz-compat-vctrs-spatial_block_cv.R
    ├── zzz-compat-vctrs-spatial_buffer_vfold_cv.R
    ├── zzz-compat-vctrs-spatial_clustering_cv.R
    ├── zzz-compat-vctrs-spatial_leave_location_out_cv.R
    ├── zzz-compat-vctrs-spatial_nndm_cv.R
    └── zzz.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── codecov.yml
├── cran-comments.md
├── data-raw
    └── boston_canopy.R
├── data
    └── boston_canopy.R
├── inst
    ├── CITATION
    ├── generate_vctrs.R
    └── vctrs_template.R
├── man
    ├── autoplot.spatial_rset.Rd
    ├── boston_canopy.Rd
    ├── buffer_indices.Rd
    ├── check_v.Rd
    ├── figures
    │   ├── README-2022-06-12_boston-anim-.gif
    │   ├── README-2022-06-12_boston_static-1.png
    │   └── logo.png
    ├── reexports.Rd
    ├── spatial_block_cv.Rd
    ├── spatial_clustering_cv.Rd
    ├── spatial_nndm_cv.Rd
    ├── spatial_vfold.Rd
    └── spatialsample-package.Rd
├── revdep
    ├── .gitignore
    ├── README.md
    ├── cran.md
    ├── email.yml
    ├── failures.md
    └── problems.md
├── spatialsample.Rproj
├── src
    ├── .gitignore
    ├── code.cpp
    └── cpp11.cpp
├── tests
    ├── testthat.R
    └── testthat
    │   ├── _snaps
    │       ├── autoplot
    │       │   ├── block-plots-with-grid.svg
    │       │   ├── block-plots.svg
    │       │   ├── block-split-plots.svg
    │       │   ├── buffered-llo-set-plot.svg
    │       │   ├── buffered-llo-split-plot.svg
    │       │   ├── buffered-rsample-plot.svg
    │       │   ├── buffered-rset-plot.svg
    │       │   ├── buffered-vfold-plot.svg
    │       │   ├── buffered-vfold-split.svg
    │       │   ├── cluster-plots.svg
    │       │   ├── cluster-split-plots.svg
    │       │   ├── expand-bbox.svg
    │       │   ├── repeated-block-cv.svg
    │       │   ├── repeated-llo.svg
    │       │   ├── repeated-vfold.svg
    │       │   └── snake-flips-rows-the-right-way.svg
    │       ├── buffer.md
    │       ├── compat-vctrs.md
    │       ├── misc.md
    │       ├── spatial_block_cv.md
    │       ├── spatial_clustering_cv.md
    │       ├── spatial_nndm_cv.md
    │       └── spatial_vfold_cv.md
    │   ├── helper-rset.R
    │   ├── test-autoplot.R
    │   ├── test-buffer.R
    │   ├── test-compat-dplyr.R
    │   ├── test-compat-vctrs.R
    │   ├── test-misc.R
    │   ├── test-spatial_block_cv.R
    │   ├── test-spatial_clustering_cv.R
    │   ├── test-spatial_nndm_cv.R
    │   └── test-spatial_vfold_cv.R
└── vignettes
    ├── .gitignore
    ├── articles
        └── buffering.Rmd
    └── spatialsample.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^spatialsample\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^README\.Rmd$
 4 | ^LICENSE\.md$
 5 | ^codecov\.yml$
 6 | ^\.github$
 7 | ^_pkgdown\.yml$
 8 | ^docs$
 9 | ^pkgdown$
10 | ^cran-comments\.md$
11 | ^CRAN-RELEASE$
12 | ^doc$
13 | ^Meta$
14 | ^data-raw$
15 | ^vignettes/articles$
16 | ^CRAN-SUBMISSION$
17 | ^revdep$
18 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # CODEOWNERS for spatialsample
2 | # https://www.tidyverse.org/development/understudies
3 | .github/CODEOWNERS @topepo @juliasilge
4 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, caste, color, religion, or sexual
 10 | identity and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the overall
 26 |   community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or advances of
 31 |   any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email address,
 35 |   without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at codeofconduct@posit.co. 
 63 | All complaints will be reviewed and investigated promptly and fairly.
 64 | 
 65 | All community leaders are obligated to respect the privacy and security of the
 66 | reporter of any incident.
 67 | 
 68 | ## Enforcement Guidelines
 69 | 
 70 | Community leaders will follow these Community Impact Guidelines in determining
 71 | the consequences for any action they deem in violation of this Code of Conduct:
 72 | 
 73 | ### 1. Correction
 74 | 
 75 | **Community Impact**: Use of inappropriate language or other behavior deemed
 76 | unprofessional or unwelcome in the community.
 77 | 
 78 | **Consequence**: A private, written warning from community leaders, providing
 79 | clarity around the nature of the violation and an explanation of why the
 80 | behavior was inappropriate. A public apology may be requested.
 81 | 
 82 | ### 2. Warning
 83 | 
 84 | **Community Impact**: A violation through a single incident or series of
 85 | actions.
 86 | 
 87 | **Consequence**: A warning with consequences for continued behavior. No
 88 | interaction with the people involved, including unsolicited interaction with
 89 | those enforcing the Code of Conduct, for a specified period of time. This
 90 | includes avoiding interactions in community spaces as well as external channels
 91 | like social media. Violating these terms may lead to a temporary or permanent
 92 | ban.
 93 | 
 94 | ### 3. Temporary Ban
 95 | 
 96 | **Community Impact**: A serious violation of community standards, including
 97 | sustained inappropriate behavior.
 98 | 
 99 | **Consequence**: A temporary ban from any sort of interaction or public
100 | communication with the community for a specified period of time. No public or
101 | private interaction with the people involved, including unsolicited interaction
102 | with those enforcing the Code of Conduct, is allowed during this period.
103 | Violating these terms may lead to a permanent ban.
104 | 
105 | ### 4. Permanent Ban
106 | 
107 | **Community Impact**: Demonstrating a pattern of violation of community
108 | standards, including sustained inappropriate behavior, harassment of an
109 | individual, or aggression toward or disparagement of classes of individuals.
110 | 
111 | **Consequence**: A permanent ban from any sort of public interaction within the
112 | community.
113 | 
114 | ## Attribution
115 | 
116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
117 | version 2.1, available at
118 | <https://www.contributor-covenant.org/version/2/1/code_of_conduct.html>.
119 | 
120 | Community Impact Guidelines were inspired by
121 | [Mozilla's code of conduct enforcement ladder][https://github.com/mozilla/inclusion].
122 | 
123 | For answers to common questions about this code of conduct, see the FAQ at
124 | <https://www.contributor-covenant.org/faq>. Translations are available at <https://www.contributor-covenant.org/translations>.
125 | 
126 | [homepage]: https://www.contributor-covenant.org
127 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to tidymodels
 2 | 
 3 | For more detailed information about contributing to tidymodels packages, see our [**development contributing guide**](https://www.tidymodels.org/contribute/).
 4 | 
 5 | ## Documentation
 6 | 
 7 | Typos or grammatical errors in documentation may be edited directly using the GitHub web interface, as long as the changes are made in the _source_ file.
 8 | 
 9 | *  YES ✅: you edit a roxygen comment in an `.R` file in the `R/` directory.
10 | *  NO 🚫: you edit an `.Rd` file in the `man/` directory.
11 | 
12 | We use [roxygen2](https://cran.r-project.org/package=roxygen2), with [Markdown syntax](https://cran.r-project.org/web/packages/roxygen2/vignettes/rd-formatting.html), for documentation.
13 | 
14 | ## Code
15 | 
16 | Before you submit 🎯 a pull request on a tidymodels package, always file an issue and confirm the tidymodels team agrees with your idea and is happy with your basic proposal.
17 | 
18 | The [tidymodels packages](https://www.tidymodels.org/packages/) work together. Each package contains its own unit tests, while integration tests and other tests using all the packages are contained in [extratests](https://github.com/tidymodels/extratests).
19 | 
20 | *  For pull requests, we recommend that you [create a fork of this repo](https://usethis.r-lib.org/articles/articles/pr-functions.html) with `usethis::create_from_github()`, and then initiate a new branch with `usethis::pr_init()`.
21 | *  Look at the build status before and after making changes. The `README` contains badges for any continuous integration services used by the package.
22 | *  New code should follow the tidyverse [style guide](http://style.tidyverse.org). You can use the [styler](https://CRAN.R-project.org/package=styler) package to apply these styles, but please don't restyle code that has nothing to do with your PR.
23 | *  For user-facing changes, add a bullet to the top of `NEWS.md` below the current development version header describing the changes made followed by your GitHub username, and links to relevant issue(s)/PR(s).
24 | *  We use [testthat](https://cran.r-project.org/package=testthat). Contributions with test cases included are easier to accept.
25 | *  If your contribution spans the use of more than one package, consider building [extratests](https://github.com/tidymodels/extratests) with your changes to check for breakages and/or adding new tests there. Let us know in your PR if you ran these extra tests.
26 | 
27 | ### Code of Conduct
28 | 
29 | This project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/0/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms.
30 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check-hard.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | #
 4 | # NOTE: This workflow only directly installs "hard" dependencies, i.e. Depends,
 5 | # Imports, and LinkingTo dependencies. Notably, Suggests dependencies are never
 6 | # installed, with the exception of testthat, knitr, and rmarkdown. The cache is
 7 | # never used to avoid accidentally restoring a cache containing a suggested
 8 | # dependency.
 9 | on:
10 |   push:
11 |     branches: [main]
12 |   pull_request:
13 |     branches: [main]
14 | 
15 | name: R-CMD-check-hard
16 | 
17 | jobs:
18 |   R-CMD-check:
19 |     runs-on: ${{ matrix.config.os }}
20 | 
21 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
22 | 
23 |     strategy:
24 |       fail-fast: false
25 |       matrix:
26 |         config:
27 |           - {os: ubuntu-latest,   r: 'release'}
28 | 
29 |     env:
30 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
31 |       R_KEEP_PKG_SOURCE: yes
32 | 
33 |     steps:
34 |       - uses: actions/checkout@v3
35 | 
36 |       - uses: r-lib/actions/setup-pandoc@v2
37 | 
38 |       - uses: r-lib/actions/setup-r@v2
39 |         with:
40 |           r-version: ${{ matrix.config.r }}
41 |           http-user-agent: ${{ matrix.config.http-user-agent }}
42 |           use-public-rspm: true
43 | 
44 |       - uses: r-lib/actions/setup-r-dependencies@v2
45 |         with:
46 |           dependencies: '"hard"'
47 |           cache: false
48 |           extra-packages: |
49 |             any::rcmdcheck
50 |             any::testthat
51 |             any::knitr
52 |             any::rmarkdown
53 |           needs: check
54 | 
55 |       # We need PROJ >= 7.0.0 in order to take advantage of the CDN
56 |       #
57 |       # Older Linux builds have older PROJ versions, and as such we need to
58 |       # install the newer versions from ppa:ubuntugis
59 |       #
60 |       # Unfortunately, that means waiting until we're testing on 22.04
61 |       # (which has 8.2.1-1 in `universe`)
62 |       # to get rid of this step
63 |       - name: Install Linux dependencies
64 |         if: runner.os == 'Linux'
65 |         run: |
66 |           sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable
67 |           sudo apt-get update
68 |           sudo apt-get install libspatialite-dev libgeotiff-dev libudunits2-dev libgdal-dev libgeos-dev libproj-dev
69 |           Rscript -e 'install.packages("sf", repos = "https://cloud.r-project.org")'
70 | 
71 |       - uses: r-lib/actions/check-r-package@v2
72 |         with:
73 |           upload-snapshots: true
74 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | #
 4 | # NOTE: This workflow is overkill for most R packages and
 5 | # check-standard.yaml is likely a better choice.
 6 | # usethis::use_github_action("check-standard") will install it.
 7 | on:
 8 |   push:
 9 |     branches: [main, master]
10 |   pull_request:
11 |     branches: [main, master]
12 | 
13 | name: R-CMD-check
14 | 
15 | jobs:
16 |   R-CMD-check:
17 |     runs-on: ${{ matrix.config.os }}
18 | 
19 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
20 | 
21 |     strategy:
22 |       fail-fast: false
23 |       matrix:
24 |         config:
25 |           - {os: macos-latest,   r: 'release'}
26 | 
27 |           - {os: windows-latest, r: 'release'}
28 |           # use 4.1 to check with rtools40's older compiler
29 |           - {os: windows-latest, r: '4.1'}
30 | 
31 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
32 |           - {os: ubuntu-latest,   r: 'release'}
33 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
34 |           - {os: ubuntu-latest,   r: 'oldrel-2'}
35 |           - {os: ubuntu-latest,   r: 'oldrel-3'}
36 |           - {os: ubuntu-latest,   r: 'oldrel-4'}
37 | 
38 |     env:
39 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
40 |       R_KEEP_PKG_SOURCE: yes
41 | 
42 |     steps:
43 |       - uses: actions/checkout@v3
44 | 
45 |       - uses: r-lib/actions/setup-pandoc@v2
46 | 
47 |       - uses: r-lib/actions/setup-r@v2
48 |         with:
49 |           r-version: ${{ matrix.config.r }}
50 |           http-user-agent: ${{ matrix.config.http-user-agent }}
51 |           use-public-rspm: true
52 | 
53 |       - uses: r-lib/actions/setup-r-dependencies@v2
54 |         with:
55 |           extra-packages: any::rcmdcheck
56 |           needs: check
57 | 
58 |       # We need PROJ >= 7.0.0 in order to take advantage of the CDN
59 |       #
60 |       # Older Linux builds have older PROJ versions, and as such we need to
61 |       # install the newer versions from ppa:ubuntugis
62 |       #
63 |       # Unfortunately, that means waiting until we're testing on 22.04
64 |       # (which has 8.2.1-1 in `universe`)
65 |       # to get rid of this step
66 |       - name: Install Linux dependencies
67 |         if: runner.os == 'Linux'
68 |         run: |
69 |           sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable
70 |           sudo apt-get update
71 |           sudo apt-get install libspatialite-dev libgeotiff-dev libudunits2-dev libgdal-dev libgeos-dev libproj-dev
72 |           Rscript -e 'install.packages("sf", repos = "https://cloud.r-project.org")'
73 | 
74 |       - uses: r-lib/actions/check-r-package@v2
75 |         with:
76 |           upload-snapshots: true
77 | 


--------------------------------------------------------------------------------
/.github/workflows/lock.yaml:
--------------------------------------------------------------------------------
 1 | name: 'Lock Threads'
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '0 0 * * *'
 6 | 
 7 | jobs:
 8 |   lock:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: dessant/lock-threads@v2
12 |         with:
13 |           github-token: ${{ github.token }}
14 |           issue-lock-inactive-days: '14'
15 | #          issue-exclude-labels: ''
16 | #          issue-lock-labels: 'outdated'
17 |           issue-lock-comment: >
18 |             This issue has been automatically locked. If you believe you have
19 |             found a related problem, please file a new issue (with a reprex:
20 |             <https://reprex.tidyverse.org>) and link to this issue.
21 |           issue-lock-reason: ''
22 |           pr-lock-inactive-days: '14'
23 | #          pr-exclude-labels: 'wip'
24 |           pr-lock-labels: ''
25 |           pr-lock-comment: >
26 |             This pull request has been automatically locked. If you believe you
27 |             have found a related problem, please file a new issue (with a reprex:
28 |             <https://reprex.tidyverse.org>) and link to this issue.
29 |           pr-lock-reason: ''
30 | #          process-only: 'issues'
31 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown
13 | 
14 | jobs:
15 |   pkgdown:
16 |     runs-on: ubuntu-latest
17 |     # Only restrict concurrency for non-PR jobs
18 |     concurrency:
19 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20 |     env:
21 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 |     steps:
23 |       - uses: actions/checkout@v3
24 | 
25 |       - uses: r-lib/actions/setup-pandoc@v2
26 | 
27 |       - uses: r-lib/actions/setup-r@v2
28 |         with:
29 |           use-public-rspm: true
30 | 
31 |       - uses: r-lib/actions/setup-r-dependencies@v2
32 |         with:
33 |           extra-packages: any::pkgdown, local::.
34 |           needs: website
35 | 
36 |       - name: Build site
37 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
38 |         shell: Rscript {0}
39 | 
40 |       - name: Deploy to GitHub pages 🚀
41 |         if: github.event_name != 'pull_request'
42 |         uses: JamesIves/github-pages-deploy-action@v4.4.1
43 |         with:
44 |           clean: false
45 |           branch: gh-pages
46 |           folder: docs
47 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 | 
 7 | name: Commands
 8 | 
 9 | jobs:
10 |   document:
11 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
12 |     name: document
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 |     steps:
17 |       - uses: actions/checkout@v3
18 | 
19 |       - uses: r-lib/actions/pr-fetch@v2
20 |         with:
21 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
22 | 
23 |       - uses: r-lib/actions/setup-r@v2
24 |         with:
25 |           use-public-rspm: true
26 | 
27 |       - uses: r-lib/actions/setup-r-dependencies@v2
28 |         with:
29 |           extra-packages: any::roxygen2
30 |           needs: pr-document
31 | 
32 |       - name: Document
33 |         run: roxygen2::roxygenise()
34 |         shell: Rscript {0}
35 | 
36 |       - name: commit
37 |         run: |
38 |           git config --local user.name "$GITHUB_ACTOR"
39 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
40 |           git add man/\* NAMESPACE
41 |           git commit -m 'Document'
42 | 
43 |       - uses: r-lib/actions/pr-push@v2
44 |         with:
45 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
46 | 
47 |   style:
48 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
49 |     name: style
50 |     runs-on: ubuntu-latest
51 |     env:
52 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
53 |     steps:
54 |       - uses: actions/checkout@v3
55 | 
56 |       - uses: r-lib/actions/pr-fetch@v2
57 |         with:
58 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
59 | 
60 |       - uses: r-lib/actions/setup-r@v2
61 | 
62 |       - name: Install dependencies
63 |         run: install.packages("styler")
64 |         shell: Rscript {0}
65 | 
66 |       - name: Style
67 |         run: styler::style_pkg()
68 |         shell: Rscript {0}
69 | 
70 |       - name: commit
71 |         run: |
72 |           git config --local user.name "$GITHUB_ACTOR"
73 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
74 |           git add \*.R
75 |           git commit -m 'Style'
76 | 
77 |       - uses: r-lib/actions/pr-push@v2
78 |         with:
79 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
80 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: test-coverage
10 | 
11 | jobs:
12 |   test-coverage:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v3
19 | 
20 |       - uses: r-lib/actions/setup-r@v2
21 |         with:
22 |           use-public-rspm: true
23 | 
24 |       - uses: r-lib/actions/setup-r-dependencies@v2
25 |         with:
26 |           extra-packages: any::covr
27 |           needs: coverage
28 | 
29 |       - name: Test coverage
30 |         run: |
31 |           covr::codecov(
32 |             quiet = FALSE,
33 |             clean = FALSE,
34 |             install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
35 |           )
36 |         shell: Rscript {0}
37 | 
38 |       - name: Show testthat output
39 |         if: always()
40 |         run: |
41 |           ## --------------------------------------------------------------------
42 |           find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
43 |         shell: bash
44 | 
45 |       - name: Upload test results
46 |         if: failure()
47 |         uses: actions/upload-artifact@v3
48 |         with:
49 |           name: coverage-test-failures
50 |           path: ${{ runner.temp }}/package
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | inst/doc
 6 | docs
 7 | pkgdown
 8 | /doc/
 9 | /Meta/
10 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: spatialsample
 2 | Title: Spatial Resampling Infrastructure
 3 | Version: 0.6.0.9000
 4 | Authors@R: c(
 5 |     person("Michael", "Mahoney", , "mike.mahoney.218@gmail.com", role = c("aut", "cre"),
 6 |            comment = c(ORCID = "0000-0003-2402-304X")),
 7 |     person("Julia", "Silge", , "julia.silge@posit.co", role = "aut",
 8 |            comment = c(ORCID = "0000-0002-3671-836X")),
 9 |     person(given = "Posit Software, PBC", role = c("cph", "fnd"))
10 |   )
11 | Description: Functions and classes for spatial resampling to use with the
12 |     'rsample' package, such as spatial cross-validation (Brenning, 2012)
13 |     <doi:10.1109/IGARSS.2012.6352393>. The scope of 'rsample' and
14 |     'spatialsample' is to provide the basic building blocks for creating
15 |     and analyzing resamples of a spatial data set, but neither package
16 |     includes functions for modeling or computing statistics. The resampled
17 |     spatial data sets created by 'spatialsample' do not contain much
18 |     overhead in memory.
19 | License: MIT + file LICENSE
20 | URL: https://github.com/tidymodels/spatialsample,
21 |     https://spatialsample.tidymodels.org
22 | BugReports: https://github.com/tidymodels/spatialsample/issues
23 | Depends: 
24 |     R (>= 3.6)
25 | Imports:
26 |     dplyr (>= 1.0.0),
27 |     ggplot2,
28 |     glue,
29 |     purrr,
30 |     rlang (>= 1.0.0),
31 |     rsample (>= 1.1.1),
32 |     sf (>= 1.0-9),
33 |     stats,
34 |     tibble,
35 |     tidyselect,
36 |     units,
37 |     vctrs (>= 0.3.6)
38 | Suggests: 
39 |     covr,
40 |     gifski,
41 |     knitr,
42 |     lwgeom,
43 |     modeldata,
44 |     rmarkdown,
45 |     testthat (>= 3.0.0),
46 |     tidyr,
47 |     vdiffr,
48 |     whisker,
49 |     withr,
50 |     yardstick
51 | LinkingTo: 
52 |     cpp11
53 | VignetteBuilder: 
54 |     knitr
55 | Config/Needs/website: 
56 |     tidyverse/tidytemplate
57 | Config/testthat/edition: 3
58 | Config/testthat/parallel: true
59 | Encoding: UTF-8
60 | LazyData: true
61 | Roxygen: list(markdown = TRUE)
62 | RoxygenNote: 7.3.2
63 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2023
2 | COPYRIGHT HOLDER: spatialsample authors
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2023 spatialsample authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(autoplot,spatial_block_cv)
 4 | S3method(autoplot,spatial_rset)
 5 | S3method(autoplot,spatial_rsplit)
 6 | S3method(pretty,spatial_block_cv)
 7 | S3method(pretty,spatial_buffer_vfold_cv)
 8 | S3method(pretty,spatial_clustering_cv)
 9 | S3method(pretty,spatial_leave_location_out_cv)
10 | S3method(print,spatial_block_cv)
11 | S3method(print,spatial_buffer_vfold_cv)
12 | S3method(print,spatial_clustering_cv)
13 | S3method(print,spatial_leave_location_out_cv)
14 | S3method(vec_cast,data.frame.spatial_block_cv)
15 | S3method(vec_cast,data.frame.spatial_buffer_vfold_cv)
16 | S3method(vec_cast,data.frame.spatial_clustering_cv)
17 | S3method(vec_cast,data.frame.spatial_leave_location_out_cv)
18 | S3method(vec_cast,data.frame.spatial_nndm_cv)
19 | S3method(vec_cast,spatial_block_cv.data.frame)
20 | S3method(vec_cast,spatial_block_cv.spatial_block_cv)
21 | S3method(vec_cast,spatial_block_cv.tbl_df)
22 | S3method(vec_cast,spatial_buffer_vfold_cv.data.frame)
23 | S3method(vec_cast,spatial_buffer_vfold_cv.spatial_buffer_vfold_cv)
24 | S3method(vec_cast,spatial_buffer_vfold_cv.tbl_df)
25 | S3method(vec_cast,spatial_clustering_cv.data.frame)
26 | S3method(vec_cast,spatial_clustering_cv.spatial_clustering_cv)
27 | S3method(vec_cast,spatial_clustering_cv.tbl_df)
28 | S3method(vec_cast,spatial_leave_location_out_cv.data.frame)
29 | S3method(vec_cast,spatial_leave_location_out_cv.spatial_leave_location_out_cv)
30 | S3method(vec_cast,spatial_leave_location_out_cv.tbl_df)
31 | S3method(vec_cast,spatial_nndm_cv.data.frame)
32 | S3method(vec_cast,spatial_nndm_cv.spatial_nndm_cv)
33 | S3method(vec_cast,spatial_nndm_cv.tbl_df)
34 | S3method(vec_cast,tbl_df.spatial_block_cv)
35 | S3method(vec_cast,tbl_df.spatial_buffer_vfold_cv)
36 | S3method(vec_cast,tbl_df.spatial_clustering_cv)
37 | S3method(vec_cast,tbl_df.spatial_leave_location_out_cv)
38 | S3method(vec_cast,tbl_df.spatial_nndm_cv)
39 | S3method(vec_ptype2,data.frame.spatial_block_cv)
40 | S3method(vec_ptype2,data.frame.spatial_buffer_vfold_cv)
41 | S3method(vec_ptype2,data.frame.spatial_clustering_cv)
42 | S3method(vec_ptype2,data.frame.spatial_leave_location_out_cv)
43 | S3method(vec_ptype2,data.frame.spatial_nndm_cv)
44 | S3method(vec_ptype2,spatial_block_cv.data.frame)
45 | S3method(vec_ptype2,spatial_block_cv.spatial_block_cv)
46 | S3method(vec_ptype2,spatial_block_cv.tbl_df)
47 | S3method(vec_ptype2,spatial_buffer_vfold_cv.data.frame)
48 | S3method(vec_ptype2,spatial_buffer_vfold_cv.spatial_buffer_vfold_cv)
49 | S3method(vec_ptype2,spatial_buffer_vfold_cv.tbl_df)
50 | S3method(vec_ptype2,spatial_clustering_cv.data.frame)
51 | S3method(vec_ptype2,spatial_clustering_cv.spatial_clustering_cv)
52 | S3method(vec_ptype2,spatial_clustering_cv.tbl_df)
53 | S3method(vec_ptype2,spatial_leave_location_out_cv.data.frame)
54 | S3method(vec_ptype2,spatial_leave_location_out_cv.spatial_leave_location_out_cv)
55 | S3method(vec_ptype2,spatial_leave_location_out_cv.tbl_df)
56 | S3method(vec_ptype2,spatial_nndm_cv.data.frame)
57 | S3method(vec_ptype2,spatial_nndm_cv.spatial_nndm_cv)
58 | S3method(vec_ptype2,spatial_nndm_cv.tbl_df)
59 | S3method(vec_ptype2,tbl_df.spatial_block_cv)
60 | S3method(vec_ptype2,tbl_df.spatial_buffer_vfold_cv)
61 | S3method(vec_ptype2,tbl_df.spatial_clustering_cv)
62 | S3method(vec_ptype2,tbl_df.spatial_leave_location_out_cv)
63 | S3method(vec_ptype2,tbl_df.spatial_nndm_cv)
64 | S3method(vec_restore,spatial_block_cv)
65 | S3method(vec_restore,spatial_buffer_vfold_cv)
66 | S3method(vec_restore,spatial_clustering_cv)
67 | S3method(vec_restore,spatial_leave_location_out_cv)
68 | S3method(vec_restore,spatial_nndm_cv)
69 | export(analysis)
70 | export(assessment)
71 | export(autoplot)
72 | export(get_rsplit)
73 | export(spatial_block_cv)
74 | export(spatial_buffer_vfold_cv)
75 | export(spatial_clustering_cv)
76 | export(spatial_leave_location_out_cv)
77 | export(spatial_nndm_cv)
78 | import(sf)
79 | import(vctrs)
80 | importFrom(dplyr,dplyr_reconstruct)
81 | importFrom(ggplot2,autoplot)
82 | importFrom(purrr,map)
83 | importFrom(rlang,is_empty)
84 | importFrom(rsample,analysis)
85 | importFrom(rsample,assessment)
86 | importFrom(rsample,complement)
87 | importFrom(rsample,get_rsplit)
88 | importFrom(rsample,make_splits)
89 | importFrom(rsample,new_rset)
90 | importFrom(stats,as.dist)
91 | importFrom(stats,cutree)
92 | importFrom(stats,hclust)
93 | importFrom(stats,kmeans)
94 | useDynLib(spatialsample, .registration = TRUE)
95 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
  1 | # spatialsample (development version)
  2 | 
  3 | # spatialsample 0.6.0
  4 | 
  5 | * Fixed bug where passing a polygon to `spatial_nndm_cv()` forced leave-one-out
  6 |   CV, rather than the intended sampling of prediction points from the polygon.
  7 | 
  8 | # spatialsample 0.5.1
  9 | 
 10 | * `spatial_block_cv()` now adds an `expand_bbox` attribute to the resulting rset
 11 |   for compatibility with `rsample::reshuffle_rset()`
 12 |   
 13 | * `autoplot.spatial_block_cv()` now plots the proper grid (using the new 
 14 |   `expand_bbox` attribute).
 15 | 
 16 | # spatialsample 0.5.0
 17 | 
 18 | * `spatial_block_cv()` gains an argument, `expand_bbox`, which represents the 
 19 |   proportion a bounding box should be expanded by (each corner of the bounding
 20 |   box is expanded by `bbox_corner_value * expand_bbox`). 
 21 |   * **This is a breaking change** for data in planar coordinate reference 
 22 |     systems. Set to 0 to obtain previous behaviors. 
 23 |   * Data in geographic coordinates was already having its bounding box expanded 
 24 |     by the default 0.00001. 
 25 |   * This makes it so that regularly spaced data is less likely to fall precisely 
 26 |     along grid lines (and therefore fall into two assessment sets) and so that 
 27 |     geographic data falls is more likely to fall within the constructed grid.
 28 |   * Thanks to Nikos on StackOverflow for reporting this behavior:
 29 |     https://stackoverflow.com/q/77374348/9625040
 30 |     
 31 | * `spatial_block_cv()` will now throw an error if observations are in multiple
 32 |   assessment folds (caused by observations, or observation centroids, falling 
 33 |   precisely along grid polygon boundaries).
 34 | 
 35 | * In `spatial_nndm_cv()`, passing a single polygon (or multipolygon) to the 
 36 |   `prediction_sites` argument will result in prediction sites being sampled from 
 37 |   that polygon, rather than from its bounding box.
 38 | 
 39 | * `get_rsplit()` is now re-exported from the rsample package. This provides a 
 40 |   more natural, pipe-able interface for accessing individual splits; 
 41 |   `get_rsplit(rset, 1)` is identical to `rset$splits[[1]]`.
 42 | 
 43 | # spatialsample 0.4.0
 44 | 
 45 | * `spatial_nndm_cv()` is a new function for nearest neighbor distance matching
 46 |   cross-validation, as described in Milà et al. 2022 
 47 |   (doi: 10.1111/2041-210X.13851). NNDM was first implemented in CAST
 48 |   (https://cran.r-project.org/package=CAST).
 49 | 
 50 | # spatialsample 0.3.0
 51 | 
 52 | ## Breaking changes
 53 | 
 54 | * `spatial_clustering_cv()` no longer accepts non-sf objects. Use 
 55 |   `rsample::clustering_cv()` for these instead (#126).
 56 |   
 57 | * `spatial_clustering_cv()` now uses edge-to-edge distances, like the rest of
 58 |   the package, rather than centroids (#126).
 59 | 
 60 | ## New features
 61 | 
 62 | * All functions now have a `repeats` argument, defaulting to 1, allowing for 
 63 |   repeated cross-validation (#122, #125, #126). 
 64 | 
 65 | * `spatial_clustering_cv()` now has a `distance_function` argument, set by 
 66 |   default to `as.dist(sf::st_distance(x))` (#126).
 67 | 
 68 | ## Minor improvements and fixes
 69 | 
 70 | * Outputs from `spatial_buffer_vfold_cv()` should now have the correct `radius` and `buffer` attributes (#110).
 71 | 
 72 | * `spatial_buffer_vfold_cv()` now has the correct `id` values when using repeats (#116).
 73 | 
 74 | * `spatial_buffer_vfold_cv()` now throws an error when `repeats > 1 && v >= nrow(data)` (#116).
 75 | 
 76 | * The minimum `sf` version required is now `>= 1.0-9`, so that unit objects can be passed to `cellsize` in `spatial_block_cv()` (#113; #124).
 77 | 
 78 | * `autoplot()` now handles repeated cross-validation properly (#123).
 79 | 
 80 | # spatialsample 0.2.1
 81 | 
 82 | * Mike Mahoney is taking over as package maintainer, as Julia Silge (who remains
 83 |   a package author) moves to focus on ModelOps work. 
 84 | 
 85 | * Functions will now return rsplits without `out_id`, like most rsample 
 86 |   functions, whenever `buffer` is `NULL`.
 87 | 
 88 | * `spatial_block_cv()`, `spatial_buffer_vfold_cv()`, and buffering now support
 89 |   using sf or sfc objects with a missing CRS. The assumption is that data in an
 90 |   NA CRS is projected, with all distance values in the same unit as the 
 91 |   projection. Trying to use alternative units will fail. Set a CRS if these
 92 |   assumptions aren't correct.
 93 |   
 94 | * `spatial_buffer_vfold_cv()` and buffering no longer support tibble or 
 95 |   data.frame inputs (they now require sf or sfc objects). It was not easy to 
 96 |   use these to begin with, but should have always caused an error: use 
 97 |   `rsample::vfold_cv()` instead or transform your data into an sf object.
 98 | 
 99 | * `spatial_buffer_vfold_cv()` has had some attribute changes to match `rsample`:
100 |   * `strata` attribute is now the name of the column used for stratification, 
101 |      or not set if there was no stratification.
102 |   * `pool` and `breaks` have been added as attributes
103 |   * `radius` and `buffer` are now set to 0 if they were passed as `NULL`.
104 | 
105 | # spatialsample 0.2.0
106 | 
107 | ## New features
108 | 
109 | * `spatial_buffer_vfold_cv()` is a new function which wraps 
110 |   `rsample::vfold_cv()`, allowing users to add inclusion radii and exclusion
111 |   buffers to their vfold resamples. This is the supported way to perform
112 |   spatially buffered leave-one-out cross validation (set `v` to `nrow(data)`).
113 |   
114 | * `spatial_leave_location_out_cv()` is a new function with wraps 
115 |   `rsample::group_vfold_cv()`, allowing users to add inclusion radii and 
116 |   exclusion buffers to their vfold resamples.
117 | 
118 | * `spatial_block_cv()` is a new function for performing spatial block
119 |   cross-validation. It currently supports randomly assigning blocks to folds.
120 | 
121 | * `spatial_clustering_cv()` gains an argument, `cluster_function`, which 
122 |   specifies what type of clustering to perform. `cluster_function = "kmeans"`, 
123 |   the default, uses `stats::kmeans()` for k-means clustering, while 
124 |   `cluster_function = "hclust"` uses `stats::hclust()` for hierarchical 
125 |   clustering. Users can also provide their own clustering function.
126 |   
127 | * `spatial_clustering_cv()` now supports `sf` objects! Coordinates are inferred
128 |   automatically when using `sf` objects, and anything passed to `coords` will
129 |   be ignored with a warning. Clusters made using `sf` objects will take 
130 |   coordinate reference systems into account (using `sf::st_distance()`), 
131 |   unlike those made using data frames.
132 | 
133 | * All resampling functions now support spatial buffering using two arguments.
134 |   `radius` lets you specify an _inclusion_ radius for your test set, where any
135 |   data within `radius` of the original assessment set will be added to the 
136 |   assessment set. `buffer` specifies an _exclusion_ buffer around the test set,
137 |   where any data within `buffer` of the assessment set (after `radius` is 
138 |   applied) will be excluded from both sets. 
139 | 
140 | * `autoplot()` now has a method for spatial resamples built from `sf` objects.
141 |   It works both on `rset` objects and on `rsplit` objects, and has a special 
142 |   method for outputs from `spatial_block_cv()`. 
143 | 
144 | * `boston_canopy` is a new dataset with data on tree canopy change over time in
145 |   Boston, Massachusetts, USA. It uses a projected coordinate reference system 
146 |   and US customary units; see `?boston_canopy` for instructions on how to 
147 |   install these into your PROJ installation if needed.
148 | 
149 | ## Documentation
150 | 
151 | * The "Getting Started" vignette has been revised to demonstrate the new 
152 |   features and clustering methods. 
153 |   
154 | * A new vignette has been added walking through the spatial buffering process.
155 | 
156 | ## Dependency changes
157 | 
158 | * R versions before 3.4 are no longer supported.
159 | 
160 | * `glue`, `sf`, and `units` have been added to Imports.
161 | 
162 | * `ggplot2` has been moved to Imports. It had been in Suggests.
163 | 
164 | * `covr`, `gifski`, `lwgeom`, and `vdiffr` are now in Suggests.
165 | 
166 | * `rlang` now has a minimum version of 1.0.0 (was previously unversioned).
167 | 
168 | # spatialsample 0.1.0
169 | 
170 | * Added a `NEWS.md` file to track changes to the package.
171 | 


--------------------------------------------------------------------------------
/R/autoplot.R:
--------------------------------------------------------------------------------
  1 | #' Create a ggplot for spatial resamples.
  2 | #'
  3 | #' This method provides a good visualization method for spatial resampling.
  4 | #'
  5 | #' @details
  6 | #' The plot method for `spatial_rset` displays which fold each observation
  7 | #' is assigned to. Note that if data is assigned to multiple folds
  8 | #' (which is common if resamples were created with a non-zero `radius`) only
  9 | #' the "last" fold for each observation will appear on the plot.
 10 | #' Consider adding `ggplot2::facet_wrap(~ fold)` to visualize all members of
 11 | #' each fold separately.
 12 | #' Alternatively, consider plotting each split using the `spatial_rsplit` method
 13 | #' (for example, via `lapply(object$splits, autoplot)`).
 14 | #'
 15 | #' @param object A `spatial_rset` object or a `spatial_rsplit` object.
 16 | #' Note that only resamples made from
 17 | #' `sf` objects create `spatial_rset` and `spatial_rsplit` objects;
 18 | #' this function will not work for
 19 | #' resamples made with non-spatial tibbles or data.frames.
 20 | #' @param ... Options passed to [ggplot2::geom_sf()].
 21 | #' @param alpha Opacity, passed to [ggplot2::geom_sf()].
 22 | #' Values of alpha range from 0 to 1, with lower values corresponding to more
 23 | #' transparent colors.
 24 | #'
 25 | #' @return A ggplot object with each fold assigned a color, made using
 26 | #' [ggplot2::geom_sf()].
 27 | #'
 28 | #' @examples
 29 | #'
 30 | #' boston_block <- spatial_block_cv(boston_canopy, v = 2)
 31 | #' autoplot(boston_block)
 32 | #' autoplot(boston_block$splits[[1]])
 33 | #'
 34 | #' @rdname autoplot.spatial_rset
 35 | # registered in zzz.R
 36 | #' @export
 37 | autoplot.spatial_rset <- function(object, ..., alpha = 0.6) {
 38 |   bool_id_columns <- grepl("^id", names(object))
 39 |   # Not sure how this would ever fire, but just in case:
 40 |   if (sum(bool_id_columns) > 2) {
 41 |     rlang::abort(
 42 |       "Cannot automatically plot rsets with more than two 'id' columns."
 43 |     )
 44 |   }
 45 |   # These are named to not interfere with normal column names
 46 |   .fold. <- .facet. <- NULL
 47 | 
 48 |   object <- if (sum(bool_id_columns) == 1) {
 49 |     purrr::map2(
 50 |       object$splits,
 51 |       object$id,
 52 |       ~ cbind(assessment(.x), .fold. = .y)
 53 |     )
 54 |   } else {
 55 |     purrr::pmap(
 56 |       object[grepl("splits", names(object)) | bool_id_columns],
 57 |       ~ cbind(assessment(..1), .facet. = ..2, .fold. = ..3)
 58 |     )
 59 |   }
 60 |   object <- dplyr::bind_rows(object)
 61 | 
 62 |   p <- ggplot2::ggplot(
 63 |     data = object,
 64 |     mapping = ggplot2::aes(color = .fold., fill = .fold.)
 65 |   )
 66 |   p <- p + ggplot2::geom_sf(..., alpha = alpha)
 67 |   p <- p + ggplot2::guides(
 68 |     colour = ggplot2::guide_legend("Fold"),
 69 |     fill = ggplot2::guide_legend("Fold")
 70 |   )
 71 | 
 72 |   if (sum(bool_id_columns) == 2) {
 73 |     p <- p + ggplot2::facet_wrap(ggplot2::vars(.facet.))
 74 |   }
 75 | 
 76 |   p + ggplot2::coord_sf()
 77 | }
 78 | 
 79 | #' @export
 80 | autoplot.spatial_rsplit <- function(object, ..., alpha = 0.6) {
 81 |   # .class. is named to not interfere with normal column names
 82 |   .class. <- NULL
 83 | 
 84 |   ins <- object$in_id
 85 |   outs <- if (identical(object$out_id, NA)) {
 86 |     rsample::complement(object)
 87 |   } else {
 88 |     object$out_id
 89 |   }
 90 |   object <- object$data
 91 |   object$.class. <- NA
 92 |   object$.class.[ins] <- "Analysis"
 93 |   object$.class.[outs] <- "Assessment"
 94 |   object$.class.[is.na(object$.class.)] <- "Buffer"
 95 | 
 96 |   p <- ggplot2::ggplot(
 97 |     data = object,
 98 |     mapping = ggplot2::aes(color = .class., fill = .class.)
 99 |   )
100 |   p <- p + ggplot2::guides(
101 |     colour = ggplot2::guide_legend("Class"),
102 |     fill = ggplot2::guide_legend("Class")
103 |   )
104 |   p <- p + ggplot2::geom_sf(..., alpha = alpha)
105 |   p + ggplot2::coord_sf()
106 | }
107 | 
108 | #' @rdname autoplot.spatial_rset
109 | #' @param show_grid When plotting [spatial_block_cv] objects, should the grid
110 | #' itself be drawn on top of the data? Set to FALSE to remove the grid.
111 | #' @export
112 | autoplot.spatial_block_cv <- function(object, show_grid = TRUE, ..., alpha = 0.6) {
113 |   p <- autoplot.spatial_rset(object, ..., alpha = alpha)
114 | 
115 |   if (!show_grid) {
116 |     return(p)
117 |   }
118 | 
119 |   data <- object$splits[[1]]$data
120 | 
121 |   plot_data <- data
122 |   plot_data <- sf::st_bbox(data)
123 |   plot_data <- expand_grid(plot_data, attr(object, "expand_bbox"))
124 |   plot_data <- sf::st_as_sfc(plot_data)
125 | 
126 |   grid_args <- list(x = plot_data)
127 |   grid_args$cellsize <- attr(object, "cellsize", TRUE)
128 |   grid_args$offset <- attr(object, "offset", TRUE)
129 |   grid_args$n <- attr(object, "n", TRUE)
130 |   grid_args$crs <- attr(object, "crs", TRUE)
131 |   grid_args$what <- attr(object, "what", TRUE)
132 |   grid_args$square <- attr(object, "square", TRUE)
133 |   grid_args$flat_topped <- attr(object, "flat_topped", TRUE)
134 |   grid_blocks <- do.call(sf::st_make_grid, grid_args)
135 | 
136 |   if (attr(object, "relevant_only", TRUE)) {
137 |     centroids <- sf::st_centroid(sf::st_geometry(data))
138 |     grid_blocks <- filter_grid_blocks(grid_blocks, centroids)
139 |   }
140 | 
141 |   # Always prints with "Coordinate system already present. Adding new coordinate system, which will replace the existing one."
142 |   # So this silences that
143 |   suppressMessages(p + ggplot2::geom_sf(data = grid_blocks, fill = NA))
144 | }
145 | 


--------------------------------------------------------------------------------
/R/buffer.R:
--------------------------------------------------------------------------------
 1 | #' Apply an inclusion radius and exclusion buffer to indices
 2 | #'
 3 | #' @param data An object of class `sf` or `sfc`.
 4 | #' @param indices List of indices in each fold generated by `split_unnamed()`.
 5 | #' @param radius Numeric: points within this distance of the initially-selected
 6 | #' test points will be assigned to the assessment set. If `NULL`, no radius is
 7 | #' applied.
 8 | #' @param buffer Numeric: points within this distance of any point in the
 9 | #' test set (after `radius` is applied) will be assigned to neither the analysis
10 | #' or assessment set. If `NULL`, no buffer is applied.
11 | #'
12 | #' @keywords internal
13 | buffer_indices <- function(data, indices, radius, buffer, call = rlang::caller_env()) {
14 |   standard_checks(data, "Buffering", call)
15 | 
16 |   n <- nrow(data)
17 |   # calling st_distance is a _huge_ performance hit, especially for big data,
18 |   # so we make a point of only doing it once.
19 |   #
20 |   # This winds up requiring all sorts of weird handler code,
21 |   # namely `row_ids_within_dist` and `which_within_dist`, in order to
22 |   # only calculate this matrix
23 |   #
24 |   # Using st_is_within_dist is not faster. Using st_intersects is not faster.
25 |   # I keep trying both of these, and have left this comment in vain hope it
26 |   # convinces me to stop.
27 |   distmat <- sf::st_distance(data)
28 | 
29 |   # only run radius checks if radius is not NULL (to prevent NAs from >)
30 |   run_radius <- !is.null(radius)
31 |   if (run_radius && units::set_units(radius, NULL) > 0) {
32 |     # In case `radius` has no units, assume it's in the same units as `data`
33 |     if (!identical(sf::st_crs(data), sf::NA_crs_)) units(radius) <- units(distmat)
34 |     indices <- row_ids_within_dist(distmat, indices, radius)
35 |   }
36 | 
37 |   # `buffer_indices` are _always_ needed
38 |   # so re-code a NULL buffer as a 0, which will buffer nothing
39 |   if (is.null(buffer)) buffer <- 0L
40 |   # In case `buffer` has no units, assume it's in the same units as `data`
41 |   if (!identical(sf::st_crs(data), sf::NA_crs_)) units(buffer) <- units(distmat)
42 |   buffer_indices <- row_ids_within_dist(distmat, indices, buffer)
43 | 
44 |   purrr::map2(indices, buffer_indices, buffered_complement, n = n)
45 | }
46 | 
47 | buffered_complement <- function(ind, buff_ind, n) {
48 |   list(
49 |     analysis = setdiff(1:n, c(ind, buff_ind)),
50 |     assessment = unique(ind)
51 |   )
52 | }
53 | 
54 | row_ids_within_dist <- function(distmat, indices, dist) {
55 |   if (units::set_units(dist, NULL) > 0) {
56 |     # c++ won't implicitly cast, so do it in R
57 |     mode(distmat) <- "numeric"
58 |     dist <- as.numeric(dist)
59 |     purrr::map(
60 |       # indices is the output of split_unnamed
61 |       indices,
62 |       # which_within_dist returns a vector of row IDs in sequential order
63 |       #
64 |       # In order to visualize (eventually) which observations were originally
65 |       # chosen for the test set and which were inside `radius`,
66 |       # we want the new indices to be appended to the end of the original indices,
67 |       # not sorted in
68 |       #
69 |       # So here we append the new indices to the old and de-duplicate them
70 |       ~ unique(c(.x, which_within_dist(distmat, as.numeric(.x), dist)))
71 |     )
72 |   } else {
73 |     # initialize to integer(0) in case buffer is <= 0:
74 |     lapply(seq_along(indices), function(x) integer(0))
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/R/checks.R:
--------------------------------------------------------------------------------
 1 | check_sf <- function(data, calling_function, call = rlang::caller_env()) {
 2 |   if (!is_sf(data)) {
 3 |     rlang::abort(
 4 |       c(
 5 |         glue::glue("{calling_function} currently only supports `sf` objects."),
 6 |         i = "Try converting `data` to an `sf` object via `sf::st_as_sf()`."
 7 |       ),
 8 |       call = call
 9 |     )
10 |   }
11 | }
12 | 
13 | check_s2 <- function(data, calling_function, call = rlang::caller_env()) {
14 |   if (is_longlat(data) && !sf::sf_use_s2()) {
15 |     rlang::abort(
16 |       c(
17 |         glue::glue("{calling_function} can only process geographic coordinates when using the s2 geometry library."),
18 |         "i" = "Reproject your data into a projected coordinate reference system using `sf::st_transform()`.",
19 |         "i" = "Or install the `s2` package and enable it using `sf::sf_use_s2(TRUE)`."
20 |       ),
21 |       call = call
22 |     )
23 |   }
24 | }
25 | 
26 | check_na_crs <- function(data, calling_function, call = rlang::caller_env()) {
27 |   if (sf::st_crs(data) == sf::NA_crs_) {
28 |     rlang::warn(
29 |       c(
30 |         glue::glue("{calling_function} expects your data to have an appropriate coordinate reference system (CRS)."),
31 |         i = "If possible, try setting a CRS using `sf::st_set_crs()`.",
32 |         i = glue::glue("Otherwise, {tolower(calling_function)} will assume your data is in projected coordinates.")
33 |       ),
34 |       call = call
35 |     )
36 |   }
37 | }
38 | 
39 | standard_checks <- function(data, calling_function, call = rlang::caller_env()) {
40 |   check_sf(data, calling_function, call)
41 |   check_na_crs(data, calling_function, call)
42 |   check_s2(data, calling_function, call)
43 | }
44 | 
45 | #' Check that "v" is sensible
46 | #'
47 | #' @param v The number of partitions for the resampling. Set to `NULL` or `Inf`
48 | #' for the maximum sensible value (for leave-one-X-out cross-validation).
49 | #' @keywords internal
50 | check_v <- function(v,
51 |                     max_v,
52 |                     objects,
53 |                     allow_max_v = TRUE,
54 |                     call = rlang::caller_env()) {
55 |   if (is.null(v)) v <- Inf
56 | 
57 |   if (!rlang::is_integerish(v) || length(v) != 1 || v < 1) {
58 |     rlang::abort("`v` must be a single positive integer.", call = call)
59 |   }
60 | 
61 |   if (is.infinite(v)) {
62 |     if (!allow_max_v) {
63 |       rlang::abort(
64 |         "`v` cannot be `NULL` or `Inf` for this function",
65 |         call = call
66 |       )
67 |     }
68 |     v <- max_v
69 |   }
70 | 
71 |   if (v > max_v) {
72 |     if (!allow_max_v) {
73 |       rlang::abort(
74 |         c(
75 |           glue::glue(
76 |             "The number of {objects} is less than `v = {v}` ({max_v})"
77 |           ),
78 |           i = glue::glue("Set `v` to a smaller value than {max_v}")
79 |         ),
80 |         call = call
81 |       )
82 |     }
83 | 
84 |     rlang::warn(
85 |       c(
86 |         glue::glue("Fewer than {v} {objects} available for sampling"),
87 |         i = glue::glue("Setting `v` to {max_v}")
88 |       ),
89 |       call = call
90 |     )
91 | 
92 |     v <- max_v
93 |   }
94 |   v
95 | }
96 | 


--------------------------------------------------------------------------------
/R/compat-vctrs-helpers.R:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | 
 3 | # Keep this list up to date with known rset subclasses for testing.
 4 | # Delay assignment because we are creating this directly in the R script
 5 | # and not all of the required helpers might have been sourced yet.
 6 | test_data <- function() {
 7 |   x <- boston_canopy
 8 |   x$idx <- rep(c("a", "b"), length.out = nrow(x))
 9 |   x
10 | }
11 | 
12 | delayedAssign("rset_subclasses", {
13 |   if (rlang::is_installed("withr")) {
14 |     withr::with_seed(
15 |       123,
16 |       list(
17 |         spatial_block_cv              = spatial_block_cv(test_data()),
18 |         spatial_clustering_cv         = spatial_clustering_cv(test_data()),
19 |         spatial_buffer_vfold_cv       = spatial_buffer_vfold_cv(test_data(), radius = 1, buffer = 1),
20 |         spatial_leave_location_out_cv = spatial_leave_location_out_cv(test_data(), idx),
21 |         spatial_nndm_cv               = spatial_nndm_cv(test_data()[1:500, ], test_data()[501:682, ])
22 |       )
23 |     )
24 |   } else {
25 |     NULL
26 |   }
27 | })
28 | 


--------------------------------------------------------------------------------
/R/compat-vctrs.R:
--------------------------------------------------------------------------------
 1 | # Mimicking rsample `compat-vctrs.R`
 2 | # https://github.com/tidymodels/rsample/blob/master/R/compat-vctrs.R
 3 | 
 4 | # ------------------------------------------------------------------------------
 5 | 
 6 | stop_incompatible_cast_rset <- function(x, to, ..., x_arg, to_arg) {
 7 |   details <- "Can't cast to an rset because attributes are likely incompatible."
 8 |   vctrs::stop_incompatible_cast(x, to, x_arg = x_arg, to_arg = to_arg, details = details)
 9 | }
10 | 
11 | stop_never_called <- function(fn) {
12 |   rlang::abort(paste0("Internal error: `", fn, "()` should never be called."))
13 | }
14 | 


--------------------------------------------------------------------------------
/R/cpp11.R:
--------------------------------------------------------------------------------
1 | # Generated by cpp11: do not edit by hand
2 | 
3 | which_within_dist <- function(distmat, idx, dist) {
4 |   .Call(`_spatialsample_which_within_dist`, distmat, idx, dist)
5 | }
6 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' Boston tree canopy and heat index data.
 2 | #'
 3 | #' A dataset containing data on tree canopy coverage and change for the city of
 4 | #' Boston, Massachusetts from 2014-2019,
 5 | #' as well as temperature and heat index data for July 2019. Data is aggregated
 6 | #' to a grid of regular 25 hectare hexagons, clipped to city boundaries.
 7 | #' This data is made available under the Public Domain Dedication and License
 8 | #' v1.0 whose full text can be found at:
 9 | #' \url{https://opendatacommons.org/licenses/pddl/1-0/}.
10 | #'
11 | #' Note that this dataset is in the EPSG:2249
12 | #' (NAD83 / Massachusetts Mainland (ftUS)) coordinate reference system (CRS),
13 | #' which may not be installed by default on your computer. Before working with
14 | #' `boston_canopy`, run:
15 | #'
16 | #' - `sf::sf_proj_network(TRUE)` to install the CRS itself
17 | #' - [sf::sf_add_proj_units()] to add US customary units to your units
18 | #' database
19 | #'
20 | #' These steps only need to be taken once per computer (or per PROJ installation).
21 | #'
22 | #' @format A data frame (of class `sf`, `tbl_df`, `tbl`, and `data.frame`)
23 | #' containing 682 records of 22 variables:
24 | #' \describe{
25 | #'   \item{grid_id}{Unique identifier for each hexagon. Letters represent the hexagon's X position in the grid (ordered West to East), while numbers represent the Y position (ordered North to South).}
26 | #'   \item{land_area}{Area excluding water bodies}
27 | #'   \item{canopy_gain}{Area of canopy gain between the two years}
28 | #'   \item{canopy_loss}{Area of canopy loss between the two years}
29 | #'   \item{canopy_no_change}{Area of no canopy change between the two years}
30 | #'   \item{canopy_area_2014}{2014 total canopy area (baseline)}
31 | #'   \item{canopy_area_2019}{2019 total canopy area}
32 | #'   \item{change_canopy_area}{The change in area of tree canopy between the two years}
33 | #'   \item{change_canopy_percentage}{Relative change calculation used in economics is the gain or loss of tree canopy relative to the earlier time period: (2019 Canopy-2014 Canopy)/(2014 Canopy)}
34 | #'   \item{canopy_percentage_2014}{2014 canopy percentage}
35 | #'   \item{canopy_percentage_2019}{2019 canopy percentage}
36 | #'   \item{change_canopy_absolute}{Absolute change. Magnitude of change in percent tree canopy from 2014 to 2019 (% 2019 Canopy - % 2014 Canopy)}
37 | #'   \item{mean_temp_morning}{Mean temperature for July 2019 from 6am - 7am}
38 | #'   \item{mean_temp_evening}{Mean temperature for July 2019 from 7pm - 8pm}
39 | #'   \item{mean_temp}{Mean temperature for July 2019 from 6am - 7am, 3pm - 4pm, and 7pm - 8pm (combined)}
40 | #'   \item{mean_heat_index_morning}{Mean heat index for July 2019 from 6am - 7am}
41 | #'   \item{mean_heat_index_evening}{Mean heat index for July 2019 from 7pm - 8pm}
42 | #'   \item{mean_heat_index}{Mean heat index for July 2019 from 6am - 7am, 3pm - 4pm, and 7pm - 8pm (combined)}
43 | #'   \item{geometry}{Geometry of each hexagon, encoded using EPSG:2249 as a coordinate reference system (NAD83 / Massachusetts Mainland (ftUS)). Note that the linear units of this CRS are in US feet.}
44 | #' }
45 | #'
46 | #' @source Canopy data is from \url{https://data.boston.gov/dataset/hex-tree-canopy-change-metrics}.
47 | #' Heat data is from \url{https://data.boston.gov/dataset/hex-mean-heat-index}.
48 | #' Most field definitions are from \url{https://data.boston.gov/dataset/canopy-change-assessment-data-dictionary}.
49 | "boston_canopy"
50 | 


--------------------------------------------------------------------------------
/R/labels.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | pretty.spatial_clustering_cv <- function(x, ...) {
 3 |   details <- attributes(x)
 4 |   res <- paste0(details$v, "-fold spatial cross-validation")
 5 |   res
 6 | }
 7 | 
 8 | #' @export
 9 | print.spatial_clustering_cv <- function(x, ...) {
10 |   cat("# ", pretty(x), "\n")
11 |   class(x) <- class(x)[!(class(x) %in% c(
12 |     "spatial_clustering_cv",
13 |     "spatial_rset",
14 |     "rset"
15 |   ))]
16 |   print(x, ...)
17 | }
18 | 
19 | #' @export
20 | pretty.spatial_block_cv <- function(x, ...) {
21 |   details <- attributes(x)
22 |   res <- paste0(details$v, "-fold spatial block cross-validation")
23 |   res
24 | }
25 | 
26 | #' @export
27 | print.spatial_block_cv <- function(x, ...) {
28 |   cat("# ", pretty(x), "\n")
29 |   class(x) <- class(x)[!(class(x) %in% c(
30 |     "spatial_block_cv",
31 |     "spatial_rset",
32 |     "rset"
33 |   ))]
34 |   print(x, ...)
35 | }
36 | 
37 | #' @export
38 | pretty.spatial_leave_location_out_cv <- function(x, ...) {
39 |   details <- attributes(x)
40 |   res <- paste0(details$v, "-fold spatial leave-location-out cross-validation")
41 |   res
42 | }
43 | 
44 | #' @export
45 | print.spatial_leave_location_out_cv <- function(x, ...) {
46 |   cat("# ", pretty(x), "\n")
47 |   class(x) <- class(x)[!(class(x) %in% c(
48 |     "spatial_leave_location_out_cv",
49 |     "spatial_rset",
50 |     "rset"
51 |   ))]
52 |   print(x, ...)
53 | }
54 | 
55 | #' @export
56 | pretty.spatial_buffer_vfold_cv <- function(x, ...) {
57 |   details <- attributes(x)
58 |   res <- paste0(details$v, "-fold spatial cross-validation")
59 |   res
60 | }
61 | 
62 | #' @export
63 | print.spatial_buffer_vfold_cv <- function(x, ...) {
64 |   cat("# ", pretty(x), "\n")
65 |   class(x) <- class(x)[!(class(x) %in% c(
66 |     "spatial_buffer_vfold_cv",
67 |     "spatial_rset",
68 |     "rset"
69 |   ))]
70 |   print(x, ...)
71 | }
72 | 


--------------------------------------------------------------------------------
/R/misc.R:
--------------------------------------------------------------------------------
 1 | ## Keep synced with rsample
 2 | 
 3 | names0 <- function(num, prefix = "x") {
 4 |   if (num == 0L) {
 5 |     return(character())
 6 |   }
 7 |   ind <- format(1:num)
 8 |   ind <- gsub(" ", "0", ind)
 9 |   paste0(prefix, ind)
10 | }
11 | 
12 | ## Get the indices of the analysis set from the assessment set
13 | default_complement <- function(ind, n) {
14 |   list(
15 |     analysis = setdiff(1:n, ind),
16 |     assessment = unique(ind)
17 |   )
18 | }
19 | 
20 | ## Split, but no names
21 | split_unnamed <- function(x, f) {
22 |   out <- split(x, f)
23 |   unname(out)
24 | }
25 | 
26 | ### Functions below are spatialsample-specific
27 | 
28 | ## This will remove the assessment indices from an rsplit object
29 | rm_out <- function(x, buffer = NULL) {
30 |   if (is.null(buffer)) x$out_id <- NA
31 |   x
32 | }
33 | 
34 | # Check sparse geometry binary predicate for empty elements
35 | # See ?sf::sgbp for more information on the data structure
36 | sgbp_is_not_empty <- function(x) !identical(x, integer(0))
37 | 
38 | is_sf <- function(x) {
39 |   inherits(x, "sf") || inherits(x, "sfc")
40 | }
41 | 
42 | is_longlat <- function(x) {
43 |   !(sf::st_crs(x) == sf::NA_crs_) && sf::st_is_longlat(x)
44 | }
45 | 


--------------------------------------------------------------------------------
/R/spatial_clustering_cv.R:
--------------------------------------------------------------------------------
  1 | #' Spatial Clustering Cross-Validation
  2 | #'
  3 | #' Spatial clustering cross-validation splits the data into V groups of
  4 | #'  disjointed sets by clustering points based on their spatial coordinates.
  5 | #'  A resample of the analysis data consists of V-1 of the folds/clusters
  6 | #'  while the assessment set contains the final fold/cluster.
  7 | #'
  8 | #' @section Changes in spatialsample 0.3.0:
  9 | #' As of spatialsample version 0.3.0, this function no longer accepts non-`sf`
 10 | #' objects as arguments to `data`. In order to perform clustering with
 11 | #' non-spatial data, consider using [rsample::clustering_cv()].
 12 | #'
 13 | #' Also as of version 0.3.0, this function now calculates edge-to-edge distance
 14 | #' for non-point geometries, in line with the rest of the package. Earlier
 15 | #' versions relied upon between-centroid distances.
 16 | #'
 17 | #' @details
 18 | #' Clusters are created based on the distances between observations
 19 | #'  if `data` is an `sf` object. Each cluster is used as a fold for
 20 | #'  cross-validation. Depending on how the data are distributed spatially, there
 21 | #'  may not be an equal number of observations in each fold.
 22 | #'
 23 | #' You can optionally provide a custom function to `distance_function.` The
 24 | #' function should take an `sf` object and return a [stats::dist()] object with
 25 | #' distances between data points.
 26 | #'
 27 | #' You can optionally provide a custom function to `cluster_function`. The
 28 | #' function must take three arguments:
 29 | #' - `dists`, a [stats::dist()] object with distances between data points
 30 | #' - `v`, a length-1 numeric for the number of folds to create
 31 | #' - `...`, to pass any additional named arguments to your function
 32 | #'
 33 | #' The function should return a vector of cluster assignments of length
 34 | #' `nrow(data)`, with each element of the vector corresponding to the matching
 35 | #' row of the data frame.
 36 | #'
 37 | #' @param data An `sf` object (often from [sf::read_sf()]
 38 | #' or [sf::st_as_sf()]) to split into folds.
 39 | #' @inheritParams buffer_indices
 40 | #' @inheritParams rsample::clustering_cv
 41 | #' @param distance_function Which function should be used for distance
 42 | #' calculations? Defaults to [sf::st_distance()], with the output matrix
 43 | #' converted to a [stats::dist()] object. You can also provide your own
 44 | #' function; see Details.
 45 | #' @param v The number of partitions of the data set.
 46 | #' @param cluster_function Which function should be used for clustering?
 47 | #' Options are either `"kmeans"` (to use [stats::kmeans()])
 48 | #' or `"hclust"` (to use [stats::hclust()]). You can also provide your own
 49 | #' function; see `Details`.
 50 | #' @param ... Extra arguments passed on to [stats::kmeans()] or
 51 | #' [stats::hclust()].
 52 | #'
 53 | #' @return A tibble with classes `spatial_clustering_cv`, `spatial_rset`,
 54 | #'  `rset`, `tbl_df`, `tbl`, and `data.frame`.
 55 | #'  The results include a column for the data split objects and
 56 | #'  an identification variable `id`.
 57 | #'  Resamples created from non-`sf` objects will not have the
 58 | #'  `spatial_rset` class.
 59 | #'
 60 | #' @references
 61 | #'
 62 | #' A. Brenning, "Spatial cross-validation and bootstrap for the assessment of
 63 | #' prediction rules in remote sensing: The R package sperrorest," 2012 IEEE
 64 | #' International Geoscience and Remote Sensing Symposium, Munich, 2012,
 65 | #' pp. 5372-5375, doi: 10.1109/IGARSS.2012.6352393.
 66 | #'
 67 | #' @examplesIf rlang::is_installed("modeldata")
 68 | #' data(Smithsonian, package = "modeldata")
 69 | #'
 70 | #' smithsonian_sf <- sf::st_as_sf(
 71 | #'   Smithsonian,
 72 | #'   coords = c("longitude", "latitude"),
 73 | #'   # Set CRS to WGS84
 74 | #'   crs = 4326
 75 | #' )
 76 | #'
 77 | #' # When providing sf objects, coords are inferred automatically
 78 | #' spatial_clustering_cv(smithsonian_sf, v = 5)
 79 | #'
 80 | #' # Can use hclust instead:
 81 | #' spatial_clustering_cv(smithsonian_sf, v = 5, cluster_function = "hclust")
 82 | #'
 83 | #' @rdname spatial_clustering_cv
 84 | #' @export
 85 | spatial_clustering_cv <- function(data,
 86 |                                   v = 10,
 87 |                                   cluster_function = c("kmeans", "hclust"),
 88 |                                   radius = NULL,
 89 |                                   buffer = NULL,
 90 |                                   ...,
 91 |                                   repeats = 1,
 92 |                                   distance_function = function(x) as.dist(sf::st_distance(x))) {
 93 |   if (!rlang::is_function(cluster_function)) {
 94 |     cluster_function <- rlang::arg_match(cluster_function)
 95 |   }
 96 | 
 97 |   standard_checks(data, "`spatial_clustering_cv()`")
 98 | 
 99 |   n <- nrow(data)
100 |   v <- check_v(
101 |     v,
102 |     n,
103 |     "data points",
104 |     allow_max_v = FALSE
105 |   )
106 | 
107 |   cv_att <- list(
108 |     v = v,
109 |     repeats = repeats,
110 |     radius = radius,
111 |     buffer = buffer,
112 |     cluster_function = cluster_function,
113 |     distance_function = distance_function
114 |   )
115 | 
116 |   rset <- rsample::clustering_cv(
117 |     data = data,
118 |     vars = names(data),
119 |     v = v,
120 |     repeats = {{ repeats }},
121 |     distance_function = distance_function,
122 |     cluster_function = cluster_function,
123 |     ...
124 |   )
125 | 
126 |   posthoc_buffer_rset(
127 |     data = data,
128 |     rset = rset,
129 |     rsplit_class = c("spatial_clustering_split", "spatial_rsplit"),
130 |     rset_class = c("spatial_clustering_cv", "spatial_rset", "rset"),
131 |     radius = radius,
132 |     buffer = buffer,
133 |     n = n,
134 |     v = v,
135 |     cv_att = cv_att
136 |   )
137 | }
138 | 


--------------------------------------------------------------------------------
/R/spatialsample-package.R:
--------------------------------------------------------------------------------
 1 | #' @keywords internal
 2 | "_PACKAGE"
 3 | 
 4 | ## usethis namespace: start
 5 | #' @importFrom rsample complement
 6 | #' @importFrom rsample new_rset
 7 | #' @importFrom stats as.dist
 8 | #' @importFrom stats cutree
 9 | #' @importFrom stats hclust
10 | #' @importFrom stats kmeans
11 | #' @importFrom rsample make_splits
12 | #' @importFrom purrr map
13 | #' @importFrom rlang is_empty
14 | #' @importFrom dplyr dplyr_reconstruct
15 | #' @useDynLib spatialsample, .registration = TRUE
16 | ## usethis namespace: end
17 | NULL
18 | 
19 | #' @importFrom rsample analysis
20 | #' @export
21 | rsample::analysis
22 | 
23 | #' @importFrom rsample assessment
24 | #' @export
25 | rsample::assessment
26 | 
27 | #' @importFrom rsample get_rsplit
28 | #' @export
29 | rsample::get_rsplit
30 | 
31 | #' @importFrom ggplot2 autoplot
32 | #' @export
33 | ggplot2::autoplot
34 | 
35 | #' @import vctrs
36 | NULL
37 | 
38 | #' @import sf
39 | NULL
40 | 


--------------------------------------------------------------------------------
/R/sysdata.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/spatialsample/ded16914d80c5bb118d8a61ffd93e07be5aad93e/R/sysdata.rda


--------------------------------------------------------------------------------
/R/zzz-compat-vctrs-spatial_block_cv.R:
--------------------------------------------------------------------------------
  1 | # This file was generated, do not edit by hand
  2 | # Please edit inst/generate_vctrs.R instead
  3 | # ------------------------------------------------------------------------------
  4 | # spatial_block_cv
  5 | 
  6 | # `vec_restore()`
  7 | #
  8 | # Called at the end of `vec_slice()` and `vec_ptype()` after all slicing has
  9 | # been done on the proxy object.
 10 | #
 11 | # If no changes have been made to the row/column structure of rset specific
 12 | # columns, then we can return an rset subclass. Otherwise, the resulting
 13 | # object is no longer guaranteed to return a valid rset, and we have to
 14 | # fallback to a bare tibble.
 15 | #
 16 | # It is very important that the result of `vec_ptype()` is a bare tibble.
 17 | # This ensures that the `vec_ptype2.<rset-subclass>.<rset-subclass>()` methods
 18 | # never get called. `vec_ptype()` is able to return a bare tibble because it
 19 | # essentially takes a 0-row slice of the rset, and then calls `vec_restore()`.
 20 | # Because the row structure has been modified, we return a bare tibble from
 21 | # `vec_restore.<rset-subclass>()`.
 22 | #
 23 | # Currently `vec_restore()` uses inheritance in vctrs, but I don't expect this
 24 | # to be the case in the future. For that reason, I use explicit methods for
 25 | # each individual rset subclass, rather than implementing `vec_restore.rset()`.
 26 | #' @export
 27 | vec_restore.spatial_block_cv <- function(x, to, ...) {
 28 |   rsample::rset_reconstruct(x, to)
 29 | }
 30 | 
 31 | # `vec_ptype2()`
 32 | #
 33 | # The purpose of `vec_ptype2()` methods is generally to determine the type
 34 | # of the output in operations like `vec_c()` or `vec_rbind()`. However, this
 35 | # implementation does not use any custom `vec_ptype2()` methods at all. This is
 36 | # explained below.
 37 | #
 38 | # `vec_ptype2()` internally works by calling `vec_ptype()` on both `x` and `y`,
 39 | # and then looking up the common type of those two ptypes. Generally, the
 40 | # ptype of a vector `x` returned from `vec_ptype()` has the same class as `x`.
 41 | # However, for rsample objects it makes more sense for the ptype of an rset to
 42 | # be a bare tibble. The reason for this is because a ptype of a data frame is
 43 | # generally a 0-row slice. However for rsample rsets this doesn't make sense
 44 | # (you can't have 0 rows in a 10-fold cv rset), so instead we return a bare
 45 | # tibble as the ptype.
 46 | # So when `vec_ptype2()` is called on an rset, that rset is downgraded to a
 47 | # bare tibble _before_ the search for a `vec_ptype2()` method occurs. This
 48 | # means that it will never find a ptype2 method such as
 49 | # `vec_ptype2.bootstraps.tbl_df()`, because the <bootstraps> will become a
 50 | # tbl_df first. This means that we rely entirely on the `tbl_df` ptype2 methods,
 51 | # which are already implemented in vctrs to return another tbl_df.
 52 | #
 53 | # The implications of this are that whenever a rset subclass is combined with
 54 | # another rset subclass or bare tibble, the resulting common type is always
 55 | # another bare tibble. So if you `vec_c(<bootstraps>, <tbl_df>)` the result
 56 | # will always be a tibble, never a bootstraps object. This makes sense because
 57 | # you might be adding rows, which would invalidate the structure of the
 58 | # bootstraps object.
 59 | 
 60 | #' @export
 61 | vec_ptype2.spatial_block_cv.spatial_block_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 62 |   stop_never_called("vec_ptype2.spatial_block_cv.spatial_block_cv")
 63 | }
 64 | #' @export
 65 | vec_ptype2.spatial_block_cv.tbl_df <- function(x, y, ..., x_arg = "", y_arg = "") {
 66 |   stop_never_called("vec_ptype2.spatial_block_cv.tbl_df")
 67 | }
 68 | #' @export
 69 | vec_ptype2.tbl_df.spatial_block_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 70 |   stop_never_called("vec_ptype2.tbl_df.spatial_block_cv")
 71 | }
 72 | #' @export
 73 | vec_ptype2.spatial_block_cv.data.frame <- function(x, y, ..., x_arg = "", y_arg = "") {
 74 |   stop_never_called("vec_ptype2.spatial_block_cv.data.frame")
 75 | }
 76 | #' @export
 77 | vec_ptype2.data.frame.spatial_block_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 78 |   stop_never_called("vec_ptype2.data.frame.spatial_block_cv")
 79 | }
 80 | 
 81 | # `vec_cast()`
 82 | #
 83 | # The `vec_cast()` methods for rset objects really only serve 1 purpose. They
 84 | # cast an rset subclass to a tibble or data frame. The cast to tibble is most
 85 | # useful. Most of the operations in vctrs work by finding a common type
 86 | # with `vec_ptype2()`, and then casting all of the inputs to that common type.
 87 | # Because `vec_ptype2()` returns a bare tibble anytime a rset-subclass is
 88 | # involved, we will always be casting the rset subclass to a tibble.
 89 | # The cast method uses `vctrs::tib_cast()`, which always returns a bare tibble
 90 | # with all of the data in `x` cast to the type of `to`.
 91 | 
 92 | #' @export
 93 | vec_cast.spatial_block_cv.spatial_block_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
 94 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
 95 | }
 96 | #' @export
 97 | vec_cast.spatial_block_cv.tbl_df <- function(x, to, ..., x_arg = "", to_arg = "") {
 98 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
 99 | }
100 | #' @export
101 | vec_cast.tbl_df.spatial_block_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
102 |   tib_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg)
103 | }
104 | #' @export
105 | vec_cast.spatial_block_cv.data.frame <- function(x, to, ..., x_arg = "", to_arg = "") {
106 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
107 | }
108 | #' @export
109 | vec_cast.data.frame.spatial_block_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
110 |   df_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg)
111 | }
112 | 


--------------------------------------------------------------------------------
/R/zzz-compat-vctrs-spatial_buffer_vfold_cv.R:
--------------------------------------------------------------------------------
  1 | # This file was generated, do not edit by hand
  2 | # Please edit inst/generate_vctrs.R instead
  3 | # ------------------------------------------------------------------------------
  4 | # spatial_buffer_vfold_cv
  5 | 
  6 | # `vec_restore()`
  7 | #
  8 | # Called at the end of `vec_slice()` and `vec_ptype()` after all slicing has
  9 | # been done on the proxy object.
 10 | #
 11 | # If no changes have been made to the row/column structure of rset specific
 12 | # columns, then we can return an rset subclass. Otherwise, the resulting
 13 | # object is no longer guaranteed to return a valid rset, and we have to
 14 | # fallback to a bare tibble.
 15 | #
 16 | # It is very important that the result of `vec_ptype()` is a bare tibble.
 17 | # This ensures that the `vec_ptype2.<rset-subclass>.<rset-subclass>()` methods
 18 | # never get called. `vec_ptype()` is able to return a bare tibble because it
 19 | # essentially takes a 0-row slice of the rset, and then calls `vec_restore()`.
 20 | # Because the row structure has been modified, we return a bare tibble from
 21 | # `vec_restore.<rset-subclass>()`.
 22 | #
 23 | # Currently `vec_restore()` uses inheritance in vctrs, but I don't expect this
 24 | # to be the case in the future. For that reason, I use explicit methods for
 25 | # each individual rset subclass, rather than implementing `vec_restore.rset()`.
 26 | #' @export
 27 | vec_restore.spatial_buffer_vfold_cv <- function(x, to, ...) {
 28 |   rsample::rset_reconstruct(x, to)
 29 | }
 30 | 
 31 | # `vec_ptype2()`
 32 | #
 33 | # The purpose of `vec_ptype2()` methods is generally to determine the type
 34 | # of the output in operations like `vec_c()` or `vec_rbind()`. However, this
 35 | # implementation does not use any custom `vec_ptype2()` methods at all. This is
 36 | # explained below.
 37 | #
 38 | # `vec_ptype2()` internally works by calling `vec_ptype()` on both `x` and `y`,
 39 | # and then looking up the common type of those two ptypes. Generally, the
 40 | # ptype of a vector `x` returned from `vec_ptype()` has the same class as `x`.
 41 | # However, for rsample objects it makes more sense for the ptype of an rset to
 42 | # be a bare tibble. The reason for this is because a ptype of a data frame is
 43 | # generally a 0-row slice. However for rsample rsets this doesn't make sense
 44 | # (you can't have 0 rows in a 10-fold cv rset), so instead we return a bare
 45 | # tibble as the ptype.
 46 | # So when `vec_ptype2()` is called on an rset, that rset is downgraded to a
 47 | # bare tibble _before_ the search for a `vec_ptype2()` method occurs. This
 48 | # means that it will never find a ptype2 method such as
 49 | # `vec_ptype2.bootstraps.tbl_df()`, because the <bootstraps> will become a
 50 | # tbl_df first. This means that we rely entirely on the `tbl_df` ptype2 methods,
 51 | # which are already implemented in vctrs to return another tbl_df.
 52 | #
 53 | # The implications of this are that whenever a rset subclass is combined with
 54 | # another rset subclass or bare tibble, the resulting common type is always
 55 | # another bare tibble. So if you `vec_c(<bootstraps>, <tbl_df>)` the result
 56 | # will always be a tibble, never a bootstraps object. This makes sense because
 57 | # you might be adding rows, which would invalidate the structure of the
 58 | # bootstraps object.
 59 | 
 60 | #' @export
 61 | vec_ptype2.spatial_buffer_vfold_cv.spatial_buffer_vfold_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 62 |   stop_never_called("vec_ptype2.spatial_buffer_vfold_cv.spatial_buffer_vfold_cv")
 63 | }
 64 | #' @export
 65 | vec_ptype2.spatial_buffer_vfold_cv.tbl_df <- function(x, y, ..., x_arg = "", y_arg = "") {
 66 |   stop_never_called("vec_ptype2.spatial_buffer_vfold_cv.tbl_df")
 67 | }
 68 | #' @export
 69 | vec_ptype2.tbl_df.spatial_buffer_vfold_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 70 |   stop_never_called("vec_ptype2.tbl_df.spatial_buffer_vfold_cv")
 71 | }
 72 | #' @export
 73 | vec_ptype2.spatial_buffer_vfold_cv.data.frame <- function(x, y, ..., x_arg = "", y_arg = "") {
 74 |   stop_never_called("vec_ptype2.spatial_buffer_vfold_cv.data.frame")
 75 | }
 76 | #' @export
 77 | vec_ptype2.data.frame.spatial_buffer_vfold_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 78 |   stop_never_called("vec_ptype2.data.frame.spatial_buffer_vfold_cv")
 79 | }
 80 | 
 81 | # `vec_cast()`
 82 | #
 83 | # The `vec_cast()` methods for rset objects really only serve 1 purpose. They
 84 | # cast an rset subclass to a tibble or data frame. The cast to tibble is most
 85 | # useful. Most of the operations in vctrs work by finding a common type
 86 | # with `vec_ptype2()`, and then casting all of the inputs to that common type.
 87 | # Because `vec_ptype2()` returns a bare tibble anytime a rset-subclass is
 88 | # involved, we will always be casting the rset subclass to a tibble.
 89 | # The cast method uses `vctrs::tib_cast()`, which always returns a bare tibble
 90 | # with all of the data in `x` cast to the type of `to`.
 91 | 
 92 | #' @export
 93 | vec_cast.spatial_buffer_vfold_cv.spatial_buffer_vfold_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
 94 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
 95 | }
 96 | #' @export
 97 | vec_cast.spatial_buffer_vfold_cv.tbl_df <- function(x, to, ..., x_arg = "", to_arg = "") {
 98 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
 99 | }
100 | #' @export
101 | vec_cast.tbl_df.spatial_buffer_vfold_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
102 |   tib_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg)
103 | }
104 | #' @export
105 | vec_cast.spatial_buffer_vfold_cv.data.frame <- function(x, to, ..., x_arg = "", to_arg = "") {
106 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
107 | }
108 | #' @export
109 | vec_cast.data.frame.spatial_buffer_vfold_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
110 |   df_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg)
111 | }
112 | 


--------------------------------------------------------------------------------
/R/zzz-compat-vctrs-spatial_clustering_cv.R:
--------------------------------------------------------------------------------
  1 | # This file was generated, do not edit by hand
  2 | # Please edit inst/generate_vctrs.R instead
  3 | # ------------------------------------------------------------------------------
  4 | # spatial_clustering_cv
  5 | 
  6 | # `vec_restore()`
  7 | #
  8 | # Called at the end of `vec_slice()` and `vec_ptype()` after all slicing has
  9 | # been done on the proxy object.
 10 | #
 11 | # If no changes have been made to the row/column structure of rset specific
 12 | # columns, then we can return an rset subclass. Otherwise, the resulting
 13 | # object is no longer guaranteed to return a valid rset, and we have to
 14 | # fallback to a bare tibble.
 15 | #
 16 | # It is very important that the result of `vec_ptype()` is a bare tibble.
 17 | # This ensures that the `vec_ptype2.<rset-subclass>.<rset-subclass>()` methods
 18 | # never get called. `vec_ptype()` is able to return a bare tibble because it
 19 | # essentially takes a 0-row slice of the rset, and then calls `vec_restore()`.
 20 | # Because the row structure has been modified, we return a bare tibble from
 21 | # `vec_restore.<rset-subclass>()`.
 22 | #
 23 | # Currently `vec_restore()` uses inheritance in vctrs, but I don't expect this
 24 | # to be the case in the future. For that reason, I use explicit methods for
 25 | # each individual rset subclass, rather than implementing `vec_restore.rset()`.
 26 | #' @export
 27 | vec_restore.spatial_clustering_cv <- function(x, to, ...) {
 28 |   rsample::rset_reconstruct(x, to)
 29 | }
 30 | 
 31 | # `vec_ptype2()`
 32 | #
 33 | # The purpose of `vec_ptype2()` methods is generally to determine the type
 34 | # of the output in operations like `vec_c()` or `vec_rbind()`. However, this
 35 | # implementation does not use any custom `vec_ptype2()` methods at all. This is
 36 | # explained below.
 37 | #
 38 | # `vec_ptype2()` internally works by calling `vec_ptype()` on both `x` and `y`,
 39 | # and then looking up the common type of those two ptypes. Generally, the
 40 | # ptype of a vector `x` returned from `vec_ptype()` has the same class as `x`.
 41 | # However, for rsample objects it makes more sense for the ptype of an rset to
 42 | # be a bare tibble. The reason for this is because a ptype of a data frame is
 43 | # generally a 0-row slice. However for rsample rsets this doesn't make sense
 44 | # (you can't have 0 rows in a 10-fold cv rset), so instead we return a bare
 45 | # tibble as the ptype.
 46 | # So when `vec_ptype2()` is called on an rset, that rset is downgraded to a
 47 | # bare tibble _before_ the search for a `vec_ptype2()` method occurs. This
 48 | # means that it will never find a ptype2 method such as
 49 | # `vec_ptype2.bootstraps.tbl_df()`, because the <bootstraps> will become a
 50 | # tbl_df first. This means that we rely entirely on the `tbl_df` ptype2 methods,
 51 | # which are already implemented in vctrs to return another tbl_df.
 52 | #
 53 | # The implications of this are that whenever a rset subclass is combined with
 54 | # another rset subclass or bare tibble, the resulting common type is always
 55 | # another bare tibble. So if you `vec_c(<bootstraps>, <tbl_df>)` the result
 56 | # will always be a tibble, never a bootstraps object. This makes sense because
 57 | # you might be adding rows, which would invalidate the structure of the
 58 | # bootstraps object.
 59 | 
 60 | #' @export
 61 | vec_ptype2.spatial_clustering_cv.spatial_clustering_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 62 |   stop_never_called("vec_ptype2.spatial_clustering_cv.spatial_clustering_cv")
 63 | }
 64 | #' @export
 65 | vec_ptype2.spatial_clustering_cv.tbl_df <- function(x, y, ..., x_arg = "", y_arg = "") {
 66 |   stop_never_called("vec_ptype2.spatial_clustering_cv.tbl_df")
 67 | }
 68 | #' @export
 69 | vec_ptype2.tbl_df.spatial_clustering_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 70 |   stop_never_called("vec_ptype2.tbl_df.spatial_clustering_cv")
 71 | }
 72 | #' @export
 73 | vec_ptype2.spatial_clustering_cv.data.frame <- function(x, y, ..., x_arg = "", y_arg = "") {
 74 |   stop_never_called("vec_ptype2.spatial_clustering_cv.data.frame")
 75 | }
 76 | #' @export
 77 | vec_ptype2.data.frame.spatial_clustering_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 78 |   stop_never_called("vec_ptype2.data.frame.spatial_clustering_cv")
 79 | }
 80 | 
 81 | # `vec_cast()`
 82 | #
 83 | # The `vec_cast()` methods for rset objects really only serve 1 purpose. They
 84 | # cast an rset subclass to a tibble or data frame. The cast to tibble is most
 85 | # useful. Most of the operations in vctrs work by finding a common type
 86 | # with `vec_ptype2()`, and then casting all of the inputs to that common type.
 87 | # Because `vec_ptype2()` returns a bare tibble anytime a rset-subclass is
 88 | # involved, we will always be casting the rset subclass to a tibble.
 89 | # The cast method uses `vctrs::tib_cast()`, which always returns a bare tibble
 90 | # with all of the data in `x` cast to the type of `to`.
 91 | 
 92 | #' @export
 93 | vec_cast.spatial_clustering_cv.spatial_clustering_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
 94 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
 95 | }
 96 | #' @export
 97 | vec_cast.spatial_clustering_cv.tbl_df <- function(x, to, ..., x_arg = "", to_arg = "") {
 98 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
 99 | }
100 | #' @export
101 | vec_cast.tbl_df.spatial_clustering_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
102 |   tib_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg)
103 | }
104 | #' @export
105 | vec_cast.spatial_clustering_cv.data.frame <- function(x, to, ..., x_arg = "", to_arg = "") {
106 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
107 | }
108 | #' @export
109 | vec_cast.data.frame.spatial_clustering_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
110 |   df_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg)
111 | }
112 | 


--------------------------------------------------------------------------------
/R/zzz-compat-vctrs-spatial_leave_location_out_cv.R:
--------------------------------------------------------------------------------
  1 | # This file was generated, do not edit by hand
  2 | # Please edit inst/generate_vctrs.R instead
  3 | # ------------------------------------------------------------------------------
  4 | # spatial_leave_location_out_cv
  5 | 
  6 | # `vec_restore()`
  7 | #
  8 | # Called at the end of `vec_slice()` and `vec_ptype()` after all slicing has
  9 | # been done on the proxy object.
 10 | #
 11 | # If no changes have been made to the row/column structure of rset specific
 12 | # columns, then we can return an rset subclass. Otherwise, the resulting
 13 | # object is no longer guaranteed to return a valid rset, and we have to
 14 | # fallback to a bare tibble.
 15 | #
 16 | # It is very important that the result of `vec_ptype()` is a bare tibble.
 17 | # This ensures that the `vec_ptype2.<rset-subclass>.<rset-subclass>()` methods
 18 | # never get called. `vec_ptype()` is able to return a bare tibble because it
 19 | # essentially takes a 0-row slice of the rset, and then calls `vec_restore()`.
 20 | # Because the row structure has been modified, we return a bare tibble from
 21 | # `vec_restore.<rset-subclass>()`.
 22 | #
 23 | # Currently `vec_restore()` uses inheritance in vctrs, but I don't expect this
 24 | # to be the case in the future. For that reason, I use explicit methods for
 25 | # each individual rset subclass, rather than implementing `vec_restore.rset()`.
 26 | #' @export
 27 | vec_restore.spatial_leave_location_out_cv <- function(x, to, ...) {
 28 |   rsample::rset_reconstruct(x, to)
 29 | }
 30 | 
 31 | # `vec_ptype2()`
 32 | #
 33 | # The purpose of `vec_ptype2()` methods is generally to determine the type
 34 | # of the output in operations like `vec_c()` or `vec_rbind()`. However, this
 35 | # implementation does not use any custom `vec_ptype2()` methods at all. This is
 36 | # explained below.
 37 | #
 38 | # `vec_ptype2()` internally works by calling `vec_ptype()` on both `x` and `y`,
 39 | # and then looking up the common type of those two ptypes. Generally, the
 40 | # ptype of a vector `x` returned from `vec_ptype()` has the same class as `x`.
 41 | # However, for rsample objects it makes more sense for the ptype of an rset to
 42 | # be a bare tibble. The reason for this is because a ptype of a data frame is
 43 | # generally a 0-row slice. However for rsample rsets this doesn't make sense
 44 | # (you can't have 0 rows in a 10-fold cv rset), so instead we return a bare
 45 | # tibble as the ptype.
 46 | # So when `vec_ptype2()` is called on an rset, that rset is downgraded to a
 47 | # bare tibble _before_ the search for a `vec_ptype2()` method occurs. This
 48 | # means that it will never find a ptype2 method such as
 49 | # `vec_ptype2.bootstraps.tbl_df()`, because the <bootstraps> will become a
 50 | # tbl_df first. This means that we rely entirely on the `tbl_df` ptype2 methods,
 51 | # which are already implemented in vctrs to return another tbl_df.
 52 | #
 53 | # The implications of this are that whenever a rset subclass is combined with
 54 | # another rset subclass or bare tibble, the resulting common type is always
 55 | # another bare tibble. So if you `vec_c(<bootstraps>, <tbl_df>)` the result
 56 | # will always be a tibble, never a bootstraps object. This makes sense because
 57 | # you might be adding rows, which would invalidate the structure of the
 58 | # bootstraps object.
 59 | 
 60 | #' @export
 61 | vec_ptype2.spatial_leave_location_out_cv.spatial_leave_location_out_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 62 |   stop_never_called("vec_ptype2.spatial_leave_location_out_cv.spatial_leave_location_out_cv")
 63 | }
 64 | #' @export
 65 | vec_ptype2.spatial_leave_location_out_cv.tbl_df <- function(x, y, ..., x_arg = "", y_arg = "") {
 66 |   stop_never_called("vec_ptype2.spatial_leave_location_out_cv.tbl_df")
 67 | }
 68 | #' @export
 69 | vec_ptype2.tbl_df.spatial_leave_location_out_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 70 |   stop_never_called("vec_ptype2.tbl_df.spatial_leave_location_out_cv")
 71 | }
 72 | #' @export
 73 | vec_ptype2.spatial_leave_location_out_cv.data.frame <- function(x, y, ..., x_arg = "", y_arg = "") {
 74 |   stop_never_called("vec_ptype2.spatial_leave_location_out_cv.data.frame")
 75 | }
 76 | #' @export
 77 | vec_ptype2.data.frame.spatial_leave_location_out_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 78 |   stop_never_called("vec_ptype2.data.frame.spatial_leave_location_out_cv")
 79 | }
 80 | 
 81 | # `vec_cast()`
 82 | #
 83 | # The `vec_cast()` methods for rset objects really only serve 1 purpose. They
 84 | # cast an rset subclass to a tibble or data frame. The cast to tibble is most
 85 | # useful. Most of the operations in vctrs work by finding a common type
 86 | # with `vec_ptype2()`, and then casting all of the inputs to that common type.
 87 | # Because `vec_ptype2()` returns a bare tibble anytime a rset-subclass is
 88 | # involved, we will always be casting the rset subclass to a tibble.
 89 | # The cast method uses `vctrs::tib_cast()`, which always returns a bare tibble
 90 | # with all of the data in `x` cast to the type of `to`.
 91 | 
 92 | #' @export
 93 | vec_cast.spatial_leave_location_out_cv.spatial_leave_location_out_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
 94 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
 95 | }
 96 | #' @export
 97 | vec_cast.spatial_leave_location_out_cv.tbl_df <- function(x, to, ..., x_arg = "", to_arg = "") {
 98 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
 99 | }
100 | #' @export
101 | vec_cast.tbl_df.spatial_leave_location_out_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
102 |   tib_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg)
103 | }
104 | #' @export
105 | vec_cast.spatial_leave_location_out_cv.data.frame <- function(x, to, ..., x_arg = "", to_arg = "") {
106 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
107 | }
108 | #' @export
109 | vec_cast.data.frame.spatial_leave_location_out_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
110 |   df_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg)
111 | }
112 | 


--------------------------------------------------------------------------------
/R/zzz-compat-vctrs-spatial_nndm_cv.R:
--------------------------------------------------------------------------------
  1 | # This file was generated, do not edit by hand
  2 | # Please edit inst/generate_vctrs.R instead
  3 | # ------------------------------------------------------------------------------
  4 | # spatial_nndm_cv
  5 | 
  6 | # `vec_restore()`
  7 | #
  8 | # Called at the end of `vec_slice()` and `vec_ptype()` after all slicing has
  9 | # been done on the proxy object.
 10 | #
 11 | # If no changes have been made to the row/column structure of rset specific
 12 | # columns, then we can return an rset subclass. Otherwise, the resulting
 13 | # object is no longer guaranteed to return a valid rset, and we have to
 14 | # fallback to a bare tibble.
 15 | #
 16 | # It is very important that the result of `vec_ptype()` is a bare tibble.
 17 | # This ensures that the `vec_ptype2.<rset-subclass>.<rset-subclass>()` methods
 18 | # never get called. `vec_ptype()` is able to return a bare tibble because it
 19 | # essentially takes a 0-row slice of the rset, and then calls `vec_restore()`.
 20 | # Because the row structure has been modified, we return a bare tibble from
 21 | # `vec_restore.<rset-subclass>()`.
 22 | #
 23 | # Currently `vec_restore()` uses inheritance in vctrs, but I don't expect this
 24 | # to be the case in the future. For that reason, I use explicit methods for
 25 | # each individual rset subclass, rather than implementing `vec_restore.rset()`.
 26 | #' @export
 27 | vec_restore.spatial_nndm_cv <- function(x, to, ...) {
 28 |   rsample::rset_reconstruct(x, to)
 29 | }
 30 | 
 31 | # `vec_ptype2()`
 32 | #
 33 | # The purpose of `vec_ptype2()` methods is generally to determine the type
 34 | # of the output in operations like `vec_c()` or `vec_rbind()`. However, this
 35 | # implementation does not use any custom `vec_ptype2()` methods at all. This is
 36 | # explained below.
 37 | #
 38 | # `vec_ptype2()` internally works by calling `vec_ptype()` on both `x` and `y`,
 39 | # and then looking up the common type of those two ptypes. Generally, the
 40 | # ptype of a vector `x` returned from `vec_ptype()` has the same class as `x`.
 41 | # However, for rsample objects it makes more sense for the ptype of an rset to
 42 | # be a bare tibble. The reason for this is because a ptype of a data frame is
 43 | # generally a 0-row slice. However for rsample rsets this doesn't make sense
 44 | # (you can't have 0 rows in a 10-fold cv rset), so instead we return a bare
 45 | # tibble as the ptype.
 46 | # So when `vec_ptype2()` is called on an rset, that rset is downgraded to a
 47 | # bare tibble _before_ the search for a `vec_ptype2()` method occurs. This
 48 | # means that it will never find a ptype2 method such as
 49 | # `vec_ptype2.bootstraps.tbl_df()`, because the <bootstraps> will become a
 50 | # tbl_df first. This means that we rely entirely on the `tbl_df` ptype2 methods,
 51 | # which are already implemented in vctrs to return another tbl_df.
 52 | #
 53 | # The implications of this are that whenever a rset subclass is combined with
 54 | # another rset subclass or bare tibble, the resulting common type is always
 55 | # another bare tibble. So if you `vec_c(<bootstraps>, <tbl_df>)` the result
 56 | # will always be a tibble, never a bootstraps object. This makes sense because
 57 | # you might be adding rows, which would invalidate the structure of the
 58 | # bootstraps object.
 59 | 
 60 | #' @export
 61 | vec_ptype2.spatial_nndm_cv.spatial_nndm_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 62 |   stop_never_called("vec_ptype2.spatial_nndm_cv.spatial_nndm_cv")
 63 | }
 64 | #' @export
 65 | vec_ptype2.spatial_nndm_cv.tbl_df <- function(x, y, ..., x_arg = "", y_arg = "") {
 66 |   stop_never_called("vec_ptype2.spatial_nndm_cv.tbl_df")
 67 | }
 68 | #' @export
 69 | vec_ptype2.tbl_df.spatial_nndm_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 70 |   stop_never_called("vec_ptype2.tbl_df.spatial_nndm_cv")
 71 | }
 72 | #' @export
 73 | vec_ptype2.spatial_nndm_cv.data.frame <- function(x, y, ..., x_arg = "", y_arg = "") {
 74 |   stop_never_called("vec_ptype2.spatial_nndm_cv.data.frame")
 75 | }
 76 | #' @export
 77 | vec_ptype2.data.frame.spatial_nndm_cv <- function(x, y, ..., x_arg = "", y_arg = "") {
 78 |   stop_never_called("vec_ptype2.data.frame.spatial_nndm_cv")
 79 | }
 80 | 
 81 | # `vec_cast()`
 82 | #
 83 | # The `vec_cast()` methods for rset objects really only serve 1 purpose. They
 84 | # cast an rset subclass to a tibble or data frame. The cast to tibble is most
 85 | # useful. Most of the operations in vctrs work by finding a common type
 86 | # with `vec_ptype2()`, and then casting all of the inputs to that common type.
 87 | # Because `vec_ptype2()` returns a bare tibble anytime a rset-subclass is
 88 | # involved, we will always be casting the rset subclass to a tibble.
 89 | # The cast method uses `vctrs::tib_cast()`, which always returns a bare tibble
 90 | # with all of the data in `x` cast to the type of `to`.
 91 | 
 92 | #' @export
 93 | vec_cast.spatial_nndm_cv.spatial_nndm_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
 94 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
 95 | }
 96 | #' @export
 97 | vec_cast.spatial_nndm_cv.tbl_df <- function(x, to, ..., x_arg = "", to_arg = "") {
 98 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
 99 | }
100 | #' @export
101 | vec_cast.tbl_df.spatial_nndm_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
102 |   tib_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg)
103 | }
104 | #' @export
105 | vec_cast.spatial_nndm_cv.data.frame <- function(x, to, ..., x_arg = "", to_arg = "") {
106 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
107 | }
108 | #' @export
109 | vec_cast.data.frame.spatial_nndm_cv <- function(x, to, ..., x_arg = "", to_arg = "") {
110 |   df_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg)
111 | }
112 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .onLoad <- function(libname, pkgname) {
2 |   vctrs::s3_register("ggplot2::autoplot", "spatial_rset")
3 | }
4 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | 
 5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 6 | 
 7 | ```{r, include = FALSE}
 8 | knitr::opts_chunk$set(
 9 |   collapse = TRUE,
10 |   comment = "#>",
11 |   fig.path = "man/figures/README-",
12 |   out.width = "100%"
13 | )
14 | library(ggplot2)
15 | theme_set(theme_minimal())
16 | ```
17 | 
18 | # spatialsample <a href="https://spatialsample.tidymodels.org"><img src="man/figures/logo.png" align="right" height="138" alt="A hand-drawn map with orange roads, blue rivers, green trees, and brown mountains on a beige background" /></a>
19 | 
20 | <!-- badges: start -->
21 | [![R-CMD-check](https://github.com/tidymodels/spatialsample/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidymodels/spatialsample/actions/workflows/R-CMD-check.yaml)
22 | [![CRAN status](https://www.r-pkg.org/badges/version/spatialsample)](https://CRAN.R-project.org/package=spatialsample)
23 | [![Codecov test coverage](https://codecov.io/gh/tidymodels/spatialsample/branch/main/graph/badge.svg)](https://app.codecov.io/gh/tidymodels/spatialsample?branch=main)
24 | [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html)
25 | <!-- badges: end -->
26 | 
27 | ## Introduction
28 | 
29 | The goal of spatialsample is to provide functions and classes for spatial resampling to use with [rsample](https://rsample.tidymodels.org/), including:
30 | 
31 | - [spatial clustering cross-validation](https://doi.org/10.1109/IGARSS.2012.6352393)
32 | - [spatial block cross-validation](https://doi.org/10.1111/ecog.02881)
33 | - [spatially buffered cross-validation](https://doi.org/10.1111/geb.12161)
34 | - [leave-location-out cross-validation](https://doi.org/10.1016/j.envsoft.2017.12.001)
35 | 
36 | Like [rsample](https://rsample.tidymodels.org/), spatialsample provides building blocks for creating and analyzing resamples of a spatial data set but does not include code for modeling or computing statistics. The resampled data sets created by spatialsample are efficient and do not have much memory overhead.
37 | 
38 | ## Installation
39 | 
40 | You can install the released version of spatialsample from [CRAN](https://CRAN.R-project.org) with:
41 | 
42 | ``` r
43 | install.packages("spatialsample")
44 | ```
45 | 
46 | And the development version from [GitHub](https://github.com/) with:
47 | 
48 | ``` r
49 | # install.packages("pak")
50 | pak::pak("tidymodels/spatialsample")
51 | ```
52 | ## Example
53 | 
54 | The most straightforward spatial resampling strategy is `spatial_clustering_cv()`, which uses k-means clustering to identify cross-validation folds:
55 | 
56 | ```{r}
57 | library(spatialsample)
58 | 
59 | set.seed(1234)
60 | folds <- spatial_clustering_cv(boston_canopy, v = 5)
61 | 
62 | folds
63 | ```
64 | 
65 | In this example, the `boston_canopy` data on tree cover in Boston, MA is resampled with `v = 5`; notice that the resulting partitions do not contain an equal number of observations.
66 | 
67 | In addition to resampling algorithms, spatialsample provides methods to visualize resamples using [ggplot2](https://ggplot2.tidyverse.org/) through the `autoplot()` function:
68 | 
69 | ```{r 2022-06-12_boston_static, fig.width=7, fig.height=5}
70 | autoplot(folds)
71 | ```
72 | 
73 | We can use the same function to visualize each fold separately:
74 | 
75 | ```{r 2022-06-12_boston-anim, animation.hook="gifski", fig.width=7, fig.height=5}
76 | library(purrr)
77 | 
78 | walk(folds$splits, function(x) print(autoplot(x)))
79 | ```
80 | 
81 | So far, we've only scratched the surface of the functionality spatialsample provides. For more information, check out the [Getting Started](https://spatialsample.tidymodels.org/articles/spatialsample.html) documentation!
82 | 
83 | ## Contributing
84 | 
85 | This project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms.
86 | 
87 | - For questions and discussions about tidymodels packages, modeling, and machine learning, please [post on RStudio Community](https://forum.posit.co/new-topic?category_id=15&tags=tidymodels,question).
88 | 
89 | - If you think you have encountered a bug, please [submit an issue](https://github.com/tidymodels/rules/issues).
90 | 
91 | - Either way, learn how to create and share a [reprex](https://reprex.tidyverse.org/articles/articles/learn-reprex.html) (a minimal, reproducible example), to clearly communicate about your code.
92 | 
93 | - Check out further details on [contributing guidelines for tidymodels packages](https://www.tidymodels.org/contribute/) and [how to get help](https://www.tidymodels.org/help/).
94 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # spatialsample <a href="https://spatialsample.tidymodels.org"><img src="man/figures/logo.png" align="right" height="138" alt="A hand-drawn map with orange roads, blue rivers, green trees, and brown mountains on a beige background" /></a>
  5 | 
  6 | <!-- badges: start -->
  7 | 
  8 | [![R-CMD-check](https://github.com/tidymodels/spatialsample/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidymodels/spatialsample/actions/workflows/R-CMD-check.yaml)
  9 | [![CRAN
 10 | status](https://www.r-pkg.org/badges/version/spatialsample)](https://CRAN.R-project.org/package=spatialsample)
 11 | [![Codecov test
 12 | coverage](https://codecov.io/gh/tidymodels/spatialsample/branch/main/graph/badge.svg)](https://app.codecov.io/gh/tidymodels/spatialsample?branch=main)
 13 | [![Lifecycle:
 14 | experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html)
 15 | <!-- badges: end -->
 16 | 
 17 | ## Introduction
 18 | 
 19 | The goal of spatialsample is to provide functions and classes for
 20 | spatial resampling to use with
 21 | [rsample](https://rsample.tidymodels.org/), including:
 22 | 
 23 | - [spatial clustering
 24 |   cross-validation](https://doi.org/10.1109/IGARSS.2012.6352393)
 25 | - [spatial block cross-validation](https://doi.org/10.1111/ecog.02881)
 26 | - [spatially buffered
 27 |   cross-validation](https://doi.org/10.1111/geb.12161)
 28 | - [leave-location-out
 29 |   cross-validation](https://doi.org/10.1016/j.envsoft.2017.12.001)
 30 | 
 31 | Like [rsample](https://rsample.tidymodels.org/), spatialsample provides
 32 | building blocks for creating and analyzing resamples of a spatial data
 33 | set but does not include code for modeling or computing statistics. The
 34 | resampled data sets created by spatialsample are efficient and do not
 35 | have much memory overhead.
 36 | 
 37 | ## Installation
 38 | 
 39 | You can install the released version of spatialsample from
 40 | [CRAN](https://CRAN.R-project.org) with:
 41 | 
 42 | ``` r
 43 | install.packages("spatialsample")
 44 | ```
 45 | 
 46 | And the development version from [GitHub](https://github.com/) with:
 47 | 
 48 | ``` r
 49 | # install.packages("pak")
 50 | pak::pak("tidymodels/spatialsample")
 51 | ```
 52 | 
 53 | ## Example
 54 | 
 55 | The most straightforward spatial resampling strategy is
 56 | `spatial_clustering_cv()`, which uses k-means clustering to identify
 57 | cross-validation folds:
 58 | 
 59 | ``` r
 60 | library(spatialsample)
 61 | 
 62 | set.seed(1234)
 63 | folds <- spatial_clustering_cv(boston_canopy, v = 5)
 64 | 
 65 | folds
 66 | #> #  5-fold spatial cross-validation 
 67 | #> # A tibble: 5 × 2
 68 | #>   splits            id   
 69 | #>   <list>            <chr>
 70 | #> 1 <split [604/78]>  Fold1
 71 | #> 2 <split [595/87]>  Fold2
 72 | #> 3 <split [524/158]> Fold3
 73 | #> 4 <split [490/192]> Fold4
 74 | #> 5 <split [515/167]> Fold5
 75 | ```
 76 | 
 77 | In this example, the `boston_canopy` data on tree cover in Boston, MA is
 78 | resampled with `v = 5`; notice that the resulting partitions do not
 79 | contain an equal number of observations.
 80 | 
 81 | In addition to resampling algorithms, spatialsample provides methods to
 82 | visualize resamples using [ggplot2](https://ggplot2.tidyverse.org/)
 83 | through the `autoplot()` function:
 84 | 
 85 | ``` r
 86 | autoplot(folds)
 87 | ```
 88 | 
 89 | <img src="man/figures/README-2022-06-12_boston_static-1.png" width="100%" />
 90 | 
 91 | We can use the same function to visualize each fold separately:
 92 | 
 93 | ``` r
 94 | library(purrr)
 95 | 
 96 | walk(folds$splits, function(x) print(autoplot(x)))
 97 | ```
 98 | 
 99 | <img src="man/figures/README-2022-06-12_boston-anim-.gif" width="100%" />
100 | 
101 | So far, we’ve only scratched the surface of the functionality
102 | spatialsample provides. For more information, check out the [Getting
103 | Started](https://spatialsample.tidymodels.org/articles/spatialsample.html)
104 | documentation!
105 | 
106 | ## Contributing
107 | 
108 | This project is released with a [Contributor Code of
109 | Conduct](https://contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html).
110 | By contributing to this project, you agree to abide by its terms.
111 | 
112 | - For questions and discussions about tidymodels packages, modeling, and
113 |   machine learning, please [post on RStudio
114 |   Community](https://forum.posit.co/new-topic?category_id=15&tags=tidymodels,question).
115 | 
116 | - If you think you have encountered a bug, please [submit an
117 |   issue](https://github.com/tidymodels/rules/issues).
118 | 
119 | - Either way, learn how to create and share a
120 |   [reprex](https://reprex.tidyverse.org/articles/articles/learn-reprex.html)
121 |   (a minimal, reproducible example), to clearly communicate about your
122 |   code.
123 | 
124 | - Check out further details on [contributing guidelines for tidymodels
125 |   packages](https://www.tidymodels.org/contribute/) and [how to get
126 |   help](https://www.tidymodels.org/help/).
127 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://spatialsample.tidymodels.org
 2 | 
 3 | template:
 4 |   package: tidytemplate
 5 |   bootstrap: 5
 6 |   bslib:
 7 |     danger: "#CA225E"
 8 |     primary: "#CA225E"
 9 |   includes:
10 |     in_header: |
11 |       <script defer data-domain="spatialsample.tidymodels.org,all.tidymodels.org" src="https://plausible.io/js/plausible.js"></script>
12 | 
13 | authors:
14 |   Michael Mahoney:
15 |     href: "https://www.mm218.dev/"
16 | 
17 | development:
18 |   mode: auto
19 | 
20 | figures:
21 |   fig.width: 8
22 |   fig.height: ~
23 | 
24 | reference:
25 |   - title: Resampling Methods
26 |     contents:
27 |       - spatial_clustering_cv
28 |       - spatial_block_cv
29 |       - spatial_buffer_vfold_cv
30 |       - spatial_leave_location_out_cv
31 |       - spatial_nndm_cv
32 |   - title: Utilities
33 |     contents:
34 |       - reexports
35 |       - autoplot.spatial_rset
36 |   - title: Data
37 |     contents:
38 |       - boston_canopy
39 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Release Summary
 2 | 
 3 | This is the 8th CRAN release of spatialsample. This release fixes a bug in 
 4 | how `spatial_nndm_cv()` samples within a single polygon.
 5 | 
 6 | ## R CMD check results
 7 | 
 8 | 0 errors | 0 warnings | 0 notes
 9 | 
10 | ## revdepcheck results
11 | 
12 | We checked 2 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
13 | 
14 |  * We saw 0 new problems
15 |  * We failed to check 0 packages
16 | 
17 | 


--------------------------------------------------------------------------------
/data-raw/boston_canopy.R:
--------------------------------------------------------------------------------
 1 | ## code to prepare `boston_canopy` dataset goes here
 2 | working_dir <- file.path(tempdir(), "boston_canopy")
 3 | if (!dir.exists(working_dir)) dir.create(working_dir)
 4 | 
 5 | download.file(
 6 |   "https://bostonopendata-boston.opendata.arcgis.com/datasets/boston::hex-tree-canopy-change-metrics.zip?outSR=%7B%22latestWkid%22%3A2249%2C%22wkid%22%3A102686%7D",
 7 |   file.path(working_dir, "canopy_metrics.zip")
 8 | )
 9 | 
10 | unzip(
11 |   file.path(working_dir, "canopy_metrics.zip"),
12 |   exdir = working_dir
13 | )
14 | 
15 | download.file(
16 |   "https://bostonopendata-boston.opendata.arcgis.com/datasets/boston::hex-mean-heat-index.zip?outSR=%7B%22latestWkid%22%3A2249%2C%22wkid%22%3A102686%7D",
17 |   file.path(working_dir, "heat_metrics.zip")
18 | )
19 | 
20 | unzip(
21 |   file.path(working_dir, "heat_metrics.zip"),
22 |   exdir = working_dir
23 | )
24 | 
25 | boston_canopy <- sf::read_sf(
26 |   file.path(
27 |     working_dir,
28 |     "Canopy_Change_Assessment%3A_Tree_Canopy_Change_Metrics.shp"
29 |   )
30 | )
31 | 
32 | canopy_metrics <- c(
33 |   "grid_id" = "GRID_ID",
34 |   "land_area" = "LandArea",
35 |   "canopy_gain" = "Gain",
36 |   "canopy_loss" = "Loss",
37 |   "canopy_no_change" = "No_Change",
38 |   "canopy_area_2014" = "TreeCanopy",
39 |   "canopy_area_2019" = "TreeCano_1",
40 |   "change_canopy_area" = "Change_Are",
41 |   "change_canopy_percentage" = "Change_Per",
42 |   "canopy_percentage_2014" = "TreeCano_2",
43 |   "canopy_percentage_2019" = "TreeCano_3",
44 |   "change_canopy_absolute" = "Change_P_1",
45 |   "geometry" = "geometry"
46 | )
47 | 
48 | boston_canopy <- boston_canopy[canopy_metrics]
49 | names(boston_canopy) <- names(canopy_metrics)
50 | 
51 | heat <- sf::read_sf(
52 |   file.path(
53 |     working_dir,
54 |     "Canopy_Change_Assessment%3A_Heat_Metrics.shp"
55 |   )
56 | )
57 | 
58 | heat_metrics <- c(
59 |   "mean_temp_morning" = "Mean_am_T_",
60 |   "mean_temp_evening" = "Mean_ev_T_",
61 |   "mean_temp" = "Mean_p2_T_",
62 |   "mean_heat_index_morning" = "Mean_am_HI",
63 |   "mean_heat_index_evening" = "Mean_ev_HI",
64 |   "mean_heat_index" = "Mean_p2_HI",
65 |   "geometry" = "geometry"
66 | )
67 | 
68 | heat <- heat[heat_metrics]
69 | names(heat) <- names(heat_metrics)
70 | 
71 | boston_canopy <- sf::st_join(boston_canopy, heat, sf::st_within, left = FALSE)
72 | boston_canopy <- dplyr::relocate(boston_canopy, geometry, .after = everything())
73 | 
74 | usethis::use_data(boston_canopy, overwrite = TRUE, internal = TRUE)
75 | unlink(working_dir, TRUE)
76 | 


--------------------------------------------------------------------------------
/data/boston_canopy.R:
--------------------------------------------------------------------------------
1 | delayedAssign("boston_canopy", local({
2 |   requireNamespace("sf", quietly = TRUE)
3 |   spatialsample:::boston_canopy
4 | }))
5 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | bibentry(
 2 |   bibtype = "Misc",
 3 |   title="Assessing the performance of spatial cross-validation approaches for models of spatially structured data",
 4 |   author="Michael J Mahoney and Lucas K Johnson and Julia Silge and Hannah Frick and Max Kuhn and Colin M Beier",
 5 |   year="2023",
 6 |   eprint="2303.07334",
 7 |   archivePrefix="arXiv",
 8 |   primaryClass="stat.CO",
 9 |   doi = "10.48550/arXiv.2303.07334",
10 |   url = "https://arxiv.org/abs/2303.07334",
11 |   textVersion = "Mahoney M. J., Johnson, L. K., Silge, J., Frick, H., Kuhn, M., and Beier C. M. (2023). Assessing the performance of spatial cross-validation approaches for models of spatially structured data. arXiv. https://doi.org/10.48550/arXiv.2303.07334",
12 |          mheader = "To cite spatialsample in publications please use:")
13 | 


--------------------------------------------------------------------------------
/inst/generate_vctrs.R:
--------------------------------------------------------------------------------
 1 | # This file generates helpers for compatibility with vctrs
 2 | # and is not a part of the rsample package proper
 3 | 
 4 | devtools::load_all()
 5 | read_utf8 <- function(x) base::readLines(x, encoding = "UTF-8", warn = FALSE)
 6 | 
 7 | template <- read_utf8("inst/vctrs_template.R")
 8 | 
 9 | for (name in names(rset_subclasses)) {
10 |   generated_template <- whisker::whisker.render(template)
11 |   generated_template <- c(
12 |     "# This file was generated, do not edit by hand",
13 |     "# Please edit inst/generate_vctrs.R instead",
14 |     generated_template
15 |   )
16 |   writeLines(
17 |     generated_template,
18 |     file.path("R", paste0("zzz-compat-vctrs-", name, ".R"))
19 |   )
20 | }
21 | 


--------------------------------------------------------------------------------
/inst/vctrs_template.R:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # {{{name}}}
  3 | 
  4 | # `vec_restore()`
  5 | #
  6 | # Called at the end of `vec_slice()` and `vec_ptype()` after all slicing has
  7 | # been done on the proxy object.
  8 | #
  9 | # If no changes have been made to the row/column structure of rset specific
 10 | # columns, then we can return an rset subclass. Otherwise, the resulting
 11 | # object is no longer guaranteed to return a valid rset, and we have to
 12 | # fallback to a bare tibble.
 13 | #
 14 | # It is very important that the result of `vec_ptype()` is a bare tibble.
 15 | # This ensures that the `vec_ptype2.<rset-subclass>.<rset-subclass>()` methods
 16 | # never get called. `vec_ptype()` is able to return a bare tibble because it
 17 | # essentially takes a 0-row slice of the rset, and then calls `vec_restore()`.
 18 | # Because the row structure has been modified, we return a bare tibble from
 19 | # `vec_restore.<rset-subclass>()`.
 20 | #
 21 | # Currently `vec_restore()` uses inheritance in vctrs, but I don't expect this
 22 | # to be the case in the future. For that reason, I use explicit methods for
 23 | # each individual rset subclass, rather than implementing `vec_restore.rset()`.
 24 | #' @export
 25 | vec_restore.{{{name}}} <- function(x, to, ...) {
 26 |   rsample::rset_reconstruct(x, to)
 27 | }
 28 | 
 29 | # `vec_ptype2()`
 30 | #
 31 | # The purpose of `vec_ptype2()` methods is generally to determine the type
 32 | # of the output in operations like `vec_c()` or `vec_rbind()`. However, this
 33 | # implementation does not use any custom `vec_ptype2()` methods at all. This is
 34 | # explained below.
 35 | #
 36 | # `vec_ptype2()` internally works by calling `vec_ptype()` on both `x` and `y`,
 37 | # and then looking up the common type of those two ptypes. Generally, the
 38 | # ptype of a vector `x` returned from `vec_ptype()` has the same class as `x`.
 39 | # However, for rsample objects it makes more sense for the ptype of an rset to
 40 | # be a bare tibble. The reason for this is because a ptype of a data frame is
 41 | # generally a 0-row slice. However for rsample rsets this doesn't make sense
 42 | # (you can't have 0 rows in a 10-fold cv rset), so instead we return a bare
 43 | # tibble as the ptype.
 44 | # So when `vec_ptype2()` is called on an rset, that rset is downgraded to a
 45 | # bare tibble _before_ the search for a `vec_ptype2()` method occurs. This
 46 | # means that it will never find a ptype2 method such as
 47 | # `vec_ptype2.bootstraps.tbl_df()`, because the <bootstraps> will become a
 48 | # tbl_df first. This means that we rely entirely on the `tbl_df` ptype2 methods,
 49 | # which are already implemented in vctrs to return another tbl_df.
 50 | #
 51 | # The implications of this are that whenever a rset subclass is combined with
 52 | # another rset subclass or bare tibble, the resulting common type is always
 53 | # another bare tibble. So if you `vec_c(<bootstraps>, <tbl_df>)` the result
 54 | # will always be a tibble, never a bootstraps object. This makes sense because
 55 | # you might be adding rows, which would invalidate the structure of the
 56 | # bootstraps object.
 57 | 
 58 | #' @export
 59 | vec_ptype2.{{{name}}}.{{{name}}} <- function(x, y, ..., x_arg = "", y_arg = "") {
 60 |   stop_never_called("vec_ptype2.{{{name}}}.{{{name}}}")
 61 | }
 62 | #' @export
 63 | vec_ptype2.{{{name}}}.tbl_df <- function(x, y, ..., x_arg = "", y_arg = "") {
 64 |   stop_never_called("vec_ptype2.{{{name}}}.tbl_df")
 65 | }
 66 | #' @export
 67 | vec_ptype2.tbl_df.{{{name}}} <- function(x, y, ..., x_arg = "", y_arg = "") {
 68 |   stop_never_called("vec_ptype2.tbl_df.{{{name}}}")
 69 | }
 70 | #' @export
 71 | vec_ptype2.{{{name}}}.data.frame <- function(x, y, ..., x_arg = "", y_arg = "") {
 72 |   stop_never_called("vec_ptype2.{{{name}}}.data.frame")
 73 | }
 74 | #' @export
 75 | vec_ptype2.data.frame.{{{name}}} <- function(x, y, ..., x_arg = "", y_arg = "") {
 76 |   stop_never_called("vec_ptype2.data.frame.{{{name}}}")
 77 | }
 78 | 
 79 | # `vec_cast()`
 80 | #
 81 | # The `vec_cast()` methods for rset objects really only serve 1 purpose. They
 82 | # cast an rset subclass to a tibble or data frame. The cast to tibble is most
 83 | # useful. Most of the operations in vctrs work by finding a common type
 84 | # with `vec_ptype2()`, and then casting all of the inputs to that common type.
 85 | # Because `vec_ptype2()` returns a bare tibble anytime a rset-subclass is
 86 | # involved, we will always be casting the rset subclass to a tibble.
 87 | # The cast method uses `vctrs::tib_cast()`, which always returns a bare tibble
 88 | # with all of the data in `x` cast to the type of `to`.
 89 | 
 90 | #' @export
 91 | vec_cast.{{{name}}}.{{{name}}} <- function(x, to, ..., x_arg = "", to_arg = "") {
 92 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
 93 | }
 94 | #' @export
 95 | vec_cast.{{{name}}}.tbl_df <- function(x, to, ..., x_arg = "", to_arg = "") {
 96 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
 97 | }
 98 | #' @export
 99 | vec_cast.tbl_df.{{{name}}} <- function(x, to, ..., x_arg = "", to_arg = "") {
100 |   tib_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg)
101 | }
102 | #' @export
103 | vec_cast.{{{name}}}.data.frame <- function(x, to, ..., x_arg = "", to_arg = "") {
104 |   stop_incompatible_cast_rset(x, to, x_arg = x_arg, to_arg = to_arg)
105 | }
106 | #' @export
107 | vec_cast.data.frame.{{{name}}} <- function(x, to, ..., x_arg = "", to_arg = "") {
108 |   df_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg)
109 | }
110 | 


--------------------------------------------------------------------------------
/man/autoplot.spatial_rset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/autoplot.R
 3 | \name{autoplot.spatial_rset}
 4 | \alias{autoplot.spatial_rset}
 5 | \alias{autoplot.spatial_block_cv}
 6 | \title{Create a ggplot for spatial resamples.}
 7 | \usage{
 8 | \method{autoplot}{spatial_rset}(object, ..., alpha = 0.6)
 9 | 
10 | \method{autoplot}{spatial_block_cv}(object, show_grid = TRUE, ..., alpha = 0.6)
11 | }
12 | \arguments{
13 | \item{object}{A \code{spatial_rset} object or a \code{spatial_rsplit} object.
14 | Note that only resamples made from
15 | \code{sf} objects create \code{spatial_rset} and \code{spatial_rsplit} objects;
16 | this function will not work for
17 | resamples made with non-spatial tibbles or data.frames.}
18 | 
19 | \item{...}{Options passed to \code{\link[ggplot2:ggsf]{ggplot2::geom_sf()}}.}
20 | 
21 | \item{alpha}{Opacity, passed to \code{\link[ggplot2:ggsf]{ggplot2::geom_sf()}}.
22 | Values of alpha range from 0 to 1, with lower values corresponding to more
23 | transparent colors.}
24 | 
25 | \item{show_grid}{When plotting \link{spatial_block_cv} objects, should the grid
26 | itself be drawn on top of the data? Set to FALSE to remove the grid.}
27 | }
28 | \value{
29 | A ggplot object with each fold assigned a color, made using
30 | \code{\link[ggplot2:ggsf]{ggplot2::geom_sf()}}.
31 | }
32 | \description{
33 | This method provides a good visualization method for spatial resampling.
34 | }
35 | \details{
36 | The plot method for \code{spatial_rset} displays which fold each observation
37 | is assigned to. Note that if data is assigned to multiple folds
38 | (which is common if resamples were created with a non-zero \code{radius}) only
39 | the "last" fold for each observation will appear on the plot.
40 | Consider adding \code{ggplot2::facet_wrap(~ fold)} to visualize all members of
41 | each fold separately.
42 | Alternatively, consider plotting each split using the \code{spatial_rsplit} method
43 | (for example, via \code{lapply(object$splits, autoplot)}).
44 | }
45 | \examples{
46 | 
47 | boston_block <- spatial_block_cv(boston_canopy, v = 2)
48 | autoplot(boston_block)
49 | autoplot(boston_block$splits[[1]])
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/man/boston_canopy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{boston_canopy}
 5 | \alias{boston_canopy}
 6 | \title{Boston tree canopy and heat index data.}
 7 | \format{
 8 | A data frame (of class \code{sf}, \code{tbl_df}, \code{tbl}, and \code{data.frame})
 9 | containing 682 records of 22 variables:
10 | \describe{
11 | \item{grid_id}{Unique identifier for each hexagon. Letters represent the hexagon's X position in the grid (ordered West to East), while numbers represent the Y position (ordered North to South).}
12 | \item{land_area}{Area excluding water bodies}
13 | \item{canopy_gain}{Area of canopy gain between the two years}
14 | \item{canopy_loss}{Area of canopy loss between the two years}
15 | \item{canopy_no_change}{Area of no canopy change between the two years}
16 | \item{canopy_area_2014}{2014 total canopy area (baseline)}
17 | \item{canopy_area_2019}{2019 total canopy area}
18 | \item{change_canopy_area}{The change in area of tree canopy between the two years}
19 | \item{change_canopy_percentage}{Relative change calculation used in economics is the gain or loss of tree canopy relative to the earlier time period: (2019 Canopy-2014 Canopy)/(2014 Canopy)}
20 | \item{canopy_percentage_2014}{2014 canopy percentage}
21 | \item{canopy_percentage_2019}{2019 canopy percentage}
22 | \item{change_canopy_absolute}{Absolute change. Magnitude of change in percent tree canopy from 2014 to 2019 (\% 2019 Canopy - \% 2014 Canopy)}
23 | \item{mean_temp_morning}{Mean temperature for July 2019 from 6am - 7am}
24 | \item{mean_temp_evening}{Mean temperature for July 2019 from 7pm - 8pm}
25 | \item{mean_temp}{Mean temperature for July 2019 from 6am - 7am, 3pm - 4pm, and 7pm - 8pm (combined)}
26 | \item{mean_heat_index_morning}{Mean heat index for July 2019 from 6am - 7am}
27 | \item{mean_heat_index_evening}{Mean heat index for July 2019 from 7pm - 8pm}
28 | \item{mean_heat_index}{Mean heat index for July 2019 from 6am - 7am, 3pm - 4pm, and 7pm - 8pm (combined)}
29 | \item{geometry}{Geometry of each hexagon, encoded using EPSG:2249 as a coordinate reference system (NAD83 / Massachusetts Mainland (ftUS)). Note that the linear units of this CRS are in US feet.}
30 | }
31 | }
32 | \source{
33 | Canopy data is from \url{https://data.boston.gov/dataset/hex-tree-canopy-change-metrics}.
34 | Heat data is from \url{https://data.boston.gov/dataset/hex-mean-heat-index}.
35 | Most field definitions are from \url{https://data.boston.gov/dataset/canopy-change-assessment-data-dictionary}.
36 | }
37 | \usage{
38 | boston_canopy
39 | }
40 | \description{
41 | A dataset containing data on tree canopy coverage and change for the city of
42 | Boston, Massachusetts from 2014-2019,
43 | as well as temperature and heat index data for July 2019. Data is aggregated
44 | to a grid of regular 25 hectare hexagons, clipped to city boundaries.
45 | This data is made available under the Public Domain Dedication and License
46 | v1.0 whose full text can be found at:
47 | \url{https://opendatacommons.org/licenses/pddl/1-0/}.
48 | }
49 | \details{
50 | Note that this dataset is in the EPSG:2249
51 | (NAD83 / Massachusetts Mainland (ftUS)) coordinate reference system (CRS),
52 | which may not be installed by default on your computer. Before working with
53 | \code{boston_canopy}, run:
54 | \itemize{
55 | \item \code{sf::sf_proj_network(TRUE)} to install the CRS itself
56 | \item \code{\link[sf:sf_project]{sf::sf_add_proj_units()}} to add US customary units to your units
57 | database
58 | }
59 | 
60 | These steps only need to be taken once per computer (or per PROJ installation).
61 | }
62 | \keyword{datasets}
63 | 


--------------------------------------------------------------------------------
/man/buffer_indices.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/buffer.R
 3 | \name{buffer_indices}
 4 | \alias{buffer_indices}
 5 | \title{Apply an inclusion radius and exclusion buffer to indices}
 6 | \usage{
 7 | buffer_indices(data, indices, radius, buffer, call = rlang::caller_env())
 8 | }
 9 | \arguments{
10 | \item{data}{An object of class \code{sf} or \code{sfc}.}
11 | 
12 | \item{indices}{List of indices in each fold generated by \code{split_unnamed()}.}
13 | 
14 | \item{radius}{Numeric: points within this distance of the initially-selected
15 | test points will be assigned to the assessment set. If \code{NULL}, no radius is
16 | applied.}
17 | 
18 | \item{buffer}{Numeric: points within this distance of any point in the
19 | test set (after \code{radius} is applied) will be assigned to neither the analysis
20 | or assessment set. If \code{NULL}, no buffer is applied.}
21 | }
22 | \description{
23 | Apply an inclusion radius and exclusion buffer to indices
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/check_v.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/checks.R
 3 | \name{check_v}
 4 | \alias{check_v}
 5 | \title{Check that "v" is sensible}
 6 | \usage{
 7 | check_v(v, max_v, objects, allow_max_v = TRUE, call = rlang::caller_env())
 8 | }
 9 | \arguments{
10 | \item{v}{The number of partitions for the resampling. Set to \code{NULL} or \code{Inf}
11 | for the maximum sensible value (for leave-one-X-out cross-validation).}
12 | }
13 | \description{
14 | Check that "v" is sensible
15 | }
16 | \keyword{internal}
17 | 


--------------------------------------------------------------------------------
/man/figures/README-2022-06-12_boston-anim-.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/spatialsample/ded16914d80c5bb118d8a61ffd93e07be5aad93e/man/figures/README-2022-06-12_boston-anim-.gif


--------------------------------------------------------------------------------
/man/figures/README-2022-06-12_boston_static-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/spatialsample/ded16914d80c5bb118d8a61ffd93e07be5aad93e/man/figures/README-2022-06-12_boston_static-1.png


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidymodels/spatialsample/ded16914d80c5bb118d8a61ffd93e07be5aad93e/man/figures/logo.png


--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/spatialsample-package.R
 3 | \docType{import}
 4 | \name{reexports}
 5 | \alias{reexports}
 6 | \alias{analysis}
 7 | \alias{assessment}
 8 | \alias{get_rsplit}
 9 | \alias{autoplot}
10 | \title{Objects exported from other packages}
11 | \keyword{internal}
12 | \description{
13 | These objects are imported from other packages. Follow the links
14 | below to see their documentation.
15 | 
16 | \describe{
17 |   \item{ggplot2}{\code{\link[ggplot2]{autoplot}}}
18 | 
19 |   \item{rsample}{\code{\link[rsample:as.data.frame.rsplit]{analysis}}, \code{\link[rsample:as.data.frame.rsplit]{assessment}}, \code{\link[rsample]{get_rsplit}}}
20 | }}
21 | 
22 | 


--------------------------------------------------------------------------------
/man/spatial_block_cv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/spatial_block_cv.R
 3 | \name{spatial_block_cv}
 4 | \alias{spatial_block_cv}
 5 | \title{Spatial block cross-validation}
 6 | \usage{
 7 | spatial_block_cv(
 8 |   data,
 9 |   method = c("random", "snake", "continuous"),
10 |   v = 10,
11 |   relevant_only = TRUE,
12 |   radius = NULL,
13 |   buffer = NULL,
14 |   ...,
15 |   repeats = 1,
16 |   expand_bbox = 1e-05
17 | )
18 | }
19 | \arguments{
20 | \item{data}{An object of class \code{sf} or \code{sfc}.}
21 | 
22 | \item{method}{The method used to sample blocks for cross validation folds.
23 | Currently supports \code{"random"}, which randomly assigns blocks to folds,
24 | \code{"snake"}, which labels the first row of blocks from left to right,
25 | then the next from right to left, and repeats from there,
26 | and \code{"continuous"}, which labels each row from left
27 | to right, moving from the bottom row up.}
28 | 
29 | \item{v}{The number of partitions for the resampling. Set to \code{NULL} or \code{Inf}
30 | for the maximum sensible value (for leave-one-X-out cross-validation).}
31 | 
32 | \item{relevant_only}{For systematic sampling, should only blocks containing
33 | data be included in fold labeling?}
34 | 
35 | \item{radius}{Numeric: points within this distance of the initially-selected
36 | test points will be assigned to the assessment set. If \code{NULL}, no radius is
37 | applied.}
38 | 
39 | \item{buffer}{Numeric: points within this distance of any point in the
40 | test set (after \code{radius} is applied) will be assigned to neither the analysis
41 | or assessment set. If \code{NULL}, no buffer is applied.}
42 | 
43 | \item{...}{Arguments passed to \code{\link[sf:st_make_grid]{sf::st_make_grid()}}.}
44 | 
45 | \item{repeats}{The number of times to repeat the V-fold partitioning.}
46 | 
47 | \item{expand_bbox}{A numeric of length 1, representing a proportion to expand
48 | the bounding box of \code{data} by before building a grid. Without this expansion,
49 | grids built from data in geographic coordinates may exclude observations and
50 | grids built from regularly spaced data might have observations fall exactly
51 | on the boundary between folds, duplicating them. In spatialsample < 0.5.0,
52 | this was 0.00001 for data in a geographic CRS and 0 for data in a planar CRS.
53 | In spatialsample >= 0.5.0, this is 0.00001 for all data.}
54 | }
55 | \value{
56 | A tibble with classes \code{spatial_block_cv},  \code{spatial_rset}, \code{rset},
57 | \code{tbl_df}, \code{tbl}, and \code{data.frame}. The results include a column for the
58 | data split objects and an identification variable \code{id}.
59 | }
60 | \description{
61 | Block cross-validation splits the area of your data into a number of
62 | grid cells, or "blocks", and then assigns all data into folds based on the
63 | blocks their centroid falls into.
64 | }
65 | \details{
66 | The grid blocks can be controlled by passing arguments to
67 | \code{\link[sf:st_make_grid]{sf::st_make_grid()}} via \code{...}. Some particularly useful arguments include:
68 | \itemize{
69 | \item \code{cellsize}: Target cellsize, expressed as the "diameter" (shortest
70 | straight-line distance between opposing sides; two times the apothem)
71 | of each block, in map units.
72 | \item \code{n}: The number of grid blocks in the x and y direction (columns, rows).
73 | \item \code{square}: A logical value indicating whether to create square (\code{TRUE}) or
74 | hexagonal (\code{FALSE}) cells.
75 | }
76 | 
77 | If both \code{cellsize} and \code{n} are provided, then the number of blocks requested
78 | by \code{n} of sizes specified by \code{cellsize} will be returned, likely not
79 | lining up with the bounding box of \code{data}. If only \code{cellsize}
80 | is provided, this function will return as many blocks of size
81 | \code{cellsize} as fit inside the bounding box of \code{data}. If only \code{n} is provided,
82 | then \code{cellsize} will be automatically adjusted to create the requested
83 | number of cells.
84 | }
85 | \examples{
86 | 
87 | spatial_block_cv(boston_canopy, v = 3)
88 | 
89 | }
90 | \references{
91 | D. R. Roberts, V. Bahn, S. Ciuti, M. S. Boyce, J. Elith, G. Guillera-Arroita,
92 | S. Hauenstein, J. J. Lahoz-Monfort, B. Schröder, W. Thuiller, D. I. Warton,
93 | B. A. Wintle, F. Hartig, and C. F. Dormann. "Cross-validation strategies for
94 | data with temporal, spatial, hierarchical, or phylogenetic structure," 2016,
95 | Ecography 40(8), pp. 913-929, doi: 10.1111/ecog.02881.
96 | }
97 | 


--------------------------------------------------------------------------------
/man/spatial_clustering_cv.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/spatial_clustering_cv.R
  3 | \name{spatial_clustering_cv}
  4 | \alias{spatial_clustering_cv}
  5 | \title{Spatial Clustering Cross-Validation}
  6 | \usage{
  7 | spatial_clustering_cv(
  8 |   data,
  9 |   v = 10,
 10 |   cluster_function = c("kmeans", "hclust"),
 11 |   radius = NULL,
 12 |   buffer = NULL,
 13 |   ...,
 14 |   repeats = 1,
 15 |   distance_function = function(x) as.dist(sf::st_distance(x))
 16 | )
 17 | }
 18 | \arguments{
 19 | \item{data}{An \code{sf} object (often from \code{\link[sf:st_read]{sf::read_sf()}}
 20 | or \code{\link[sf:st_as_sf]{sf::st_as_sf()}}) to split into folds.}
 21 | 
 22 | \item{v}{The number of partitions of the data set.}
 23 | 
 24 | \item{cluster_function}{Which function should be used for clustering?
 25 | Options are either \code{"kmeans"} (to use \code{\link[stats:kmeans]{stats::kmeans()}})
 26 | or \code{"hclust"} (to use \code{\link[stats:hclust]{stats::hclust()}}). You can also provide your own
 27 | function; see \code{Details}.}
 28 | 
 29 | \item{radius}{Numeric: points within this distance of the initially-selected
 30 | test points will be assigned to the assessment set. If \code{NULL}, no radius is
 31 | applied.}
 32 | 
 33 | \item{buffer}{Numeric: points within this distance of any point in the
 34 | test set (after \code{radius} is applied) will be assigned to neither the analysis
 35 | or assessment set. If \code{NULL}, no buffer is applied.}
 36 | 
 37 | \item{...}{Extra arguments passed on to \code{\link[stats:kmeans]{stats::kmeans()}} or
 38 | \code{\link[stats:hclust]{stats::hclust()}}.}
 39 | 
 40 | \item{repeats}{The number of times to repeat the clustered partitioning.}
 41 | 
 42 | \item{distance_function}{Which function should be used for distance
 43 | calculations? Defaults to \code{\link[sf:geos_measures]{sf::st_distance()}}, with the output matrix
 44 | converted to a \code{\link[stats:dist]{stats::dist()}} object. You can also provide your own
 45 | function; see Details.}
 46 | }
 47 | \value{
 48 | A tibble with classes \code{spatial_clustering_cv}, \code{spatial_rset},
 49 | \code{rset}, \code{tbl_df}, \code{tbl}, and \code{data.frame}.
 50 | The results include a column for the data split objects and
 51 | an identification variable \code{id}.
 52 | Resamples created from non-\code{sf} objects will not have the
 53 | \code{spatial_rset} class.
 54 | }
 55 | \description{
 56 | Spatial clustering cross-validation splits the data into V groups of
 57 | disjointed sets by clustering points based on their spatial coordinates.
 58 | A resample of the analysis data consists of V-1 of the folds/clusters
 59 | while the assessment set contains the final fold/cluster.
 60 | }
 61 | \details{
 62 | Clusters are created based on the distances between observations
 63 | if \code{data} is an \code{sf} object. Each cluster is used as a fold for
 64 | cross-validation. Depending on how the data are distributed spatially, there
 65 | may not be an equal number of observations in each fold.
 66 | 
 67 | You can optionally provide a custom function to \code{distance_function.} The
 68 | function should take an \code{sf} object and return a \code{\link[stats:dist]{stats::dist()}} object with
 69 | distances between data points.
 70 | 
 71 | You can optionally provide a custom function to \code{cluster_function}. The
 72 | function must take three arguments:
 73 | \itemize{
 74 | \item \code{dists}, a \code{\link[stats:dist]{stats::dist()}} object with distances between data points
 75 | \item \code{v}, a length-1 numeric for the number of folds to create
 76 | \item \code{...}, to pass any additional named arguments to your function
 77 | }
 78 | 
 79 | The function should return a vector of cluster assignments of length
 80 | \code{nrow(data)}, with each element of the vector corresponding to the matching
 81 | row of the data frame.
 82 | }
 83 | \section{Changes in spatialsample 0.3.0}{
 84 | 
 85 | As of spatialsample version 0.3.0, this function no longer accepts non-\code{sf}
 86 | objects as arguments to \code{data}. In order to perform clustering with
 87 | non-spatial data, consider using \code{\link[rsample:clustering_cv]{rsample::clustering_cv()}}.
 88 | 
 89 | Also as of version 0.3.0, this function now calculates edge-to-edge distance
 90 | for non-point geometries, in line with the rest of the package. Earlier
 91 | versions relied upon between-centroid distances.
 92 | }
 93 | 
 94 | \examples{
 95 | \dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 96 | data(Smithsonian, package = "modeldata")
 97 | 
 98 | smithsonian_sf <- sf::st_as_sf(
 99 |   Smithsonian,
100 |   coords = c("longitude", "latitude"),
101 |   # Set CRS to WGS84
102 |   crs = 4326
103 | )
104 | 
105 | # When providing sf objects, coords are inferred automatically
106 | spatial_clustering_cv(smithsonian_sf, v = 5)
107 | 
108 | # Can use hclust instead:
109 | spatial_clustering_cv(smithsonian_sf, v = 5, cluster_function = "hclust")
110 | \dontshow{\}) # examplesIf}
111 | }
112 | \references{
113 | A. Brenning, "Spatial cross-validation and bootstrap for the assessment of
114 | prediction rules in remote sensing: The R package sperrorest," 2012 IEEE
115 | International Geoscience and Remote Sensing Symposium, Munich, 2012,
116 | pp. 5372-5375, doi: 10.1109/IGARSS.2012.6352393.
117 | }
118 | 


--------------------------------------------------------------------------------
/man/spatial_nndm_cv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/spatial_nndm_cv.R
 3 | \name{spatial_nndm_cv}
 4 | \alias{spatial_nndm_cv}
 5 | \title{Nearest neighbor distance matching (NNDM) cross-validation}
 6 | \usage{
 7 | spatial_nndm_cv(
 8 |   data,
 9 |   prediction_sites,
10 |   ...,
11 |   autocorrelation_range = NULL,
12 |   prediction_sample_size = 1000,
13 |   min_analysis_proportion = 0.5
14 | )
15 | }
16 | \arguments{
17 | \item{data}{An object of class \code{sf} or \code{sfc}.}
18 | 
19 | \item{prediction_sites}{An \code{sf} or \code{sfc} object describing the areas to be
20 | predicted. If \code{prediction_sites} are all points, then those points are
21 | treated as the intended prediction points when calculating target nearest
22 | neighbor distances. If \code{prediction_sites} is a single (multi-)polygon, then
23 | points are sampled from within the boundaries of that polygon. Otherwise,
24 | if \code{prediction_sites} is of length > 1 and not made up of points,
25 | then points are sampled from within the bounding box of \code{prediction_sites}
26 | and used as the intended prediction points.}
27 | 
28 | \item{...}{Additional arguments passed to \code{\link[sf:st_sample]{sf::st_sample()}}. Note that the
29 | number of points to sample is controlled by \code{prediction_sample_size}; trying
30 | to pass \code{size} via \code{...} will cause an error.}
31 | 
32 | \item{autocorrelation_range}{A numeric of length 1 representing the landscape
33 | autocorrelation range ("phi" in the terminology of Milà et al. (2022)). If
34 | \code{NULL}, the default, the autocorrelation range is assumed to be the distance
35 | between the opposite corners of the bounding box of \code{prediction_sites}.}
36 | 
37 | \item{prediction_sample_size}{A numeric of length 1: the number of points to
38 | sample when \code{prediction_sites} is not only composed of points. Note that this
39 | argument is passed to \code{size} in \code{\link[sf:st_sample]{sf::st_sample()}}, meaning that no elements
40 | of \code{...} can be named \code{size}.}
41 | 
42 | \item{min_analysis_proportion}{The minimum proportion of \code{data} that must
43 | remain after removing points to match nearest neighbor distances. This
44 | function will stop removing data from analysis sets once only
45 | \code{min_analysis_proportion} of the original data remains in analysis sets, even
46 | if the nearest neighbor distances between analysis and assessment sets are
47 | still lower than those between training and prediction locations.}
48 | }
49 | \value{
50 | A tibble with classes \code{spatial_nndm_cv},  \code{spatial_rset}, \code{rset},
51 | \code{tbl_df}, \code{tbl}, and \code{data.frame}. The results include a column for the
52 | data split objects and an identification variable \code{id}.
53 | }
54 | \description{
55 | NNDM is a variant of leave-one-out cross-validation which assigns each
56 | observation to a single assessment fold, and then attempts to remove data
57 | from each analysis fold until the nearest neighbor distance distribution
58 | between assessment and analysis folds matches the nearest neighbor distance
59 | distribution between training data and the locations a model will be used to
60 | predict.
61 | Proposed by Milà et al. (2022), this method aims to provide accurate
62 | estimates of how well models will perform in the locations they will actually
63 | be predicting. This method was originally implemented in the CAST package.
64 | }
65 | \details{
66 | Note that, as a form of leave-one-out cross-validation, this method can be
67 | rather slow for larger data (and fitting models to these resamples will be
68 | even slower).
69 | }
70 | \examples{
71 | \dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
72 | data(ames, package = "modeldata")
73 | ames_sf <- sf::st_as_sf(ames, coords = c("Longitude", "Latitude"), crs = 4326)
74 | 
75 | # Using a small subset of the data, to make the example run faster:
76 | spatial_nndm_cv(ames_sf[1:100, ], ames_sf[2001:2100, ])
77 | \dontshow{\}) # examplesIf}
78 | }
79 | \references{
80 | C. Milà, J. Mateu, E. Pebesma, and H. Meyer. 2022. "Nearest Neighbour
81 | Distance Matching Leave-One-Out Cross-Validation for map validation." Methods
82 | in Ecology and Evolution 2022:13, pp 1304– 1316.
83 | doi: 10.1111/2041-210X.13851.
84 | 
85 | H. Meyer and E. Pebesma. 2022. "Machine learning-based global maps of
86 | ecological variables and the challenge of assessing them."
87 | Nature Communications 13, pp 2208. doi: 10.1038/s41467-022-29838-9.
88 | }
89 | 


--------------------------------------------------------------------------------
/man/spatial_vfold.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/spatial_vfold_cv.R
  3 | \name{spatial_buffer_vfold_cv}
  4 | \alias{spatial_buffer_vfold_cv}
  5 | \alias{spatial_leave_location_out_cv}
  6 | \title{V-Fold Cross-Validation with Buffering}
  7 | \usage{
  8 | spatial_buffer_vfold_cv(
  9 |   data,
 10 |   radius,
 11 |   buffer,
 12 |   v = 10,
 13 |   repeats = 1,
 14 |   strata = NULL,
 15 |   breaks = 4,
 16 |   pool = 0.1,
 17 |   ...
 18 | )
 19 | 
 20 | spatial_leave_location_out_cv(
 21 |   data,
 22 |   group,
 23 |   v = NULL,
 24 |   radius = NULL,
 25 |   buffer = NULL,
 26 |   ...,
 27 |   repeats = 1
 28 | )
 29 | }
 30 | \arguments{
 31 | \item{data}{A data frame.}
 32 | 
 33 | \item{radius}{Numeric: points within this distance of the initially-selected
 34 | test points will be assigned to the assessment set. If \code{NULL}, no radius is
 35 | applied.}
 36 | 
 37 | \item{buffer}{Numeric: points within this distance of any point in the
 38 | test set (after \code{radius} is applied) will be assigned to neither the analysis
 39 | or assessment set. If \code{NULL}, no buffer is applied.}
 40 | 
 41 | \item{v}{The number of partitions for the resampling. Set to \code{NULL} or \code{Inf}
 42 | for the maximum sensible value (for leave-one-X-out cross-validation).}
 43 | 
 44 | \item{repeats}{The number of times to repeat the V-fold partitioning.}
 45 | 
 46 | \item{strata}{A variable in \code{data} (single character or name) used to conduct
 47 | stratified sampling. When not \code{NULL}, each resample is created within the
 48 | stratification variable. Numeric \code{strata} are binned into quartiles.}
 49 | 
 50 | \item{breaks}{A single number giving the number of bins desired to stratify a
 51 | numeric stratification variable.}
 52 | 
 53 | \item{pool}{A proportion of data used to determine if a particular group is
 54 | too small and should be pooled into another group. We do not recommend
 55 | decreasing this argument below its default of 0.1 because of the dangers
 56 | of stratifying groups that are too small.}
 57 | 
 58 | \item{...}{These dots are for future extensions and must be empty.}
 59 | 
 60 | \item{group}{A variable in data (single character or name) used to create
 61 | folds. For leave-location-out CV, this should be a variable containing
 62 | the locations to group observations by, for leave-time-out CV the
 63 | time blocks to group by, and for leave-location-and-time-out the
 64 | spatiotemporal blocks to group by.}
 65 | }
 66 | \description{
 67 | V-fold cross-validation (also known as k-fold cross-validation) randomly
 68 | splits the data into V groups of roughly equal size (called "folds").
 69 | A resample of the analysis data consists of V-1 of the folds while the
 70 | assessment set contains the final fold.
 71 | These functions extend \code{\link[rsample:vfold_cv]{rsample::vfold_cv()}} and \code{\link[rsample:group_vfold_cv]{rsample::group_vfold_cv()}}
 72 | to also apply an inclusion radius and exclusion buffer to the assessment set,
 73 | ensuring that your analysis data is spatially separated from the assessment
 74 | set.
 75 | In basic V-fold cross-validation (i.e. no repeats), the number of resamples
 76 | is equal to V.
 77 | }
 78 | \details{
 79 | When \code{radius} and \code{buffer} are both \code{NULL}, \code{spatial_buffer_vfold_cv}
 80 | is equivalent to \code{\link[rsample:vfold_cv]{rsample::vfold_cv()}} and \code{spatial_leave_location_out_cv}
 81 | is equivalent to \code{\link[rsample:group_vfold_cv]{rsample::group_vfold_cv()}}.
 82 | }
 83 | \examples{
 84 | \dontshow{if (sf::sf_use_s2() && rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 85 | 
 86 | data(Smithsonian, package = "modeldata")
 87 | Smithsonian_sf <- sf::st_as_sf(
 88 |   Smithsonian,
 89 |   coords = c("longitude", "latitude"),
 90 |   crs = 4326
 91 | )
 92 | 
 93 | spatial_buffer_vfold_cv(
 94 |   Smithsonian_sf,
 95 |   buffer = 500,
 96 |   radius = NULL
 97 | )
 98 | 
 99 | data(ames, package = "modeldata")
100 | ames_sf <- sf::st_as_sf(ames, coords = c("Longitude", "Latitude"), crs = 4326)
101 | ames_neighborhoods <- spatial_leave_location_out_cv(ames_sf, Neighborhood)
102 | \dontshow{\}) # examplesIf}
103 | }
104 | \references{
105 | K. Le Rest, D. Pinaud, P. Monestiez, J. Chadoeuf, and C. Bretagnolle. 2014.
106 | "Spatial leave-one-out cross-validation for variable selection in the
107 | presence of spatial autocorrelation," Global Ecology and Biogeography 23,
108 | pp. 811-820, doi: 10.1111/geb.12161.
109 | 
110 | H. Meyer, C. Reudenbach, T. Hengl, M. Katurji, and T. Nauss. 2018.
111 | "Improving performance of spatio-temporal machine learning models using
112 | forward feature selection and target-oriented validation,"
113 | Environmental Modelling & Software 101, pp. 1-9,
114 | doi: 10.1016/j.envsoft.2017.12.001.
115 | }
116 | 


--------------------------------------------------------------------------------
/man/spatialsample-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/spatialsample-package.R
 3 | \docType{package}
 4 | \name{spatialsample-package}
 5 | \alias{spatialsample}
 6 | \alias{spatialsample-package}
 7 | \title{spatialsample: Spatial Resampling Infrastructure}
 8 | \description{
 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
10 | 
11 | Functions and classes for spatial resampling to use with the 'rsample' package, such as spatial cross-validation (Brenning, 2012) \doi{10.1109/IGARSS.2012.6352393}. The scope of 'rsample' and 'spatialsample' is to provide the basic building blocks for creating and analyzing resamples of a spatial data set, but neither package includes functions for modeling or computing statistics. The resampled spatial data sets created by 'spatialsample' do not contain much overhead in memory.
12 | }
13 | \seealso{
14 | Useful links:
15 | \itemize{
16 |   \item \url{https://github.com/tidymodels/spatialsample}
17 |   \item \url{https://spatialsample.tidymodels.org}
18 |   \item Report bugs at \url{https://github.com/tidymodels/spatialsample/issues}
19 | }
20 | 
21 | }
22 | \author{
23 | \strong{Maintainer}: Michael Mahoney \email{mike.mahoney.218@gmail.com} (\href{https://orcid.org/0000-0003-2402-304X}{ORCID})
24 | 
25 | Authors:
26 | \itemize{
27 |   \item Julia Silge \email{julia.silge@posit.co} (\href{https://orcid.org/0000-0002-3671-836X}{ORCID})
28 | }
29 | 
30 | Other contributors:
31 | \itemize{
32 |   \item Posit Software, PBC [copyright holder, funder]
33 | }
34 | 
35 | }
36 | \keyword{internal}
37 | 


--------------------------------------------------------------------------------
/revdep/.gitignore:
--------------------------------------------------------------------------------
1 | checks
2 | library
3 | checks.noindex
4 | library.noindex
5 | cloud.noindex
6 | data.sqlite
7 | *.html
8 | 


--------------------------------------------------------------------------------
/revdep/README.md:
--------------------------------------------------------------------------------
 1 | # Platform
 2 | 
 3 | |field    |value                                      |
 4 | |:--------|:------------------------------------------|
 5 | |version  |R version 4.4.1 (2024-06-14)               |
 6 | |os       |macOS Sonoma 14.6.1                        |
 7 | |system   |aarch64, darwin20                          |
 8 | |ui       |X11                                        |
 9 | |language |(EN)                                       |
10 | |collate  |en_US.UTF-8                                |
11 | |ctype    |en_US.UTF-8                                |
12 | |tz       |America/New_York                           |
13 | |date     |2024-10-02                                 |
14 | |pandoc   |3.2.1 @ /opt/homebrew/bin/ (via rmarkdown) |
15 | 
16 | # Dependencies
17 | 
18 | |package       |old   |new        |Δ  |
19 | |:-------------|:-----|:----------|:--|
20 | |spatialsample |0.5.1 |0.5.1.9000 |*  |
21 | 
22 | # Revdeps
23 | 
24 | 


--------------------------------------------------------------------------------
/revdep/cran.md:
--------------------------------------------------------------------------------
1 | ## revdepcheck results
2 | 
3 | We checked 2 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
4 | 
5 |  * We saw 0 new problems
6 |  * We failed to check 0 packages
7 | 
8 | 


--------------------------------------------------------------------------------
/revdep/email.yml:
--------------------------------------------------------------------------------
1 | release_date: ???
2 | rel_release_date: ???
3 | my_news_url: ???
4 | release_version: ???
5 | release_details: ???
6 | 


--------------------------------------------------------------------------------
/revdep/failures.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*


--------------------------------------------------------------------------------
/revdep/problems.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*


--------------------------------------------------------------------------------
/spatialsample.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 | 
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 | 


--------------------------------------------------------------------------------
/src/code.cpp:
--------------------------------------------------------------------------------
 1 | #include <cpp11.hpp>
 2 | #include <vector>
 3 | using namespace cpp11;
 4 | 
 5 | [[cpp11::register]]
 6 | cpp11::writable::integers which_within_dist(doubles_matrix<> distmat, doubles idx, double dist) {
 7 | 
 8 |   int n_idx = idx.size();
 9 |   int n_matrix = distmat.ncol();
10 |   std::vector<bool> comparisons(n_matrix);
11 |   int cur_row;
12 | 
13 |   for (int i = 0; i < n_idx; i++) {
14 |     cur_row = idx[i] - 1;
15 |     for (int j = 0; j < n_matrix; j++) {
16 |       if (distmat(cur_row, j) <= dist) {
17 |         comparisons[j] = true;
18 |       }
19 |     }
20 |   }
21 | 
22 |   auto n_pos = std::count(comparisons.begin(), comparisons.end(), true);
23 |   std::vector<int> out(n_pos);
24 |   int cur_idx = 0;
25 |   for (int i = 0; i < n_matrix; i++) {
26 |     if (comparisons[i]) {
27 |       out[cur_idx] = i + 1;
28 |       ++cur_idx;
29 |     }
30 |   }
31 | 
32 |   return out;
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/src/cpp11.cpp:
--------------------------------------------------------------------------------
 1 | // Generated by cpp11: do not edit by hand
 2 | // clang-format off
 3 | 
 4 | 
 5 | #include "cpp11/declarations.hpp"
 6 | #include <R_ext/Visibility.h>
 7 | 
 8 | // code.cpp
 9 | cpp11::writable::integers which_within_dist(doubles_matrix<> distmat, doubles idx, double dist);
10 | extern "C" SEXP _spatialsample_which_within_dist(SEXP distmat, SEXP idx, SEXP dist) {
11 |   BEGIN_CPP11
12 |     return cpp11::as_sexp(which_within_dist(cpp11::as_cpp<cpp11::decay_t<doubles_matrix<>>>(distmat), cpp11::as_cpp<cpp11::decay_t<doubles>>(idx), cpp11::as_cpp<cpp11::decay_t<double>>(dist)));
13 |   END_CPP11
14 | }
15 | 
16 | extern "C" {
17 | static const R_CallMethodDef CallEntries[] = {
18 |     {"_spatialsample_which_within_dist", (DL_FUNC) &_spatialsample_which_within_dist, 3},
19 |     {NULL, NULL, 0}
20 | };
21 | }
22 | 
23 | extern "C" attribute_visible void R_init_spatialsample(DllInfo* dll){
24 |   R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
25 |   R_useDynamicSymbols(dll, FALSE);
26 |   R_forceSymbols(dll, TRUE);
27 | }
28 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(spatialsample)
3 | 
4 | sf::sf_extSoftVersion()
5 | 
6 | test_check("spatialsample")
7 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/buffer.md:
--------------------------------------------------------------------------------
  1 | # bad args
  2 | 
  3 |     Code
  4 |       buffer_indices(ames_sf)
  5 |     Condition
  6 |       Error:
  7 |       ! Buffering can only process geographic coordinates when using the s2 geometry library.
  8 |       i Reproject your data into a projected coordinate reference system using `sf::st_transform()`.
  9 |       i Or install the `s2` package and enable it using `sf::sf_use_s2(TRUE)`.
 10 | 
 11 | ---
 12 | 
 13 |     Code
 14 |       spatial_clustering_cv(ames_sf, buffer = 0.01)
 15 |     Output
 16 |       #  10-fold spatial cross-validation 
 17 |       # A tibble: 10 x 2
 18 |          splits             id    
 19 |          <list>             <chr> 
 20 |        1 <split [2586/344]> Fold01
 21 |        2 <split [2506/424]> Fold02
 22 |        3 <split [2701/229]> Fold03
 23 |        4 <split [2740/190]> Fold04
 24 |        5 <split [2625/305]> Fold05
 25 |        6 <split [2757/173]> Fold06
 26 |        7 <split [2404/526]> Fold07
 27 |        8 <split [2665/265]> Fold08
 28 |        9 <split [2661/269]> Fold09
 29 |       10 <split [2725/205]> Fold10
 30 | 
 31 | # using buffers
 32 | 
 33 |     Code
 34 |       spatial_clustering_cv(ames_sf, v = 2, radius = 500, buffer = 500)
 35 |     Output
 36 |       #  2-fold spatial cross-validation 
 37 |       # A tibble: 2 x 2
 38 |         splits              id   
 39 |         <list>              <chr>
 40 |       1 <split [1753/1177]> Fold1
 41 |       2 <split [1101/1795]> Fold2
 42 | 
 43 | ---
 44 | 
 45 |     Code
 46 |       spatial_block_cv(boston_canopy, v = 2, method = "snake", radius = 500, buffer = 500)
 47 |     Output
 48 |       #  2-fold spatial block cross-validation 
 49 |       # A tibble: 2 x 2
 50 |         splits           id   
 51 |         <list>           <chr>
 52 |       1 <split [37/553]> Fold1
 53 |       2 <split [54/527]> Fold2
 54 | 
 55 | ---
 56 | 
 57 |     Code
 58 |       spatial_buffer_vfold_cv(boston_canopy, v = 682, radius = 500, buffer = 500)
 59 |     Output
 60 |       #  682-fold spatial cross-validation 
 61 |       # A tibble: 682 x 2
 62 |          splits          id     
 63 |          <list>          <chr>  
 64 |        1 <split [672/3]> Fold001
 65 |        2 <split [664/7]> Fold002
 66 |        3 <split [663/7]> Fold003
 67 |        4 <split [666/7]> Fold004
 68 |        5 <split [665/7]> Fold005
 69 |        6 <split [671/5]> Fold006
 70 |        7 <split [671/6]> Fold007
 71 |        8 <split [663/7]> Fold008
 72 |        9 <split [663/7]> Fold009
 73 |       10 <split [665/7]> Fold010
 74 |       # i 672 more rows
 75 | 
 76 | ---
 77 | 
 78 |     Code
 79 |       spatial_leave_location_out_cv(ames_sf, Neighborhood, v = 682, radius = 500,
 80 |         buffer = 500)
 81 |     Condition
 82 |       Warning in `spatial_leave_location_out_cv()`:
 83 |       Fewer than 682 locations available for sampling
 84 |       i Setting `v` to 28
 85 |     Output
 86 |       #  28-fold spatial leave-location-out cross-validation 
 87 |       # A tibble: 28 x 2
 88 |          splits             id        
 89 |          <list>             <chr>     
 90 |        1 <split [2194/555]> Resample01
 91 |        2 <split [2220/397]> Resample02
 92 |        3 <split [2435/279]> Resample03
 93 |        4 <split [2779/151]> Resample04
 94 |        5 <split [2597/127]> Resample05
 95 |        6 <split [2598/197]> Resample06
 96 |        7 <split [2640/166]> Resample07
 97 |        8 <split [2617/136]> Resample08
 98 |        9 <split [2764/150]> Resample09
 99 |       10 <split [2501/324]> Resample10
100 |       # i 18 more rows
101 | 
102 | ---
103 | 
104 |     Code
105 |       spatial_block_cv(ames_sf, v = 2, method = "random", radius = 500, buffer = 500)
106 |     Output
107 |       #  2-fold spatial block cross-validation 
108 |       # A tibble: 2 x 2
109 |         splits             id   
110 |         <list>             <chr>
111 |       1 <split [209/2505]> Fold1
112 |       2 <split [2/2470]>   Fold2
113 | 
114 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/compat-vctrs.md:
--------------------------------------------------------------------------------
  1 | # vec_cbind() returns a bare tibble
  2 | 
  3 |     Code
  4 |       expect_identical(vec_cbind(x, x), vec_cbind(tbl, tbl))
  5 |     Message
  6 |       New names:
  7 |       * `splits` -> `splits...1`
  8 |       * `id` -> `id...2`
  9 |       * `splits` -> `splits...3`
 10 |       * `id` -> `id...4`
 11 |       New names:
 12 |       * `splits` -> `splits...1`
 13 |       * `id` -> `id...2`
 14 |       * `splits` -> `splits...3`
 15 |       * `id` -> `id...4`
 16 | 
 17 | ---
 18 | 
 19 |     Code
 20 |       expect_identical(vec_cbind(x, tbl), vec_cbind(tbl, tbl))
 21 |     Message
 22 |       New names:
 23 |       * `splits` -> `splits...1`
 24 |       * `id` -> `id...2`
 25 |       * `splits` -> `splits...3`
 26 |       * `id` -> `id...4`
 27 |       New names:
 28 |       * `splits` -> `splits...1`
 29 |       * `id` -> `id...2`
 30 |       * `splits` -> `splits...3`
 31 |       * `id` -> `id...4`
 32 | 
 33 | ---
 34 | 
 35 |     Code
 36 |       expect_s3_class_bare_tibble(vec_cbind(x, x))
 37 |     Message
 38 |       New names:
 39 |       * `splits` -> `splits...1`
 40 |       * `id` -> `id...2`
 41 |       * `splits` -> `splits...3`
 42 |       * `id` -> `id...4`
 43 | 
 44 | ---
 45 | 
 46 |     Code
 47 |       expect_identical(vec_cbind(x, x), vec_cbind(tbl, tbl))
 48 |     Message
 49 |       New names:
 50 |       * `splits` -> `splits...1`
 51 |       * `id` -> `id...2`
 52 |       * `splits` -> `splits...3`
 53 |       * `id` -> `id...4`
 54 |       New names:
 55 |       * `splits` -> `splits...1`
 56 |       * `id` -> `id...2`
 57 |       * `splits` -> `splits...3`
 58 |       * `id` -> `id...4`
 59 | 
 60 | ---
 61 | 
 62 |     Code
 63 |       expect_identical(vec_cbind(x, tbl), vec_cbind(tbl, tbl))
 64 |     Message
 65 |       New names:
 66 |       * `splits` -> `splits...1`
 67 |       * `id` -> `id...2`
 68 |       * `splits` -> `splits...3`
 69 |       * `id` -> `id...4`
 70 |       New names:
 71 |       * `splits` -> `splits...1`
 72 |       * `id` -> `id...2`
 73 |       * `splits` -> `splits...3`
 74 |       * `id` -> `id...4`
 75 | 
 76 | ---
 77 | 
 78 |     Code
 79 |       expect_s3_class_bare_tibble(vec_cbind(x, x))
 80 |     Message
 81 |       New names:
 82 |       * `splits` -> `splits...1`
 83 |       * `id` -> `id...2`
 84 |       * `splits` -> `splits...3`
 85 |       * `id` -> `id...4`
 86 | 
 87 | ---
 88 | 
 89 |     Code
 90 |       expect_identical(vec_cbind(x, x), vec_cbind(tbl, tbl))
 91 |     Message
 92 |       New names:
 93 |       * `splits` -> `splits...1`
 94 |       * `id` -> `id...2`
 95 |       * `splits` -> `splits...3`
 96 |       * `id` -> `id...4`
 97 |       New names:
 98 |       * `splits` -> `splits...1`
 99 |       * `id` -> `id...2`
100 |       * `splits` -> `splits...3`
101 |       * `id` -> `id...4`
102 | 
103 | ---
104 | 
105 |     Code
106 |       expect_identical(vec_cbind(x, tbl), vec_cbind(tbl, tbl))
107 |     Message
108 |       New names:
109 |       * `splits` -> `splits...1`
110 |       * `id` -> `id...2`
111 |       * `splits` -> `splits...3`
112 |       * `id` -> `id...4`
113 |       New names:
114 |       * `splits` -> `splits...1`
115 |       * `id` -> `id...2`
116 |       * `splits` -> `splits...3`
117 |       * `id` -> `id...4`
118 | 
119 | ---
120 | 
121 |     Code
122 |       expect_s3_class_bare_tibble(vec_cbind(x, x))
123 |     Message
124 |       New names:
125 |       * `splits` -> `splits...1`
126 |       * `id` -> `id...2`
127 |       * `splits` -> `splits...3`
128 |       * `id` -> `id...4`
129 | 
130 | ---
131 | 
132 |     Code
133 |       expect_identical(vec_cbind(x, x), vec_cbind(tbl, tbl))
134 |     Message
135 |       New names:
136 |       * `splits` -> `splits...1`
137 |       * `id` -> `id...2`
138 |       * `splits` -> `splits...3`
139 |       * `id` -> `id...4`
140 |       New names:
141 |       * `splits` -> `splits...1`
142 |       * `id` -> `id...2`
143 |       * `splits` -> `splits...3`
144 |       * `id` -> `id...4`
145 | 
146 | ---
147 | 
148 |     Code
149 |       expect_identical(vec_cbind(x, tbl), vec_cbind(tbl, tbl))
150 |     Message
151 |       New names:
152 |       * `splits` -> `splits...1`
153 |       * `id` -> `id...2`
154 |       * `splits` -> `splits...3`
155 |       * `id` -> `id...4`
156 |       New names:
157 |       * `splits` -> `splits...1`
158 |       * `id` -> `id...2`
159 |       * `splits` -> `splits...3`
160 |       * `id` -> `id...4`
161 | 
162 | ---
163 | 
164 |     Code
165 |       expect_s3_class_bare_tibble(vec_cbind(x, x))
166 |     Message
167 |       New names:
168 |       * `splits` -> `splits...1`
169 |       * `id` -> `id...2`
170 |       * `splits` -> `splits...3`
171 |       * `id` -> `id...4`
172 | 
173 | ---
174 | 
175 |     Code
176 |       expect_identical(vec_cbind(x, x), vec_cbind(tbl, tbl))
177 |     Message
178 |       New names:
179 |       * `splits` -> `splits...1`
180 |       * `id` -> `id...2`
181 |       * `splits` -> `splits...3`
182 |       * `id` -> `id...4`
183 |       New names:
184 |       * `splits` -> `splits...1`
185 |       * `id` -> `id...2`
186 |       * `splits` -> `splits...3`
187 |       * `id` -> `id...4`
188 | 
189 | ---
190 | 
191 |     Code
192 |       expect_identical(vec_cbind(x, tbl), vec_cbind(tbl, tbl))
193 |     Message
194 |       New names:
195 |       * `splits` -> `splits...1`
196 |       * `id` -> `id...2`
197 |       * `splits` -> `splits...3`
198 |       * `id` -> `id...4`
199 |       New names:
200 |       * `splits` -> `splits...1`
201 |       * `id` -> `id...2`
202 |       * `splits` -> `splits...3`
203 |       * `id` -> `id...4`
204 | 
205 | ---
206 | 
207 |     Code
208 |       expect_s3_class_bare_tibble(vec_cbind(x, x))
209 |     Message
210 |       New names:
211 |       * `splits` -> `splits...1`
212 |       * `id` -> `id...2`
213 |       * `splits` -> `splits...3`
214 |       * `id` -> `id...4`
215 | 
216 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/misc.md:
--------------------------------------------------------------------------------
 1 | # check_v errors appropriately
 2 | 
 3 |     Code
 4 |       check_v(-1)
 5 |     Condition
 6 |       Error:
 7 |       ! `v` must be a single positive integer.
 8 | 
 9 | ---
10 | 
11 |     Code
12 |       check_v(c(5, 10))
13 |     Condition
14 |       Error:
15 |       ! `v` must be a single positive integer.
16 | 
17 | ---
18 | 
19 |     Code
20 |       check_v("a")
21 |     Condition
22 |       Error:
23 |       ! `v` must be a single positive integer.
24 | 
25 | ---
26 | 
27 |     Code
28 |       check_v(10, 5, "rows", FALSE)
29 |     Condition
30 |       Error:
31 |       ! The number of rows is less than `v = 10` (5)
32 |       i Set `v` to a smaller value than 5
33 | 
34 | # check_v updates v appropriately
35 | 
36 |     Code
37 |       new_v <- check_v(10, 5, "rows")
38 |     Condition
39 |       Warning:
40 |       Fewer than 10 rows available for sampling
41 |       i Setting `v` to 5
42 | 
43 | # check_v handles NULL and Inf appropriately
44 | 
45 |     Code
46 |       check_v(c(Inf, 1))
47 |     Condition
48 |       Error:
49 |       ! `v` must be a single positive integer.
50 | 
51 | ---
52 | 
53 |     Code
54 |       check_v(Inf, 5, "rows", FALSE)
55 |     Condition
56 |       Error:
57 |       ! `v` cannot be `NULL` or `Inf` for this function
58 | 
59 | ---
60 | 
61 |     Code
62 |       check_v(NULL, 5, "rows", FALSE)
63 |     Condition
64 |       Error:
65 |       ! `v` cannot be `NULL` or `Inf` for this function
66 | 
67 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/spatial_clustering_cv.md:
--------------------------------------------------------------------------------
 1 | # bad args
 2 | 
 3 |     Code
 4 |       spatial_clustering_cv(Smithsonian)
 5 |     Condition
 6 |       Error in `spatial_clustering_cv()`:
 7 |       ! `spatial_clustering_cv()` currently only supports `sf` objects.
 8 |       i Try converting `data` to an `sf` object via `sf::st_as_sf()`.
 9 | 
10 | ---
11 | 
12 |     Code
13 |       spatial_clustering_cv(Smithsonian_sf, v = "a")
14 |     Condition
15 |       Error in `spatial_clustering_cv()`:
16 |       ! `v` must be a single positive integer.
17 | 
18 | ---
19 | 
20 |     Code
21 |       spatial_clustering_cv(Smithsonian_sf, v = c(5, 10))
22 |     Condition
23 |       Error in `spatial_clustering_cv()`:
24 |       ! `v` must be a single positive integer.
25 | 
26 | ---
27 | 
28 |     Code
29 |       spatial_clustering_cv(Smithsonian_sf, v = 100)
30 |     Condition
31 |       Error in `spatial_clustering_cv()`:
32 |       ! The number of data points is less than `v = 100` (20)
33 |       i Set `v` to a smaller value than 20
34 | 
35 | # using sf
36 | 
37 |     Code
38 |       spatial_clustering_cv(Smithsonian_sf)
39 |     Output
40 |       #  10-fold spatial cross-validation 
41 |       # A tibble: 10 x 2
42 |          splits         id    
43 |          <list>         <chr> 
44 |        1 <split [18/2]> Fold01
45 |        2 <split [19/1]> Fold02
46 |        3 <split [18/2]> Fold03
47 |        4 <split [17/3]> Fold04
48 |        5 <split [18/2]> Fold05
49 |        6 <split [19/1]> Fold06
50 |        7 <split [18/2]> Fold07
51 |        8 <split [18/2]> Fold08
52 |        9 <split [17/3]> Fold09
53 |       10 <split [18/2]> Fold10
54 | 
55 | # printing
56 | 
57 |     #  2-fold spatial cross-validation 
58 |     # A tibble: 2 x 2
59 |       splits         id   
60 |       <list>         <chr>
61 |     1 <split [18/2]> Fold1
62 |     2 <split [2/18]> Fold2
63 | 
64 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/spatial_nndm_cv.md:
--------------------------------------------------------------------------------
 1 | # bad args
 2 | 
 3 |     Code
 4 |       spatial_nndm_cv(Smithsonian_sf[1:15, ], Smithsonian[16:20, ])
 5 |     Condition
 6 |       Error in `spatial_nndm_cv()`:
 7 |       ! `spatial_nndm_cv()` currently only supports `sf` objects.
 8 |       i Try converting `prediction_sites` to an `sf` object via `sf::st_as_sf()`.
 9 | 
10 | ---
11 | 
12 |     Code
13 |       spatial_nndm_cv(Smithsonian[1:15, ], Smithsonian_sf[16:20, ])
14 |     Condition
15 |       Error in `spatial_nndm_cv()`:
16 |       ! `spatial_nndm_cv()` currently only supports `sf` objects.
17 |       i Try converting `data` to an `sf` object via `sf::st_as_sf()`.
18 | 
19 | # normal usage
20 | 
21 |     Code
22 |       spatial_nndm_cv(Smithsonian_sf[1:15, ], Smithsonian_sf[16:20, ])
23 |     Output
24 |       # A tibble: 15 x 2
25 |          splits         id    
26 |          <list>         <chr> 
27 |        1 <split [14/1]> Fold01
28 |        2 <split [14/1]> Fold02
29 |        3 <split [14/1]> Fold03
30 |        4 <split [14/1]> Fold04
31 |        5 <split [13/1]> Fold05
32 |        6 <split [14/1]> Fold06
33 |        7 <split [7/1]>  Fold07
34 |        8 <split [14/1]> Fold08
35 |        9 <split [10/1]> Fold09
36 |       10 <split [14/1]> Fold10
37 |       11 <split [14/1]> Fold11
38 |       12 <split [7/1]>  Fold12
39 |       13 <split [14/1]> Fold13
40 |       14 <split [7/1]>  Fold14
41 |       15 <split [14/1]> Fold15
42 | 
43 | # can pass a single polygon to sample within
44 | 
45 |     Code
46 |       spatial_nndm_cv(Smithsonian_sf, example_poly)
47 |     Output
48 |       # A tibble: 20 x 2
49 |          splits         id    
50 |          <list>         <chr> 
51 |        1 <split [10/1]> Fold01
52 |        2 <split [10/1]> Fold02
53 |        3 <split [10/1]> Fold03
54 |        4 <split [18/1]> Fold04
55 |        5 <split [10/1]> Fold05
56 |        6 <split [10/1]> Fold06
57 |        7 <split [10/1]> Fold07
58 |        8 <split [10/1]> Fold08
59 |        9 <split [14/1]> Fold09
60 |       10 <split [10/1]> Fold10
61 |       11 <split [10/1]> Fold11
62 |       12 <split [15/1]> Fold12
63 |       13 <split [18/1]> Fold13
64 |       14 <split [10/1]> Fold14
65 |       15 <split [17/1]> Fold15
66 |       16 <split [10/1]> Fold16
67 |       17 <split [10/1]> Fold17
68 |       18 <split [11/1]> Fold18
69 |       19 <split [10/1]> Fold19
70 |       20 <split [10/1]> Fold20
71 | 
72 | # printing
73 | 
74 |     # A tibble: 15 x 2
75 |        splits         id    
76 |        <list>         <chr> 
77 |      1 <split [14/1]> Fold01
78 |      2 <split [14/1]> Fold02
79 |      3 <split [14/1]> Fold03
80 |      4 <split [14/1]> Fold04
81 |      5 <split [13/1]> Fold05
82 |      6 <split [14/1]> Fold06
83 |      7 <split [7/1]>  Fold07
84 |      8 <split [14/1]> Fold08
85 |      9 <split [10/1]> Fold09
86 |     10 <split [14/1]> Fold10
87 |     11 <split [14/1]> Fold11
88 |     12 <split [7/1]>  Fold12
89 |     13 <split [14/1]> Fold13
90 |     14 <split [7/1]>  Fold14
91 |     15 <split [14/1]> Fold15
92 | 
93 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/spatial_vfold_cv.md:
--------------------------------------------------------------------------------
  1 | # erroring when no S2
  2 | 
  3 |     Code
  4 |       spatial_buffer_vfold_cv(ames_sf, buffer = 500, radius = NULL)
  5 |     Condition
  6 |       Error in `spatial_buffer_vfold_cv()`:
  7 |       ! `spatial_buffer_vfold_cv()` can only process geographic coordinates when using the s2 geometry library.
  8 |       i Reproject your data into a projected coordinate reference system using `sf::st_transform()`.
  9 |       i Or install the `s2` package and enable it using `sf::sf_use_s2(TRUE)`.
 10 | 
 11 | ---
 12 | 
 13 |     Code
 14 |       suppressMessages(spatial_leave_location_out_cv(ames_sf, Neighborhood, buffer = 500))
 15 |     Condition
 16 |       Error in `spatial_leave_location_out_cv()`:
 17 |       ! Buffering can only process geographic coordinates when using the s2 geometry library.
 18 |       i Reproject your data into a projected coordinate reference system using `sf::st_transform()`.
 19 |       i Or install the `s2` package and enable it using `sf::sf_use_s2(TRUE)`.
 20 | 
 21 | # spatial_buffer_vfold_cv
 22 | 
 23 |     Code
 24 |       rs1
 25 |     Output
 26 |       #  2-fold spatial cross-validation 
 27 |       # A tibble: 4 x 3
 28 |         splits              id      id2  
 29 |         <list>              <chr>   <chr>
 30 |       1 <split [1465/1465]> Repeat1 Fold1
 31 |       2 <split [1465/1465]> Repeat1 Fold2
 32 |       3 <split [1465/1465]> Repeat2 Fold1
 33 |       4 <split [1465/1465]> Repeat2 Fold2
 34 | 
 35 | # spatial_leave_location_out_cv
 36 | 
 37 |     Code
 38 |       rs1
 39 |     Output
 40 |       #  2-fold spatial leave-location-out cross-validation 
 41 |       # A tibble: 4 x 3
 42 |         splits              id      id2      
 43 |         <list>              <chr>   <chr>    
 44 |       1 <split [1268/1662]> Repeat1 Resample1
 45 |       2 <split [1662/1268]> Repeat1 Resample2
 46 |       3 <split [1548/1382]> Repeat2 Resample1
 47 |       4 <split [1382/1548]> Repeat2 Resample2
 48 | 
 49 | # bad args
 50 | 
 51 |     Code
 52 |       spatial_buffer_vfold_cv(ames_sf, radius = NULL)
 53 |     Condition
 54 |       Error in `spatial_buffer_vfold_cv()`:
 55 |       ! `spatial_buffer_vfold_cv()` requires both `radius` and `buffer` be provided.
 56 |       i Use `NULL` for resampling without one of `radius` or `buffer`, like `radius = NULL, buffer = 5000`.
 57 | 
 58 | ---
 59 | 
 60 |     Code
 61 |       spatial_buffer_vfold_cv(ames_sf, buffer = 500)
 62 |     Condition
 63 |       Error in `spatial_buffer_vfold_cv()`:
 64 |       ! `spatial_buffer_vfold_cv()` requires both `radius` and `buffer` be provided.
 65 |       i Use `NULL` for resampling without one of `radius` or `buffer`, like `radius = NULL, buffer = 5000`.
 66 | 
 67 | ---
 68 | 
 69 |     Code
 70 |       spatial_buffer_vfold_cv(ames_sf)
 71 |     Condition
 72 |       Error in `spatial_buffer_vfold_cv()`:
 73 |       ! `spatial_buffer_vfold_cv()` requires both `radius` and `buffer` be provided.
 74 |       i Use `NULL` for resampling without one of `radius` or `buffer`, like `radius = NULL, buffer = 5000`.
 75 |       i Or use `rsample::vfold_cv() to use a non-spatial V-fold.
 76 | 
 77 | ---
 78 | 
 79 |     Code
 80 |       spatial_leave_location_out_cv(ames)
 81 |     Condition
 82 |       Error in `rsample::group_vfold_cv()`:
 83 |       ! `group` should be a single character value for the column that will be used for splitting.
 84 | 
 85 | ---
 86 | 
 87 |     Code
 88 |       spatial_leave_location_out_cv(ames, Neighborhood, buffer = 500)
 89 |     Condition
 90 |       Error in `spatial_leave_location_out_cv()`:
 91 |       ! Buffering currently only supports `sf` objects.
 92 |       i Try converting `data` to an `sf` object via `sf::st_as_sf()`.
 93 | 
 94 | ---
 95 | 
 96 |     Code
 97 |       spatial_leave_location_out_cv(ames_sf, v = c(5, 10))
 98 |     Condition
 99 |       Error in `rsample::group_vfold_cv()`:
100 |       ! `group` should be a single character value for the column that will be used for splitting.
101 | 
102 | ---
103 | 
104 |     Code
105 |       spatial_buffer_vfold_cv(ames_sf, v = c(5, 10), buffer = NULL, radius = NULL)
106 |     Condition
107 |       Error in `spatial_buffer_vfold_cv()`:
108 |       ! `v` must be a single positive integer.
109 | 
110 | ---
111 | 
112 |     Code
113 |       spatial_leave_location_out_cv(ames_sf, Neighborhood, v = 60)
114 |     Condition
115 |       Warning in `spatial_leave_location_out_cv()`:
116 |       Fewer than 60 locations available for sampling
117 |       i Setting `v` to 28
118 |     Output
119 |       #  28-fold spatial leave-location-out cross-validation 
120 |       # A tibble: 28 x 2
121 |          splits             id        
122 |          <list>             <chr>     
123 |        1 <split [2816/114]> Resample01
124 |        2 <split [2928/2]>   Resample02
125 |        3 <split [2799/131]> Resample03
126 |        4 <split [2827/103]> Resample04
127 |        5 <split [2902/28]>  Resample05
128 |        6 <split [2920/10]>  Resample06
129 |        7 <split [2886/44]>  Resample07
130 |        8 <split [2691/239]> Resample08
131 |        9 <split [2779/151]> Resample09
132 |       10 <split [2748/182]> Resample10
133 |       # i 18 more rows
134 | 
135 | ---
136 | 
137 |     Code
138 |       spatial_buffer_vfold_cv(boston_canopy, v = 683, buffer = NULL, radius = NULL)
139 |     Condition
140 |       Warning in `spatial_buffer_vfold_cv()`:
141 |       Fewer than 683 rows available for sampling
142 |       i Setting `v` to 682
143 |     Output
144 |       #  682-fold spatial cross-validation 
145 |       # A tibble: 682 x 2
146 |          splits          id     
147 |          <list>          <chr>  
148 |        1 <split [681/1]> Fold001
149 |        2 <split [681/1]> Fold002
150 |        3 <split [681/1]> Fold003
151 |        4 <split [681/1]> Fold004
152 |        5 <split [681/1]> Fold005
153 |        6 <split [681/1]> Fold006
154 |        7 <split [681/1]> Fold007
155 |        8 <split [681/1]> Fold008
156 |        9 <split [681/1]> Fold009
157 |       10 <split [681/1]> Fold010
158 |       # i 672 more rows
159 | 
160 | ---
161 | 
162 |     Repeated cross-validation doesn't make sense when performing leave-one-out cross-validation.
163 |     i Set `v` to a lower value.
164 |     i Or set `repeats = 1`.
165 | 
166 | ---
167 | 
168 |     Repeated resampling when `v` is 28 would create identical resamples
169 | 
170 | # printing
171 | 
172 |     #  10-fold spatial block cross-validation 
173 |     # A tibble: 10 x 2
174 |        splits             id    
175 |        <list>             <chr> 
176 |      1 <split [2524/406]> Fold01
177 |      2 <split [2656/274]> Fold02
178 |      3 <split [2476/454]> Fold03
179 |      4 <split [2771/159]> Fold04
180 |      5 <split [2607/323]> Fold05
181 |      6 <split [2762/168]> Fold06
182 |      7 <split [2718/212]> Fold07
183 |      8 <split [2665/265]> Fold08
184 |      9 <split [2642/288]> Fold09
185 |     10 <split [2549/381]> Fold10
186 | 
187 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-rset.R:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | 
 3 | dim_rset <- function(x, ...) {
 4 |   dims <- purrr::map(x$splits, dim)
 5 |   dims <- do.call("rbind", dims)
 6 |   dims <- tibble::as_tibble(dims)
 7 |   id_cols <- grep("^id", colnames(x), value = TRUE)
 8 |   for (i in seq_along(id_cols)) {
 9 |     dims[id_cols[i]] <- getElement(x, id_cols[i])
10 |   }
11 |   dims
12 | }
13 | 
14 | # ------------------------------------------------------------------------------
15 | 
16 | tib_upcast <- function(x) {
17 |   size <- df_size(x)
18 | 
19 |   # Strip all attributes except names to construct
20 |   # a bare list to build the tibble back up from.
21 |   attributes(x) <- list(names = names(x))
22 | 
23 |   tibble::new_tibble(x, nrow = size)
24 | }
25 | 
26 | df_size <- function(x) {
27 |   if (!is.list(x)) {
28 |     rlang::abort("Cannot get the df size of a non-list.")
29 |   }
30 | 
31 |   if (length(x) == 0L) {
32 |     return(0L)
33 |   }
34 | 
35 |   col <- x[[1L]]
36 | 
37 |   vec_size(col)
38 | }
39 | 
40 | # ------------------------------------------------------------------------------
41 | 
42 | expect_s3_class_rset <- function(x) {
43 |   expect_s3_class(x, "rset")
44 | }
45 | 
46 | expect_s3_class_bare_tibble <- function(x) {
47 |   expect_s3_class(x, c("tbl_df", "tbl", "data.frame"), exact = TRUE)
48 | }
49 | 


--------------------------------------------------------------------------------
/tests/testthat/test-autoplot.R:
--------------------------------------------------------------------------------
  1 | skip_if_not_installed("modeldata")
  2 | skip_if_not_installed("vdiffr")
  3 | 
  4 | data(ames, package = "modeldata")
  5 | 
  6 | test_that("autoplot is stable", {
  7 |   skip_if_not(sf::sf_use_s2())
  8 | 
  9 |   ames_sf <- sf::st_as_sf(ames, coords = c("Longitude", "Latitude"), crs = 4326)
 10 |   set.seed(123)
 11 |   ames_cluster <- spatial_clustering_cv(ames_sf)
 12 | 
 13 |   p <- autoplot(ames_cluster)
 14 |   vdiffr::expect_doppelganger("cluster plots", p)
 15 | 
 16 |   p <- autoplot(ames_cluster$splits[[1]])
 17 |   vdiffr::expect_doppelganger("cluster split plots", p)
 18 | 
 19 |   skip_if_not(sf::sf_use_s2())
 20 |   set.seed(123)
 21 |   ames_block <- spatial_block_cv(ames_sf)
 22 | 
 23 |   p <- autoplot(ames_block, show_grid = FALSE)
 24 |   vdiffr::expect_doppelganger("block plots", p)
 25 | 
 26 |   p <- autoplot(ames_block)
 27 |   vdiffr::expect_doppelganger("block plots with grid", p)
 28 | 
 29 |   p <- autoplot(ames_block$splits[[1]])
 30 |   vdiffr::expect_doppelganger("block split plots", p)
 31 | 
 32 |   skip_if_not_installed("curl")
 33 |   skip_if_offline()
 34 |   sf::sf_proj_network(enable = TRUE)
 35 | 
 36 |   set.seed(123)
 37 |   boston_buffer <- spatial_block_cv(boston_canopy, buffer = 5000, radius = NULL)
 38 | 
 39 |   p <- autoplot(boston_buffer)
 40 |   vdiffr::expect_doppelganger("buffered rset plot", p)
 41 | 
 42 |   p <- autoplot(boston_buffer$splits[[1]])
 43 |   vdiffr::expect_doppelganger("buffered rsample plot", p)
 44 | 
 45 |   set.seed(123)
 46 |   boston_vfold_buffer <- spatial_buffer_vfold_cv(
 47 |     boston_canopy,
 48 |     v = 10,
 49 |     buffer = 5000,
 50 |     radius = NULL
 51 |   )
 52 | 
 53 |   p <- autoplot(boston_vfold_buffer)
 54 |   vdiffr::expect_doppelganger("buffered vfold plot", p)
 55 | 
 56 |   set.seed(123)
 57 |   boston_vfold_buffer <- spatial_buffer_vfold_cv(
 58 |     boston_canopy,
 59 |     v = 682,
 60 |     radius = 1,
 61 |     buffer = 5000
 62 |   )
 63 | 
 64 |   # chose the fourth split purely because it looks cool
 65 |   p <- autoplot(boston_vfold_buffer$splits[[4]])
 66 |   vdiffr::expect_doppelganger("buffered vfold split", p)
 67 | 
 68 |   set.seed(123)
 69 |   ames_neighborhoods <- spatial_leave_location_out_cv(ames_sf, Neighborhood)
 70 | 
 71 |   p <- autoplot(ames_neighborhoods)
 72 |   vdiffr::expect_doppelganger("buffered LLO set plot", p)
 73 | 
 74 |   p <- autoplot(ames_neighborhoods$splits[[1]])
 75 |   vdiffr::expect_doppelganger("buffered LLO split plot", p)
 76 | 
 77 |   # Not setting seed because this _should_ be deterministic
 78 |   boston_snake <- spatial_block_cv(
 79 |     boston_canopy,
 80 |     v = 10,
 81 |     method = "snake",
 82 |     relevant_only = FALSE,
 83 |     n = c(10, 23)
 84 |   )
 85 |   p <- autoplot(boston_snake)
 86 |   vdiffr::expect_doppelganger("snake flips rows the right way", p)
 87 | 
 88 |   set.seed(123)
 89 |   repeat_block <- spatial_block_cv(
 90 |     boston_canopy,
 91 |     v = 10,
 92 |     method = "random",
 93 |     repeats = 2
 94 |   )
 95 |   vdiffr::expect_doppelganger(
 96 |     "repeated block CV",
 97 |     autoplot(repeat_block)
 98 |   )
 99 | 
100 |   set.seed(123)
101 |   repeat_vfold <- spatial_buffer_vfold_cv(
102 |     boston_canopy,
103 |     radius = 1,
104 |     buffer = 4000,
105 |     repeats = 2
106 |   )
107 |   vdiffr::expect_doppelganger(
108 |     "repeated vfold",
109 |     autoplot(repeat_vfold)
110 |   )
111 | 
112 |   set.seed(123)
113 |   repeat_llo <- spatial_leave_location_out_cv(
114 |     ames_sf,
115 |     Neighborhood,
116 |     repeats = 2,
117 |     v = 10
118 |   )
119 |   vdiffr::expect_doppelganger(
120 |     "repeated LLO",
121 |     autoplot(repeat_llo)
122 |   )
123 | })
124 | 
125 | test_that("autoplot respects expand_bbox", {
126 |   vdiffr::expect_doppelganger(
127 |     "expand_bbox",
128 |     autoplot(
129 |       spatial_block_cv(boston_canopy, expand_bbox = 0.5, v = 4)
130 |     )
131 |   )
132 | })
133 | 


--------------------------------------------------------------------------------
/tests/testthat/test-buffer.R:
--------------------------------------------------------------------------------
  1 | chosen_points <- sf::st_as_sf(
  2 |   data.frame(x = c(0, 1, 3), y = 1),
  3 |   coords = c("x", "y"),
  4 |   # Any projected CRS should be fine here
  5 |   crs = 2249
  6 | )
  7 | 
  8 | test_that("buffering selects the expected points", {
  9 |   skip_if_not_installed("curl")
 10 |   skip_if_offline()
 11 |   sf::sf_proj_network(enable = TRUE)
 12 | 
 13 |   skip_if_not(sf::sf_use_s2())
 14 | 
 15 |   # These points fall along a number line: point 1 is 1 away from point 2,
 16 |   # point 3 is 2 away from point 2
 17 | 
 18 |   # Using a projected CRS (so no geographic weirdness), that means buffering
 19 |   # should be conceptually straightforward: points X units away should be
 20 |   # "caught" by any radius or buffer (or the two combined) >= X
 21 | 
 22 |   # No buffer or radius is identical to NULL:
 23 |   expect_identical(
 24 |     buffer_indices(
 25 |       data = chosen_points,
 26 |       indices = list(2),
 27 |       radius = 0,
 28 |       buffer = 0
 29 |     ),
 30 |     buffer_indices(
 31 |       data = chosen_points,
 32 |       indices = list(2),
 33 |       radius = NULL,
 34 |       buffer = NULL
 35 |     )
 36 |   )
 37 | 
 38 |   expect_identical(
 39 |     buffer_indices(
 40 |       data = chosen_points,
 41 |       indices = list(2),
 42 |       radius = 0,
 43 |       buffer = 0
 44 |     ),
 45 |     buffer_indices(
 46 |       data = chosen_points,
 47 |       indices = list(2),
 48 |       radius = 0,
 49 |       buffer = NULL
 50 |     )
 51 |   )
 52 | 
 53 |   expect_identical(
 54 |     buffer_indices(
 55 |       data = chosen_points,
 56 |       indices = list(2),
 57 |       radius = 0,
 58 |       buffer = 0
 59 |     ),
 60 |     buffer_indices(
 61 |       data = chosen_points,
 62 |       indices = list(2),
 63 |       radius = NULL,
 64 |       buffer = 0
 65 |     )
 66 |   )
 67 | 
 68 |   # No buffer or radius: only the selected point (2) should be in test:
 69 |   expect_identical(
 70 |     buffer_indices(
 71 |       data = chosen_points,
 72 |       indices = list(2),
 73 |       radius = 0,
 74 |       buffer = 0
 75 |     ),
 76 |     list(
 77 |       list(
 78 |         analysis = c(1L, 3L),
 79 |         assessment = 2
 80 |       )
 81 |     )
 82 |   )
 83 | 
 84 |   # 1 radius 0 buffer: the point at 1 should be in test:
 85 |   expect_identical(
 86 |     buffer_indices(
 87 |       data = chosen_points,
 88 |       indices = list(2),
 89 |       radius = 1,
 90 |       buffer = 0
 91 |     ),
 92 |     list(
 93 |       list(
 94 |         analysis = c(3L),
 95 |         assessment = c(2, 1)
 96 |       )
 97 |     )
 98 |   )
 99 | 
100 |   # 0 radius 1 buffer: the point at 1 should be nowhere:
101 |   expect_identical(
102 |     buffer_indices(
103 |       data = chosen_points,
104 |       indices = list(2),
105 |       radius = 0,
106 |       buffer = 1
107 |     ),
108 |     list(
109 |       list(
110 |         analysis = c(3L),
111 |         assessment = c(2)
112 |       )
113 |     )
114 |   )
115 | 
116 |   # 1 radius 2 buffer: the point at 3 should be nowhere:
117 |   expect_identical(
118 |     buffer_indices(
119 |       data = chosen_points,
120 |       indices = list(2),
121 |       radius = 1,
122 |       buffer = 2
123 |     ),
124 |     list(
125 |       list(
126 |         analysis = integer(),
127 |         assessment = c(2, 1)
128 |       )
129 |     )
130 |   )
131 | 
132 |   # 0 radius 2 buffer: the point at 3 should be nowhere:
133 |   expect_identical(
134 |     buffer_indices(
135 |       data = chosen_points,
136 |       indices = list(2),
137 |       radius = 0,
138 |       buffer = 2
139 |     ),
140 |     list(
141 |       list(
142 |         analysis = integer(),
143 |         assessment = c(2)
144 |       )
145 |     )
146 |   )
147 | 
148 |   # >1 radius 1 buffer: the point at 3 should be in test:
149 |   expect_identical(
150 |     buffer_indices(
151 |       data = chosen_points,
152 |       indices = list(2),
153 |       radius = 1.8,
154 |       buffer = 1
155 |     ),
156 |     list(
157 |       list(
158 |         analysis = c(3L),
159 |         assessment = c(2, 1)
160 |       )
161 |     )
162 |   )
163 | })
164 | 
165 | skip_if_not_installed("modeldata")
166 | data("ames", package = "modeldata")
167 | 
168 | test_that("bad args", {
169 |   ames_sf <- sf::st_as_sf(
170 |     ames,
171 |     coords = c("Longitude", "Latitude")
172 |   )
173 |   ames_sf <- sf::st_set_crs(
174 |     ames_sf,
175 |     4326
176 |   )
177 |   s2_status <- sf::sf_use_s2()
178 |   sf::sf_use_s2(FALSE)
179 |   expect_snapshot(
180 |     buffer_indices(ames_sf),
181 |     error = TRUE
182 |   )
183 |   sf::sf_use_s2(s2_status)
184 | 
185 |   # The default RNG changed in 3.6.0
186 |   skip_if_not(getRversion() >= numeric_version("3.6.0"))
187 | 
188 |   skip_if_not(sf::sf_use_s2())
189 | 
190 |   set.seed(123)
191 |   expect_snapshot(
192 |     spatial_clustering_cv(ames_sf, buffer = 0.01)
193 |   )
194 | })
195 | 
196 | ames_sf <- sf::st_as_sf(
197 |   ames,
198 |   coords = c("Longitude", "Latitude"),
199 |   crs = 4326
200 | )
201 | 
202 | test_that("using buffers", {
203 |   skip_if_not(sf::sf_use_s2())
204 |   skip_if_not_installed("curl")
205 |   skip_if_offline()
206 |   sf::sf_proj_network(enable = TRUE)
207 | 
208 |   set.seed(11)
209 |   rs1 <- spatial_clustering_cv(
210 |     ames_sf,
211 |     v = 2
212 |   )
213 |   set.seed(11)
214 |   rs2 <- spatial_clustering_cv(
215 |     ames_sf,
216 |     v = 2,
217 |     radius = 0,
218 |     buffer = 0
219 |   )
220 | 
221 |   # These should be the only changes between 0 and NULL:
222 |   attr(rs2, "radius") <- NULL
223 |   attr(rs2, "buffer") <- NULL
224 |   attr(rs2, "distance_function") <- attr(rs1, "distance_function")
225 |   attr(rs2, "fingerprint") <- attr(rs1, "fingerprint")
226 |   rs2$splits <- map(rs2$splits, rm_out)
227 | 
228 |   expect_identical(rs1, rs2)
229 | 
230 |   set.seed(11)
231 |   expect_snapshot(
232 |     spatial_clustering_cv(
233 |       ames_sf,
234 |       v = 2,
235 |       radius = 500,
236 |       buffer = 500
237 |     )
238 |   )
239 | 
240 |   set.seed(11)
241 |   expect_snapshot(
242 |     spatial_block_cv(
243 |       boston_canopy,
244 |       v = 2,
245 |       method = "snake",
246 |       radius = 500,
247 |       buffer = 500
248 |     )
249 |   )
250 | 
251 | 
252 |   # The default RNG changed in 3.6.0
253 |   skip_if_not(getRversion() >= numeric_version("3.6.0"))
254 | 
255 |   set.seed(11)
256 |   expect_snapshot(
257 |     spatial_buffer_vfold_cv(
258 |       boston_canopy,
259 |       v = 682,
260 |       radius = 500,
261 |       buffer = 500
262 |     )
263 |   )
264 | 
265 |   set.seed(11)
266 |   expect_snapshot(
267 |     spatial_leave_location_out_cv(
268 |       ames_sf,
269 |       Neighborhood,
270 |       v = 682,
271 |       radius = 500,
272 |       buffer = 500
273 |     )
274 |   )
275 | 
276 |   set.seed(11)
277 |   expect_snapshot(
278 |     spatial_block_cv(
279 |       ames_sf,
280 |       v = 2,
281 |       method = "random",
282 |       radius = 500,
283 |       buffer = 500
284 |     )
285 |   )
286 | })
287 | 
288 | test_that("buffers respect units", {
289 |   skip_if_not(sf::sf_use_s2())
290 |   skip_if_not_installed("curl")
291 |   skip_if_offline()
292 |   sf::sf_proj_network(enable = TRUE)
293 | 
294 |   set.seed(123)
295 |   rs1 <- spatial_block_cv(
296 |     boston_canopy,
297 |     v = 2,
298 |     method = "snake",
299 |     radius = 500,
300 |     buffer = 500
301 |   )
302 |   set.seed(123)
303 |   rs2 <- spatial_block_cv(
304 |     boston_canopy,
305 |     v = 2,
306 |     method = "snake",
307 |     radius = units::as_units(500, "ft"),
308 |     buffer = units::as_units(500, "ft")
309 |   )
310 |   attr(rs2, "radius") <- 500
311 |   attr(rs2, "buffer") <- 500
312 |   expect_identical(rs1, rs2)
313 | 
314 |   set.seed(123)
315 |   rs1 <- spatial_block_cv(
316 |     ames_sf,
317 |     v = 2,
318 |     method = "snake",
319 |     radius = 100,
320 |     buffer = 100
321 |   )
322 |   set.seed(123)
323 |   rs2 <- spatial_block_cv(
324 |     ames_sf,
325 |     v = 2,
326 |     method = "snake",
327 |     radius = units::as_units(100, "m"),
328 |     buffer = units::as_units(100, "m")
329 |   )
330 |   attr(rs2, "radius") <- 100
331 |   attr(rs2, "buffer") <- 100
332 |   expect_identical(rs1, rs2)
333 | })
334 | 


--------------------------------------------------------------------------------
/tests/testthat/test-compat-vctrs.R:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # vec_restore()
  3 | 
  4 | test_that("vec_restore() returns an rset subclass if `x` retains rset structure", {
  5 |   for (x in rset_subclasses) {
  6 |     expect_identical(vec_restore(x, x), x)
  7 |     expect_s3_class_rset(vec_restore(x, x))
  8 |   }
  9 | })
 10 | 
 11 | test_that("vec_restore() returns bare tibble if `x` loses rset structure", {
 12 |   for (x in rset_subclasses) {
 13 |     col <- x[1]
 14 |     row <- x[0, ]
 15 | 
 16 |     expect_s3_class_bare_tibble(vec_restore(col, x))
 17 |     expect_s3_class_bare_tibble(vec_restore(row, x))
 18 |   }
 19 | })
 20 | 
 21 | test_that("vec_restore() retains extra attributes of `to` when not falling back", {
 22 |   for (x in rset_subclasses) {
 23 |     to <- x
 24 |     attr(to, "foo") <- "bar"
 25 | 
 26 |     x_tbl <- x[1]
 27 | 
 28 |     expect_identical(attr(vec_restore(x, to), "foo"), "bar")
 29 |     expect_identical(attr(vec_restore(x_tbl, to), "foo"), NULL)
 30 | 
 31 |     expect_s3_class_rset(vec_restore(x, to))
 32 |     expect_s3_class_bare_tibble(vec_restore(x_tbl, to))
 33 |   }
 34 | })
 35 | 
 36 | # ------------------------------------------------------------------------------
 37 | # vec_ptype2()
 38 | 
 39 | test_that("vec_ptype2() is working", {
 40 |   for (x in rset_subclasses) {
 41 |     tbl <- tibble::tibble(x = 1)
 42 |     df <- data.frame(x = 1)
 43 | 
 44 |     # rset-rset
 45 |     expect_identical(vec_ptype2(x, x), vec_ptype2(tib_upcast(x), tib_upcast(x)))
 46 | 
 47 |     # rset-tbl_df
 48 |     expect_identical(vec_ptype2(x, tbl), vec_ptype2(tib_upcast(x), tbl))
 49 |     expect_identical(vec_ptype2(tbl, x), vec_ptype2(tbl, tib_upcast(x)))
 50 | 
 51 |     # rset-df
 52 |     expect_identical(vec_ptype2(x, df), vec_ptype2(tib_upcast(x), df))
 53 |     expect_identical(vec_ptype2(df, x), vec_ptype2(df, tib_upcast(x)))
 54 |   }
 55 | })
 56 | 
 57 | # ------------------------------------------------------------------------------
 58 | # vec_cast()
 59 | 
 60 | test_that("vec_cast() is working", {
 61 |   for (x in rset_subclasses) {
 62 |     tbl <- tib_upcast(x)
 63 |     df <- as.data.frame(tbl)
 64 | 
 65 |     # rset-rset
 66 |     expect_error(vec_cast(x, x), class = "vctrs_error_incompatible_type")
 67 | 
 68 |     # rset-tbl_df
 69 |     expect_identical(vec_cast(x, tbl), tbl)
 70 |     expect_error(vec_cast(tbl, x), class = "vctrs_error_incompatible_type")
 71 | 
 72 |     # rset-df
 73 |     expect_identical(vec_cast(x, df), df)
 74 |     expect_error(vec_cast(df, x), class = "vctrs_error_incompatible_type")
 75 |   }
 76 | })
 77 | 
 78 | # ------------------------------------------------------------------------------
 79 | # vctrs methods
 80 | 
 81 | test_that("vec_ptype() returns a bare tibble", {
 82 |   for (x in rset_subclasses) {
 83 |     expect_identical(vec_ptype(x), vec_ptype(tib_upcast(x)))
 84 |     expect_s3_class_bare_tibble(vec_ptype(x))
 85 |   }
 86 | })
 87 | 
 88 | test_that("vec_slice() generally returns a bare tibble", {
 89 |   for (x in rset_subclasses) {
 90 |     expect_identical(vec_slice(x, 0), vec_slice(tib_upcast(x), 0))
 91 |     expect_s3_class_bare_tibble(vec_slice(x, 0))
 92 |   }
 93 | })
 94 | 
 95 | test_that("vec_slice() can return an rset if all rows are selected", {
 96 |   for (x in rset_subclasses) {
 97 |     expect_identical(vec_slice(x, TRUE), x)
 98 |     expect_s3_class_rset(vec_slice(x, TRUE))
 99 |   }
100 | })
101 | 
102 | test_that("vec_c() returns a bare tibble", {
103 |   for (x in rset_subclasses) {
104 |     tbl <- tib_upcast(x)
105 | 
106 |     expect_identical(vec_c(x), vec_c(tbl))
107 |     expect_identical(vec_c(x, x), vec_c(tbl, tbl))
108 |     expect_identical(vec_c(x, tbl), vec_c(tbl, tbl))
109 | 
110 |     expect_s3_class_bare_tibble(vec_c(x))
111 |     expect_s3_class_bare_tibble(vec_c(x, x))
112 |   }
113 | })
114 | 
115 | test_that("vec_rbind() returns a bare tibble", {
116 |   for (x in rset_subclasses) {
117 |     tbl <- tib_upcast(x)
118 | 
119 |     expect_identical(vec_rbind(x), vec_rbind(tbl))
120 |     expect_identical(vec_rbind(x, x), vec_rbind(tbl, tbl))
121 |     expect_identical(vec_rbind(x, tbl), vec_rbind(tbl, tbl))
122 | 
123 |     expect_s3_class_bare_tibble(vec_rbind(x))
124 |     expect_s3_class_bare_tibble(vec_rbind(x, x))
125 |   }
126 | })
127 | 
128 | test_that("vec_cbind() returns a bare tibble", {
129 |   for (x in rset_subclasses) {
130 |     tbl <- tib_upcast(x)
131 | 
132 |     expect_identical(vec_cbind(x), vec_cbind(tbl))
133 |     expect_snapshot(expect_identical(vec_cbind(x, x), vec_cbind(tbl, tbl)))
134 |     expect_snapshot(expect_identical(vec_cbind(x, tbl), vec_cbind(tbl, tbl)))
135 | 
136 |     expect_s3_class_bare_tibble(vec_cbind(x))
137 |     expect_snapshot(expect_s3_class_bare_tibble(vec_cbind(x, x)))
138 |   }
139 | })
140 | 


--------------------------------------------------------------------------------
/tests/testthat/test-misc.R:
--------------------------------------------------------------------------------
 1 | test_that("check_v errors appropriately", {
 2 |   expect_snapshot(
 3 |     check_v(-1),
 4 |     error = TRUE
 5 |   )
 6 |   expect_snapshot(
 7 |     check_v(c(5, 10)),
 8 |     error = TRUE
 9 |   )
10 |   expect_snapshot(
11 |     check_v("a"),
12 |     error = TRUE
13 |   )
14 |   expect_snapshot(
15 |     check_v(10, 5, "rows", FALSE),
16 |     error = TRUE
17 |   )
18 | })
19 | 
20 | test_that("check_v updates v appropriately", {
21 |   expect_snapshot(
22 |     new_v <- check_v(10, 5, "rows")
23 |   )
24 | 
25 |   expect_identical(
26 |     new_v,
27 |     5
28 |   )
29 | })
30 | 
31 | test_that("check_v handles NULL and Inf appropriately", {
32 |   expect_snapshot(
33 |     check_v(c(Inf, 1)),
34 |     error = TRUE
35 |   )
36 | 
37 |   expect_snapshot(
38 |     check_v(Inf, 5, "rows", FALSE),
39 |     error = TRUE
40 |   )
41 | 
42 |   expect_snapshot(
43 |     check_v(NULL, 5, "rows", FALSE),
44 |     error = TRUE
45 |   )
46 | 
47 |   expect_identical(
48 |     check_v(NULL, 5, "rows"),
49 |     5
50 |   )
51 | 
52 |   expect_identical(
53 |     check_v(Inf, 5, "rows"),
54 |     5
55 |   )
56 | })
57 | 
58 | test_that("reverse_splits is working", {
59 |   skip_if_not(rlang::is_installed("withr"))
60 | 
61 |   for (x in rset_subclasses) {
62 |     set.seed(123)
63 |     rev_x <- rsample::reverse_splits(x)
64 |     expect_identical(analysis(x$splits[[1]]), assessment(rev_x$splits[[1]]))
65 |     expect_identical(assessment(x$splits[[1]]), analysis(rev_x$splits[[1]]))
66 |     expect_identical(class(x), class(rev_x))
67 |     expect_identical(class(x$splits[[1]]), class(rev_x$splits[[1]]))
68 |   }
69 | })
70 | 
71 | test_that("reshuffle_rset is working", {
72 |   skip_if_not(rlang::is_installed("withr"))
73 | 
74 |   # Reshuffling with the same seed, in the same order,
75 |   # should recreate the same objects
76 |   out <- withr::with_seed(
77 |     123,
78 |     lapply(
79 |       rset_subclasses,
80 |       function(x) suppressWarnings(rsample::reshuffle_rset(x))
81 |     )
82 |   )
83 | 
84 |   for (i in seq_along(rset_subclasses)) {
85 |     expect_identical(
86 |       out[[i]],
87 |       rset_subclasses[[i]]
88 |     )
89 |   }
90 | })
91 | 


--------------------------------------------------------------------------------
/tests/testthat/test-spatial_clustering_cv.R:
--------------------------------------------------------------------------------
  1 | library(testthat)
  2 | library(rsample)
  3 | library(purrr)
  4 | skip_if_not_installed("modeldata")
  5 | 
  6 | data("Smithsonian", package = "modeldata")
  7 | Smithsonian_sf <- sf::st_as_sf(
  8 |   Smithsonian,
  9 |   coords = c("longitude", "latitude"),
 10 |   crs = 4326
 11 | )
 12 | 
 13 | test_that("repeats", {
 14 |   skip_if_not(sf::sf_use_s2())
 15 |   set.seed(11)
 16 |   rs1 <- spatial_clustering_cv(
 17 |     Smithsonian_sf,
 18 |     v = 2,
 19 |     repeats = 2
 20 |   )
 21 |   sizes1 <- dim_rset(rs1)
 22 | 
 23 |   expect_true(all(sizes1$analysis + sizes1$assessment == 20))
 24 |   same_data <- map_lgl(
 25 |     rs1$splits,
 26 |     function(x) {
 27 |       isTRUE(all.equal(x$data, Smithsonian_sf))
 28 |     }
 29 |   )
 30 |   expect_true(all(same_data))
 31 | 
 32 |   good_holdout <- map_lgl(
 33 |     rs1$splits,
 34 |     function(x) {
 35 |       length(intersect(x$in_ind, x$out_id)) == 0
 36 |     }
 37 |   )
 38 |   expect_true(all(good_holdout))
 39 | })
 40 | 
 41 | test_that("using hclust", {
 42 |   skip_if_not(sf::sf_use_s2())
 43 |   set.seed(11)
 44 |   rs1 <- spatial_clustering_cv(
 45 |     Smithsonian_sf,
 46 |     v = 2,
 47 |     cluster_function = "hclust"
 48 |   )
 49 |   sizes1 <- dim_rset(rs1)
 50 | 
 51 |   expect_true(all(sizes1$analysis + sizes1$assessment == 20))
 52 |   same_data <- map_lgl(
 53 |     rs1$splits,
 54 |     function(x) {
 55 |       isTRUE(all.equal(x$data, Smithsonian_sf))
 56 |     }
 57 |   )
 58 |   expect_true(all(same_data))
 59 | 
 60 |   good_holdout <- map_lgl(
 61 |     rs1$splits,
 62 |     function(x) {
 63 |       length(intersect(x$in_ind, x$out_id)) == 0
 64 |     }
 65 |   )
 66 |   expect_true(all(good_holdout))
 67 | })
 68 | 
 69 | 
 70 | test_that("bad args", {
 71 |   expect_snapshot(
 72 |     spatial_clustering_cv(Smithsonian),
 73 |     error = TRUE
 74 |   )
 75 |   expect_snapshot(
 76 |     spatial_clustering_cv(
 77 |       Smithsonian_sf,
 78 |       v = "a"
 79 |     ),
 80 |     error = TRUE
 81 |   )
 82 |   expect_snapshot(
 83 |     spatial_clustering_cv(
 84 |       Smithsonian_sf,
 85 |       v = c(5, 10)
 86 |     ),
 87 |     error = TRUE
 88 |   )
 89 |   expect_snapshot(
 90 |     spatial_clustering_cv(
 91 |       Smithsonian_sf,
 92 |       v = 100
 93 |     ),
 94 |     error = TRUE
 95 |   )
 96 | })
 97 | 
 98 | test_that("can pass the dots to kmeans", {
 99 |   skip_if_not(sf::sf_use_s2())
100 |   expect_error(
101 |     spatial_clustering_cv(
102 |       Smithsonian_sf,
103 |       v = 2,
104 |       algorithm = "MacQueen"
105 |     ),
106 |     NA
107 |   )
108 | })
109 | 
110 | test_that("using sf", {
111 |   skip_if_not(sf::sf_use_s2())
112 |   set.seed(11)
113 |   rs1 <- spatial_clustering_cv(
114 |     Smithsonian_sf,
115 |     v = 2
116 |   )
117 |   sizes1 <- dim_rset(rs1)
118 | 
119 |   set.seed(11)
120 |   rs2 <- spatial_clustering_cv(
121 |     Smithsonian_sf,
122 |     v = 2,
123 |     cluster_function = "kmeans"
124 |   )
125 |   expect_identical(rs1, rs2)
126 | 
127 |   expect_true(all(sizes1$analysis + sizes1$assessment == 20))
128 |   same_data <- map_lgl(
129 |     rs1$splits,
130 |     function(x) {
131 |       isTRUE(all.equal(x$data, Smithsonian_sf))
132 |     }
133 |   )
134 |   expect_true(all(same_data))
135 | 
136 |   good_holdout <- map_lgl(
137 |     rs1$splits,
138 |     function(x) {
139 |       length(intersect(x$in_ind, x$out_id)) == 0
140 |     }
141 |   )
142 |   expect_true(all(good_holdout))
143 | 
144 |   # This tests to ensure that _other_ warnings don't fire on _most_ platforms
145 |   # The default RNG changed in 3.6.0 (skips oldrel-4)
146 |   skip_if_not(getRversion() >= numeric_version("3.6.0"))
147 |   # Older builds without s2 give additional warnings,
148 |   # as running sf::st_centroid pre-s2 gives inaccurate results
149 |   # for geographic CRS (skips windows-3.6)
150 |   skip_if_not(sf::sf_use_s2())
151 |   set.seed(123)
152 |   expect_snapshot(
153 |     spatial_clustering_cv(Smithsonian_sf)
154 |   )
155 | })
156 | 
157 | test_that("using custom functions", {
158 |   skip_if_not(sf::sf_use_s2())
159 |   custom_cluster <- function(dists, v, ...) {
160 |     clusters <- kmeans(dists, centers = v, ...)
161 |     letters[clusters$cluster]
162 |   }
163 | 
164 |   set.seed(11)
165 |   rs1 <- spatial_clustering_cv(
166 |     Smithsonian_sf,
167 |     v = 2
168 |   )
169 |   set.seed(11)
170 |   rs2 <- spatial_clustering_cv(
171 |     Smithsonian_sf,
172 |     v = 2,
173 |     cluster_function = custom_cluster
174 |   )
175 |   expect_identical(rs1$splits, rs2$splits)
176 | 
177 |   expect_error(
178 |     spatial_clustering_cv(
179 |       Smithsonian_sf,
180 |       v = 2,
181 |       cluster_function = custom_cluster,
182 |       algorithm = "MacQueen"
183 |     ),
184 |     NA
185 |   )
186 | })
187 | 
188 | test_that("polygons are only assigned one fold", {
189 |   skip_if_not(sf::sf_use_s2())
190 |   set.seed(11)
191 | 
192 |   rs1 <- spatial_clustering_cv(boston_canopy, cluster_function = "hclust")
193 |   rs2 <- spatial_clustering_cv(boston_canopy, cluster_function = "kmeans")
194 | 
195 |   expect_identical(
196 |     sum(map_int(rs1$splits, function(x) nrow(assessment(x)))),
197 |     nrow(boston_canopy)
198 |   )
199 | 
200 |   expect_identical(
201 |     sum(map_int(rs2$splits, function(x) nrow(assessment(x)))),
202 |     nrow(boston_canopy)
203 |   )
204 | 
205 |   good_holdout <- map_lgl(
206 |     c(
207 |       rs1$splits,
208 |       rs2$splits
209 |     ),
210 |     function(x) {
211 |       length(intersect(x$in_ind, x$out_id)) == 0
212 |     }
213 |   )
214 |   expect_true(all(good_holdout))
215 | })
216 | 
217 | test_that("printing", {
218 |   skip_if_not(sf::sf_use_s2())
219 |   # The default RNG changed in 3.6.0
220 |   skip_if_not(getRversion() >= numeric_version("3.6.0"))
221 |   set.seed(123)
222 |   expect_snapshot_output(
223 |     spatial_clustering_cv(
224 |       Smithsonian_sf,
225 |       v = 2
226 |     )
227 |   )
228 | })
229 | 
230 | test_that("rsplit labels", {
231 |   skip_if_not(sf::sf_use_s2())
232 |   rs <- spatial_clustering_cv(Smithsonian_sf, v = 2)
233 |   all_labs <- dplyr::bind_rows(purrr::map(rs$splits, labels))
234 |   original_id <- rs[, grepl("^id", names(rs))]
235 |   expect_equal(all_labs, original_id)
236 | })
237 | 


--------------------------------------------------------------------------------
/tests/testthat/test-spatial_nndm_cv.R:
--------------------------------------------------------------------------------
  1 | library(testthat)
  2 | library(rsample)
  3 | library(purrr)
  4 | skip_if_not_installed("modeldata")
  5 | 
  6 | data("Smithsonian", package = "modeldata")
  7 | Smithsonian_sf <- sf::st_as_sf(
  8 |   Smithsonian,
  9 |   coords = c("longitude", "latitude"),
 10 |   crs = 4326
 11 | )
 12 | 
 13 | test_that("bad args", {
 14 | 
 15 |   expect_snapshot(
 16 |     spatial_nndm_cv(Smithsonian_sf[1:15, ], Smithsonian[16:20, ]),
 17 |     error = TRUE
 18 |   )
 19 | 
 20 |   expect_snapshot(
 21 |     spatial_nndm_cv(Smithsonian[1:15, ], Smithsonian_sf[16:20, ]),
 22 |     error = TRUE
 23 |   )
 24 | })
 25 | 
 26 | test_that("can pass the dots to st_sample", {
 27 |   skip_if_not(sf::sf_use_s2())
 28 |   expect_no_error(
 29 |     spatial_nndm_cv(
 30 |       Smithsonian_sf[1:15, ],
 31 |       Smithsonian_sf[16:20, ],
 32 |       type = "regular"
 33 |     )
 34 |   )
 35 | })
 36 | 
 37 | test_that("normal usage", {
 38 |   skip_if_not(sf::sf_use_s2())
 39 |   set.seed(11)
 40 |   rs1 <- spatial_nndm_cv(
 41 |     Smithsonian_sf[1:15, ],
 42 |     Smithsonian_sf[16:20, ]
 43 |   )
 44 |   sizes1 <- dim_rset(rs1)
 45 | 
 46 |   expect_true(all(sizes1$assessment == 1))
 47 |   same_data <- map_lgl(
 48 |     rs1$splits,
 49 |     function(x) {
 50 |       isTRUE(all.equal(x$data, Smithsonian_sf[1:15, ]))
 51 |     }
 52 |   )
 53 |   expect_true(all(same_data))
 54 | 
 55 |   good_holdout <- map_lgl(
 56 |     rs1$splits,
 57 |     function(x) {
 58 |       length(intersect(x$in_ind, x$out_id)) == 0
 59 |     }
 60 |   )
 61 |   expect_true(all(good_holdout))
 62 | 
 63 |   # This tests to ensure that _other_ warnings don't fire on _most_ platforms
 64 |   # The default RNG changed in 3.6.0 (skips oldrel-4)
 65 |   skip_if_not(getRversion() >= numeric_version("3.6.0"))
 66 |   # Older builds without s2 give additional warnings,
 67 |   # as running sf::st_centroid pre-s2 gives inaccurate results
 68 |   # for geographic CRS (skips windows-3.6)
 69 |   skip_if_not(sf::sf_use_s2())
 70 |   set.seed(123)
 71 |   expect_snapshot(
 72 |     spatial_nndm_cv(Smithsonian_sf[1:15, ], Smithsonian_sf[16:20, ])
 73 |   )
 74 | })
 75 | 
 76 | test_that("can pass a single polygon to sample within", {
 77 |   library(sf)
 78 |   skip_if_not(sf::sf_use_s2())
 79 | 
 80 |   example_poly <- sf::st_as_sfc(
 81 |     list(
 82 |       sf::st_point(c(-77.03, 40)),
 83 |       sf::st_point(c(-76, 40.5)),
 84 |       sf::st_point(c(-76.5, 39.5))
 85 |     )
 86 |   )
 87 |   example_poly <- sf::st_set_crs(example_poly, sf::st_crs(Smithsonian_sf))
 88 |   example_poly <- sf::st_union(example_poly)
 89 |   example_poly <- sf::st_cast(example_poly, "POLYGON")
 90 | 
 91 |   expect_snapshot(
 92 |     spatial_nndm_cv(
 93 |       Smithsonian_sf,
 94 |       example_poly
 95 |     )
 96 |   )
 97 | })
 98 | 
 99 | 
100 | 
101 | test_that("printing", {
102 |   skip_if_not(sf::sf_use_s2())
103 |   # The default RNG changed in 3.6.0
104 |   skip_if_not(getRversion() >= numeric_version("3.6.0"))
105 |   set.seed(123)
106 |   expect_snapshot_output(
107 |     spatial_nndm_cv(Smithsonian_sf[1:15, ], Smithsonian_sf[16:20, ])
108 |   )
109 | })
110 | 
111 | test_that("rsplit labels", {
112 |   skip_if_not(sf::sf_use_s2())
113 |   rs <- spatial_nndm_cv(Smithsonian_sf[1:15, ], Smithsonian_sf[16:20, ])
114 |   all_labs <- dplyr::bind_rows(purrr::map(rs$splits, labels))
115 |   original_id <- rs[, grepl("^id", names(rs))]
116 |   expect_equal(all_labs, original_id)
117 | })
118 | 
119 | test_that("passing a polygon works correctly", {
120 |   skip_if_not(sf::sf_use_s2())
121 |   ames_sf <- sf::st_as_sf(
122 |     modeldata::ames,
123 |     coords = c("Longitude", "Latitude"), 
124 |     crs = 4326
125 |   )
126 |   ch <- sf::st_as_sfc(sf::st_bbox(ames_sf))
127 | 
128 |   withr::with_seed(
129 |     123,
130 |     pts <- sf::st_sample(ch, 1000)
131 |   )
132 |   nndm_1 <- spatial_nndm_cv(ames_sf[1:100, ], pts)
133 |   attr(nndm_1, "prediction_sites") <- ch
134 |   withr::with_seed(
135 |     123,
136 |     nndm_2 <- spatial_nndm_cv(ames_sf[1:100, ], ch)
137 |   )
138 |   expect_identical(nndm_1, nndm_2)
139 | })


--------------------------------------------------------------------------------
/tests/testthat/test-spatial_vfold_cv.R:
--------------------------------------------------------------------------------
  1 | library(testthat)
  2 | library(rsample)
  3 | library(purrr)
  4 | 
  5 | skip_if_not_installed("modeldata")
  6 | 
  7 | data(ames, package = "modeldata")
  8 | ames_sf <- sf::st_as_sf(ames, coords = c("Longitude", "Latitude"), crs = 4326)
  9 | 
 10 | test_that("erroring when no S2", {
 11 |   s2_store <- sf::sf_use_s2()
 12 |   sf::sf_use_s2(FALSE)
 13 |   expect_snapshot(
 14 |     spatial_buffer_vfold_cv(ames_sf, buffer = 500, radius = NULL),
 15 |     error = TRUE
 16 |   )
 17 |   expect_snapshot(
 18 |     suppressMessages(spatial_leave_location_out_cv(ames_sf, Neighborhood, buffer = 500)),
 19 |     error = TRUE
 20 |   )
 21 |   sf::sf_use_s2(s2_store)
 22 | })
 23 | 
 24 | test_that("spatial_buffer_vfold_cv", {
 25 |   skip_if_not(sf::sf_use_s2())
 26 |   set.seed(11)
 27 |   rs1 <- spatial_buffer_vfold_cv(ames_sf, radius = NULL, buffer = NULL)
 28 |   sizes1 <- dim_rset(rs1)
 29 | 
 30 |   set.seed(11)
 31 |   rs2 <- rsample::vfold_cv(ames_sf)
 32 |   expect_identical(
 33 |     purrr::map(rs1$splits, purrr::pluck, "in_id"),
 34 |     purrr::map(rs2$splits, purrr::pluck, "in_id")
 35 |   )
 36 | 
 37 |   expect_true(all(sizes1$analysis + sizes1$assessment == nrow(ames)))
 38 |   same_data <- map_lgl(
 39 |     rs1$splits,
 40 |     function(x) {
 41 |       isTRUE(all.equal(x$data, ames_sf))
 42 |     }
 43 |   )
 44 |   expect_true(all(same_data))
 45 | 
 46 |   good_holdout <- map_lgl(
 47 |     rs1$splits,
 48 |     function(x) {
 49 |       length(intersect(x$in_ind, x$out_id)) == 0
 50 |     }
 51 |   )
 52 |   expect_true(all(good_holdout))
 53 | 
 54 |   set.seed(123)
 55 |   rs1 <- spatial_buffer_vfold_cv(
 56 |     ames_sf,
 57 |     v = 2,
 58 |     radius = NULL,
 59 |     buffer = NULL,
 60 |     repeats = 2
 61 |   )
 62 |   expect_identical(
 63 |     names(rs1),
 64 |     c("splits", "id", "id2")
 65 |   )
 66 |   expect_snapshot(rs1)
 67 |   same_data <- map_lgl(
 68 |     rs1$splits,
 69 |     function(x) {
 70 |       isTRUE(all.equal(x$data, ames_sf))
 71 |     }
 72 |   )
 73 |   expect_true(all(same_data))
 74 | 
 75 |   good_holdout <- map_lgl(
 76 |     rs1$splits,
 77 |     function(x) {
 78 |       length(intersect(x$in_ind, x$out_id)) == 0
 79 |     }
 80 |   )
 81 |   expect_true(all(good_holdout))
 82 | })
 83 | 
 84 | test_that("spatial_leave_location_out_cv", {
 85 |   skip_if_not(sf::sf_use_s2())
 86 |   set.seed(11)
 87 |   rs1 <- spatial_leave_location_out_cv(ames_sf, Neighborhood)
 88 |   sizes1 <- dim_rset(rs1)
 89 | 
 90 |   set.seed(11)
 91 |   rs2 <- rsample::group_vfold_cv(
 92 |     ames_sf,
 93 |     tidyselect::eval_select("Neighborhood", ames_sf)
 94 |   )
 95 |   expect_identical(
 96 |     purrr::map(rs1$splits, purrr::pluck, "in_id"),
 97 |     purrr::map(rs2$splits, purrr::pluck, "in_id")
 98 |   )
 99 | 
100 |   expect_true(all(sizes1$analysis + sizes1$assessment == nrow(ames)))
101 |   same_data <- map_lgl(
102 |     rs1$splits,
103 |     function(x) {
104 |       isTRUE(all.equal(x$data, ames_sf))
105 |     }
106 |   )
107 |   expect_true(all(same_data))
108 | 
109 |   good_holdout <- map_lgl(
110 |     rs1$splits,
111 |     function(x) {
112 |       length(intersect(x$in_ind, x$out_id)) == 0
113 |     }
114 |   )
115 |   expect_true(all(good_holdout))
116 | 
117 |   set.seed(123)
118 |   rs1 <- spatial_leave_location_out_cv(
119 |     ames_sf,
120 |     Neighborhood,
121 |     v = 2,
122 |     repeats = 2
123 |   )
124 |   same_data <- map_lgl(
125 |     rs1$splits,
126 |     function(x) {
127 |       isTRUE(all.equal(x$data, ames_sf))
128 |     }
129 |   )
130 |   expect_true(all(same_data))
131 | 
132 |   good_holdout <- map_lgl(
133 |     rs1$splits,
134 |     function(x) {
135 |       length(intersect(x$in_ind, x$out_id)) == 0
136 |     }
137 |   )
138 |   expect_true(all(good_holdout))
139 | 
140 |   expect_identical(
141 |     names(rs1),
142 |     c("splits", "id", "id2")
143 |   )
144 |   skip_if_not(getRversion() >= numeric_version("3.6.0"))
145 |   expect_snapshot(rs1)
146 | })
147 | 
148 | test_that("bad args", {
149 |   skip_if_not(sf::sf_use_s2())
150 |   set.seed(123)
151 | 
152 |   set.seed(123)
153 |   expect_snapshot(
154 |     spatial_buffer_vfold_cv(ames_sf, radius = NULL),
155 |     error = TRUE
156 |   )
157 | 
158 |   set.seed(123)
159 |   expect_snapshot(
160 |     spatial_buffer_vfold_cv(ames_sf, buffer = 500),
161 |     error = TRUE
162 |   )
163 | 
164 |   set.seed(123)
165 |   expect_snapshot(
166 |     spatial_buffer_vfold_cv(ames_sf),
167 |     error = TRUE
168 |   )
169 | 
170 |   expect_snapshot(
171 |     spatial_leave_location_out_cv(ames),
172 |     error = TRUE
173 |   )
174 | 
175 |   expect_snapshot(
176 |     spatial_leave_location_out_cv(ames, Neighborhood, buffer = 500),
177 |     error = TRUE
178 |   )
179 | 
180 |   set.seed(123)
181 |   expect_snapshot(
182 |     spatial_leave_location_out_cv(ames_sf, v = c(5, 10)),
183 |     error = TRUE
184 |   )
185 | 
186 |   set.seed(123)
187 |   expect_snapshot(
188 |     spatial_buffer_vfold_cv(ames_sf, v = c(5, 10), buffer = NULL, radius = NULL),
189 |     error = TRUE
190 |   )
191 | 
192 |   skip_if_not(getRversion() >= numeric_version("3.6.0"))
193 | 
194 |   set.seed(123)
195 |   expect_snapshot(
196 |     spatial_leave_location_out_cv(ames_sf, Neighborhood, v = 60)
197 |   )
198 | 
199 |   set.seed(123)
200 |   expect_snapshot(
201 |     spatial_buffer_vfold_cv(boston_canopy, v = 683, buffer = NULL, radius = NULL)
202 |   )
203 | 
204 |   set.seed(123)
205 |   expect_snapshot_error(
206 |     spatial_buffer_vfold_cv(
207 |       boston_canopy,
208 |       v = 682,
209 |       buffer = NULL,
210 |       radius = NULL,
211 |       repeats = 2
212 |     )
213 |   )
214 | 
215 |   set.seed(123)
216 |   expect_snapshot_error(
217 |     spatial_leave_location_out_cv(
218 |       ames_sf,
219 |       Neighborhood,
220 |       repeats = 2
221 |     )
222 |   )
223 | })
224 | 
225 | test_that("printing", {
226 |   skip_if_not(sf::sf_use_s2())
227 |   # The default RNG changed in 3.6.0
228 |   skip_if_not(getRversion() >= numeric_version("3.6.0"))
229 |   set.seed(123)
230 |   expect_snapshot_output(
231 |     spatial_block_cv(ames_sf)
232 |   )
233 | })
234 | 
235 | test_that("rsplit labels", {
236 |   skip_if_not(sf::sf_use_s2())
237 |   set.seed(123)
238 |   rs <- spatial_buffer_vfold_cv(ames_sf, v = 2, buffer = NULL, radius = NULL)
239 |   all_labs <- dplyr::bind_rows(purrr::map(rs$splits, labels))
240 |   original_id <- rs[, grepl("^id", names(rs))]
241 |   expect_equal(all_labs, original_id)
242 | 
243 |   set.seed(123)
244 |   rs <- spatial_leave_location_out_cv(ames_sf, Neighborhood, v = 2)
245 |   all_labs <- dplyr::bind_rows(purrr::map(rs$splits, labels))
246 |   original_id <- rs[, grepl("^id", names(rs))]
247 |   expect_equal(all_labs, original_id)
248 | })
249 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/articles/buffering.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Buffering"
  3 | ---
  4 | 
  5 | ```{r, include = FALSE}
  6 | knitr::opts_chunk$set(
  7 |   collapse = TRUE,
  8 |   comment = "#>",
  9 |   fig.width = 8,
 10 |   fig.height = 5.75,
 11 |   eval = requireNamespace("gifski", quietly = TRUE) && requireNamespace("knitr", quietly = TRUE)
 12 | )
 13 | library(ggplot2)
 14 | theme_set(theme_minimal())
 15 | ```
 16 | 
 17 | The goal of spatialsample is to provide functions and classes for spatial resampling to use with [rsample](https://rsample.tidymodels.org/). Keeping the data used to train a spatial model (what we call a training or analysis set) separate from the data used to evaluate that model (what we call a testing or assessment set) provides a more realistic view of how well it will perform when extrapolating to new locations.
 18 | 
 19 | When resampling spatial data, we often want to introduce some distance between analysis and assessment data; one of the most common methods for introducing this distance is to "buffer" the assessment set, removing all points within a given distance from the analysis set to enforce a minimum space between data sets. This vignette walks through how to buffer your assessment folds with spatialsample, as well as some considerations about how those buffers are calculated.
 20 | 
 21 | To begin, let's load spatialsample:
 22 | 
 23 | ```{r setup}
 24 | library(spatialsample)
 25 | ```
 26 | 
 27 | ## Exclusion buffers
 28 | 
 29 | By default, most spatial cross-validation methods in spatialsample don't automatically create buffer zones. Take for instance `spatial_block_cv()`, which creates a number of "blocks" in a grid and assigns data to folds based on the block its centroid falls in:
 30 | 
 31 | ```{r boston-block}
 32 | set.seed(123)
 33 | blocks <- spatial_block_cv(boston_canopy, v = 5)
 34 | 
 35 | autoplot(blocks)
 36 | ```
 37 | 
 38 | If we look at the individual folds, we can see that the assessment data directly borders the analysis data for each given fold:
 39 | 
 40 | ```{r boston-block-gif, animation.hook="gifski"}
 41 | purrr::walk(blocks$splits, function(x) print(autoplot(x)))
 42 | ```
 43 | 
 44 | This is a downside of standard blocking cross-validation approaches; while it does introduce some spatial separation between the analysis and assessment sets for data at the middle of the block, data towards the edges may not be separated at all. 
 45 | 
 46 | Applying an exclusion buffer around each assessment fold lets us change that. To create these exclusion buffers while using any cross-validation function in spatialsample, we can use a standardized `buffer` argument:
 47 | 
 48 | ```{r}
 49 | set.seed(123)
 50 | blocks <- spatial_block_cv(boston_canopy, v = 5, buffer = 1500)
 51 | ```
 52 | 
 53 | Now when we plot the folds separately, we can see that a strip of data around each assessment block has been assigned to neither the analysis or assessment fold. Instead, it's been removed entirely in order to provide some distance between the two sets:
 54 | 
 55 | ```{r boston-block-buffer-gif, animation.hook="gifski"}
 56 | purrr::walk(blocks$splits, function(x) print(autoplot(x)))
 57 | ```
 58 | 
 59 | By default, `buffer` is assumed to be in the same units as your data, as determined by the data's coordinate reference system. To apply buffers of other units, use the `units` package to explicitly specify what units your buffer is in.
 60 | 
 61 | For instance, `boston_canopy` uses units of US feet for distance. To specify a buffer in meters instead, we can use:
 62 | 
 63 | ```{r boston-block-buffer-m-gif, animation.hook="gifski"}
 64 | set.seed(123)
 65 | blocks <- spatial_block_cv(
 66 |   boston_canopy,
 67 |   v = 5,
 68 |   buffer = units::as_units(1500, "m")
 69 | )
 70 | 
 71 | purrr::walk(blocks$splits, function(x) print(autoplot(x)))
 72 | ```
 73 | 
 74 | Note that, when you're using non-point data, the distance between observations is calculated as the shortest distance between any points in two observations. For instance, buffers on polygon data will exclude data based on the edge-to-edge distance between observations, rather than centroid to centroid.
 75 | 
 76 | One special case, however, is when `buffer` is set to 0. In this case, spatialsample won't apply a buffer at all. While polygons that share an edge are within 0 distance of each other, when calculated from edge-to-edge, we think that setting `buffer = 0` would intuitively apply zero (that is, _no_) buffer. If you want to be sure to only capture adjacent polygons in a buffer, set `buffer` to a tiny, non-zero value:
 77 | 
 78 | ```{r boston-block-buffer-eps-gif, animation.hook="gifski"}
 79 | set.seed(123)
 80 | blocks <- spatial_block_cv(
 81 |   boston_canopy,
 82 |   v = 5,
 83 |   buffer = 2e-200
 84 | )
 85 | 
 86 | purrr::walk(blocks$splits, function(x) print(autoplot(x)))
 87 | ```
 88 | 
 89 | ## Inclusion radii
 90 | 
 91 | In addition to exclusion buffers, spatialsample also provides a way to add an _inclusion_ buffer (or as we call it, an "inclusion radius") around your assessment set. Simply set the `radius` argument in any spatial cross-validation function to your desired distance, and any data within that inclusion radius will be added to the assessment set:
 92 | 
 93 | ```{r boston-block-radius-eps-gif, animation.hook="gifski"}
 94 | set.seed(123)
 95 | blocks <- spatial_block_cv(
 96 |   boston_canopy,
 97 |   v = 5,
 98 |   radius = 2e-200
 99 | )
100 | 
101 | purrr::walk(blocks$splits, function(x) print(autoplot(x)))
102 | ```
103 | 
104 | This argument is handled the same way as `buffer`, with the same caveats: 
105 | 
106 | - Unless units are specified explicitly, `radius` is assumed to be in the same units as your data's coordinate reference system.
107 | - Distances are calculated between the closest parts of observations.
108 | - Values of zero do not apply a radius. 
109 | 
110 | Both `radius` and `buffer` can be specified at the same time. This makes it possible to implement, for instance, leave-one-disc-out cross-validation using spatialsample:
111 | 
112 | ```{r boston-block-disc-visible, eval = FALSE}
113 | set.seed(123)
114 | blocks <- spatial_buffer_vfold_cv(
115 |   boston_canopy,
116 |   v = nrow(boston_canopy),
117 |   radius = 1500,
118 |   buffer = 1500
119 | )
120 | purrr::walk(blocks$splits, function(x) print(autoplot(x)))
121 | ```
122 | 
123 | ```{r boston-block-disc-executed, echo = FALSE, animation.hook="gifski"}
124 | set.seed(123)
125 | blocks <- spatial_buffer_vfold_cv(
126 |   boston_canopy,
127 |   v = nrow(boston_canopy),
128 |   radius = 1500,
129 |   buffer = 1500
130 | )
131 | 
132 | # Without filtering, this creates an 11 minute long GIF
133 | # which is possibly not the best use of disc space
134 | #
135 | # ...no matter how badly I want to ship an 11 minute GIF in a CRAN package
136 | purrr::walk(head(blocks, 60)$splits, function(x) print(autoplot(x)))
137 | ```
138 | 
139 | When both `radius` and `buffer` are specified, spatialsample first applies the inclusion radius to the original randomly-selected assessment set, adding any data within the radius to the assessment set. Next, the exclusion buffer is applied to all the points in the new (post-radius) assessment set, removing any data within the buffer from the analysis set. 
140 | 
141 | Note that this means that `buffer` is _not_ simply applying a "doughnut" around the circular "radius", but is buffering each test point separately. See for instance the non-uniform buffer region that happens when there's a gap in the data:
142 |  
143 | ```{r boston-both}
144 | autoplot(blocks$splits[[12]])
145 | ```
146 | 
147 | This leaves more data in your analysis set for fitting the model, while still ensuring your assessment data is spatially removed.
148 | 


--------------------------------------------------------------------------------
/vignettes/spatialsample.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using spatial resamples for analysis"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{Using spatial resamples for analysis}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>",
 14 |   fig.width = 8,
 15 |   fig.height = 5.75,
 16 |   eval = requireNamespace("gifski", quietly = TRUE) && requireNamespace("modeldata", quietly = TRUE) && sf::sf_use_s2()
 17 | )
 18 | ```
 19 | 
 20 | ```{r, include = FALSE}
 21 | library(ggplot2)
 22 | theme_set(theme_minimal())
 23 | ```
 24 | 
 25 | 
 26 | The resampled objects created by spatialsample can be used in many of the same ways that those created by [rsample](https://rsample.tidymodels.org/) can, from making comparisons to evaluating models. These objects can be used together with other parts of the [tidymodels](https://www.tidymodels.org/) framework, but let's walk through a more basic example using linear modeling of housing data from Ames, IA.
 27 | 
 28 | ```{r}
 29 | data("ames", package = "modeldata")
 30 | ```
 31 | 
 32 | The Ames housing data is a normal [tibble](https://tibble.tidyverse.org/). While many of the functions in spatialsample support standard tibbles, several require that our data be an [sf](https://r-spatial.github.io/sf/) object to properly handle spatial distance calculations. We can transform our Ames data into an sf object using the `sf::st_as_sf()` function:
 33 | 
 34 | ```{r}
 35 | ames_sf <- sf::st_as_sf(
 36 |   ames,
 37 |   # "coords" is in x/y order -- so longitude goes first!
 38 |   coords = c("Longitude", "Latitude"),
 39 |   # Set our coordinate reference system to EPSG:4326,
 40 |   # the standard WGS84 geodetic coordinate reference system
 41 |   crs = 4326
 42 | )
 43 | ```
 44 | 
 45 | For this vignette, we'll model the sale prices of the houses in the Ames data set. Let's say that the sale price of these houses depends on the year they were built, their living area (size), and the type of house they are (duplex vs. townhouse vs. single family), along with perhaps interactions between type and house size. 
 46 | 
 47 | ```{r, eval = FALSE}
 48 | log10(Sale_Price) ~ Year_Built + Gr_Liv_Area + Bldg_Type
 49 | ```
 50 | 
 51 | This relationship may exhibit spatial autocorrelation across the city of Ames, and we can use any of the several different methods provided by spatialsample to try and investigate it. 
 52 | 
 53 | For instance, we could create `v = 15` spatial cross-validation folds with `spatial_clustering_cv()`, which [uses k-means clustering in order to divide the data into folds](https://doi.org/10.1109/IGARSS.2012.6352393). We can then visualize those folds using the `autoplot()` function from spatialsample:
 54 | 
 55 | ```{r}
 56 | library(spatialsample)
 57 | 
 58 | set.seed(123)
 59 | cluster_folds <- spatial_clustering_cv(ames_sf, v = 15)
 60 | 
 61 | autoplot(cluster_folds)
 62 | ```
 63 | 
 64 | Our `cluster_folds` object is an `rset` object that contains many resamples or `rsplit` objects in the `splits` column. The resulting partitions do not necessarily contain an equal number of observations:
 65 | 
 66 | ```{r}
 67 | cluster_folds
 68 | ```
 69 | 
 70 | But while spatial clustering is _a_ method for spatial cross-validation using spatialsample, it is not the only method available. The other methods are broadly similar, breaking the data into a number of (not necessarily even) folds based on spatial arrangement. 
 71 | 
 72 | For instance, the `spatial_block_cv()` function will perform [spatial blocking](https://doi.org/10.1111/ecog.02881) with your data:
 73 | 
 74 | ```{r}
 75 | set.seed(123)
 76 | block_folds <- spatial_block_cv(ames_sf, v = 15)
 77 | 
 78 | autoplot(block_folds)
 79 | ```
 80 | 
 81 | If you already have a sense of what locations in your data are likely to be closely related, you can also use the `spatial_leave_location_out_cv()` function to perform [leave-location-out cross-validation](https://doi.org/10.1016/j.envsoft.2017.12.001). For instance, we can split the Ames data into folds based on neighborhoods using this function:
 82 | 
 83 | ```{r}
 84 | set.seed(123)
 85 | location_folds <-
 86 |   spatial_leave_location_out_cv(
 87 |     ames_sf,
 88 |     group = Neighborhood,
 89 |     v = 15
 90 |   )
 91 | 
 92 | autoplot(location_folds)
 93 | ```
 94 | 
 95 | We've now got a lot of different resamples floating around! We're going to fit the same models to each of them, in the same way, using the same code. In order to make that task a little easier, let's add a new column named `type` to signal what type of resample each fold is from, and then combine them into a new data frame:
 96 | 
 97 | ```{r}
 98 | cluster_folds$type <- "cluster"
 99 | block_folds$type <- "block"
100 | location_folds$type <- "location"
101 | 
102 | resamples <-
103 |   dplyr::bind_rows(
104 |     cluster_folds,
105 |     block_folds,
106 |     location_folds
107 |   )
108 | ```
109 | 
110 | Now let's write a function that will, for each resample:
111 | 
112 | - obtain the _analysis_ set for model fitting
113 | - fit a linear model with a interaction term
114 | - predict the _assessment_ set and return both the true and predicted price, on the log scale
115 | 
116 | 
117 | ```{r}
118 | # `splits` will be the `rsplit` object
119 | compute_preds <- function(splits) {
120 |   # fit the model to the analysis set
121 |   mod <- lm(log10(Sale_Price) ~ Year_Built + Bldg_Type * log10(Gr_Liv_Area),
122 |     data = analysis(splits)
123 |   )
124 |   # identify the assessment set
125 |   holdout <- assessment(splits)
126 |   # return the assessment set, with true and predicted price
127 |   tibble::tibble(
128 |     geometry = holdout$geometry,
129 |     Sale_Price = log10(holdout$Sale_Price),
130 |     .pred = predict(mod, holdout)
131 |   )
132 | }
133 | ```
134 | 
135 | 
136 | We can apply this function to just one of the `splits`.
137 | 
138 | ```{r}
139 | compute_preds(cluster_folds$splits[[7]])
140 | ```
141 | 
142 | 
143 | Or we can apply this function to all of the `splits`, using `purrr::map()`.
144 | 
145 | ```{r}
146 | library(purrr)
147 | library(dplyr)
148 | 
149 | cv_res <- resamples %>%
150 |   mutate(.preds = map(splits, compute_preds))
151 | ```
152 | 
153 | We can `unnest()` these results and [use yardstick to compute any regression metrics appropriate to this modeling analysis](https://yardstick.tidymodels.org/reference/index.html#section-regression-metrics), such as `yardstick::rmse()`:
154 | 
155 | ```{r}
156 | library(tidyr)
157 | library(yardstick)
158 | 
159 | cv_rmse <- cv_res %>%
160 |   unnest(.preds) %>%
161 |   group_by(id, type) %>%
162 |   rmse(Sale_Price, .pred)
163 | 
164 | cv_rmse
165 | ```
166 | 
167 | It looks like the RMSE may vary across the city, so we can join the metrics back up to our results and plot them.
168 | 
169 | ```{r fig.height=12}
170 | library(ggplot2)
171 | 
172 | cv_res %>%
173 |   unnest(.preds) %>%
174 |   left_join(cv_rmse, by = c("id", "type")) %>%
175 |   ggplot(aes(color = .estimate)) +
176 |   geom_sf(aes(geometry = geometry), alpha = 0.5) +
177 |   labs(color = "RMSE") +
178 |   scale_color_viridis_c() +
179 |   facet_wrap(vars(type), ncol = 1)
180 | ```
181 | 
182 | As you can see, the results you get are highly dependent on how you resample your data! It's important to use the right method for your data (and, for methods like spatial blocking and buffered cross-validation, the right distances).
183 | 


--------------------------------------------------------------------------------