├── .Rbuildignore ├── .Rprofile ├── .editorconfig ├── .github └── workflows │ ├── dev-cmd-check.yml │ ├── pkgdown.yml │ └── r-cmd-check.yml ├── .gitignore ├── .lintr ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── DataBackendRaster.R ├── DataBackendVector.R ├── LearnerClassifSpatial.R ├── LearnerRegrSpatial.R ├── TaskClassifST.R ├── TaskClassif_leipzig.R ├── TaskRegrST.R ├── as_task_classif_st.R ├── as_task_regr_st.R ├── as_task_unsupervised.R ├── data.R ├── helper.R ├── predict_spatial.R └── zzz.R ├── README.Rmd ├── README.md ├── attic ├── benchmark.R ├── benchmark.Rmd ├── benchmark.Rmd.orig ├── benchmark.html ├── plot-benchmark-1.png ├── plot-benchmark-large-1.png └── plot-benchmark-small-1.png ├── data └── leipzig.rda ├── inst ├── WORDLIST └── extdata │ ├── leipzig_points.gpkg │ └── leipzig_raster.tif ├── man-roxygen ├── param-chunksize.R ├── param-data.R ├── param-primary-key.R ├── param-quiet.R ├── param-response-is-factor.R ├── param-response.R ├── param-task.R ├── param_backend.R ├── param_coordinate_names.R ├── param_coords_as_features.R ├── param_crs.R ├── param_extra_args.R ├── param_id.R ├── param_label.R ├── param_positive.R └── param_target.R ├── man ├── DataBackendRaster.Rd ├── DataBackendVector.Rd ├── TaskClassifST.Rd ├── TaskRegrST.Rd ├── as_data_backend.Rd ├── as_task_classif_st.Rd ├── as_task_regr_st.Rd ├── block_size.Rd ├── factor_layer.Rd ├── figures │ ├── land_cover.png │ ├── logo.png │ └── sentinel.png ├── generate_stack.Rd ├── leipzig.Rd ├── mask_stack.Rd ├── mlr3spatial-package.Rd ├── numeric_layer.Rd ├── predict_spatial.Rd ├── sample_stack.Rd └── write_raster.Rd ├── mlr3spatial.Rproj ├── pkgdown ├── _pkgdown.yml ├── apple-touch-icon-120x120.png ├── apple-touch-icon-152x152.png ├── apple-touch-icon-180x180.png ├── apple-touch-icon-60x60.png ├── apple-touch-icon-76x76.png ├── apple-touch-icon.png ├── favicon-16x16.png ├── favicon-32x32.png ├── favicon.ico └── favicon │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ └── favicon.ico ├── tests ├── testthat.R └── testthat │ ├── helper_expectations.R │ ├── helper_future.R │ ├── helper_learner.R │ ├── setup.R │ ├── teardown.R │ ├── test_DataBackendRaster.R │ ├── test_DataBackendVector.R │ ├── test_LearnerClassifSpatial.R │ ├── test_LearnerRegrSpatial.R │ ├── test_TaskClassifST.R │ ├── test_TaskRegrST.R │ ├── test_as_task_classif_st.R │ ├── test_as_task_regr_st.R │ ├── test_as_task_unsupervised.R │ ├── test_bock_size.R │ ├── test_data.R │ └── test_predict_spatial.R └── vignettes ├── .gitignore ├── benchmark.Rmd ├── benchmark.Rmd.orig ├── plot-benchmark-1.png └── precompile.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^renv$ 2 | ^renv\.lock$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | ^\.pre-commit-config\.yaml$ 6 | ^\.lintr$ 7 | ^\.github$ 8 | ^\.ccache$ 9 | ^codemeta\.json$ 10 | pkgdown 11 | ^_pkgdown\.yml$ 12 | ^docs$ 13 | ^pkgdown$ 14 | attic/ 15 | target* 16 | ^.*\.xml$ 17 | ^man-roxygen$ 18 | cran-comments\.md 19 | ^CRAN-RELEASE$ 20 | ^CRAN-SUBMISSION$ 21 | .editorconfig 22 | ^README\.Rmd$ 23 | ^README\.html$ 24 | .vscode 25 | ^cran-comments\.md$ 26 | -------------------------------------------------------------------------------- /.Rprofile: -------------------------------------------------------------------------------- 1 | if (dir.exists("renv")) { 2 | source("renv/activate.R") 3 | options(renv.config.user.profile = TRUE) 4 | } 5 | 6 | options(styler.addins_style_transformer = "styler.mlr::mlr_style()") 7 | 8 | if (exists("~/.Rprofile")) { 9 | source("~/.Rprofile") 10 | } 11 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # See http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | charset = utf-8 6 | end_of_line = lf 7 | insert_final_newline = true 8 | indent_style = space 9 | trim_trailing_whitespace = true 10 | 11 | [*.{r,R,md,Rmd}] 12 | indent_size = 2 13 | 14 | [*.{c,h}] 15 | indent_size = 4 16 | 17 | [*.{cpp,hpp}] 18 | indent_size = 4 19 | 20 | [{NEWS.md,DESCRIPTION,LICENSE}] 21 | max_line_length = 80 22 | -------------------------------------------------------------------------------- /.github/workflows/dev-cmd-check.yml: -------------------------------------------------------------------------------- 1 | # dev cmd check workflow of the mlr3 ecosystem v0.1.0 2 | # https://github.com/mlr-org/actions 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | 12 | name: dev-check 13 | 14 | jobs: 15 | check-package: 16 | runs-on: ${{ matrix.config.os }} 17 | 18 | name: ${{ matrix.config.dev-package }} 19 | 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | config: 27 | - {os: ubuntu-latest, r: 'release', dev-package: 'mlr-org/mlr3'} 28 | - {os: ubuntu-latest, r: 'release', dev-package: "mlr-org/paradox', 'mlr-org/mlr3learners"} 29 | 30 | steps: 31 | - uses: actions/checkout@v3 32 | 33 | - uses: r-lib/actions/setup-r@v2 34 | with: 35 | r-version: ${{ matrix.config.r }} 36 | 37 | - uses: r-lib/actions/setup-r-dependencies@v2 38 | with: 39 | extra-packages: any::rcmdcheck 40 | needs: check 41 | 42 | - name: Install dev versions 43 | run: pak::pkg_install(c('${{ matrix.config.dev-package }}')) 44 | shell: Rscript {0} 45 | 46 | - uses: r-lib/actions/check-r-package@v2 47 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yml: -------------------------------------------------------------------------------- 1 | # pkgdown workflow of the mlr3 ecosystem v0.1.0 2 | # https://github.com/mlr-org/actions 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | release: 11 | types: 12 | - published 13 | workflow_dispatch: 14 | 15 | name: pkgdown 16 | 17 | jobs: 18 | pkgdown: 19 | runs-on: ubuntu-latest 20 | 21 | concurrency: 22 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 23 | env: 24 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 25 | steps: 26 | - uses: actions/checkout@v3 27 | 28 | - uses: r-lib/actions/setup-pandoc@v2 29 | 30 | - uses: r-lib/actions/setup-r@v2 31 | 32 | - uses: r-lib/actions/setup-r-dependencies@v2 33 | with: 34 | extra-packages: any::pkgdown, local::. 35 | needs: website 36 | 37 | - name: Install template 38 | run: pak::pkg_install("mlr-org/mlr3pkgdowntemplate") 39 | shell: Rscript {0} 40 | 41 | - name: Build site 42 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 43 | shell: Rscript {0} 44 | 45 | - name: Deploy 46 | if: github.event_name != 'pull_request' 47 | uses: JamesIves/github-pages-deploy-action@v4.4.1 48 | with: 49 | clean: false 50 | branch: gh-pages 51 | folder: docs 52 | -------------------------------------------------------------------------------- /.github/workflows/r-cmd-check.yml: -------------------------------------------------------------------------------- 1 | # r cmd check workflow of the mlr3 ecosystem v0.1.0 2 | # https://github.com/mlr-org/actions 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | 12 | name: r-cmd-check 13 | 14 | jobs: 15 | r-cmd-check: 16 | runs-on: ${{ matrix.config.os }} 17 | 18 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 19 | 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | config: 27 | - {os: ubuntu-latest, r: 'devel'} 28 | - {os: ubuntu-latest, r: 'release'} 29 | - {os: ubuntu-latest, r: 'release', dev-package: 'mlr-org/mlr3@weights_reworked'} 30 | 31 | steps: 32 | - uses: actions/checkout@v3 33 | 34 | - uses: r-lib/actions/setup-r@v2 35 | with: 36 | r-version: ${{ matrix.config.r }} 37 | 38 | - uses: r-lib/actions/setup-r-dependencies@v2 39 | with: 40 | extra-packages: any::rcmdcheck 41 | needs: check 42 | 43 | - uses: r-lib/actions/check-r-package@v2 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig 2 | 3 | # Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,macos,r 4 | # Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,macos,r 5 | 6 | ### macOS ### 7 | # General 8 | .DS_Store 9 | .AppleDouble 10 | .LSOverride 11 | 12 | # Icon must end with two \r 13 | Icon 14 | 15 | 16 | # Thumbnails 17 | ._* 18 | 19 | # Files that might appear in the root of a volume 20 | .DocumentRevisions-V100 21 | .fseventsd 22 | .Spotlight-V100 23 | .TemporaryItems 24 | .Trashes 25 | .VolumeIcon.icns 26 | .com.apple.timemachine.donotpresent 27 | 28 | # Directories potentially created on remote AFP share 29 | .AppleDB 30 | .AppleDesktop 31 | Network Trash Folder 32 | Temporary Items 33 | .apdisk 34 | 35 | ### R ### 36 | # History files 37 | .Rhistory 38 | .Rapp.history 39 | 40 | # Session Data files 41 | .RData 42 | 43 | # User-specific files 44 | .Ruserdata 45 | 46 | # Example code in package build process 47 | *-Ex.R 48 | 49 | # Output files from R CMD build 50 | /*.tar.gz 51 | 52 | # Output files from R CMD check 53 | /*.Rcheck/ 54 | 55 | # RStudio files 56 | .Rproj.user/ 57 | 58 | # produced vignettes 59 | vignettes/*.html 60 | vignettes/*.pdf 61 | 62 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 63 | .httr-oauth 64 | 65 | # knitr and R markdown default cache directories 66 | *_cache/ 67 | /cache/ 68 | 69 | # Temporary files created by R markdown 70 | *.utf8.md 71 | *.knit.md 72 | 73 | # R Environment Variables 74 | .Renviron 75 | 76 | # pkgdown site 77 | docs/ 78 | 79 | # translation temp files 80 | po/*~ 81 | 82 | ### R.Bookdown Stack ### 83 | # R package: bookdown caching files 84 | /*_files/ 85 | 86 | ### VisualStudioCode ### 87 | .vscode/* 88 | !.vscode/settings.json 89 | !.vscode/tasks.json 90 | !.vscode/launch.json 91 | !.vscode/extensions.json 92 | *.code-workspace 93 | 94 | # Local History for Visual Studio Code 95 | .history/ 96 | 97 | ### VisualStudioCode Patch ### 98 | # Ignore all local history of files 99 | .history 100 | .ionide 101 | 102 | # End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,macos,r 103 | 104 | # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) 105 | 106 | *.xml 107 | inst/doc 108 | cran-comments\.md 109 | CRAN-RELEASE 110 | .vscode 111 | README\.html 112 | CRAN-SUBMISSION 113 | -------------------------------------------------------------------------------- /.lintr: -------------------------------------------------------------------------------- 1 | linters: with_defaults( 2 | # lintr defaults: https://github.com/jimhester/lintr#available-linters 3 | # the following setup changes/removes certain linters 4 | assignment_linter = NULL, # do not force using <- for assignments 5 | object_name_linter = object_name_linter(c("snake_case", "CamelCase")), # only allow snake case and camel case object names 6 | cyclocomp_linter = NULL, # do not check function complexity 7 | commented_code_linter = NULL, # allow code in comments 8 | line_length_linter = line_length_linter(200) 9 | ) 10 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: mlr3spatial 2 | Title: Support for Spatial Objects Within the 'mlr3' Ecosystem 3 | Version: 0.5.0.9000 4 | Date: 2024-03-09 5 | Authors@R: 6 | c( 7 | person("Marc", "Becker", , "marcbecker@posteo.de", role = c("aut", "cre"), 8 | comment = c(ORCID = "0000-0002-8115-0400")), 9 | person("Patrick", "Schratz", , "patrick.schratz@gmail.com", role = "aut", 10 | comment = c(ORCID = "0000-0003-0748-6624")) 11 | ) 12 | Description: Extends the 'mlr3' ML framework with methods for spatial 13 | objects. Data storage and prediction are supported for packages 14 | 'terra', 'raster' and 'stars'. 15 | License: LGPL-3 16 | URL: https://mlr3spatial.mlr-org.com, 17 | https://github.com/mlr-org/mlr3spatial 18 | BugReports: https://github.com/mlr-org/mlr3spatial/issues 19 | Depends: 20 | mlr3 (>= 0.14.0), 21 | R (>= 3.1.0) 22 | Imports: 23 | checkmate (>= 2.0.0), 24 | data.table (>= 1.14.0), 25 | lgr (>= 0.4.2), 26 | methods, 27 | mlr3misc (>= 0.11.0), 28 | R6 (>= 2.5.0), 29 | sf, 30 | terra (>= 1.6-3), 31 | utils 32 | Suggests: 33 | bench, 34 | future, 35 | future.callr, 36 | knitr, 37 | mlr3learners (>= 0.4.5), 38 | paradox, 39 | ranger, 40 | raster, 41 | rmarkdown, 42 | rpart, 43 | stars (>= 0.5-5), 44 | testthat (>= 3.0.0) 45 | VignetteBuilder: 46 | knitr 47 | Config/testthat/edition: 3 48 | Config/testthat/parallel: false 49 | Encoding: UTF-8 50 | LazyData: true 51 | Roxygen: list(markdown = TRUE) 52 | RoxygenNote: 7.3.1 53 | Collate: 54 | 'DataBackendRaster.R' 55 | 'DataBackendVector.R' 56 | 'LearnerClassifSpatial.R' 57 | 'LearnerRegrSpatial.R' 58 | 'TaskRegrST.R' 59 | 'TaskClassifST.R' 60 | 'TaskClassif_leipzig.R' 61 | 'as_task_classif_st.R' 62 | 'as_task_regr_st.R' 63 | 'as_task_unsupervised.R' 64 | 'data.R' 65 | 'helper.R' 66 | 'predict_spatial.R' 67 | 'zzz.R' 68 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(as_data_backend,RasterBrick) 4 | S3method(as_data_backend,RasterStack) 5 | S3method(as_data_backend,SpatRaster) 6 | S3method(as_data_backend,sf) 7 | S3method(as_data_backend,stars) 8 | S3method(as_task_classif_st,DataBackend) 9 | S3method(as_task_classif_st,TaskClassifST) 10 | S3method(as_task_classif_st,TaskRegrST) 11 | S3method(as_task_classif_st,data.frame) 12 | S3method(as_task_classif_st,sf) 13 | S3method(as_task_regr_st,DataBackend) 14 | S3method(as_task_regr_st,TaskClassifST) 15 | S3method(as_task_regr_st,TaskRegrST) 16 | S3method(as_task_regr_st,data.frame) 17 | S3method(as_task_regr_st,sf) 18 | S3method(as_task_unsupervised,RasterBrick) 19 | S3method(as_task_unsupervised,RasterStack) 20 | S3method(as_task_unsupervised,SpatRaster) 21 | S3method(as_task_unsupervised,sf) 22 | S3method(as_task_unsupervised,stars) 23 | export(DataBackendRaster) 24 | export(DataBackendVector) 25 | export(TaskClassifST) 26 | export(TaskRegrST) 27 | export(as_data_backend.RasterBrick) 28 | export(as_data_backend.RasterStack) 29 | export(as_data_backend.SpatRaster) 30 | export(as_data_backend.sf) 31 | export(as_data_backend.stars) 32 | export(as_task_classif_st) 33 | export(as_task_classif_st.DataBackend) 34 | export(as_task_classif_st.TaskClassifST) 35 | export(as_task_classif_st.TaskRegrST) 36 | export(as_task_classif_st.data.frame) 37 | export(as_task_classif_st.sf) 38 | export(as_task_regr_st) 39 | export(as_task_regr_st.DataBackend) 40 | export(as_task_regr_st.TaskClassifST) 41 | export(as_task_regr_st.TaskRegrST) 42 | export(as_task_regr_st.data.frame) 43 | export(as_task_regr_st.sf) 44 | export(block_size) 45 | export(factor_layer) 46 | export(generate_stack) 47 | export(mask_stack) 48 | export(numeric_layer) 49 | export(predict_spatial) 50 | export(sample_stack) 51 | export(write_raster) 52 | import(checkmate) 53 | import(data.table) 54 | import(mlr3) 55 | import(mlr3misc) 56 | import(sf) 57 | importFrom(R6,R6Class) 58 | importFrom(R6,is.R6) 59 | importFrom(methods,as) 60 | importFrom(stats,complete.cases) 61 | importFrom(terra,cats) 62 | importFrom(terra,head) 63 | importFrom(terra,intersect) 64 | importFrom(terra,ncell) 65 | importFrom(terra,ncol) 66 | importFrom(terra,nlyr) 67 | importFrom(terra,rast) 68 | importFrom(terra,readStart) 69 | importFrom(terra,readStop) 70 | importFrom(terra,readValues) 71 | importFrom(terra,rowColFromCell) 72 | importFrom(terra,sources) 73 | importFrom(terra,unique) 74 | importFrom(terra,writeRaster) 75 | importFrom(terra,writeStart) 76 | importFrom(terra,writeStop) 77 | importFrom(utils,data) 78 | importFrom(utils,getFromNamespace) 79 | importFrom(utils,tail) 80 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # mlr3spatial (development version) 2 | 3 | # mlr3spatial 0.5.0 4 | 5 | * compatibility: Work with new paradox version 1.0.0 6 | 7 | # mlr3spatial 0.4.1 8 | 9 | * refactor: Use the `terra::inMemory()` function instead of `@ptr`. 10 | 11 | # mlr3spatial 0.4.0 12 | 13 | * refactor: The data input of `spatial_predict()` accepts `SpatRaster`, `stars` `sf` `RasterStack` and `RasterBrick` objects now. 14 | A `mlr3::TaskUnsupervised` can still be passed but the argument name changed from `task` to `newdata`. 15 | * fix: The log showed a warning when the estimated values per chunk were a floating number. 16 | 17 | # mlr3spatial 0.3.1 18 | 19 | * chore: Remove `rgdal` dependency and require `raster` version 3.6-11. 20 | 21 | # mlr3spatial 0.3.0 22 | 23 | * feat: Add prediction on vector data to `spatial_predict()`. 24 | 25 | # mlr3spatial 0.2.1 26 | 27 | * fix: Add `"space"` and `"time"` column role from mlr3spatiotempcv 28 | 29 | # mlr3spatial 0.2.0 30 | 31 | * BREAKING CHANGE: `TaskClassifST` and `TaskRegrST` are used to train a learner with spatial data. 32 | The new tasks unify the work with mlr3spatiotempcv. 33 | * BREAKING CHANGE: Raster objects cannot be used to create tasks for training anymore. 34 | * BREAKING CHANGE: `TaskUnsupervised` is used to predict on rasters objects now. 35 | The new task type is more convenient for data without a response. 36 | * feat: Add `as_task_regr_st()` and `as_task_classif_st()` from spatial objects. 37 | * feat: Add `as_task_unsupervised()` from raster objects. 38 | * feat: Task `leipzig` with land cover target. 39 | * feat: `data("leipzig")` loads an `sf` object with land cover in Leipzig. 40 | * feat: GeoTIFF and GeoPackage of Leipzig in `extdata` folder. 41 | * refactor: Vector data is handled with `DataBackendDataTable` now and `DataBackendVector` is removed. 42 | * BREAKING CHANGE: `DataBackendRaster` cannot be created from `RasterLayer` objects anymore. 43 | * fix: `spatial_predict()` returned an unnamed response. 44 | * fix: `spatial_predict()` wrote predictions to the wrong cell. 45 | * BREAKING CHANGE: Remove `demo_raster()`, `demo_stack_spatraster()`, `demo_stack_rasterbrick()` and `demo_rasterbrick()` functions. 46 | * feat: Prediction layer contains `NA` at raster cells with `NA` values in one or more feature layers. 47 | 48 | # mlr3spatial 0.1.2 49 | 50 | * refactor: Stars objects are directly converted to terra objects now. 51 | 52 | # mlr3spatial 0.1.1 53 | 54 | * fix: Compatibility to terra update. 55 | 56 | # mlr3spatial 0.1.0 57 | 58 | * First version of mlr3spatial. 59 | 60 | -------------------------------------------------------------------------------- /R/DataBackendRaster.R: -------------------------------------------------------------------------------- 1 | #' @title DataBackend for Raster Objects 2 | #' 3 | #' @description 4 | #' [mlr3::DataBackend] for [terra::SpatRaster] raster objects. 5 | #' 6 | #' @param rows `integer()`\cr 7 | #' Row indices. Row indices start with 1 in the upper left corner in the 8 | #' raster, increase from left to right and then from top to bottom. The last 9 | #' cell is in the bottom right corner and the row index equals the number of 10 | #' cells in the raster. 11 | #' @param cols `character()`\cr 12 | #' Column names. 13 | #' 14 | #' @section Read mode: 15 | #' There are two different ways the reading of values is performed internally: 16 | #' * "Block mode" reads complete rows of the raster file and subsets the requested cells. 17 | #' This mode is faster than "cell mode" if the complete raster file is iterated over. 18 | #' 19 | #' * "Cell mode" reads individual cells. 20 | #' This is faster than "block mode" if only a few cells are sampled. 21 | #' 22 | #' "Block mode" is activated if `$data(rows)` is used with a increasing integer sequence e.g. `200:300`. 23 | #' If only a single cell is requested, "cell mode" is used. 24 | #' 25 | #' @importFrom terra writeRaster writeStart writeStop rast cats sources intersect readStart readStop rowColFromCell readValues head unique ncell nlyr ncol 26 | #' @importFrom utils tail 27 | #' @importFrom methods as 28 | #' @export 29 | DataBackendRaster = R6Class("DataBackendRaster", 30 | inherit = DataBackend, cloneable = FALSE, 31 | public = list( 32 | 33 | #' @description 34 | #' Creates a new instance of this [R6][R6::R6Class] class. 35 | #' 36 | #' @template param-data 37 | #' 38 | initialize = function(data) { 39 | assert_class(data, "SpatRaster") 40 | 41 | # write raster to disk 42 | sources = map_chr(names(data), function(layer) { 43 | if (terra::inMemory(data[layer])) { 44 | filename = tempfile(fileext = ".tif") 45 | terra::writeRaster(data[layer], filename = filename) 46 | } else { 47 | filename = terra::sources(data[layer]) 48 | } 49 | filename 50 | }) 51 | 52 | # stack 53 | private$.data = unique(sources) # nolint 54 | private$.categories = terra::cats(data) 55 | private$.layer_names = names(data) 56 | private$.crs = terra::crs(data) 57 | 58 | self$data_formats = "data.table" 59 | }, 60 | 61 | #' @description 62 | #' Returns a slice of the raster in the specified format. 63 | #' Currently, the only supported formats is `"data.table"`. 64 | #' 65 | #' The rows must be addressed as vector of cells indices, columns must be 66 | #' referred to via layer names. Queries for rows with no matching row id and 67 | #' queries for columns with no matching column name are silently ignored. 68 | #' 69 | #' Rows are guaranteed to be returned in the same order as `rows`, columns 70 | #' may be returned in an arbitrary order. Duplicated row ids result in 71 | #' duplicated rows, duplicated column names lead to an exception. 72 | #' 73 | #' @param data_format (`character(1)`)\cr 74 | #' Desired data format. Currently only `"data.table"` supported. 75 | data = function(rows, cols, data_format = "data.table") { 76 | stack = self$stack 77 | if (is.null(rows)) rows = numeric(0) 78 | rows = assert_integerish(rows, coerce = TRUE, null.ok = TRUE) 79 | assert_names(cols, type = "unique") 80 | assert_choice(data_format, self$data_formats) 81 | cols = intersect(cols, private$.layer_names) 82 | 83 | if (!length(cols)) { 84 | data.table() 85 | } else if (length(rows) && test_integer(rows, sorted = TRUE, unique = TRUE, len = max(rows[length(rows)] - rows[1] + 1, 0))) { 86 | # block read (e.g. c(1:10)) 87 | terra::readStart(stack) 88 | on.exit(terra::readStop(stack)) 89 | # determine rows to read 90 | cells = terra::rowColFromCell(stack, rows) 91 | row = cells[1, 1] 92 | nrows = cells[dim(cells)[1], 1] - cells[1, 1] + 1 93 | res = as.data.table(terra::readValues(stack, row = row, nrows = nrows, dataframe = TRUE)) 94 | # subset cells and features 95 | res[cells[1, 2]:(cells[1, 2] + length(rows) - 1), cols, with = FALSE] 96 | } else { 97 | # cell read (e.g. c(1, 3, 5, 6, 10)) 98 | as.data.table(terra::extract(stack, rows))[, cols, with = FALSE] 99 | } 100 | }, 101 | 102 | #' @description 103 | #' Retrieve the first `n` rows. 104 | #' 105 | #' @param n (`integer(1)`)\cr 106 | #' Number of rows. 107 | #' 108 | #' @return [data.table::data.table()] of the first `n` rows. 109 | head = function(n = 6L) { 110 | res = as.data.table(terra::head(self$stack, n)) 111 | if (length(private$.response)) set(res, j = private$.target_names, value = private$.response) 112 | res 113 | }, 114 | 115 | #' @description 116 | #' Returns a named list of vectors of distinct values for each column 117 | #' specified. If `na_rm` is `TRUE`, missing values are removed from the 118 | #' returned vectors of distinct values. Non-existing rows and columns are 119 | #' silently ignored. 120 | #' 121 | #' @param na_rm `logical(1)`\cr 122 | #' Whether to remove NAs or not. 123 | #' 124 | #' @return Named `list()` of distinct values. 125 | distinct = function(rows, cols, na_rm = TRUE) { 126 | cols = intersect(cols, private$.layer_names) 127 | rows = rows %??% seq(self$nrow) 128 | 129 | res = if (!length(cols)) { 130 | named_list() 131 | } else if (test_integer(rows, sorted = TRUE, unique = TRUE, len = self$nrow)) { 132 | # fast 133 | stack = terra::subset(self$stack, cols) 134 | set_names(map(cols, function(layer) { 135 | if (terra::is.factor(stack[layer])) { 136 | terra::cats(stack[layer])[[1]][, 2] 137 | } else { 138 | terra::unique(stack["x_1"])[[1]] 139 | } 140 | }), cols) 141 | } else { 142 | # slow 143 | data = self$data(rows, cols) 144 | res = map(data, unique) 145 | map_if(res, is.factor, as.character) 146 | } 147 | 148 | if (na_rm) res = map(res, function(values) values[!is.na(values)]) 149 | res 150 | }, 151 | 152 | #' @description 153 | #' Returns the number of missing values per column in the specified slice 154 | #' of data. Non-existing rows and columns are silently ignored. 155 | #' 156 | #' @return Total of missing values per column (named `numeric()`). 157 | missings = function(rows, cols) { 158 | cols = intersect(cols, private$.layer_names) 159 | 160 | if (!length(cols)) { 161 | numeric(0) 162 | } else if (test_integer(rows, sorted = TRUE, unique = TRUE, len = self$nrow)) { 163 | # fast 164 | stack = self$stack 165 | res = terra::freq(stack, value = NA) 166 | res = set_names(res[, "count"], private$.layer_names)[cols] 167 | res 168 | } else { 169 | # slow 170 | data = self$data(rows, cols) 171 | map_int(data, count_missing) 172 | } 173 | }, 174 | 175 | #' @description 176 | #' Returns the coordinates of `rows`. 177 | #' If `rows` is missing, all coordinates are returned. 178 | #' 179 | #' @return [data.table::data.table()] of coordinates of `rows`. 180 | coordinates = function(rows) { 181 | if (missing(rows)) { 182 | as.data.table(terra::crds(self$stack, df = TRUE)) 183 | } else { 184 | as.data.table(terra::xyFromCell(rows)) 185 | } 186 | } 187 | ), 188 | 189 | active = list( 190 | #' @field rownames (`integer()`)\cr 191 | #' Returns vector of all distinct row identifiers, i.e. the contents of the primary key column. 192 | rownames = function(rhs) { 193 | assert_ro_binding(rhs) 194 | 1:terra::ncell(self$stack) 195 | }, 196 | 197 | #' @field colnames (`character()`)\cr 198 | #' Returns vector of all column names. 199 | colnames = function(rhs) { 200 | assert_ro_binding(rhs) 201 | names(self$stack) 202 | }, 203 | 204 | #' @field nrow (`integer(1)`)\cr 205 | #' Number of rows (observations). 206 | nrow = function(rhs) { 207 | assert_ro_binding(rhs) 208 | terra::ncell(self$stack) 209 | }, 210 | 211 | #' @field ncol (`integer(1)`)\cr 212 | #' Number of columns (variables). 213 | ncol = function(rhs) { 214 | assert_ro_binding(rhs) 215 | terra::nlyr(self$stack) 216 | }, 217 | 218 | #' @field stack (`SpatRaster`)\cr 219 | #' Raster stack. 220 | stack = function(rhs) { 221 | assert_ro_binding(rhs) 222 | stack = terra::rast(private$.data) 223 | iwalk(private$.categories, function(category, n) { 224 | if (!is.null(category)) { 225 | terra::set.cats(stack, layer = n, value = category) 226 | } 227 | }) 228 | terra::set.names(stack, private$.layer_names) 229 | terra::crs(stack) = private$.crs 230 | stack 231 | } 232 | ), 233 | 234 | private = list( 235 | .calculate_hash = function() { 236 | mlr3misc::calculate_hash(self$stack) 237 | }, 238 | .categories = NULL, 239 | .layer_names = NULL, 240 | .crs = NULL 241 | ) 242 | ) 243 | 244 | #' @title Coerce to spatial DataBackend 245 | #' 246 | #' @description 247 | #' Wraps a [DataBackend] around spatial objects. 248 | #' Currently these S3 methods are only alternative ways for writing `DataBackendRaster$new()`. 249 | #' They do not support coercing from other backends yet. 250 | #' 251 | #' @template param-data 252 | #' @template param-primary-key 253 | #' @param ... (`any`)\cr 254 | #' Not used. 255 | #' 256 | #' @return [DataBackend]. 257 | #' @rdname as_data_backend 258 | #' 259 | #' @exportS3Method 260 | #' @export as_data_backend.stars 261 | as_data_backend.stars = function(data, primary_key = NULL, ...) { # nolint 262 | require_namespaces("stars") 263 | data = as(data, "SpatRaster") 264 | DataBackendRaster$new(data) 265 | } 266 | 267 | #' @export as_data_backend.SpatRaster 268 | #' @exportS3Method 269 | #' @rdname as_data_backend 270 | as_data_backend.SpatRaster = function(data, primary_key = NULL, ...) { # nolint 271 | DataBackendRaster$new(data) 272 | } 273 | 274 | #' @export as_data_backend.RasterBrick 275 | #' @exportS3Method 276 | #' @rdname as_data_backend 277 | as_data_backend.RasterBrick = function(data, primary_key = NULL, ...) { # nolint 278 | data = terra::rast(data) 279 | DataBackendRaster$new(data) 280 | } 281 | 282 | #' @export as_data_backend.RasterStack 283 | #' @exportS3Method 284 | #' @rdname as_data_backend 285 | as_data_backend.RasterStack = function(data, primary_key = NULL, ...) { # nolint 286 | data = terra::rast(data) 287 | DataBackendRaster$new(data) 288 | } 289 | -------------------------------------------------------------------------------- /R/DataBackendVector.R: -------------------------------------------------------------------------------- 1 | #' @title DataBackend for Vector Objects 2 | #' 3 | #' @description 4 | #' [mlr3::DataBackend] for [sf::sf] vector objects. 5 | #' 6 | #' @export 7 | DataBackendVector = R6::R6Class("DataBackendVector", 8 | inherit = mlr3::DataBackendDataTable, 9 | cloneable = FALSE, 10 | public = list( 11 | 12 | #' @description 13 | #' Creates a new instance of this [R6][R6::R6Class] class. 14 | #' 15 | #' @param data (`sf`)\cr 16 | #' A raster object. 17 | #' @param primary_key (`character(1)` | `integer()`)\cr 18 | #' Name of the primary key column, or integer vector of row ids. 19 | initialize = function(data, primary_key) { 20 | assert_class(data, "sf") 21 | self$data_formats = "data.table" 22 | 23 | # store geometry 24 | sf_column = attr(data, "sf_column") 25 | private$.sfc = data[[sf_column]] 26 | 27 | # store data 28 | data[[sf_column]] = NULL 29 | attr(data, "sf_column") = NULL 30 | data = as.data.table(data) 31 | 32 | super$initialize(data, primary_key) 33 | } 34 | ), 35 | 36 | active = list( 37 | #' @field sfc ([sf::sfc])\cr 38 | #' Returns the sfc object. 39 | sfc = function(rhs) { 40 | assert_ro_binding(rhs) 41 | private$.sfc 42 | } 43 | ), 44 | 45 | private = list( 46 | .sfc = NULL 47 | ) 48 | ) 49 | 50 | #' @param keep_rownames (`logical(1)` | `character(1)`)\cr 51 | #' If `TRUE` or a single string, keeps the row names of `data` as a new column. 52 | #' The column is named like the provided string, defaulting to `"..rownames"` for `keep_rownames == TRUE`. 53 | #' Note that the created column will be used as a regular feature by the task unless you manually change the column role. 54 | #' Also see [data.table::as.data.table()]. 55 | #' @rdname as_data_backend 56 | #' @exportS3Method 57 | #' @export as_data_backend.sf 58 | as_data_backend.sf = function(data, primary_key = NULL, keep_rownames = FALSE, ...) { # nolint 59 | assert_class(data, "sf") 60 | assert_data_frame(data, min.cols = 1L, col.names = "unique") 61 | 62 | if (!isFALSE(keep_rownames)) { 63 | if (isTRUE(keep_rownames)) { 64 | keep_rownames = "..rownames" 65 | } else { 66 | assert_string(keep_rownames) 67 | } 68 | data[[keep_rownames]] = rownames(data) 69 | } 70 | 71 | compact_seq = FALSE 72 | 73 | if (is.character(primary_key)) { 74 | assert_string(primary_key) 75 | assert_choice(primary_key, colnames(data)) 76 | assert_integer(data[[primary_key]], any.missing = FALSE, unique = TRUE) 77 | } else { 78 | if (is.null(primary_key)) { 79 | row_ids = seq_row(data) 80 | compact_seq = TRUE 81 | } else if (is.integer(primary_key)) { 82 | row_ids = assert_integer(primary_key, len = nrow(data), any.missing = FALSE, unique = TRUE) 83 | } else { 84 | stopf("Argument 'primary_key' must be NULL, a column name or a vector of ids") 85 | } 86 | 87 | primary_key = "..row_id" 88 | data[[primary_key]] = row_ids 89 | } 90 | 91 | b = DataBackendVector$new(data, primary_key) 92 | b$compact_seq = compact_seq 93 | 94 | return(b) 95 | } 96 | -------------------------------------------------------------------------------- /R/LearnerClassifSpatial.R: -------------------------------------------------------------------------------- 1 | LearnerClassifSpatial = R6::R6Class("LearnerClassifSpatial", 2 | inherit = LearnerClassif, 3 | public = list( 4 | learner = NULL, 5 | 6 | initialize = function(learner) { 7 | self$learner = assert_learner(learner) 8 | super$initialize( 9 | id = "classif.ranger", 10 | param_set = learner$param_set, 11 | predict_types = learner$predict_types, 12 | feature_types = learner$feature_types, 13 | properties = union(learner$properties, "missings"), 14 | packages = learner$packages, 15 | man = "mlr3learners::mlr_learners_classif.spatial" 16 | ) 17 | }, 18 | 19 | predict = function(task, row_ids = NULL) { 20 | data = task$data(rows = row_ids) 21 | ids = complete.cases(data[, task$feature_names, with = FALSE]) 22 | pred = self$learner$predict_newdata(data[ids]) 23 | response = rep(NaN, nrow(data)) 24 | response[ids] = pred$data$response 25 | pred$data$row_ids = seq_len(nrow(data)) 26 | pred$data$response = response 27 | pred$data$truth = rep(NaN, nrow(data)) 28 | pred 29 | } 30 | ) 31 | ) 32 | -------------------------------------------------------------------------------- /R/LearnerRegrSpatial.R: -------------------------------------------------------------------------------- 1 | LearnerRegrSpatial = R6::R6Class("LearnerRegrSpatial", 2 | inherit = LearnerRegr, 3 | public = list( 4 | learner = NULL, 5 | 6 | initialize = function(learner) { 7 | self$learner = assert_learner(learner) 8 | super$initialize( 9 | id = "regr.ranger", 10 | param_set = learner$param_set, 11 | predict_types = learner$predict_types, 12 | feature_types = learner$feature_types, 13 | properties = union(learner$properties, "missings"), 14 | packages = learner$packages, 15 | man = "mlr3learners::mlr_learners_regr.spatial" 16 | ) 17 | }, 18 | 19 | predict = function(task, row_ids = NULL) { 20 | data = task$data(rows = row_ids) 21 | ids = complete.cases(data[, task$feature_names, with = FALSE]) 22 | pred = self$learner$predict_newdata(data[ids]) 23 | response = rep(NaN, nrow(data)) 24 | response[ids] = pred$data$response 25 | pred$data$row_ids = seq_len(nrow(data)) 26 | pred$data$response = response 27 | pred$data$truth = rep(NaN, nrow(data)) 28 | pred 29 | } 30 | ) 31 | ) 32 | -------------------------------------------------------------------------------- /R/TaskClassifST.R: -------------------------------------------------------------------------------- 1 | #' @title Spatiotemporal Classification Task 2 | #' 3 | #' @description 4 | #' This task specializes [TaskClassif] for spatiotemporal classification problems. 5 | #' 6 | #' A spatial example task is available via `tsk("ecuador")`. 7 | #' 8 | #' The coordinate reference system passed during initialization must match the one which was used during data creation, otherwise offsets of multiple meters may occur. 9 | #' By default, coordinates are not used as features. 10 | #' This can be changed by setting `coords_as_features = TRUE`. 11 | #' 12 | #' @template param_id 13 | #' @template param_backend 14 | #' @template param_target 15 | #' @template param_positive 16 | #' @template param_label 17 | #' @template param_coords_as_features 18 | #' @template param_crs 19 | #' @template param_coordinate_names 20 | #' @template param_extra_args 21 | #' 22 | #' @export 23 | TaskClassifST = R6::R6Class("TaskClassifST", 24 | inherit = TaskClassif, 25 | public = list( 26 | 27 | #' @description 28 | #' Creates a new instance of this [R6][R6::R6Class] class. 29 | #' The function [as_task_classif_st()] provides an alternative way to construct classification tasks. 30 | initialize = function(id, backend, target, positive = NULL, label = NA_character_, coordinate_names, crs = NA_character_, coords_as_features = FALSE, extra_args = list()) { 31 | if (inherits(backend, "sf")) { 32 | stopf("Creating a task from an sf objects is not supported anymore. Use `as_task_classif_st()` to convert an sf objects into a task.") 33 | } 34 | 35 | super$initialize(id = id, backend = backend, target = target, label = label, positive = positive, extra_args = extra_args) 36 | self$crs = crs 37 | self$coordinate_names = coordinate_names 38 | walk(coordinate_names, function(x) assert_numeric(self$backend$head(1)[[x]], .var.name = x)) 39 | 40 | # adjust classif task 41 | self$task_type = "classif_st" 42 | new_col_roles = named_list(setdiff(mlr_reflections$task_col_roles[["classif_st"]], names(private$.col_roles)), character(0)) 43 | private$.col_roles = insert_named(private$.col_roles, new_col_roles) 44 | 45 | # add coordinates as features 46 | self$coords_as_features = assert_flag(coords_as_features) 47 | }, 48 | 49 | #' @description 50 | #' Returns coordinates of observations. 51 | #' 52 | #' @param row_ids (`integer()`)\cr 53 | #' Vector of rows indices as subset of `task$row_ids`. 54 | #' 55 | #' @return [data.table::data.table()] 56 | coordinates = function(row_ids = NULL) { 57 | if (is.null(row_ids)) row_ids = self$row_ids 58 | self$backend$data(rows = row_ids, cols = self$coordinate_names) 59 | }, 60 | 61 | #' @description 62 | #' Print the task. 63 | #' 64 | #' @param ... Arguments passed to the `$print()` method of the superclass. 65 | print = function(...) { 66 | super$print(...) 67 | catn("* Coordinates:") 68 | print(self$coordinates(), nrows = 10) 69 | } 70 | ), 71 | 72 | active = list( 73 | 74 | #' @field crs (`character(1)`)\cr 75 | #' Returns coordinate reference system of task. 76 | crs = function(rhs) { 77 | if (missing(rhs)) { 78 | return(self$extra_args$crs) 79 | } 80 | self$extra_args$crs = assert_string(rhs, na.ok = TRUE) 81 | }, 82 | 83 | #' @field coordinate_names (`character()`)\cr 84 | #' Returns coordinate names. 85 | coordinate_names = function(rhs) { 86 | if (missing(rhs)) { 87 | return(self$extra_args$coordinate_names) 88 | } 89 | self$extra_args$coordinate_names = assert_character(rhs, len = 2, all.missing = FALSE, any.missing = FALSE) 90 | }, 91 | 92 | #' @field coords_as_features (`logical(1)`)\cr 93 | #' If `TRUE`, coordinates are used as features. 94 | coords_as_features = function(rhs) { 95 | if (missing(rhs)) { 96 | return(self$extra_args$coords_as_features) 97 | } 98 | 99 | self$extra_args$coords_as_features = assert_flag(rhs) 100 | if (rhs) { 101 | self$set_col_roles(self$coordinate_names, add_to = "coordinate") 102 | } else { 103 | self$set_col_roles(self$coordinate_names, roles = "coordinate") 104 | } 105 | } 106 | ) 107 | ) 108 | -------------------------------------------------------------------------------- /R/TaskClassif_leipzig.R: -------------------------------------------------------------------------------- 1 | #' @title Leipzig Land Cover Task 2 | #' 3 | #' @name leipzig 4 | #' @aliases mlr_tasks_leipzig 5 | #' 6 | #' @description 7 | #' Point survey of land cover in Leipzig. 8 | #' Includes Sentinel-2 spectral bands and NDVI. 9 | #' 10 | #' @source 11 | #' Copernicus Sentinel Data (2021). Retrieved from Copernicus Open Access Hub and processed by European Space Agency. 12 | #' 13 | #' @docType data 14 | #' @keywords data 15 | #' @examples 16 | #' if (requireNamespace("sf")) { 17 | #' library(sf) 18 | #' data("leipzig", package = "mlr3spatial") 19 | #' print(leipzig) 20 | #' } 21 | NULL 22 | 23 | load_task_leipzig = function(id = "leipzig") { 24 | vector = sf::read_sf(system.file("extdata", "leipzig_points.gpkg", package = "mlr3spatial"), stringsAsFactors = TRUE) 25 | task = as_task_classif_st(vector, id = id, target = "land_cover", label = "Leipzig Land Cover") 26 | task$backend$hash = task$man = "mlr3::mlr_tasks_leipzig" 27 | task 28 | } 29 | -------------------------------------------------------------------------------- /R/TaskRegrST.R: -------------------------------------------------------------------------------- 1 | #' @title Spatiotemporal Regression Task 2 | #' 3 | #' @description 4 | #' This task specializes [TaskRegr] for spatiotemporal regression problems. 5 | #' 6 | #' A spatial example task is available via `tsk("cookfarm_mlr3")`. 7 | #' 8 | #' The coordinate reference system passed during initialization must match the one which was used during data creation, otherwise offsets of multiple meters may occur. 9 | #' By default, coordinates are not used as features. 10 | #' This can be changed by setting `coords_as_features = TRUE`. 11 | #' 12 | #' @template param_id 13 | #' @template param_backend 14 | #' @template param_target 15 | #' @template param_label 16 | #' @template param_coords_as_features 17 | #' @template param_crs 18 | #' @template param_coordinate_names 19 | #' @template param_extra_args 20 | #' 21 | #' @export 22 | TaskRegrST = R6::R6Class("TaskRegrST", 23 | inherit = TaskRegr, 24 | public = list( 25 | 26 | #' @description 27 | #' Creates a new instance of this [R6][R6::R6Class] class. 28 | #' The function [as_task_regr_st()] provides an alternative way to construct classification tasks. 29 | initialize = function(id, backend, target, label = NA_character_, coordinate_names, crs = NA_character_, coords_as_features = FALSE, extra_args = list()) { 30 | if (inherits(backend, "sf")) { 31 | stopf("Creating a task from an sf objects is not supported anymore. Use `as_task_regr_st()` to convert an sf objects into a task.") 32 | } 33 | 34 | super$initialize(id = id, backend = backend, target = target, label = label, extra_args = extra_args) 35 | self$crs = crs 36 | self$coordinate_names = coordinate_names 37 | walk(coordinate_names, function(x) assert_numeric(self$backend$head(1)[[x]], .var.name = x)) 38 | 39 | # adjust regr task 40 | self$task_type = "regr_st" 41 | new_col_roles = named_list(setdiff(mlr_reflections$task_col_roles[["regr_st"]], names(private$.col_roles)), character(0)) 42 | private$.col_roles = insert_named(private$.col_roles, new_col_roles) 43 | 44 | # add coordinates as features 45 | self$coords_as_features = assert_flag(coords_as_features) 46 | }, 47 | 48 | #' @description 49 | #' Returns coordinates of observations. 50 | #' 51 | #' @param row_ids (`integer()`)\cr 52 | #' Vector of rows indices as subset of `task$row_ids`. 53 | #' 54 | #' @return [data.table::data.table()] 55 | coordinates = function(row_ids = NULL) { 56 | if (is.null(row_ids)) row_ids = self$row_ids 57 | self$backend$data(rows = row_ids, cols = self$coordinate_names) 58 | }, 59 | 60 | #' @description 61 | #' Print the task. 62 | #' 63 | #' @param ... Arguments passed to the `$print()` method of the superclass. 64 | print = function(...) { 65 | super$print(...) 66 | catn("* Coordinates:") 67 | print(self$coordinates(), nrows = 10) 68 | } 69 | ), 70 | 71 | active = list( 72 | 73 | #' @field crs (`character(1)`)\cr 74 | #' Returns coordinate reference system of the task. 75 | crs = function(rhs) { 76 | if (missing(rhs)) { 77 | return(self$extra_args$crs) 78 | } 79 | self$extra_args$crs = assert_string(rhs, na.ok = TRUE) 80 | }, 81 | 82 | #' @field coordinate_names (`character()`)\cr 83 | #' Returns coordinate names. 84 | coordinate_names = function(rhs) { 85 | if (missing(rhs)) { 86 | return(self$extra_args$coordinate_names) 87 | } 88 | self$extra_args$coordinate_names = assert_character(rhs, len = 2, all.missing = FALSE, any.missing = FALSE) 89 | }, 90 | 91 | #' @field coords_as_features (`logical(1)`)\cr 92 | #' If `TRUE`, coordinates are used as features. 93 | coords_as_features = function(rhs) { 94 | if (missing(rhs)) { 95 | return(self$extra_args$coords_as_features) 96 | } 97 | 98 | self$extra_args$coords_as_features = assert_flag(rhs) 99 | if (rhs) { 100 | self$set_col_roles(self$coordinate_names, add_to = "coordinate") 101 | } else { 102 | self$set_col_roles(self$coordinate_names, roles = "coordinate") 103 | } 104 | } 105 | ) 106 | ) 107 | -------------------------------------------------------------------------------- /R/as_task_classif_st.R: -------------------------------------------------------------------------------- 1 | #' @title Convert to a Spatiotemporal Classification Task 2 | #' 3 | #' @description 4 | #' Convert object to a [TaskClassifST]. 5 | #' This is a S3 generic, specialized for at least the following objects: 6 | #' 7 | #' 1. [TaskClassifST]: Ensure the identity. 8 | #' 2. [data.frame()] and [DataBackend]: Provides an alternative to the constructor of [TaskClassifST]. 9 | #' 3. [sf::sf]: Extracts spatial meta data before construction. 10 | #' 4. [TaskRegr]: Calls [convert_task()]. 11 | #' 12 | #' @inheritParams mlr3::as_task_classif 13 | #' @template param_coords_as_features 14 | #' @template param_crs 15 | #' @template param_coordinate_names 16 | #' 17 | #' @return [TaskClassifST] 18 | #' @export 19 | as_task_classif_st = function(x, ...) { 20 | UseMethod("as_task_classif_st") 21 | } 22 | 23 | #' @rdname as_task_classif_st 24 | #' @export as_task_classif_st.TaskClassifST 25 | #' @exportS3Method 26 | as_task_classif_st.TaskClassifST = function(x, clone = FALSE, ...) { # nolint 27 | if (clone) x$clone() else x 28 | } 29 | 30 | #' @rdname as_task_classif_st 31 | #' @export as_task_classif_st.data.frame 32 | #' @exportS3Method 33 | as_task_classif_st.data.frame = function(x, target, id = deparse(substitute(x)), positive = NULL, coordinate_names, crs = NA_character_, coords_as_features = FALSE, label = NA_character_, ...) { 34 | ii = which(map_lgl(keep(x, is.double), anyInfinite)) 35 | if (length(ii)) { 36 | warningf("Detected columns with unsupported Inf values in data: %s", str_collapse(names(ii))) 37 | } 38 | TaskClassifST$new(id = id, backend = x, target = target, positive = positive, coords_as_features = coords_as_features, crs = crs, coordinate_names = coordinate_names, label = label) 39 | } 40 | 41 | #' @rdname as_task_classif_st 42 | #' @export as_task_classif_st.DataBackend 43 | #' @exportS3Method 44 | as_task_classif_st.DataBackend = function(x, target, id = deparse(substitute(x)), positive = NULL, coordinate_names, crs, coords_as_features = FALSE, label = NA_character_, ...) { 45 | TaskClassifST$new(id = id, backend = x, target = target, positive = positive, coords_as_features = coords_as_features, crs = crs, coordinate_names = coordinate_names, label = label) 46 | } 47 | 48 | #' @rdname as_task_classif_st 49 | #' @export as_task_classif_st.sf 50 | #' @exportS3Method 51 | as_task_classif_st.sf = function(x, target = NULL, id = deparse(substitute(x)), positive = NULL, coords_as_features = FALSE, label = NA_character_, ...) { 52 | id = as.character(id) 53 | geometries = as.character(unique(sf::st_geometry_type(x))) 54 | if (!test_names(geometries, identical.to = "POINT")) { 55 | stop("Simple feature may not contain geometries of type '%s'", str_collapse(setdiff(geometries, "POINT"))) 56 | } 57 | 58 | # extract spatial meta data 59 | crs = sf::st_crs(x)$wkt 60 | coordinates = as.data.frame(sf::st_coordinates(x)) 61 | coordinate_names = colnames(coordinates) 62 | 63 | # convert sf to data.frame 64 | x[[attr(x, "sf_column")]] = NULL 65 | attr(x, "sf_column") = NULL 66 | x = as.data.frame(x) 67 | 68 | # add coordinates 69 | x = cbind(x, coordinates) 70 | 71 | as_task_classif_st(x, target = target, id = id, positive = positive, coords_as_features = coords_as_features, crs = crs, coordinate_names = coordinate_names, label = label) 72 | } 73 | 74 | #' @rdname as_task_classif_st 75 | #' @export as_task_classif_st.TaskRegrST 76 | #' @exportS3Method 77 | as_task_classif_st.TaskRegrST = function(x, target = NULL, drop_original_target = FALSE, drop_levels = TRUE, ...) { 78 | convert_task(intask = x, target = target, new_type = "classif_st", drop_original_target = FALSE, drop_levels = TRUE) 79 | } 80 | -------------------------------------------------------------------------------- /R/as_task_regr_st.R: -------------------------------------------------------------------------------- 1 | #' @title Convert to a Spatiotemporal Regression Task 2 | #' 3 | #' @description 4 | #' Convert object to a [TaskRegrST]. 5 | #' This is a S3 generic, specialized for at least the following objects: 6 | #' 7 | #' 1. [TaskRegrST]: Ensure the identity. 8 | #' 2. [data.frame()] and [DataBackend]: Provides an alternative to the constructor of [TaskRegrST]. 9 | #' 3. [sf::sf]: Extracts spatial meta data before construction. 10 | #' 4. [TaskClassif]: Calls [convert_task()]. 11 | #' 12 | #' @inheritParams mlr3::as_task_regr 13 | #' @template param_coords_as_features 14 | #' @template param_crs 15 | #' @template param_coordinate_names 16 | #' 17 | #' @return [TaskRegrST] 18 | #' @export 19 | as_task_regr_st = function(x, ...) { 20 | UseMethod("as_task_regr_st") 21 | } 22 | 23 | #' @rdname as_task_regr_st 24 | #' @export as_task_regr_st.TaskRegrST 25 | #' @exportS3Method 26 | as_task_regr_st.TaskRegrST = function(x, clone = FALSE, ...) { # nolint 27 | if (clone) x$clone() else x 28 | } 29 | 30 | #' @rdname as_task_regr_st 31 | #' @export as_task_regr_st.data.frame 32 | #' @exportS3Method 33 | as_task_regr_st.data.frame = function(x, target, id = deparse(substitute(x)), coordinate_names, crs = NA_character_, coords_as_features = FALSE, label = NA_character_, ...) { 34 | ii = which(map_lgl(keep(x, is.double), anyInfinite)) 35 | if (length(ii)) { 36 | warningf("Detected columns with unsupported Inf values in data: %s", str_collapse(names(ii))) 37 | } 38 | TaskRegrST$new(id = id, backend = x, target = target, coords_as_features = coords_as_features, crs = crs, coordinate_names = coordinate_names, label = label) 39 | } 40 | 41 | #' @rdname as_task_regr_st 42 | #' @export as_task_regr_st.DataBackend 43 | #' @exportS3Method 44 | as_task_regr_st.DataBackend = function(x, target, id = deparse(substitute(x)), coordinate_names, crs, coords_as_features = FALSE, label = NA_character_, ...) { 45 | TaskRegrST$new(id = id, backend = x, target = target, coords_as_features = coords_as_features, crs = crs, coordinate_names = coordinate_names, label = label) 46 | } 47 | 48 | #' @rdname as_task_regr_st 49 | #' @export as_task_regr_st.sf 50 | #' @exportS3Method 51 | as_task_regr_st.sf = function(x, target = NULL, id = deparse(substitute(x)), coords_as_features = FALSE, label = NA_character_, ...) { 52 | id = as.character(id) 53 | geometries = as.character(unique(sf::st_geometry_type(x))) 54 | if (!test_names(geometries, identical.to = "POINT")) { 55 | stop("Simple feature may not contain geometries of type '%s'", str_collapse(setdiff(geometries, "POINT"))) 56 | } 57 | 58 | # extract spatial meta data 59 | crs = sf::st_crs(x)$wkt 60 | coordinates = as.data.frame(sf::st_coordinates(x)) 61 | coordinate_names = colnames(coordinates) 62 | 63 | # convert sf to data.frame 64 | x[[attr(x, "sf_column")]] = NULL 65 | attr(x, "sf_column") = NULL 66 | x = as.data.frame(x) 67 | 68 | # add coordinates 69 | x = cbind(x, coordinates) 70 | 71 | as_task_regr_st(x, target = target, id = id, coords_as_features = coords_as_features, crs = crs, coordinate_names = coordinate_names, label = label) 72 | } 73 | 74 | #' @rdname as_task_regr_st 75 | #' @export as_task_regr_st.TaskClassifST 76 | #' @exportS3Method 77 | as_task_regr_st.TaskClassifST = function(x, target = NULL, drop_original_target = FALSE, drop_levels = TRUE, ...) { 78 | convert_task(intask = x, target = target, new_type = "regr_st", drop_original_target = FALSE, drop_levels = TRUE) 79 | } 80 | -------------------------------------------------------------------------------- /R/as_task_unsupervised.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | as_task_unsupervised.RasterBrick = function(x, id = deparse(substitute(x)), label = NA_character_, ...) { 3 | TaskUnsupervised$new(id = id, backend = x, label = label) 4 | } 5 | 6 | #' @export 7 | as_task_unsupervised.RasterStack = function(x, id = deparse(substitute(x)), label = NA_character_, ...) { 8 | TaskUnsupervised$new(id = id, backend = x, label = label) 9 | } 10 | 11 | #' @export 12 | as_task_unsupervised.SpatRaster = function(x, id = deparse(substitute(x)), label = NA_character_, ...) { 13 | TaskUnsupervised$new(id = id, backend = x, label = label) 14 | } 15 | 16 | #' @export 17 | as_task_unsupervised.stars = function(x, id = deparse(substitute(x)), label = NA_character_, ...) { 18 | TaskUnsupervised$new(id = id, backend = x, label = label) 19 | } 20 | 21 | #' @export 22 | as_task_unsupervised.sf = function(x, id = deparse(substitute(x)), label = NA_character_, ...) { 23 | TaskUnsupervised$new(id = id, backend = x, label = label) 24 | } 25 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' @title Numeric Layer Generator 2 | #' 3 | #' @description 4 | #' Generates a numeric layer when passed to [generate_stack()]. 5 | #' 6 | #' @param id (`character(1)`)\cr 7 | #' Layer id. 8 | #' @param in_memory (`logical(1)`)\cr 9 | #' If `FALSE` (default), layer is written to disk. 10 | #' 11 | #' @return Named `list()` 12 | #' 13 | #' @keywords internal 14 | #' @export 15 | numeric_layer = function(id, in_memory = FALSE) { 16 | assert_string(id) 17 | assert_flag(in_memory) 18 | 19 | list(id = id, type = "numeric", in_memory = in_memory) 20 | } 21 | 22 | #' @title Factor Layer Generator 23 | #' 24 | #' @description 25 | #' Generates a factor layer when passed to [generate_stack()]. 26 | #' 27 | #' @param id (`character(1)`)\cr 28 | #' Layer id. 29 | #' @param levels (`character()`)\cr 30 | #' Factor levels. 31 | #' @param in_memory (`logical(1)`)\cr 32 | #' If `FALSE` (default), layer is written to disk. 33 | #' 34 | #' @return Named `list()` 35 | #' 36 | #' @keywords internal 37 | #' @export 38 | factor_layer = function(id, levels, in_memory = FALSE) { 39 | assert_string(id) 40 | assert_character(levels) 41 | assert_flag(in_memory) 42 | 43 | list(id = id, type = "factor", levels = levels, in_memory = in_memory) 44 | } 45 | 46 | #' @title Generate Raster Stack 47 | #' 48 | #' @description 49 | #' Generates a raster stack. 50 | #' 51 | #' @param layers (List of [numeric_layer()] and [factor_layer()])\cr 52 | #' List of layers. 53 | #' @param layer_size (`numeric(1)`)\cr 54 | #' Size of a single layer in megabytes. 55 | #' @param dimension (`integer(1)`)\cr 56 | #' Dimension of the squared layers. 57 | #' @param multi_layer_file (`logical(1)`)\cr 58 | #' If `TRUE`, raster is written to disk as a single multi-layer file. 59 | #' Overwrites `ìn_memory` argument of `numeric_layer()` and `factor_layer()`. 60 | #' 61 | #' `layer_size` and `dimension` are mutually exclusive. 62 | #' 63 | #' @return [terra::SpatRaster] 64 | #' 65 | #' @keywords internal 66 | #' @export 67 | generate_stack = function(layers, layer_size = NULL, dimension = NULL, multi_layer_file = FALSE) { 68 | if (!xor(is.null(layer_size), is.null(dimension))) { 69 | stop("Either `layer_size` or `dimension` must be provided") 70 | } 71 | assert_list(layers) 72 | assert_int(layer_size, null.ok = TRUE) 73 | assert_int(dimension, null.ok = TRUE) 74 | assert_flag(multi_layer_file) 75 | dimension = dimension %??% floor(sqrt(layer_size * 1e+06 / 8)) 76 | ids = map_chr(layers, "id") 77 | assert_character(ids, unique = TRUE) 78 | 79 | layers = map(layers, function(layer) { 80 | if (layer$type == "numeric") { 81 | data = matrix(c(stats::rnorm(floor(dimension^2 / 2), 0, 1), stats::rnorm(ceiling(dimension^2 / 2), 1, 1)), nrow = dimension) 82 | ras = rast(data) 83 | if (!layer$in_memory && !multi_layer_file) { 84 | filename = tempfile(fileext = ".tif") 85 | writeRaster(ras, filename) 86 | ras = rast(filename) 87 | } 88 | ras 89 | } else if (layer$type == "factor") { 90 | data = matrix(rep(seq_along(layer$levels), each = floor(dimension^2 / length(layer$levels)), length.out = dimension^2), nrow = dimension) 91 | ras = rast(data) 92 | ras = terra::categories(ras, layer = 1, data.table(ID = seq_along(layer$levels), category = layer$levels)) 93 | if (!layer$in_memory && !multi_layer_file) { 94 | filename = tempfile(fileext = ".tif") 95 | writeRaster(ras, filename) 96 | ras = rast(filename) 97 | } 98 | ras 99 | } 100 | }) 101 | 102 | stack = rast(layers) 103 | terra::crs(stack) = "EPSG:4326" 104 | stack = set_names(stack, ids) 105 | if (multi_layer_file) stack = terra::writeRaster(stack, filename = tempfile(fileext = ".tif")) 106 | stack 107 | } 108 | 109 | #' @title Sample Points in Raster Stack 110 | #' 111 | #' @description 112 | #' Samples `n` points of a raster stack. 113 | #' 114 | #' @param stack ([terra::SpatRaster])\cr 115 | #' Raster stack. 116 | #' @param n (`integer(1)`)\cr 117 | #' Number of points. 118 | #' 119 | #' @return [sf::sf] 120 | #' 121 | #' @keywords internal 122 | #' @export 123 | sample_stack = function(stack, n = 100) { 124 | # WORKAROUND: spatSample fails with categorical layers 125 | extent = terra::ext(stack) 126 | layer_factor = names(stack)[terra::is.factor(stack)] 127 | points = terra::spatSample(extent, size = n, lonlat = FALSE, as.points = TRUE) 128 | data = terra::extract(stack, points) 129 | data$ID = NULL 130 | points = terra::setValues(points, data) 131 | vector = sf::st_as_sf(points) 132 | vector = sf::st_set_crs(vector, "EPSG:4326") 133 | for (layer in layer_factor) { 134 | vector[layer] = factor(vector[[layer]]) 135 | } 136 | vector 137 | } 138 | 139 | #' @title Sample Points in Raster Stack 140 | #' 141 | #' @description 142 | #' Masks stack to a circular area of interest. 143 | #' 144 | #' @param stack ([terra::SpatRaster])\cr 145 | #' Raster stack. 146 | #' 147 | #' @return [terra::SpatRaster] 148 | #' 149 | #' @keywords internal 150 | #' @export 151 | mask_stack = function(stack) { 152 | x = (terra::xmax(stack) - terra::xmin(stack)) / 2 153 | y = (terra::xmax(stack) - terra::ymin(stack)) / 2 154 | point = sf::st_as_sf(sf::st_as_sfc(list(sf::st_point(c(x, y))))) 155 | polygon = sf::st_buffer(point, dist = x * 0.8) 156 | mask = terra::vect(polygon) 157 | terra::crs(mask) = "EPSG:4326" 158 | 159 | terra::mask(stack, mask) 160 | } 161 | -------------------------------------------------------------------------------- /R/helper.R: -------------------------------------------------------------------------------- 1 | #' @title Write a Raster in Chunks 2 | #' 3 | #' @description 4 | #' Writes square raster to disk in chunks. 5 | #' Internal helper function. 6 | #' 7 | #' @param data `[SpatRaster]`\cr 8 | #' `SpatRaster` object. 9 | #' 10 | #' @keywords internal 11 | #' @export 12 | write_raster = function(data) { 13 | # create temp file 14 | filename = tempfile(fileext = ".tif") 15 | target_raster = terra::rast(data) 16 | # calculate block size 17 | bs = block_size(target_raster, 100) 18 | # initialize target raster 19 | terra::writeStart(target_raster, filename = filename) 20 | # write values in chunks 21 | mlr3misc::pmap(list(bs$cells_seq, bs$cells_to_read), function(row, nrows) { 22 | terra::writeValues(target_raster, 23 | data[seq_len((terra::rowFromCell(target_raster, row)) + terra::rowFromCell(target_raster, nrows) - 1), 24 | seq_len(terra::nrow(data))], terra::rowFromCell(target_raster, row), terra::rowFromCell(target_raster, nrows)) 25 | }) 26 | terra::writeStop(target_raster) 27 | terra::rast(filename) 28 | } 29 | 30 | #' @title Split Raster Into Chunks 31 | #' 32 | #' @description 33 | #' Splits raster into chunks. 34 | #' 35 | #' @param raster ([terra::SpatRaster])\cr 36 | #' Raster to be split into chunks. 37 | #' @template param-chunksize 38 | #' 39 | #' @export 40 | #' @keywords internal 41 | block_size = function(raster, chunksize) { 42 | assert_class(raster, "SpatRaster") 43 | # chunksize in bytes 44 | chunksize = assert_numeric(chunksize) * 1e+06 45 | 46 | # row_size in bites; one cell takes 8 byte memory 47 | row_size = terra::ncol(raster) * terra::nlyr(raster) * 8 48 | # Hom many rows can be processed in one block? 49 | nrow_block = ceiling(chunksize / row_size) 50 | # How many cells are this? 51 | ncells_block = nrow_block * terra::ncol(raster) 52 | # split all cells by ncells_block 53 | cells_seq = seq(1, terra::ncell(raster), by = ncells_block) 54 | # number of rows to read per block 55 | cells_to_read = rep(ncells_block, length(cells_seq)) 56 | # adapt last write 57 | cells_to_read[length(cells_to_read)] = terra::ncell(raster) - cells_seq[length(cells_seq)] + 1 58 | 59 | return(list(cells_seq = cells_seq, cells_to_read = cells_to_read)) 60 | } 61 | 62 | allow_partial_matching = list( 63 | warnPartialMatchArgs = FALSE, 64 | warnPartialMatchAttr = FALSE, 65 | warnPartialMatchDollar = FALSE 66 | ) 67 | -------------------------------------------------------------------------------- /R/predict_spatial.R: -------------------------------------------------------------------------------- 1 | #' @title Predict on Spatial Objects with mlr3 Learners 2 | #' 3 | #' @description 4 | #' This function allows to directly predict mlr3 learners on various spatial objects. 5 | #' 6 | #' @param newdata ([terra::SpatRaster] | `stars::stars` | [sf::sf] | `raster::RasterStack` | `raster::RasterBrick`). 7 | #' New data to predict on. All spatial data formats convertible by `as_data_backend()` are supported e.g. [terra::SpatRaster] or [sf::sf]. 8 | #' @param learner ([Learner]). 9 | #' Learner with trained model. 10 | #' @template param-chunksize 11 | #' @param format (`character(1)`)\cr 12 | #' Output class of the resulting object. 13 | #' Accepted values are `"raster"`, `"stars"` and `"terra"` if the input is a raster. 14 | #' Note that when choosing something else than `"terra"`, the spatial object is converted into the respective format which might cause overhead both in runtime and memory allocation. 15 | #' For vector data only `"sf"` is supported. 16 | #' @param filename (`character(1)`)\cr 17 | #' Path where the spatial object should be written to. 18 | #' 19 | #' @return Spatial object of class given in argument `format`. 20 | #' @examples 21 | #' library(terra, exclude = "resample") 22 | #' 23 | #' # fit rpart on training points 24 | #' task_train = tsk("leipzig") 25 | #' learner = lrn("classif.rpart") 26 | #' learner$train(task_train) 27 | #' 28 | #' # load raster 29 | #' stack = rast(system.file("extdata", "leipzig_raster.tif", package = "mlr3spatial")) 30 | #' 31 | #' # predict land cover classes 32 | #' pred = predict_spatial(stack, learner, chunksize = 1L) 33 | #' @export 34 | predict_spatial = function(newdata, learner, chunksize = 200L, format = "terra", filename = NULL) { 35 | task = as_task_unsupervised(newdata) 36 | assert_multi_class(task$backend, c("DataBackendRaster", "DataBackendVector")) 37 | assert_learner(learner) 38 | 39 | if (test_class(task$backend, "DataBackendRaster")) { 40 | assert_number(chunksize) 41 | assert_choice(format, c("terra", "raster", "stars")) 42 | filename = filename %??% tempfile(fileext = ".tif") 43 | assert_path_for_output(filename) 44 | 45 | stack = task$backend$stack 46 | start_time = proc.time()[3] 47 | learner = switch(learner$task_type, 48 | "classif" = LearnerClassifSpatial$new(learner), 49 | "regr" = LearnerRegrSpatial$new(learner)) 50 | 51 | # calculate block size 52 | bs = block_size(stack, chunksize) 53 | 54 | # initialize target raster 55 | target_raster = terra::rast(terra::ext(stack), resolution = terra::res(stack), crs = terra::crs(stack)) 56 | terra::writeStart(target_raster, filename = filename, overwrite = TRUE, datatype = "FLT8S") 57 | 58 | lg$info("Start raster prediction") 59 | lg$info("Prediction is executed with a chunksize of %s Megabytes, %i chunk(s) in total, %i values per chunk", 60 | as.character(chunksize), length(bs$cells_seq), ceiling(terra::ncell(task$backend$stack) / length(bs$cells_seq))) 61 | 62 | mlr3misc::pmap(list(bs$cells_seq, bs$cells_to_read, seq_along(bs$cells_seq)), function(cells_seq, cells_to_read, n) { 63 | 64 | stack = task$backend$stack 65 | pred = learner$predict(task, row_ids = cells_seq:((cells_seq + cells_to_read - 1))) 66 | terra::writeValues(x = target_raster, v = pred$response, 67 | start = terra::rowFromCell(stack, cells_seq), # start row number 68 | nrows = terra::rowFromCell(stack, cells_to_read)) # how many rows 69 | lg$info("Chunk %i of %i finished", n, length(bs$cells_seq)) 70 | }) 71 | 72 | terra::writeStop(target_raster) 73 | lg$info("Finished raster prediction in %i seconds", as.integer(proc.time()[3] - start_time)) 74 | 75 | if (learner$task_type == "classif") { 76 | levels = learner$learner$state$train_task$levels()[[learner$learner$state$train_task$target_names]] 77 | value = data.table(ID = seq_along(levels), categories = levels) 78 | target_raster = terra::categories(target_raster, value = value) 79 | } 80 | target_raster = set_names(target_raster, learner$learner$state$train_task$target_names) 81 | 82 | switch(format, 83 | "terra" = target_raster, 84 | "stars" = stars::st_as_stars(target_raster), 85 | "raster" = as(target_raster, "Raster") 86 | ) 87 | } else { 88 | assert_string(format, "sf") 89 | if (!is.null(filename)) assert_path_for_output(filename) 90 | pred = learner$predict(task) 91 | vector = set_names(sf::st_as_sf(data.frame(pred$response, task$backend$sfc)), c(learner$state$train_task$target_names, "geometry")) 92 | 93 | if (!is.null(filename)) sf::st_write(vector, filename, quiet = TRUE) 94 | vector 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | #' @import data.table 2 | #' @import checkmate 3 | #' @import mlr3misc 4 | #' @import mlr3 5 | #' @import sf 6 | #' @importFrom R6 R6Class is.R6 7 | #' @importFrom stats complete.cases 8 | #' @importFrom utils getFromNamespace data 9 | #' 10 | #' @section Learn mlr3: 11 | #' * Book on mlr3: \url{https://mlr3book.mlr-org.com} 12 | #' * Use cases and examples gallery: \url{https://mlr3gallery.mlr-org.com} 13 | #' * Cheat Sheets: \url{https://github.com/mlr-org/mlr3cheatsheets} 14 | #' 15 | #' @section mlr3 extensions: 16 | #' * Preprocessing and machine learning pipelines: \CRANpkg{mlr3pipelines} 17 | #' * Analysis of benchmark experiments: \CRANpkg{mlr3benchmark} 18 | #' * More classification and regression tasks: \CRANpkg{mlr3data} 19 | #' * Connector to [OpenML](https://www.openml.org): \CRANpkg{mlr3oml} 20 | #' * Solid selection of good classification and regression learners: \CRANpkg{mlr3learners} 21 | #' * Even more learners: \url{https://github.com/mlr-org/mlr3extralearners} 22 | #' * Tuning of hyperparameters: \CRANpkg{mlr3tuning} 23 | #' * Hyperband tuner: \CRANpkg{mlr3hyperband} 24 | #' * Visualizations for many \pkg{mlr3} objects: \CRANpkg{mlr3viz} 25 | #' * Survival analysis and probabilistic regression: \CRANpkg{mlr3proba} 26 | #' * Cluster analysis: \CRANpkg{mlr3cluster} 27 | #' * Feature selection filters: \CRANpkg{mlr3filters} 28 | #' * Feature selection wrappers: \CRANpkg{mlr3fselect} 29 | #' * Interface to real (out-of-memory) data bases: \CRANpkg{mlr3db} 30 | #' * Performance measures as plain functions: \CRANpkg{mlr3measures} 31 | #' 32 | #' @section Suggested packages: 33 | #' * Parallelization framework: \CRANpkg{future} 34 | #' * Progress bars: \CRANpkg{progressr} 35 | #' * Encapsulated evaluation: \CRANpkg{evaluate}, \CRANpkg{callr} (external process) 36 | #' 37 | #' @section Package Options: 38 | #' * `"mlr3.debug"`: If set to `TRUE`, parallelization via \CRANpkg{future} is 39 | #' disabled to simplify debugging and provide more concise tracebacks. Note that 40 | #' results computed with debug mode enabled use a different seeding mechanism 41 | #' and are not reproducible. 42 | #' * `"mlr3.allow_utf8_names"`: If set to `TRUE`, checks on the feature names 43 | #' are relaxed, allowing non-ascii characters in column names. This is an 44 | #' experimental and temporal option to pave the way for text analysis, and will 45 | #' likely be removed in a future version of the package. analysis. 46 | #' 47 | #' @references 48 | #' `r tools::toRd(citation("mlr3spatial"))` 49 | "_PACKAGE" 50 | 51 | .onLoad = function(libname, pkgname) { 52 | # nocov start 53 | 54 | # reflections 55 | x = getFromNamespace("mlr_reflections", ns = "mlr3") 56 | 57 | x$task_types = x$task_types[!c("regr_st", "classif_st")] 58 | x$task_types = setkeyv(rbind(x$task_types, rowwise_table( 59 | ~type, ~package, ~task, ~learner, ~prediction, ~prediction_data, ~measure, 60 | "regr_st", "mlr3spatial", "TaskRegrST", "LearnerRegr", "PredictionRegr", "PredictionDataRegr", "MeasureRegr", 61 | "classif_st", "mlr3spatial", "TaskClassifST", "LearnerClassif", "PredictionClassif", "PredictionDataClassif", "MeasureClassif" 62 | )), "type") 63 | 64 | x$task_col_roles$classif_st = c(x$task_col_roles$classif, c("coordinate", "space", "time")) 65 | x$task_col_roles$regr_st = c(x$task_col_roles$regr, c("coordinate", "space", "time")) 66 | x$task_col_roles$unsupervised = x$task_col_roles$regr 67 | 68 | x$task_properties$classif_st = x$task_properties$classif 69 | x$task_properties$regr_st = x$task_properties$regr 70 | 71 | x$default_measures$classif_st = "classif.ce" 72 | x$default_measures$regr_st = "regr.mse" 73 | 74 | # task 75 | x = getFromNamespace("mlr_tasks", ns = "mlr3") 76 | x$add("leipzig", load_task_leipzig) 77 | 78 | # setup logger 79 | assign("lg", lgr::get_logger("mlr3"), envir = parent.env(environment())) 80 | if (Sys.getenv("IN_PKGDOWN") == "true") { 81 | lg$set_threshold("warn") 82 | } 83 | } # nocov end 84 | 85 | 86 | .onUnload = function(libpaths) { # nolint 87 | mlr3::mlr_tasks$remove("leipzig") 88 | } 89 | 90 | leanify_package() # nocov 91 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | ```{r, include=FALSE} 6 | library(mlr3) 7 | library(mlr3spatial) 8 | library(terra, exclude = "resample") 9 | library(sf) 10 | 11 | lgr::get_logger("mlr3")$set_threshold("warn") 12 | lgr::get_logger("mlr3spatial")$set_threshold("warn") 13 | set.seed(1) 14 | options( 15 | datatable.print.nrows = 10, 16 | datatable.print.class = FALSE, 17 | datatable.print.keys = FALSE, 18 | width = 100) 19 | ``` 20 | 21 | # mlr3spatial 22 | 23 | Package website: [release](https://mlr3spatial.mlr-org.com/) | [dev](https://mlr3spatial.mlr-org.com/dev/) 24 | 25 | 26 | [![r-cmd-check](https://github.com/mlr-org/mlr3spatial/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/mlr3spatial/actions/workflows/r-cmd-check.yml) 27 | [![CRAN status](https://www.r-pkg.org/badges/version/mlr3spatial)](https://CRAN.R-project.org/package=mlr3spatial) 28 | [![StackOverflow](https://img.shields.io/badge/stackoverflow-mlr3-orange.svg)](https://stackoverflow.com/questions/tagged/mlr3) 29 | [![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/) 30 | 31 | 32 | *mlr3spatial* is the package for spatial objects within the [`mlr3`](https://mlr-org.com) ecosystem. 33 | The package directly loads data from [`sf`](https://CRAN.R-project.org/package=sf) objects to train any mlr3 learner. 34 | The learner can predict on various raster formats ([`terra`](https://CRAN.R-project.org/package=terra), [`raster`](https://CRAN.R-project.org/package=raster) and [`stars`](https://CRAN.R-project.org/package=stars)) and writes the prediction raster to disk. 35 | mlr3spatial reads large raster objects in chunks to avoid memory issues and predicts the chunks in parallel. 36 | Check out [`mlr3spatiotempcv`](https://github.com/mlr-org/mlr3spatiotempcv) for spatiotemporal resampling within mlr3. 37 | 38 | ## Resources 39 | 40 | There are sections about spatial data in the [mlr3book](https://mlr3book.mlr-org.com). 41 | 42 | * Learn how to [predict](https://mlr3book.mlr-org.com/chapters/chapter13/beyond_regression_and_classification.html#sec-spatial-prediction) a spatial raster image. 43 | * Estimate the performance of a model with [spatial cross-validation](https://mlr3book.mlr-org.com/chapters/chapter13/beyond_regression_and_classification.html#sec-spatiotemporal). 44 | 45 | The gallery features articles about spatial data in the mlr3 ecosystem. 46 | 47 | * Learn the basics with a [land cover classification](https://mlr-org.com/gallery/technical/2023-02-27-land-cover-classification/) of the city of Leipzig. 48 | 49 | ## Installation 50 | 51 | Install the last release from CRAN: 52 | 53 | ```{r, eval=FALSE} 54 | install.packages("mlr3spatial") 55 | ``` 56 | 57 | Install the development version from GitHub: 58 | 59 | ```{r, eval=FALSE} 60 | remotes::install_github("mlr-org/mlr3spatial") 61 | ``` 62 | 63 | ## Example 64 | 65 | Our goal is to map the land cover of the city of Leipzig. 66 | The `mlr3spatial` package contains a Sentinel-2 scene of the city of Leipzig and a point vector with training sites. 67 | The Sentinel-2 scene is a 10m resolution multispectral image with 7 bands and the NDVI. 68 | The points represent samples of the four land cover classes: Forest, Pastures, Urban and Water. 69 | We load the raster with the [`terra`](https://CRAN.R-project.org/package=terra) package and the vector with the [`sf`](https://CRAN.R-project.org/package=sf) package in the R Session. 70 | 71 | ```{r} 72 | library(mlr3verse) 73 | library(mlr3spatial) 74 | library(terra, exclude = "resample") 75 | library(sf) 76 | 77 | leipzig = read_sf(system.file("extdata", "leipzig_points.gpkg", package = "mlr3spatial"), stringsAsFactors = TRUE) 78 | 79 | leipzig_raster = rast(system.file("extdata", "leipzig_raster.tif", package = "mlr3spatial")) 80 | ``` 81 | 82 | The function `as_task_classif_st()` converts the `sf::sf` object to a spatial classification task. 83 | 84 | ```{r} 85 | task = as_task_classif_st(leipzig, target = "land_cover") 86 | task 87 | ``` 88 | 89 | The points are located in the district of Lindenau and Zentrum-West. 90 | 91 | 92 | 93 | Now we train a classification tree on the leipzig task. 94 | 95 | ```{r} 96 | learner = lrn("classif.rpart") 97 | learner$train(task) 98 | ``` 99 | 100 | As a last step, we predict the land cover class for the whole area of interest. 101 | For this, we pass the Sentinel-2 scene and the trained learner to the `predict_spatial()` function. 102 | 103 | ```{r} 104 | land_cover = predict_spatial(leipzig_raster, learner) 105 | ``` 106 | 107 | 108 | 109 | ## FAQ 110 | 111 |
112 | Will mlr3spatial support spatial learners? 113 |
114 | Eventually. It is not yet clear whether these would live in mlr3extralearners or in mlr3spatial. 115 | So far there are none yet. 116 |
117 | 118 |
119 | Why are there two packages, mlr3spatial and mlr3spatiotempcv? 120 |
121 | mlr3spatiotempcv is solely devoted to resampling techniques. 122 | There are quite a few and keeping packages small is one of the development philosophies of the mlr3 framework. 123 | Also back in the days when mlr3spatiotempcv was developed, it was not yet clear how we want to structure additional spatial components such as prediction support for spatial classes and so on. 124 |
125 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # mlr3spatial 3 | 4 | Package website: [release](https://mlr3spatial.mlr-org.com/) \| 5 | [dev](https://mlr3spatial.mlr-org.com/dev/) 6 | 7 | 8 | 9 | [![r-cmd-check](https://github.com/mlr-org/mlr3spatial/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/mlr3spatial/actions/workflows/r-cmd-check.yml) 10 | [![CRAN 11 | status](https://www.r-pkg.org/badges/version/mlr3spatial)](https://CRAN.R-project.org/package=mlr3spatial) 12 | [![StackOverflow](https://img.shields.io/badge/stackoverflow-mlr3-orange.svg)](https://stackoverflow.com/questions/tagged/mlr3) 13 | [![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/) 14 | 15 | 16 | *mlr3spatial* is the package for spatial objects within the 17 | [`mlr3`](https://mlr-org.com) ecosystem. The package directly loads data 18 | from [`sf`](https://CRAN.R-project.org/package=sf) objects to train any 19 | mlr3 learner. The learner can predict on various raster formats 20 | ([`terra`](https://CRAN.R-project.org/package=terra), 21 | [`raster`](https://CRAN.R-project.org/package=raster) and 22 | [`stars`](https://CRAN.R-project.org/package=stars)) and writes the 23 | prediction raster to disk. mlr3spatial reads large raster objects in 24 | chunks to avoid memory issues and predicts the chunks in parallel. Check 25 | out [`mlr3spatiotempcv`](https://github.com/mlr-org/mlr3spatiotempcv) 26 | for spatiotemporal resampling within mlr3. 27 | 28 | ## Resources 29 | 30 | There are sections about spatial data in the 31 | [mlr3book](https://mlr3book.mlr-org.com). 32 | 33 | - Learn how to 34 | [predict](https://mlr3book.mlr-org.com/chapters/chapter13/beyond_regression_and_classification.html#sec-spatial-prediction) 35 | a spatial raster image. 36 | - Estimate the performance of a model with [spatial 37 | cross-validation](https://mlr3book.mlr-org.com/chapters/chapter13/beyond_regression_and_classification.html#sec-spatiotemporal). 38 | 39 | The gallery features articles about spatial data in the mlr3 ecosystem. 40 | 41 | - Learn the basics with a [land cover 42 | classification](https://mlr-org.com/gallery/technical/2023-02-27-land-cover-classification/) 43 | of the city of Leipzig. 44 | 45 | ## Installation 46 | 47 | Install the last release from CRAN: 48 | 49 | ``` r 50 | install.packages("mlr3spatial") 51 | ``` 52 | 53 | Install the development version from GitHub: 54 | 55 | ``` r 56 | remotes::install_github("mlr-org/mlr3spatial") 57 | ``` 58 | 59 | ## Example 60 | 61 | Our goal is to map the land cover of the city of Leipzig. The 62 | `mlr3spatial` package contains a Sentinel-2 scene of the city of Leipzig 63 | and a point vector with training sites. The Sentinel-2 scene is a 10m 64 | resolution multispectral image with 7 bands and the NDVI. The points 65 | represent samples of the four land cover classes: Forest, Pastures, 66 | Urban and Water. We load the raster with the 67 | [`terra`](https://CRAN.R-project.org/package=terra) package and the 68 | vector with the [`sf`](https://CRAN.R-project.org/package=sf) package in 69 | the R Session. 70 | 71 | ``` r 72 | library(mlr3verse) 73 | library(mlr3spatial) 74 | library(terra, exclude = "resample") 75 | library(sf) 76 | 77 | leipzig = read_sf(system.file("extdata", "leipzig_points.gpkg", package = "mlr3spatial"), stringsAsFactors = TRUE) 78 | 79 | leipzig_raster = rast(system.file("extdata", "leipzig_raster.tif", package = "mlr3spatial")) 80 | ``` 81 | 82 | The function `as_task_classif_st()` converts the `sf::sf` object to a 83 | spatial classification task. 84 | 85 | ``` r 86 | task = as_task_classif_st(leipzig, target = "land_cover") 87 | task 88 | ``` 89 | 90 | ## (97 x 9) 91 | ## * Target: land_cover 92 | ## * Properties: multiclass 93 | ## * Features (8): 94 | ## - dbl (8): b02, b03, b04, b06, b07, b08, b11, ndvi 95 | ## * Coordinates: 96 | ## X Y 97 | ## 1: 732480.1 5693957 98 | ## 2: 732217.4 5692769 99 | ## 3: 732737.2 5692469 100 | ## 4: 733169.3 5692777 101 | ## 5: 732202.2 5692644 102 | ## --- 103 | ## 93: 733018.7 5692342 104 | ## 94: 732551.4 5692887 105 | ## 95: 732520.4 5692589 106 | ## 96: 732542.2 5692204 107 | ## 97: 732437.8 5692300 108 | 109 | The points are located in the district of Lindenau and Zentrum-West. 110 | 111 | 112 | 113 | Now we train a classification tree on the leipzig task. 114 | 115 | ``` r 116 | learner = lrn("classif.rpart") 117 | learner$train(task) 118 | ``` 119 | 120 | As a last step, we predict the land cover class for the whole area of 121 | interest. For this, we pass the Sentinel-2 scene and the trained learner 122 | to the `predict_spatial()` function. 123 | 124 | ``` r 125 | land_cover = predict_spatial(leipzig_raster, learner) 126 | ``` 127 | 128 | 129 | 130 | ## FAQ 131 | 132 |
133 | 134 | Will mlr3spatial support spatial learners? 135 | 136 |
Eventually. It is not yet clear whether these would live in 137 | mlr3extralearners or in mlr3spatial. So far there are none yet. 138 |
139 |
140 | 141 | Why are there two packages, mlr3spatial and mlr3spatiotempcv? 142 | 143 |
mlr3spatiotempcv is solely devoted to resampling techniques. There 144 | are quite a few and keeping packages small is one of the development 145 | philosophies of the mlr3 framework. Also back in the days when 146 | mlr3spatiotempcv was developed, it was not yet clear how we want to 147 | structure additional spatial components such as prediction support for 148 | spatial classes and so on. 149 |
150 | -------------------------------------------------------------------------------- /attic/benchmark.R: -------------------------------------------------------------------------------- 1 | library(mlr3spatial) 2 | library(mlr3learners) 3 | library(tictoc) 4 | 5 | # 10 MB - in memory ------------------------------------------------------------ 6 | 7 | # train task 8 | stack = generate_stack(list( 9 | numeric_layer("x_1"), 10 | factor_layer("y", levels = c("a", "b"))), 11 | layer_size = 10) 12 | vector = sample_stack(stack, n = 100) 13 | task_train = as_task_classif_st(vector, id = "test_vector", target = "y") 14 | learner = lrn("classif.ranger") 15 | learner$train(task_train) 16 | 17 | # predict task 18 | dt = as.data.table(terra::values(stack)) 19 | dt[, y := factor(y, levels = c("a", "b"))] 20 | backend = as_data_backend(dt) 21 | task = as_task_classif(dt, id = "test", target = "y") 22 | 23 | tic() 24 | pred = learner$predict(task) # 74 seconds 25 | toc() 26 | 27 | 28 | # 10 MB - terra ---------------------------------------------------------------- 29 | 30 | # train task 31 | stack = generate_stack(list( 32 | numeric_layer("x_1"), 33 | factor_layer("y", levels = c("a", "b"))), 34 | layer_size = 10) 35 | vector = sample_stack(stack, n = 100) 36 | task_train = as_task_classif_st(vector, id = "test_vector", target = "y") 37 | learner = lrn("classif.ranger") 38 | learner$train(task_train) 39 | 40 | # predict task 41 | stack$y = NULL 42 | model = learner$model 43 | 44 | predfun = function(model, data) { 45 | library(ranger) 46 | predict(model, data)$predictions 47 | } 48 | 49 | tic() 50 | terra::predict(stack, model, fun = predfun) # 78 seconds 51 | toc() 52 | 53 | tic() 54 | terra::predict(stack, model, fun = predfun, cores = 4) # 36 seconds 55 | toc() 56 | 57 | tic() 58 | terra::predict(stack, learner, fun = predfun, cores = 8) # 47 seconds 59 | toc() 60 | 61 | 62 | # 10 MB - 1 chunk -------------------------------------------------------------- 63 | 64 | # train task 65 | stack = generate_stack(list( 66 | numeric_layer("x_1"), 67 | factor_layer("y", levels = c("a", "b"))), 68 | layer_size = 10) 69 | vector = sample_stack(stack, n = 100) 70 | task_train = as_task_classif_st(vector, id = "test_vector", target = "y") 71 | learner = lrn("classif.ranger") 72 | learner$train(task_train) 73 | 74 | # predict task 75 | stack$y = NULL 76 | task_predict = as_task_unsupervised(stack, id = "test") 77 | learner$parallel_predict = TRUE 78 | 79 | ras = predict_spatial(task_predict, learner, chunksize = 10L) # 71 seconds 80 | 81 | with_future("multicore", workers = 2, { 82 | ras = predict_spatial(task_predict, learner, chunksize = 10L) # 38 seconds 83 | }) 84 | 85 | with_future("multicore", workers = 4, { 86 | ras = predict_spatial(task_predict, learner, chunksize = 10L) # 21 seconds 87 | }) 88 | 89 | with_future("multicore", workers = 8, { 90 | ras = predict_spatial(task_predict, learner, chunksize = 10L) # 19 seconds 91 | }) 92 | 93 | # 50 MB - 5 chunks ------------------------------------------------------------- 94 | 95 | # train task 96 | stack = generate_stack(list( 97 | numeric_layer("x_1"), 98 | factor_layer("y", levels = c("a", "b"))), 99 | layer_size = 50) 100 | vector = sample_stack(stack, n = 100) 101 | task_train = as_task_classif_st(vector, id = "test_vector", target = "y") 102 | learner = lrn("classif.ranger") 103 | learner$train(task_train) 104 | 105 | # predict task 106 | stack$y = NULL 107 | task_predict = as_task_unsupervised(stack, id = "test") 108 | learner$parallel_predict = TRUE 109 | 110 | ras = predict_spatial(task_predict, learner, chunksize = 10L) # 386 seconds 111 | 112 | with_future("multicore", workers = 2, { 113 | ras = predict_spatial(task_predict, learner, chunksize = 10L) # 114 | }) 115 | 116 | with_future("multicore", workers = 4, { 117 | ras = predict_spatial(task_predict, learner, chunksize = 10L) # 142 seconds 118 | }) 119 | 120 | with_future("multicore", workers = 8, { 121 | ras = predict_spatial(task_predict, learner, chunksize = 10L) # 122 | }) 123 | 124 | # 20 MB - 1 chunk -------------------------------------------------------------- 125 | 126 | # train task 127 | stack = generate_stack(list( 128 | numeric_layer("x_1"), 129 | factor_layer("y", levels = c("a", "b"))), 130 | layer_size = 20) 131 | vector = sample_stack(stack, n = 100) 132 | task_train = as_task_classif_st(vector, id = "test_vector", target = "y") 133 | learner = lrn("classif.ranger") 134 | learner$train(task_train) 135 | 136 | # predict task 137 | stack$y = NULL 138 | task_predict = as_task_unsupervised(stack, id = "test") 139 | learner$parallel_predict = TRUE 140 | 141 | ras = predict_spatial(task_predict, learner, chunksize = 20L) # 221 seconds 142 | 143 | with_future("multicore", workers = 2, { 144 | ras = predict_spatial(task_predict, learner, chunksize = 10L) # 145 | }) 146 | 147 | with_future("multicore", workers = 4, { 148 | ras = predict_spatial(task_predict, learner, chunksize = 20L) # 61 seconds 149 | }) 150 | 151 | with_future("multicore", workers = 8, { 152 | ras = predict_spatial(task_predict, learner, chunksize = 10L) # 153 | }) 154 | -------------------------------------------------------------------------------- /attic/benchmark.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Benchmarking parallel predictions" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Benchmarking parallel predictions} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | 11 | 12 | This benchmark was run on a MacBook Pro 2021 (arm64) with the following specs 13 | 14 | - M1 Pro 15 | - 32 GB RAM 16 | 17 | Note that the differences between the parallel and sequential timings will increase for larger objects as the overhead for starting the parallel workers and collecting the results will decrease. 18 | 19 | It is not fully clear why the parallel approach of the {terra} package is slow than its sequential counterpart but it might relate to the single-core performance of the machine the benchmark was run on in combination with the overhead associated with starting the parallel cluster the way it is done in the {terra} package. 20 | 21 | 22 | ```r 23 | library(mlr3) 24 | library(mlr3spatial) 25 | library(future) 26 | library(bench) 27 | library(stars) 28 | #> Loading required package: abind 29 | #> Loading required package: sf 30 | #> Linking to GEOS 3.9.1, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE 31 | library(rpart) 32 | ``` 33 | 34 | ## Small files 35 | 36 | - ~ 250k values 37 | - 48 MB on disk 38 | 39 | 40 | ```r 41 | # SpatRaster demo stack 42 | stack_terra = demo_stack_spatraster(50) 43 | value = data.table::data.table(ID = c(0, 1), y = c("negative", "positive")) 44 | terra::set.cats(stack_terra, layer = "y", value = value) 45 | colnames = names(stack_terra) 46 | file_terra = tempfile("terra", fileext = ".tif") 47 | terra::writeRaster(stack_terra, file_terra) 48 | 49 | # RasterBrick demo stack 50 | stack_raster = demo_stack_rasterbrick(50) 51 | colnames_raster = names(stack_raster) 52 | file_raster = tempfile("raster", fileext = ".tif") 53 | raster::writeRaster(stack_raster, file_raster) 54 | #> Warning in .gd_SetProject(object, ...): NOT UPDATED FOR PROJ >= 6 55 | 56 | # tasks 57 | stack_terra = terra::rast(file_terra) 58 | backend_terra = DataBackendRaster$new(stack_terra) 59 | task_terra = as_task_regr(backend_terra, target = "x_1") 60 | 61 | stack_raster = raster::brick(file_raster) 62 | names(stack_raster) = colnames_raster 63 | backend_raster = DataBackendRaster$new(stack_raster) 64 | #> Warning in raster::wkt(from): no wkt comment 65 | task_raster = as_task_regr(backend_raster, target = "x_1") 66 | 67 | # Train learners 68 | set.seed(42) 69 | row_ids = sample(1:task_terra$nrow, 50) 70 | 71 | learner_task_terra = lrn("regr.rpart") 72 | learner_task_terra$parallel_predict = TRUE 73 | learner_task_terra$train(task_terra, row_ids = row_ids) 74 | 75 | learner_task_raster = lrn("regr.rpart") 76 | learner_task_terra$parallel_predict = TRUE 77 | learner_task_raster$train(task_raster, row_ids = row_ids) 78 | 79 | # non-mlr3 models 80 | rpart_task_terra = rpart::rpart(x_1 ~ ., task_terra$data(rows = row_ids)) 81 | rpart_task_raster = rpart::rpart(x_1 ~ ., task_raster$data(rows = row_ids)) 82 | ``` 83 | 84 | 85 | ```r 86 | bm = bench::mark( 87 | 88 | "01-mlr3-terra-4-cores" = { 89 | plan(multicore, workers = 4) 90 | predict_spatial(task_terra, learner_task_terra, chunksize = 2000L) 91 | }, 92 | 93 | "02-terra-4-cores" = terra::predict(stack_terra, rpart_task_terra, cores = 4, cpkgs = "rpart"), 94 | 95 | "03-mlr3-raster-4-cores" = { 96 | plan(multicore, workers = 4) 97 | predict_spatial(task_raster, learner_task_raster, chunksize = 2000L, format = "raster") 98 | }, 99 | 100 | "04-raster-4-cores" = { 101 | library(raster) 102 | library(rpart) 103 | beginCluster(4, type = "PSOCK") 104 | clusterR(stack_raster, predict, args = list(model = rpart_task_raster)) 105 | }, 106 | 107 | check = FALSE, filter_gc = FALSE, min_iterations = 3, 108 | max_iterations = 3, memory = FALSE) 109 | #> Loading required package: sp 110 | 111 | bm$`itr/sec` = NULL 112 | bm$result = NULL 113 | bm$`gc/sec` = NULL 114 | bm$memory = NULL 115 | bm$mem_alloc = NULL 116 | 117 | print(bm) 118 | #> # A tibble: 4 × 8 119 | #> expression min median n_itr n_gc total_time time gc 120 | #> 121 | #> 1 01-mlr3-terra-4-cores 852.65ms 915.91ms 3 22 2.77s 122 | #> 2 02-terra-4-cores 9.54s 9.7s 3 34 29s 123 | #> 3 03-mlr3-raster-4-cores 1.72s 1.8s 3 28 5.5s 124 | #> 4 04-raster-4-cores 3.76s 3.79s 3 1 11.41s 125 | ``` 126 | 127 | 128 | ```r 129 | library(ggplot2) 130 | autoplot(bm, type = "ridge") 131 | #> Loading required namespace: tidyr 132 | #> Picking joint bandwidth of 0.0103 133 | ``` 134 | 135 | ![](./plot-benchmark-small-1.png) 136 | 137 | 138 | 139 | ## Large files 140 | 141 | - ~ 25 Mio. values 142 | 143 | - 485 MB on disk 144 | 145 | 146 | ```r 147 | # SpatRaster demo stack 148 | stack_terra = demo_stack_spatraster(500) 149 | value = data.table::data.table(ID = c(0, 1), y = c("negative", "positive")) 150 | terra::set.cats(stack_terra, layer = "y", value = value) 151 | colnames = names(stack_terra) 152 | file_terra = tempfile("terra", fileext = ".tif") 153 | terra::writeRaster(stack_terra, file_terra) 154 | 155 | # RasterBrick demo stack 156 | stack_raster = demo_stack_rasterbrick(500) 157 | colnames_raster = names(stack_raster) 158 | file_raster = tempfile("raster", fileext = ".tif") 159 | raster::writeRaster(stack_raster, file_raster) 160 | #> Warning in .gd_SetProject(object, ...): NOT UPDATED FOR PROJ >= 6 161 | 162 | # tasks 163 | stack_terra = terra::rast(file_terra) 164 | backend_terra = DataBackendRaster$new(stack_terra) 165 | task_terra = as_task_regr(backend_terra, target = "x_1") 166 | 167 | stack_raster = raster::brick(file_raster) 168 | names(stack_raster) = colnames_raster 169 | backend_raster = DataBackendRaster$new(stack_raster) 170 | #> Warning in raster::wkt(from): no wkt comment 171 | task_raster = as_task_regr(backend_raster, target = "x_1") 172 | 173 | # Train learners 174 | set.seed(42) 175 | row_ids = sample(1:task_terra$nrow, 50) 176 | 177 | learner_task_terra = lrn("regr.rpart") 178 | learner_task_terra$parallel_predict = TRUE 179 | learner_task_terra$train(task_terra, row_ids = row_ids) 180 | 181 | learner_task_raster = lrn("regr.rpart") 182 | learner_task_terra$parallel_predict = TRUE 183 | learner_task_raster$train(task_raster, row_ids = row_ids) 184 | 185 | # non-mlr3 models 186 | rpart_task_terra = rpart::rpart(x_1 ~ ., task_terra$data(rows = row_ids)) 187 | rpart_task_raster = rpart::rpart(x_1 ~ ., task_raster$data(rows = row_ids)) 188 | ``` 189 | 190 | 191 | ```r 192 | bm = bench::mark( 193 | 194 | "01-mlr3-terra-4-cores" = { 195 | plan(multicore, workers = 4) 196 | predict_spatial(task_terra, learner_task_terra, chunksize = 2000L) 197 | }, 198 | 199 | "02-terra-4-cores" = terra::predict(stack_terra, rpart_task_terra, cores = 4, cpkgs = "rpart"), 200 | 201 | "03-mlr3-raster-4-cores" = { 202 | plan(multicore, workers = 4) 203 | predict_spatial(task_raster, learner_task_raster, chunksize = 2000L, format = "raster") 204 | }, 205 | 206 | "04-raster-4-cores" = { 207 | library(raster) 208 | library(rpart) 209 | beginCluster(4, type = "PSOCK") 210 | clusterR(stack_raster, predict, args = list(model = rpart_task_raster)) 211 | }, 212 | 213 | check = FALSE, filter_gc = FALSE, min_iterations = 3, 214 | max_iterations = 3, memory = FALSE) 215 | 216 | bm$`itr/sec` = NULL 217 | bm$result = NULL 218 | bm$`gc/sec` = NULL 219 | bm$memory = NULL 220 | bm$mem_alloc = NULL 221 | 222 | print(bm) 223 | #> # A tibble: 4 × 8 224 | #> expression min median n_itr n_gc total_time time gc 225 | #> 226 | #> 1 01-mlr3-terra-4-cores 11.93s 12.01s 3 167 37.23s 227 | #> 2 02-terra-4-cores 41.14s 42.54s 3 32 2.52m 228 | #> 3 03-mlr3-raster-4-cores 15.81s 16.09s 3 31 48.85s 229 | #> 4 04-raster-4-cores 9.46s 9.85s 3 0 29.36s 230 | ``` 231 | 232 | 233 | ```r 234 | library(ggplot2) 235 | autoplot(bm, type = "ridge") 236 | #> Picking joint bandwidth of 0.0215 237 | ``` 238 | 239 | ![](./plot-benchmark-large-1.png) 240 | 241 | 242 | -------------------------------------------------------------------------------- /attic/benchmark.Rmd.orig: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Benchmarking parallel predictions" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Benchmarking parallel predictions} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r setup, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>", 14 | options(width = 120), 15 | fig.path = "./" 16 | ) 17 | lgr::get_logger("bbotk")$set_threshold("warn") 18 | lgr::get_logger("mlr3")$set_threshold("warn") 19 | lgr::get_logger("mlr3spatial")$set_threshold("warn") 20 | ``` 21 | 22 | This benchmark was run on a MacBook Pro 2021 (arm64) with the following specs 23 | 24 | - M1 Pro 25 | - 32 GB RAM 26 | 27 | Note that the differences between the parallel and sequential timings will increase for larger objects as the overhead for starting the parallel workers and collecting the results will decrease. 28 | 29 | It is not fully clear why the parallel approach of the {terra} package is slow than its sequential counterpart but it might relate to the single-core performance of the machine the benchmark was run on in combination with the overhead associated with starting the parallel cluster the way it is done in the {terra} package. 30 | 31 | ```{r prepare} 32 | library(mlr3) 33 | library(mlr3spatial) 34 | library(future) 35 | library(bench) 36 | library(stars) 37 | library(rpart) 38 | ``` 39 | 40 | ## Small files 41 | 42 | - ~ 250k values 43 | - 48 MB on disk 44 | 45 | ```{r prepare-small} 46 | # SpatRaster demo stack 47 | stack_terra = demo_stack_spatraster(50) 48 | value = data.table::data.table(ID = c(0, 1), y = c("negative", "positive")) 49 | terra::set.cats(stack_terra, layer = "y", value = value) 50 | colnames = names(stack_terra) 51 | file_terra = tempfile("terra", fileext = ".tif") 52 | terra::writeRaster(stack_terra, file_terra) 53 | 54 | # RasterBrick demo stack 55 | stack_raster = demo_stack_rasterbrick(50) 56 | colnames_raster = names(stack_raster) 57 | file_raster = tempfile("raster", fileext = ".tif") 58 | raster::writeRaster(stack_raster, file_raster) 59 | 60 | # tasks 61 | stack_terra = terra::rast(file_terra) 62 | backend_terra = DataBackendRaster$new(stack_terra) 63 | task_terra = as_task_regr(backend_terra, target = "x_1") 64 | 65 | stack_raster = raster::brick(file_raster) 66 | names(stack_raster) = colnames_raster 67 | backend_raster = DataBackendRaster$new(stack_raster) 68 | task_raster = as_task_regr(backend_raster, target = "x_1") 69 | 70 | # Train learners 71 | set.seed(42) 72 | row_ids = sample(1:task_terra$nrow, 50) 73 | 74 | learner_task_terra = lrn("regr.rpart") 75 | learner_task_terra$parallel_predict = TRUE 76 | learner_task_terra$train(task_terra, row_ids = row_ids) 77 | 78 | learner_task_raster = lrn("regr.rpart") 79 | learner_task_terra$parallel_predict = TRUE 80 | learner_task_raster$train(task_raster, row_ids = row_ids) 81 | 82 | # non-mlr3 models 83 | rpart_task_terra = rpart::rpart(x_1 ~ ., task_terra$data(rows = row_ids)) 84 | rpart_task_raster = rpart::rpart(x_1 ~ ., task_raster$data(rows = row_ids)) 85 | ``` 86 | 87 | ```{r benchmark-small} 88 | bm = bench::mark( 89 | 90 | "01-mlr3-terra-4-cores" = { 91 | plan(multicore, workers = 4) 92 | predict_spatial(task_terra, learner_task_terra, chunksize = 2000L) 93 | }, 94 | 95 | "02-terra-4-cores" = terra::predict(stack_terra, rpart_task_terra, cores = 4, cpkgs = "rpart"), 96 | 97 | "03-mlr3-raster-4-cores" = { 98 | plan(multicore, workers = 4) 99 | predict_spatial(task_raster, learner_task_raster, chunksize = 2000L, format = "raster") 100 | }, 101 | 102 | "04-raster-4-cores" = { 103 | library(raster) 104 | library(rpart) 105 | beginCluster(4, type = "PSOCK") 106 | clusterR(stack_raster, predict, args = list(model = rpart_task_raster)) 107 | }, 108 | 109 | check = FALSE, filter_gc = FALSE, min_iterations = 3, 110 | max_iterations = 3, memory = FALSE) 111 | 112 | bm$`itr/sec` = NULL 113 | bm$result = NULL 114 | bm$`gc/sec` = NULL 115 | bm$memory = NULL 116 | bm$mem_alloc = NULL 117 | 118 | print(bm) 119 | ``` 120 | 121 | ```{r plot-benchmark-small, fig.cap=""} 122 | library(ggplot2) 123 | autoplot(bm, type = "ridge") 124 | ``` 125 | 126 | ```{r save-plot, echo = FALSE, message = FALSE} 127 | ggsave("plot-benchmark-small-1.png") 128 | ``` 129 | 130 | ## Large files 131 | 132 | - ~ 25 Mio. values 133 | 134 | - 485 MB on disk 135 | 136 | ```{r prepare-large} 137 | # SpatRaster demo stack 138 | stack_terra = demo_stack_spatraster(500) 139 | value = data.table::data.table(ID = c(0, 1), y = c("negative", "positive")) 140 | terra::set.cats(stack_terra, layer = "y", value = value) 141 | colnames = names(stack_terra) 142 | file_terra = tempfile("terra", fileext = ".tif") 143 | terra::writeRaster(stack_terra, file_terra) 144 | 145 | # RasterBrick demo stack 146 | stack_raster = demo_stack_rasterbrick(500) 147 | colnames_raster = names(stack_raster) 148 | file_raster = tempfile("raster", fileext = ".tif") 149 | raster::writeRaster(stack_raster, file_raster) 150 | 151 | # tasks 152 | stack_terra = terra::rast(file_terra) 153 | backend_terra = DataBackendRaster$new(stack_terra) 154 | task_terra = as_task_regr(backend_terra, target = "x_1") 155 | 156 | stack_raster = raster::brick(file_raster) 157 | names(stack_raster) = colnames_raster 158 | backend_raster = DataBackendRaster$new(stack_raster) 159 | task_raster = as_task_regr(backend_raster, target = "x_1") 160 | 161 | # Train learners 162 | set.seed(42) 163 | row_ids = sample(1:task_terra$nrow, 50) 164 | 165 | learner_task_terra = lrn("regr.rpart") 166 | learner_task_terra$parallel_predict = TRUE 167 | learner_task_terra$train(task_terra, row_ids = row_ids) 168 | 169 | learner_task_raster = lrn("regr.rpart") 170 | learner_task_terra$parallel_predict = TRUE 171 | learner_task_raster$train(task_raster, row_ids = row_ids) 172 | 173 | # non-mlr3 models 174 | rpart_task_terra = rpart::rpart(x_1 ~ ., task_terra$data(rows = row_ids)) 175 | rpart_task_raster = rpart::rpart(x_1 ~ ., task_raster$data(rows = row_ids)) 176 | ``` 177 | 178 | ```{r benchmark} 179 | bm = bench::mark( 180 | 181 | "01-mlr3-terra-4-cores" = { 182 | plan(multicore, workers = 4) 183 | predict_spatial(task_terra, learner_task_terra, chunksize = 2000L) 184 | }, 185 | 186 | "02-terra-4-cores" = terra::predict(stack_terra, rpart_task_terra, cores = 4, cpkgs = "rpart"), 187 | 188 | "03-mlr3-raster-4-cores" = { 189 | plan(multicore, workers = 4) 190 | predict_spatial(task_raster, learner_task_raster, chunksize = 2000L, format = "raster") 191 | }, 192 | 193 | "04-raster-4-cores" = { 194 | library(raster) 195 | library(rpart) 196 | beginCluster(4, type = "PSOCK") 197 | clusterR(stack_raster, predict, args = list(model = rpart_task_raster)) 198 | }, 199 | 200 | check = FALSE, filter_gc = FALSE, min_iterations = 3, 201 | max_iterations = 3, memory = FALSE) 202 | 203 | bm$`itr/sec` = NULL 204 | bm$result = NULL 205 | bm$`gc/sec` = NULL 206 | bm$memory = NULL 207 | bm$mem_alloc = NULL 208 | 209 | print(bm) 210 | ``` 211 | 212 | ```{r plot-benchmark-large, fig.cap=""} 213 | library(ggplot2) 214 | autoplot(bm, type = "ridge") 215 | ``` 216 | 217 | ```{r save-plot-large, echo = FALSE, message = FALSE} 218 | ggsave("plot-benchmark-large-1.png") 219 | ``` 220 | -------------------------------------------------------------------------------- /attic/plot-benchmark-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/attic/plot-benchmark-1.png -------------------------------------------------------------------------------- /attic/plot-benchmark-large-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/attic/plot-benchmark-large-1.png -------------------------------------------------------------------------------- /attic/plot-benchmark-small-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/attic/plot-benchmark-small-1.png -------------------------------------------------------------------------------- /data/leipzig.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/data/leipzig.rda -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | DataBackend 2 | DataBackendDataTable 3 | DataBackendVector 4 | EPSG 5 | GeoPackage 6 | GeoTIFF 7 | Hyperband 8 | MacBook 9 | Mattermost 10 | NDVI 11 | ORCID 12 | OpenML 13 | Parallelization 14 | SpatRaster 15 | Spatiotemporal 16 | StackOverflow 17 | TaskClassif 18 | TaskClassifST 19 | TaskRegr 20 | TaskRegrST 21 | TaskUnsupervised 22 | WKT 23 | chunksize 24 | cloneable 25 | cmd 26 | dev 27 | github 28 | https 29 | mlr 30 | parallelization 31 | rasters 32 | sfc 33 | socker 34 | spatiotempcv 35 | spatiotemporal 36 | subparts 37 | terra 38 | tracebacks 39 | -------------------------------------------------------------------------------- /inst/extdata/leipzig_points.gpkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/inst/extdata/leipzig_points.gpkg -------------------------------------------------------------------------------- /inst/extdata/leipzig_raster.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/inst/extdata/leipzig_raster.tif -------------------------------------------------------------------------------- /man-roxygen/param-chunksize.R: -------------------------------------------------------------------------------- 1 | #' @param chunksize (`integer(1)`)\cr 2 | #' The chunksize determines in how many subparts the prediction task will be 3 | #' split into. The value can be roughly thought of as megabyte of a raster file 4 | #' on disk. For example, if a prediction on a 1 GB file would be carried out 5 | #' with `chunksize = 100L`, the prediction would happen in 10 chunks. 6 | #' 7 | #' The default of `chunksize = 1000L` might be a good compromise between speed 8 | #' and memory usage. If you find yourself running out of memory, reduce this 9 | #' value. 10 | -------------------------------------------------------------------------------- /man-roxygen/param-data.R: -------------------------------------------------------------------------------- 1 | #' @param data ([terra::SpatRaster])\cr 2 | #' The input [terra::SpatRaster]. 3 | -------------------------------------------------------------------------------- /man-roxygen/param-primary-key.R: -------------------------------------------------------------------------------- 1 | #' @param primary_key (`character(1)` | `integer()`)\cr 2 | #' Name of the primary key column, or integer vector of row ids. 3 | -------------------------------------------------------------------------------- /man-roxygen/param-quiet.R: -------------------------------------------------------------------------------- 1 | #' @param quiet `[logical]`\cr 2 | #' Whether to suppress possible console output. 3 | -------------------------------------------------------------------------------- /man-roxygen/param-response-is-factor.R: -------------------------------------------------------------------------------- 1 | #' @param response_is_factor ([`character`])\cr 2 | #' When this backend should be used in a [mlr3::TaskClassif], set `response_is_factor = TRUE`. 3 | -------------------------------------------------------------------------------- /man-roxygen/param-response.R: -------------------------------------------------------------------------------- 1 | #' @param response ([`character`])\cr 2 | #' The name of the response variable. Only needed when `response_is_factor = TRUE`. 3 | -------------------------------------------------------------------------------- /man-roxygen/param-task.R: -------------------------------------------------------------------------------- 1 | #' @param task `[Task]`\cr 2 | #' mlr3 task. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_backend.R: -------------------------------------------------------------------------------- 1 | #' @param backend ([DataBackend])\cr 2 | #' Either a [DataBackend], or any object which is convertible to a [DataBackend] with `as_data_backend()`. 3 | #' E.g., am `sf` will be converted to a [DataBackendDataTable]. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_coordinate_names.R: -------------------------------------------------------------------------------- 1 | #' @param coordinate_names (`character(1)`)\cr 2 | #' The column names of the coordinates in the data. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_coords_as_features.R: -------------------------------------------------------------------------------- 1 | #' @param coords_as_features (`logical(1)`)\cr 2 | #' If `TRUE`, coordinates are used as features. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_crs.R: -------------------------------------------------------------------------------- 1 | #' @param crs (`character(1)`)\cr 2 | #' Coordinate reference system. 3 | #' WKT2 or EPSG string. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_extra_args.R: -------------------------------------------------------------------------------- 1 | #' @param extra_args (named `list()`)\cr 2 | #' Named list of constructor arguments, required for converting task types via [convert_task()]. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_id.R: -------------------------------------------------------------------------------- 1 | #' @param id (`character(1)`)\cr 2 | #' Identifier for the new instance. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_label.R: -------------------------------------------------------------------------------- 1 | #' @param label (`character(1)`)\cr 2 | #' Label for the new instance. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_positive.R: -------------------------------------------------------------------------------- 1 | #' @param positive (`character(1)`)\cr 2 | #' Only for binary classification: Name of the positive class. 3 | #' The levels of the target columns are reordered accordingly, so that the first element of `$class_names` is the positive class, and the second element is the negative class. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_target.R: -------------------------------------------------------------------------------- 1 | #' @param target (`character(1)`)\cr 2 | #' Name of the target column. 3 | -------------------------------------------------------------------------------- /man/DataBackendRaster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DataBackendRaster.R 3 | \name{DataBackendRaster} 4 | \alias{DataBackendRaster} 5 | \title{DataBackend for Raster Objects} 6 | \description{ 7 | \link[mlr3:DataBackend]{mlr3::DataBackend} for \link[terra:SpatRaster-class]{terra::SpatRaster} raster objects. 8 | } 9 | \section{Read mode}{ 10 | 11 | There are two different ways the reading of values is performed internally: 12 | \itemize{ 13 | \item "Block mode" reads complete rows of the raster file and subsets the requested cells. 14 | This mode is faster than "cell mode" if the complete raster file is iterated over. 15 | \item "Cell mode" reads individual cells. 16 | This is faster than "block mode" if only a few cells are sampled. 17 | } 18 | 19 | "Block mode" is activated if \verb{$data(rows)} is used with a increasing integer sequence e.g. \code{200:300}. 20 | If only a single cell is requested, "cell mode" is used. 21 | } 22 | 23 | \section{Super class}{ 24 | \code{\link[mlr3:DataBackend]{mlr3::DataBackend}} -> \code{DataBackendRaster} 25 | } 26 | \section{Active bindings}{ 27 | \if{html}{\out{
}} 28 | \describe{ 29 | \item{\code{rownames}}{(\code{integer()})\cr 30 | Returns vector of all distinct row identifiers, i.e. the contents of the primary key column.} 31 | 32 | \item{\code{colnames}}{(\code{character()})\cr 33 | Returns vector of all column names.} 34 | 35 | \item{\code{nrow}}{(\code{integer(1)})\cr 36 | Number of rows (observations).} 37 | 38 | \item{\code{ncol}}{(\code{integer(1)})\cr 39 | Number of columns (variables).} 40 | 41 | \item{\code{stack}}{(\code{SpatRaster})\cr 42 | Raster stack.} 43 | } 44 | \if{html}{\out{
}} 45 | } 46 | \section{Methods}{ 47 | \subsection{Public methods}{ 48 | \itemize{ 49 | \item \href{#method-DataBackendRaster-new}{\code{DataBackendRaster$new()}} 50 | \item \href{#method-DataBackendRaster-data}{\code{DataBackendRaster$data()}} 51 | \item \href{#method-DataBackendRaster-head}{\code{DataBackendRaster$head()}} 52 | \item \href{#method-DataBackendRaster-distinct}{\code{DataBackendRaster$distinct()}} 53 | \item \href{#method-DataBackendRaster-missings}{\code{DataBackendRaster$missings()}} 54 | \item \href{#method-DataBackendRaster-coordinates}{\code{DataBackendRaster$coordinates()}} 55 | } 56 | } 57 | \if{html}{\out{ 58 |
Inherited methods 59 | 63 |
64 | }} 65 | \if{html}{\out{
}} 66 | \if{html}{\out{}} 67 | \if{latex}{\out{\hypertarget{method-DataBackendRaster-new}{}}} 68 | \subsection{Method \code{new()}}{ 69 | Creates a new instance of this \link[R6:R6Class]{R6} class. 70 | \subsection{Usage}{ 71 | \if{html}{\out{
}}\preformatted{DataBackendRaster$new(data)}\if{html}{\out{
}} 72 | } 73 | 74 | \subsection{Arguments}{ 75 | \if{html}{\out{
}} 76 | \describe{ 77 | \item{\code{data}}{(\link[terra:SpatRaster-class]{terra::SpatRaster})\cr 78 | The input \link[terra:SpatRaster-class]{terra::SpatRaster}.} 79 | } 80 | \if{html}{\out{
}} 81 | } 82 | } 83 | \if{html}{\out{
}} 84 | \if{html}{\out{}} 85 | \if{latex}{\out{\hypertarget{method-DataBackendRaster-data}{}}} 86 | \subsection{Method \code{data()}}{ 87 | Returns a slice of the raster in the specified format. 88 | Currently, the only supported formats is \code{"data.table"}. 89 | 90 | The rows must be addressed as vector of cells indices, columns must be 91 | referred to via layer names. Queries for rows with no matching row id and 92 | queries for columns with no matching column name are silently ignored. 93 | 94 | Rows are guaranteed to be returned in the same order as \code{rows}, columns 95 | may be returned in an arbitrary order. Duplicated row ids result in 96 | duplicated rows, duplicated column names lead to an exception. 97 | \subsection{Usage}{ 98 | \if{html}{\out{
}}\preformatted{DataBackendRaster$data(rows, cols, data_format = "data.table")}\if{html}{\out{
}} 99 | } 100 | 101 | \subsection{Arguments}{ 102 | \if{html}{\out{
}} 103 | \describe{ 104 | \item{\code{rows}}{\code{integer()}\cr 105 | Row indices. Row indices start with 1 in the upper left corner in the 106 | raster, increase from left to right and then from top to bottom. The last 107 | cell is in the bottom right corner and the row index equals the number of 108 | cells in the raster.} 109 | 110 | \item{\code{cols}}{\code{character()}\cr 111 | Column names.} 112 | 113 | \item{\code{data_format}}{(\code{character(1)})\cr 114 | Desired data format. Currently only \code{"data.table"} supported.} 115 | } 116 | \if{html}{\out{
}} 117 | } 118 | } 119 | \if{html}{\out{
}} 120 | \if{html}{\out{}} 121 | \if{latex}{\out{\hypertarget{method-DataBackendRaster-head}{}}} 122 | \subsection{Method \code{head()}}{ 123 | Retrieve the first \code{n} rows. 124 | \subsection{Usage}{ 125 | \if{html}{\out{
}}\preformatted{DataBackendRaster$head(n = 6L)}\if{html}{\out{
}} 126 | } 127 | 128 | \subsection{Arguments}{ 129 | \if{html}{\out{
}} 130 | \describe{ 131 | \item{\code{n}}{(\code{integer(1)})\cr 132 | Number of rows.} 133 | } 134 | \if{html}{\out{
}} 135 | } 136 | \subsection{Returns}{ 137 | \code{\link[data.table:data.table]{data.table::data.table()}} of the first \code{n} rows. 138 | } 139 | } 140 | \if{html}{\out{
}} 141 | \if{html}{\out{}} 142 | \if{latex}{\out{\hypertarget{method-DataBackendRaster-distinct}{}}} 143 | \subsection{Method \code{distinct()}}{ 144 | Returns a named list of vectors of distinct values for each column 145 | specified. If \code{na_rm} is \code{TRUE}, missing values are removed from the 146 | returned vectors of distinct values. Non-existing rows and columns are 147 | silently ignored. 148 | \subsection{Usage}{ 149 | \if{html}{\out{
}}\preformatted{DataBackendRaster$distinct(rows, cols, na_rm = TRUE)}\if{html}{\out{
}} 150 | } 151 | 152 | \subsection{Arguments}{ 153 | \if{html}{\out{
}} 154 | \describe{ 155 | \item{\code{rows}}{\code{integer()}\cr 156 | Row indices. Row indices start with 1 in the upper left corner in the 157 | raster, increase from left to right and then from top to bottom. The last 158 | cell is in the bottom right corner and the row index equals the number of 159 | cells in the raster.} 160 | 161 | \item{\code{cols}}{\code{character()}\cr 162 | Column names.} 163 | 164 | \item{\code{na_rm}}{\code{logical(1)}\cr 165 | Whether to remove NAs or not.} 166 | } 167 | \if{html}{\out{
}} 168 | } 169 | \subsection{Returns}{ 170 | Named \code{list()} of distinct values. 171 | } 172 | } 173 | \if{html}{\out{
}} 174 | \if{html}{\out{}} 175 | \if{latex}{\out{\hypertarget{method-DataBackendRaster-missings}{}}} 176 | \subsection{Method \code{missings()}}{ 177 | Returns the number of missing values per column in the specified slice 178 | of data. Non-existing rows and columns are silently ignored. 179 | \subsection{Usage}{ 180 | \if{html}{\out{
}}\preformatted{DataBackendRaster$missings(rows, cols)}\if{html}{\out{
}} 181 | } 182 | 183 | \subsection{Arguments}{ 184 | \if{html}{\out{
}} 185 | \describe{ 186 | \item{\code{rows}}{\code{integer()}\cr 187 | Row indices. Row indices start with 1 in the upper left corner in the 188 | raster, increase from left to right and then from top to bottom. The last 189 | cell is in the bottom right corner and the row index equals the number of 190 | cells in the raster.} 191 | 192 | \item{\code{cols}}{\code{character()}\cr 193 | Column names.} 194 | } 195 | \if{html}{\out{
}} 196 | } 197 | \subsection{Returns}{ 198 | Total of missing values per column (named \code{numeric()}). 199 | } 200 | } 201 | \if{html}{\out{
}} 202 | \if{html}{\out{}} 203 | \if{latex}{\out{\hypertarget{method-DataBackendRaster-coordinates}{}}} 204 | \subsection{Method \code{coordinates()}}{ 205 | Returns the coordinates of \code{rows}. 206 | If \code{rows} is missing, all coordinates are returned. 207 | \subsection{Usage}{ 208 | \if{html}{\out{
}}\preformatted{DataBackendRaster$coordinates(rows)}\if{html}{\out{
}} 209 | } 210 | 211 | \subsection{Arguments}{ 212 | \if{html}{\out{
}} 213 | \describe{ 214 | \item{\code{rows}}{\code{integer()}\cr 215 | Row indices. Row indices start with 1 in the upper left corner in the 216 | raster, increase from left to right and then from top to bottom. The last 217 | cell is in the bottom right corner and the row index equals the number of 218 | cells in the raster.} 219 | } 220 | \if{html}{\out{
}} 221 | } 222 | \subsection{Returns}{ 223 | \code{\link[data.table:data.table]{data.table::data.table()}} of coordinates of \code{rows}. 224 | } 225 | } 226 | } 227 | -------------------------------------------------------------------------------- /man/DataBackendVector.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DataBackendVector.R 3 | \name{DataBackendVector} 4 | \alias{DataBackendVector} 5 | \title{DataBackend for Vector Objects} 6 | \description{ 7 | \link[mlr3:DataBackend]{mlr3::DataBackend} for \link[sf:sf]{sf::sf} vector objects. 8 | } 9 | \section{Super classes}{ 10 | \code{\link[mlr3:DataBackend]{mlr3::DataBackend}} -> \code{\link[mlr3:DataBackendDataTable]{mlr3::DataBackendDataTable}} -> \code{DataBackendVector} 11 | } 12 | \section{Active bindings}{ 13 | \if{html}{\out{
}} 14 | \describe{ 15 | \item{\code{sfc}}{(\link[sf:sfc]{sf::sfc})\cr 16 | Returns the sfc object.} 17 | } 18 | \if{html}{\out{
}} 19 | } 20 | \section{Methods}{ 21 | \subsection{Public methods}{ 22 | \itemize{ 23 | \item \href{#method-DataBackendVector-new}{\code{DataBackendVector$new()}} 24 | } 25 | } 26 | \if{html}{\out{ 27 |
Inherited methods 28 | 36 |
37 | }} 38 | \if{html}{\out{
}} 39 | \if{html}{\out{}} 40 | \if{latex}{\out{\hypertarget{method-DataBackendVector-new}{}}} 41 | \subsection{Method \code{new()}}{ 42 | Creates a new instance of this \link[R6:R6Class]{R6} class. 43 | \subsection{Usage}{ 44 | \if{html}{\out{
}}\preformatted{DataBackendVector$new(data, primary_key)}\if{html}{\out{
}} 45 | } 46 | 47 | \subsection{Arguments}{ 48 | \if{html}{\out{
}} 49 | \describe{ 50 | \item{\code{data}}{(\code{sf})\cr 51 | A raster object.} 52 | 53 | \item{\code{primary_key}}{(\code{character(1)} | \code{integer()})\cr 54 | Name of the primary key column, or integer vector of row ids.} 55 | } 56 | \if{html}{\out{
}} 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /man/TaskClassifST.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TaskClassifST.R 3 | \name{TaskClassifST} 4 | \alias{TaskClassifST} 5 | \title{Spatiotemporal Classification Task} 6 | \description{ 7 | This task specializes \link{TaskClassif} for spatiotemporal classification problems. 8 | 9 | A spatial example task is available via \code{tsk("ecuador")}. 10 | 11 | The coordinate reference system passed during initialization must match the one which was used during data creation, otherwise offsets of multiple meters may occur. 12 | By default, coordinates are not used as features. 13 | This can be changed by setting \code{coords_as_features = TRUE}. 14 | } 15 | \section{Super classes}{ 16 | \code{\link[mlr3:Task]{mlr3::Task}} -> \code{\link[mlr3:TaskSupervised]{mlr3::TaskSupervised}} -> \code{\link[mlr3:TaskClassif]{mlr3::TaskClassif}} -> \code{TaskClassifST} 17 | } 18 | \section{Active bindings}{ 19 | \if{html}{\out{
}} 20 | \describe{ 21 | \item{\code{crs}}{(\code{character(1)})\cr 22 | Returns coordinate reference system of task.} 23 | 24 | \item{\code{coordinate_names}}{(\code{character()})\cr 25 | Returns coordinate names.} 26 | 27 | \item{\code{coords_as_features}}{(\code{logical(1)})\cr 28 | If \code{TRUE}, coordinates are used as features.} 29 | } 30 | \if{html}{\out{
}} 31 | } 32 | \section{Methods}{ 33 | \subsection{Public methods}{ 34 | \itemize{ 35 | \item \href{#method-TaskClassifST-new}{\code{TaskClassifST$new()}} 36 | \item \href{#method-TaskClassifST-coordinates}{\code{TaskClassifST$coordinates()}} 37 | \item \href{#method-TaskClassifST-print}{\code{TaskClassifST$print()}} 38 | \item \href{#method-TaskClassifST-clone}{\code{TaskClassifST$clone()}} 39 | } 40 | } 41 | \if{html}{\out{ 42 |
Inherited methods 43 | 63 |
64 | }} 65 | \if{html}{\out{
}} 66 | \if{html}{\out{}} 67 | \if{latex}{\out{\hypertarget{method-TaskClassifST-new}{}}} 68 | \subsection{Method \code{new()}}{ 69 | Creates a new instance of this \link[R6:R6Class]{R6} class. 70 | The function \code{\link[=as_task_classif_st]{as_task_classif_st()}} provides an alternative way to construct classification tasks. 71 | \subsection{Usage}{ 72 | \if{html}{\out{
}}\preformatted{TaskClassifST$new( 73 | id, 74 | backend, 75 | target, 76 | positive = NULL, 77 | label = NA_character_, 78 | coordinate_names, 79 | crs = NA_character_, 80 | coords_as_features = FALSE, 81 | extra_args = list() 82 | )}\if{html}{\out{
}} 83 | } 84 | 85 | \subsection{Arguments}{ 86 | \if{html}{\out{
}} 87 | \describe{ 88 | \item{\code{id}}{(\code{character(1)})\cr 89 | Identifier for the new instance.} 90 | 91 | \item{\code{backend}}{(\link{DataBackend})\cr 92 | Either a \link{DataBackend}, or any object which is convertible to a \link{DataBackend} with \code{as_data_backend()}. 93 | E.g., am \code{sf} will be converted to a \link{DataBackendDataTable}.} 94 | 95 | \item{\code{target}}{(\code{character(1)})\cr 96 | Name of the target column.} 97 | 98 | \item{\code{positive}}{(\code{character(1)})\cr 99 | Only for binary classification: Name of the positive class. 100 | The levels of the target columns are reordered accordingly, so that the first element of \verb{$class_names} is the positive class, and the second element is the negative class.} 101 | 102 | \item{\code{label}}{(\code{character(1)})\cr 103 | Label for the new instance.} 104 | 105 | \item{\code{coordinate_names}}{(\code{character(1)})\cr 106 | The column names of the coordinates in the data.} 107 | 108 | \item{\code{crs}}{(\code{character(1)})\cr 109 | Coordinate reference system. 110 | WKT2 or EPSG string.} 111 | 112 | \item{\code{coords_as_features}}{(\code{logical(1)})\cr 113 | If \code{TRUE}, coordinates are used as features.} 114 | 115 | \item{\code{extra_args}}{(named \code{list()})\cr 116 | Named list of constructor arguments, required for converting task types via \code{\link[=convert_task]{convert_task()}}.} 117 | } 118 | \if{html}{\out{
}} 119 | } 120 | } 121 | \if{html}{\out{
}} 122 | \if{html}{\out{}} 123 | \if{latex}{\out{\hypertarget{method-TaskClassifST-coordinates}{}}} 124 | \subsection{Method \code{coordinates()}}{ 125 | Returns coordinates of observations. 126 | \subsection{Usage}{ 127 | \if{html}{\out{
}}\preformatted{TaskClassifST$coordinates(row_ids = NULL)}\if{html}{\out{
}} 128 | } 129 | 130 | \subsection{Arguments}{ 131 | \if{html}{\out{
}} 132 | \describe{ 133 | \item{\code{row_ids}}{(\code{integer()})\cr 134 | Vector of rows indices as subset of \code{task$row_ids}.} 135 | } 136 | \if{html}{\out{
}} 137 | } 138 | \subsection{Returns}{ 139 | \code{\link[data.table:data.table]{data.table::data.table()}} 140 | } 141 | } 142 | \if{html}{\out{
}} 143 | \if{html}{\out{}} 144 | \if{latex}{\out{\hypertarget{method-TaskClassifST-print}{}}} 145 | \subsection{Method \code{print()}}{ 146 | Print the task. 147 | \subsection{Usage}{ 148 | \if{html}{\out{
}}\preformatted{TaskClassifST$print(...)}\if{html}{\out{
}} 149 | } 150 | 151 | \subsection{Arguments}{ 152 | \if{html}{\out{
}} 153 | \describe{ 154 | \item{\code{...}}{Arguments passed to the \verb{$print()} method of the superclass.} 155 | } 156 | \if{html}{\out{
}} 157 | } 158 | } 159 | \if{html}{\out{
}} 160 | \if{html}{\out{}} 161 | \if{latex}{\out{\hypertarget{method-TaskClassifST-clone}{}}} 162 | \subsection{Method \code{clone()}}{ 163 | The objects of this class are cloneable with this method. 164 | \subsection{Usage}{ 165 | \if{html}{\out{
}}\preformatted{TaskClassifST$clone(deep = FALSE)}\if{html}{\out{
}} 166 | } 167 | 168 | \subsection{Arguments}{ 169 | \if{html}{\out{
}} 170 | \describe{ 171 | \item{\code{deep}}{Whether to make a deep clone.} 172 | } 173 | \if{html}{\out{
}} 174 | } 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /man/TaskRegrST.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TaskRegrST.R 3 | \name{TaskRegrST} 4 | \alias{TaskRegrST} 5 | \title{Spatiotemporal Regression Task} 6 | \description{ 7 | This task specializes \link{TaskRegr} for spatiotemporal regression problems. 8 | 9 | A spatial example task is available via \code{tsk("cookfarm_mlr3")}. 10 | 11 | The coordinate reference system passed during initialization must match the one which was used during data creation, otherwise offsets of multiple meters may occur. 12 | By default, coordinates are not used as features. 13 | This can be changed by setting \code{coords_as_features = TRUE}. 14 | } 15 | \section{Super classes}{ 16 | \code{\link[mlr3:Task]{mlr3::Task}} -> \code{\link[mlr3:TaskSupervised]{mlr3::TaskSupervised}} -> \code{\link[mlr3:TaskRegr]{mlr3::TaskRegr}} -> \code{TaskRegrST} 17 | } 18 | \section{Active bindings}{ 19 | \if{html}{\out{
}} 20 | \describe{ 21 | \item{\code{crs}}{(\code{character(1)})\cr 22 | Returns coordinate reference system of the task.} 23 | 24 | \item{\code{coordinate_names}}{(\code{character()})\cr 25 | Returns coordinate names.} 26 | 27 | \item{\code{coords_as_features}}{(\code{logical(1)})\cr 28 | If \code{TRUE}, coordinates are used as features.} 29 | } 30 | \if{html}{\out{
}} 31 | } 32 | \section{Methods}{ 33 | \subsection{Public methods}{ 34 | \itemize{ 35 | \item \href{#method-TaskRegrST-new}{\code{TaskRegrST$new()}} 36 | \item \href{#method-TaskRegrST-coordinates}{\code{TaskRegrST$coordinates()}} 37 | \item \href{#method-TaskRegrST-print}{\code{TaskRegrST$print()}} 38 | \item \href{#method-TaskRegrST-clone}{\code{TaskRegrST$clone()}} 39 | } 40 | } 41 | \if{html}{\out{ 42 |
Inherited methods 43 | 63 |
64 | }} 65 | \if{html}{\out{
}} 66 | \if{html}{\out{}} 67 | \if{latex}{\out{\hypertarget{method-TaskRegrST-new}{}}} 68 | \subsection{Method \code{new()}}{ 69 | Creates a new instance of this \link[R6:R6Class]{R6} class. 70 | The function \code{\link[=as_task_regr_st]{as_task_regr_st()}} provides an alternative way to construct classification tasks. 71 | \subsection{Usage}{ 72 | \if{html}{\out{
}}\preformatted{TaskRegrST$new( 73 | id, 74 | backend, 75 | target, 76 | label = NA_character_, 77 | coordinate_names, 78 | crs = NA_character_, 79 | coords_as_features = FALSE, 80 | extra_args = list() 81 | )}\if{html}{\out{
}} 82 | } 83 | 84 | \subsection{Arguments}{ 85 | \if{html}{\out{
}} 86 | \describe{ 87 | \item{\code{id}}{(\code{character(1)})\cr 88 | Identifier for the new instance.} 89 | 90 | \item{\code{backend}}{(\link{DataBackend})\cr 91 | Either a \link{DataBackend}, or any object which is convertible to a \link{DataBackend} with \code{as_data_backend()}. 92 | E.g., am \code{sf} will be converted to a \link{DataBackendDataTable}.} 93 | 94 | \item{\code{target}}{(\code{character(1)})\cr 95 | Name of the target column.} 96 | 97 | \item{\code{label}}{(\code{character(1)})\cr 98 | Label for the new instance.} 99 | 100 | \item{\code{coordinate_names}}{(\code{character(1)})\cr 101 | The column names of the coordinates in the data.} 102 | 103 | \item{\code{crs}}{(\code{character(1)})\cr 104 | Coordinate reference system. 105 | WKT2 or EPSG string.} 106 | 107 | \item{\code{coords_as_features}}{(\code{logical(1)})\cr 108 | If \code{TRUE}, coordinates are used as features.} 109 | 110 | \item{\code{extra_args}}{(named \code{list()})\cr 111 | Named list of constructor arguments, required for converting task types via \code{\link[=convert_task]{convert_task()}}.} 112 | } 113 | \if{html}{\out{
}} 114 | } 115 | } 116 | \if{html}{\out{
}} 117 | \if{html}{\out{}} 118 | \if{latex}{\out{\hypertarget{method-TaskRegrST-coordinates}{}}} 119 | \subsection{Method \code{coordinates()}}{ 120 | Returns coordinates of observations. 121 | \subsection{Usage}{ 122 | \if{html}{\out{
}}\preformatted{TaskRegrST$coordinates(row_ids = NULL)}\if{html}{\out{
}} 123 | } 124 | 125 | \subsection{Arguments}{ 126 | \if{html}{\out{
}} 127 | \describe{ 128 | \item{\code{row_ids}}{(\code{integer()})\cr 129 | Vector of rows indices as subset of \code{task$row_ids}.} 130 | } 131 | \if{html}{\out{
}} 132 | } 133 | \subsection{Returns}{ 134 | \code{\link[data.table:data.table]{data.table::data.table()}} 135 | } 136 | } 137 | \if{html}{\out{
}} 138 | \if{html}{\out{}} 139 | \if{latex}{\out{\hypertarget{method-TaskRegrST-print}{}}} 140 | \subsection{Method \code{print()}}{ 141 | Print the task. 142 | \subsection{Usage}{ 143 | \if{html}{\out{
}}\preformatted{TaskRegrST$print(...)}\if{html}{\out{
}} 144 | } 145 | 146 | \subsection{Arguments}{ 147 | \if{html}{\out{
}} 148 | \describe{ 149 | \item{\code{...}}{Arguments passed to the \verb{$print()} method of the superclass.} 150 | } 151 | \if{html}{\out{
}} 152 | } 153 | } 154 | \if{html}{\out{
}} 155 | \if{html}{\out{}} 156 | \if{latex}{\out{\hypertarget{method-TaskRegrST-clone}{}}} 157 | \subsection{Method \code{clone()}}{ 158 | The objects of this class are cloneable with this method. 159 | \subsection{Usage}{ 160 | \if{html}{\out{
}}\preformatted{TaskRegrST$clone(deep = FALSE)}\if{html}{\out{
}} 161 | } 162 | 163 | \subsection{Arguments}{ 164 | \if{html}{\out{
}} 165 | \describe{ 166 | \item{\code{deep}}{Whether to make a deep clone.} 167 | } 168 | \if{html}{\out{
}} 169 | } 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /man/as_data_backend.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DataBackendRaster.R, R/DataBackendVector.R 3 | \name{as_data_backend.stars} 4 | \alias{as_data_backend.stars} 5 | \alias{as_data_backend.SpatRaster} 6 | \alias{as_data_backend.RasterBrick} 7 | \alias{as_data_backend.RasterStack} 8 | \alias{as_data_backend.sf} 9 | \title{Coerce to spatial DataBackend} 10 | \usage{ 11 | \method{as_data_backend}{stars}(data, primary_key = NULL, ...) 12 | 13 | \method{as_data_backend}{SpatRaster}(data, primary_key = NULL, ...) 14 | 15 | \method{as_data_backend}{RasterBrick}(data, primary_key = NULL, ...) 16 | 17 | \method{as_data_backend}{RasterStack}(data, primary_key = NULL, ...) 18 | 19 | \method{as_data_backend}{sf}(data, primary_key = NULL, keep_rownames = FALSE, ...) 20 | } 21 | \arguments{ 22 | \item{data}{(\link[terra:SpatRaster-class]{terra::SpatRaster})\cr 23 | The input \link[terra:SpatRaster-class]{terra::SpatRaster}.} 24 | 25 | \item{primary_key}{(\code{character(1)} | \code{integer()})\cr 26 | Name of the primary key column, or integer vector of row ids.} 27 | 28 | \item{...}{(\code{any})\cr 29 | Not used.} 30 | 31 | \item{keep_rownames}{(\code{logical(1)} | \code{character(1)})\cr 32 | If \code{TRUE} or a single string, keeps the row names of \code{data} as a new column. 33 | The column is named like the provided string, defaulting to \code{"..rownames"} for \code{keep_rownames == TRUE}. 34 | Note that the created column will be used as a regular feature by the task unless you manually change the column role. 35 | Also see \code{\link[data.table:as.data.table]{data.table::as.data.table()}}.} 36 | } 37 | \value{ 38 | \link{DataBackend}. 39 | } 40 | \description{ 41 | Wraps a \link{DataBackend} around spatial objects. 42 | Currently these S3 methods are only alternative ways for writing \code{DataBackendRaster$new()}. 43 | They do not support coercing from other backends yet. 44 | } 45 | -------------------------------------------------------------------------------- /man/as_task_classif_st.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_task_classif_st.R 3 | \name{as_task_classif_st} 4 | \alias{as_task_classif_st} 5 | \alias{as_task_classif_st.TaskClassifST} 6 | \alias{as_task_classif_st.data.frame} 7 | \alias{as_task_classif_st.DataBackend} 8 | \alias{as_task_classif_st.sf} 9 | \alias{as_task_classif_st.TaskRegrST} 10 | \title{Convert to a Spatiotemporal Classification Task} 11 | \usage{ 12 | as_task_classif_st(x, ...) 13 | 14 | \method{as_task_classif_st}{TaskClassifST}(x, clone = FALSE, ...) 15 | 16 | \method{as_task_classif_st}{data.frame}( 17 | x, 18 | target, 19 | id = deparse(substitute(x)), 20 | positive = NULL, 21 | coordinate_names, 22 | crs = NA_character_, 23 | coords_as_features = FALSE, 24 | label = NA_character_, 25 | ... 26 | ) 27 | 28 | \method{as_task_classif_st}{DataBackend}( 29 | x, 30 | target, 31 | id = deparse(substitute(x)), 32 | positive = NULL, 33 | coordinate_names, 34 | crs, 35 | coords_as_features = FALSE, 36 | label = NA_character_, 37 | ... 38 | ) 39 | 40 | \method{as_task_classif_st}{sf}( 41 | x, 42 | target = NULL, 43 | id = deparse(substitute(x)), 44 | positive = NULL, 45 | coords_as_features = FALSE, 46 | label = NA_character_, 47 | ... 48 | ) 49 | 50 | \method{as_task_classif_st}{TaskRegrST}( 51 | x, 52 | target = NULL, 53 | drop_original_target = FALSE, 54 | drop_levels = TRUE, 55 | ... 56 | ) 57 | } 58 | \arguments{ 59 | \item{x}{(any)\cr 60 | Object to convert.} 61 | 62 | \item{...}{(any)\cr 63 | Additional arguments.} 64 | 65 | \item{clone}{(\code{logical(1)})\cr 66 | If \code{TRUE}, ensures that the returned object is not the same as the input \code{x}.} 67 | 68 | \item{target}{(\code{character(1)})\cr 69 | Name of the target column.} 70 | 71 | \item{id}{(\code{character(1)})\cr 72 | Id for the new task. 73 | Defaults to the (deparsed and substituted) name of the data argument.} 74 | 75 | \item{positive}{(\code{character(1)})\cr 76 | Level of the positive class. See \link[mlr3]{TaskClassif}.} 77 | 78 | \item{coordinate_names}{(\code{character(1)})\cr 79 | The column names of the coordinates in the data.} 80 | 81 | \item{crs}{(\code{character(1)})\cr 82 | Coordinate reference system. 83 | WKT2 or EPSG string.} 84 | 85 | \item{coords_as_features}{(\code{logical(1)})\cr 86 | If \code{TRUE}, coordinates are used as features.} 87 | 88 | \item{label}{(\code{character(1)})\cr 89 | Label for the new instance.} 90 | 91 | \item{drop_original_target}{(\code{logical(1)})\cr 92 | If \code{FALSE} (default), the original target is added as a feature. 93 | Otherwise the original target is dropped.} 94 | 95 | \item{drop_levels}{(\code{logical(1)})\cr 96 | If \code{TRUE} (default), unused levels of the new target variable are dropped.} 97 | } 98 | \value{ 99 | \link{TaskClassifST} 100 | } 101 | \description{ 102 | Convert object to a \link{TaskClassifST}. 103 | This is a S3 generic, specialized for at least the following objects: 104 | \enumerate{ 105 | \item \link{TaskClassifST}: Ensure the identity. 106 | \item \code{\link[=data.frame]{data.frame()}} and \link{DataBackend}: Provides an alternative to the constructor of \link{TaskClassifST}. 107 | \item \link[sf:sf]{sf::sf}: Extracts spatial meta data before construction. 108 | \item \link{TaskRegr}: Calls \code{\link[=convert_task]{convert_task()}}. 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /man/as_task_regr_st.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_task_regr_st.R 3 | \name{as_task_regr_st} 4 | \alias{as_task_regr_st} 5 | \alias{as_task_regr_st.TaskRegrST} 6 | \alias{as_task_regr_st.data.frame} 7 | \alias{as_task_regr_st.DataBackend} 8 | \alias{as_task_regr_st.sf} 9 | \alias{as_task_regr_st.TaskClassifST} 10 | \title{Convert to a Spatiotemporal Regression Task} 11 | \usage{ 12 | as_task_regr_st(x, ...) 13 | 14 | \method{as_task_regr_st}{TaskRegrST}(x, clone = FALSE, ...) 15 | 16 | \method{as_task_regr_st}{data.frame}( 17 | x, 18 | target, 19 | id = deparse(substitute(x)), 20 | coordinate_names, 21 | crs = NA_character_, 22 | coords_as_features = FALSE, 23 | label = NA_character_, 24 | ... 25 | ) 26 | 27 | \method{as_task_regr_st}{DataBackend}( 28 | x, 29 | target, 30 | id = deparse(substitute(x)), 31 | coordinate_names, 32 | crs, 33 | coords_as_features = FALSE, 34 | label = NA_character_, 35 | ... 36 | ) 37 | 38 | \method{as_task_regr_st}{sf}( 39 | x, 40 | target = NULL, 41 | id = deparse(substitute(x)), 42 | coords_as_features = FALSE, 43 | label = NA_character_, 44 | ... 45 | ) 46 | 47 | \method{as_task_regr_st}{TaskClassifST}( 48 | x, 49 | target = NULL, 50 | drop_original_target = FALSE, 51 | drop_levels = TRUE, 52 | ... 53 | ) 54 | } 55 | \arguments{ 56 | \item{x}{(any)\cr 57 | Object to convert.} 58 | 59 | \item{...}{(any)\cr 60 | Additional arguments.} 61 | 62 | \item{clone}{(\code{logical(1)})\cr 63 | If \code{TRUE}, ensures that the returned object is not the same as the input \code{x}.} 64 | 65 | \item{target}{(\code{character(1)})\cr 66 | Name of the target column.} 67 | 68 | \item{id}{(\code{character(1)})\cr 69 | Id for the new task. 70 | Defaults to the (deparsed and substituted) name of the data argument.} 71 | 72 | \item{coordinate_names}{(\code{character(1)})\cr 73 | The column names of the coordinates in the data.} 74 | 75 | \item{crs}{(\code{character(1)})\cr 76 | Coordinate reference system. 77 | WKT2 or EPSG string.} 78 | 79 | \item{coords_as_features}{(\code{logical(1)})\cr 80 | If \code{TRUE}, coordinates are used as features.} 81 | 82 | \item{label}{(\code{character(1)})\cr 83 | Label for the new instance.} 84 | 85 | \item{drop_original_target}{(\code{logical(1)})\cr 86 | If \code{FALSE} (default), the original target is added as a feature. 87 | Otherwise the original target is dropped.} 88 | 89 | \item{drop_levels}{(\code{logical(1)})\cr 90 | If \code{TRUE} (default), unused levels of the new target variable are dropped.} 91 | } 92 | \value{ 93 | \link{TaskRegrST} 94 | } 95 | \description{ 96 | Convert object to a \link{TaskRegrST}. 97 | This is a S3 generic, specialized for at least the following objects: 98 | \enumerate{ 99 | \item \link{TaskRegrST}: Ensure the identity. 100 | \item \code{\link[=data.frame]{data.frame()}} and \link{DataBackend}: Provides an alternative to the constructor of \link{TaskRegrST}. 101 | \item \link[sf:sf]{sf::sf}: Extracts spatial meta data before construction. 102 | \item \link{TaskClassif}: Calls \code{\link[=convert_task]{convert_task()}}. 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /man/block_size.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper.R 3 | \name{block_size} 4 | \alias{block_size} 5 | \title{Split Raster Into Chunks} 6 | \usage{ 7 | block_size(raster, chunksize) 8 | } 9 | \arguments{ 10 | \item{raster}{(\link[terra:SpatRaster-class]{terra::SpatRaster})\cr 11 | Raster to be split into chunks.} 12 | 13 | \item{chunksize}{(\code{integer(1)})\cr 14 | The chunksize determines in how many subparts the prediction task will be 15 | split into. The value can be roughly thought of as megabyte of a raster file 16 | on disk. For example, if a prediction on a 1 GB file would be carried out 17 | with \code{chunksize = 100L}, the prediction would happen in 10 chunks. 18 | 19 | The default of \code{chunksize = 1000L} might be a good compromise between speed 20 | and memory usage. If you find yourself running out of memory, reduce this 21 | value.} 22 | } 23 | \description{ 24 | Splits raster into chunks. 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/factor_layer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{factor_layer} 4 | \alias{factor_layer} 5 | \title{Factor Layer Generator} 6 | \usage{ 7 | factor_layer(id, levels, in_memory = FALSE) 8 | } 9 | \arguments{ 10 | \item{id}{(\code{character(1)})\cr 11 | Layer id.} 12 | 13 | \item{levels}{(\code{character()})\cr 14 | Factor levels.} 15 | 16 | \item{in_memory}{(\code{logical(1)})\cr 17 | If \code{FALSE} (default), layer is written to disk.} 18 | } 19 | \value{ 20 | Named \code{list()} 21 | } 22 | \description{ 23 | Generates a factor layer when passed to \code{\link[=generate_stack]{generate_stack()}}. 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/figures/land_cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/man/figures/land_cover.png -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/man/figures/logo.png -------------------------------------------------------------------------------- /man/figures/sentinel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/man/figures/sentinel.png -------------------------------------------------------------------------------- /man/generate_stack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{generate_stack} 4 | \alias{generate_stack} 5 | \title{Generate Raster Stack} 6 | \usage{ 7 | generate_stack( 8 | layers, 9 | layer_size = NULL, 10 | dimension = NULL, 11 | multi_layer_file = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{layers}{(List of \code{\link[=numeric_layer]{numeric_layer()}} and \code{\link[=factor_layer]{factor_layer()}})\cr 16 | List of layers.} 17 | 18 | \item{layer_size}{(\code{numeric(1)})\cr 19 | Size of a single layer in megabytes.} 20 | 21 | \item{dimension}{(\code{integer(1)})\cr 22 | Dimension of the squared layers.} 23 | 24 | \item{multi_layer_file}{(\code{logical(1)})\cr 25 | If \code{TRUE}, raster is written to disk as a single multi-layer file. 26 | Overwrites \code{ìn_memory} argument of \code{numeric_layer()} and \code{factor_layer()}. 27 | 28 | \code{layer_size} and \code{dimension} are mutually exclusive.} 29 | } 30 | \value{ 31 | \link[terra:SpatRaster-class]{terra::SpatRaster} 32 | } 33 | \description{ 34 | Generates a raster stack. 35 | } 36 | \keyword{internal} 37 | -------------------------------------------------------------------------------- /man/leipzig.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TaskClassif_leipzig.R 3 | \docType{data} 4 | \name{leipzig} 5 | \alias{leipzig} 6 | \alias{mlr_tasks_leipzig} 7 | \title{Leipzig Land Cover Task} 8 | \source{ 9 | Copernicus Sentinel Data (2021). Retrieved from Copernicus Open Access Hub and processed by European Space Agency. 10 | } 11 | \description{ 12 | Point survey of land cover in Leipzig. 13 | Includes Sentinel-2 spectral bands and NDVI. 14 | } 15 | \examples{ 16 | if (requireNamespace("sf")) { 17 | library(sf) 18 | data("leipzig", package = "mlr3spatial") 19 | print(leipzig) 20 | } 21 | } 22 | \keyword{data} 23 | -------------------------------------------------------------------------------- /man/mask_stack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{mask_stack} 4 | \alias{mask_stack} 5 | \title{Sample Points in Raster Stack} 6 | \usage{ 7 | mask_stack(stack) 8 | } 9 | \arguments{ 10 | \item{stack}{(\link[terra:SpatRaster-class]{terra::SpatRaster})\cr 11 | Raster stack.} 12 | } 13 | \value{ 14 | \link[terra:SpatRaster-class]{terra::SpatRaster} 15 | } 16 | \description{ 17 | Masks stack to a circular area of interest. 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/mlr3spatial-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zzz.R 3 | \docType{package} 4 | \name{mlr3spatial-package} 5 | \alias{mlr3spatial} 6 | \alias{mlr3spatial-package} 7 | \title{mlr3spatial: Support for Spatial Objects Within the 'mlr3' Ecosystem} 8 | \description{ 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} 10 | 11 | Extends the 'mlr3' ML framework with methods for spatial objects. Data storage and prediction are supported for packages 'terra', 'raster' and 'stars'. 12 | } 13 | \section{Learn mlr3}{ 14 | 15 | \itemize{ 16 | \item Book on mlr3: \url{https://mlr3book.mlr-org.com} 17 | \item Use cases and examples gallery: \url{https://mlr3gallery.mlr-org.com} 18 | \item Cheat Sheets: \url{https://github.com/mlr-org/mlr3cheatsheets} 19 | } 20 | } 21 | 22 | \section{mlr3 extensions}{ 23 | 24 | \itemize{ 25 | \item Preprocessing and machine learning pipelines: \CRANpkg{mlr3pipelines} 26 | \item Analysis of benchmark experiments: \CRANpkg{mlr3benchmark} 27 | \item More classification and regression tasks: \CRANpkg{mlr3data} 28 | \item Connector to \href{https://www.openml.org}{OpenML}: \CRANpkg{mlr3oml} 29 | \item Solid selection of good classification and regression learners: \CRANpkg{mlr3learners} 30 | \item Even more learners: \url{https://github.com/mlr-org/mlr3extralearners} 31 | \item Tuning of hyperparameters: \CRANpkg{mlr3tuning} 32 | \item Hyperband tuner: \CRANpkg{mlr3hyperband} 33 | \item Visualizations for many \pkg{mlr3} objects: \CRANpkg{mlr3viz} 34 | \item Survival analysis and probabilistic regression: \CRANpkg{mlr3proba} 35 | \item Cluster analysis: \CRANpkg{mlr3cluster} 36 | \item Feature selection filters: \CRANpkg{mlr3filters} 37 | \item Feature selection wrappers: \CRANpkg{mlr3fselect} 38 | \item Interface to real (out-of-memory) data bases: \CRANpkg{mlr3db} 39 | \item Performance measures as plain functions: \CRANpkg{mlr3measures} 40 | } 41 | } 42 | 43 | \section{Suggested packages}{ 44 | 45 | \itemize{ 46 | \item Parallelization framework: \CRANpkg{future} 47 | \item Progress bars: \CRANpkg{progressr} 48 | \item Encapsulated evaluation: \CRANpkg{evaluate}, \CRANpkg{callr} (external process) 49 | } 50 | } 51 | 52 | \section{Package Options}{ 53 | 54 | \itemize{ 55 | \item \code{"mlr3.debug"}: If set to \code{TRUE}, parallelization via \CRANpkg{future} is 56 | disabled to simplify debugging and provide more concise tracebacks. Note that 57 | results computed with debug mode enabled use a different seeding mechanism 58 | and are not reproducible. 59 | \item \code{"mlr3.allow_utf8_names"}: If set to \code{TRUE}, checks on the feature names 60 | are relaxed, allowing non-ascii characters in column names. This is an 61 | experimental and temporal option to pave the way for text analysis, and will 62 | likely be removed in a future version of the package. analysis. 63 | } 64 | } 65 | 66 | \references{ 67 | Becker M, Schratz P (2024). 68 | \emph{mlr3spatial: Support for Spatial Objects Within the 'mlr3' Ecosystem}. 69 | https://mlr3spatial.mlr-org.com, 70 | https://github.com/mlr-org/mlr3spatial. 71 | } 72 | \seealso{ 73 | Useful links: 74 | \itemize{ 75 | \item \url{https://mlr3spatial.mlr-org.com} 76 | \item \url{https://github.com/mlr-org/mlr3spatial} 77 | \item Report bugs at \url{https://github.com/mlr-org/mlr3spatial/issues} 78 | } 79 | 80 | } 81 | \author{ 82 | \strong{Maintainer}: Marc Becker \email{marcbecker@posteo.de} (\href{https://orcid.org/0000-0002-8115-0400}{ORCID}) 83 | 84 | Authors: 85 | \itemize{ 86 | \item Patrick Schratz \email{patrick.schratz@gmail.com} (\href{https://orcid.org/0000-0003-0748-6624}{ORCID}) 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /man/numeric_layer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{numeric_layer} 4 | \alias{numeric_layer} 5 | \title{Numeric Layer Generator} 6 | \usage{ 7 | numeric_layer(id, in_memory = FALSE) 8 | } 9 | \arguments{ 10 | \item{id}{(\code{character(1)})\cr 11 | Layer id.} 12 | 13 | \item{in_memory}{(\code{logical(1)})\cr 14 | If \code{FALSE} (default), layer is written to disk.} 15 | } 16 | \value{ 17 | Named \code{list()} 18 | } 19 | \description{ 20 | Generates a numeric layer when passed to \code{\link[=generate_stack]{generate_stack()}}. 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /man/predict_spatial.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict_spatial.R 3 | \name{predict_spatial} 4 | \alias{predict_spatial} 5 | \title{Predict on Spatial Objects with mlr3 Learners} 6 | \usage{ 7 | predict_spatial( 8 | newdata, 9 | learner, 10 | chunksize = 200L, 11 | format = "terra", 12 | filename = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{newdata}{(\link[terra:SpatRaster-class]{terra::SpatRaster} | \code{stars::stars} | \link[sf:sf]{sf::sf} | \code{raster::RasterStack} | \code{raster::RasterBrick}). 17 | New data to predict on. All spatial data formats convertible by \code{as_data_backend()} are supported e.g. \link[terra:SpatRaster-class]{terra::SpatRaster} or \link[sf:sf]{sf::sf}.} 18 | 19 | \item{learner}{(\link{Learner}). 20 | Learner with trained model.} 21 | 22 | \item{chunksize}{(\code{integer(1)})\cr 23 | The chunksize determines in how many subparts the prediction task will be 24 | split into. The value can be roughly thought of as megabyte of a raster file 25 | on disk. For example, if a prediction on a 1 GB file would be carried out 26 | with \code{chunksize = 100L}, the prediction would happen in 10 chunks. 27 | 28 | The default of \code{chunksize = 1000L} might be a good compromise between speed 29 | and memory usage. If you find yourself running out of memory, reduce this 30 | value.} 31 | 32 | \item{format}{(\code{character(1)})\cr 33 | Output class of the resulting object. 34 | Accepted values are \code{"raster"}, \code{"stars"} and \code{"terra"} if the input is a raster. 35 | Note that when choosing something else than \code{"terra"}, the spatial object is converted into the respective format which might cause overhead both in runtime and memory allocation. 36 | For vector data only \code{"sf"} is supported.} 37 | 38 | \item{filename}{(\code{character(1)})\cr 39 | Path where the spatial object should be written to.} 40 | } 41 | \value{ 42 | Spatial object of class given in argument \code{format}. 43 | } 44 | \description{ 45 | This function allows to directly predict mlr3 learners on various spatial objects. 46 | } 47 | \examples{ 48 | library(terra, exclude = "resample") 49 | 50 | # fit rpart on training points 51 | task_train = tsk("leipzig") 52 | learner = lrn("classif.rpart") 53 | learner$train(task_train) 54 | 55 | # load raster 56 | stack = rast(system.file("extdata", "leipzig_raster.tif", package = "mlr3spatial")) 57 | 58 | # predict land cover classes 59 | pred = predict_spatial(stack, learner, chunksize = 1L) 60 | } 61 | -------------------------------------------------------------------------------- /man/sample_stack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \name{sample_stack} 4 | \alias{sample_stack} 5 | \title{Sample Points in Raster Stack} 6 | \usage{ 7 | sample_stack(stack, n = 100) 8 | } 9 | \arguments{ 10 | \item{stack}{(\link[terra:SpatRaster-class]{terra::SpatRaster})\cr 11 | Raster stack.} 12 | 13 | \item{n}{(\code{integer(1)})\cr 14 | Number of points.} 15 | } 16 | \value{ 17 | \link[sf:sf]{sf::sf} 18 | } 19 | \description{ 20 | Samples \code{n} points of a raster stack. 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /man/write_raster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper.R 3 | \name{write_raster} 4 | \alias{write_raster} 5 | \title{Write a Raster in Chunks} 6 | \usage{ 7 | write_raster(data) 8 | } 9 | \arguments{ 10 | \item{data}{\verb{[SpatRaster]}\cr 11 | \code{SpatRaster} object.} 12 | } 13 | \description{ 14 | Writes square raster to disk in chunks. 15 | Internal helper function. 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /mlr3spatial.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageCleanBeforeInstall: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /pkgdown/_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://mlr3spatial.mlr-org.com 2 | 3 | template: 4 | bootstrap: 5 5 | package: mlr3pkgdowntemplate 6 | 7 | development: 8 | mode: auto 9 | version_label: default 10 | version_tooltip: "Version" 11 | 12 | authors: 13 | Patrick Schratz: 14 | href: https://pat-s.me 15 | 16 | navbar: 17 | structure: 18 | left: [reference, news, book, articles] 19 | right: [github, mattermost, stackoverflow, rss] 20 | components: 21 | home: ~ 22 | reference: 23 | icon: fa fa-file-alt 24 | text: Reference 25 | href: reference/index.html 26 | mattermost: 27 | icon: fa fa-comments 28 | href: https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/ 29 | book: 30 | text: mlr3book 31 | icon: fa fa-link 32 | href: https://mlr3book.mlr-org.com 33 | stackoverflow: 34 | icon: fab fa-stack-overflow 35 | href: https://stackoverflow.com/questions/tagged/mlr3 36 | rss: 37 | icon: fa-rss 38 | href: https://mlr-org.com/ 39 | 40 | reference: 41 | - title: mlr3spatial 42 | contents: 43 | - mlr3spatial-package 44 | - title: Spatiotemporal Tasks 45 | contents: 46 | - contains("Task") 47 | - contains("as_task_") 48 | - leipzig 49 | - title: Data Backends 50 | contents: 51 | - contains("Backend") 52 | - contains("_backend") 53 | - title: Prediction 54 | contents: 55 | - predict_spatial 56 | -------------------------------------------------------------------------------- /pkgdown/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/favicon.ico -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | if (requireNamespace("testthat", quietly = TRUE)) { 2 | library("checkmate") 3 | library("testthat") 4 | library("mlr3spatial") 5 | test_check("mlr3spatial") 6 | } 7 | -------------------------------------------------------------------------------- /tests/testthat/helper_expectations.R: -------------------------------------------------------------------------------- 1 | expect_backend = function(b) { 2 | checkmate::expect_r6(b, cloneable = FALSE, 3 | public = c("nrow", "ncol", "colnames", "rownames", "head", "data", "hash"), 4 | private = c(".data", ".hash", ".calculate_hash")) 5 | checkmate::expect_subset(b$data_formats, mlr3::mlr_reflections$data_formats, empty.ok = FALSE) 6 | testthat::expect_output(print(b), "^ 0 zero length 76 | 77 | ## missings are handled by distinct? 78 | d = b$distinct(rn, cn, na_rm = TRUE) 79 | checkmate::qexpectr(d, "V") 80 | 81 | d = b$distinct(rn, cn, na_rm = FALSE) 82 | m = b$missings(rn, cn) 83 | testthat::expect_equal(vapply(d, checkmate::anyMissing, FUN.VALUE = logical(1)), m > 0L) 84 | 85 | # $missings() 86 | x = b$missings(b$rownames, b$colnames) 87 | checkmate::expect_integerish(x, lower = 0L, upper = b$nrow, any.missing = FALSE) 88 | checkmate::expect_names(names(x), permutation.of = b$colnames) 89 | checkmate::expect_integerish(b$missings(b$rownames, "_not_existing_"), len = 0L, names = "named") 90 | checkmate::expect_integerish(b$missings(b$rownames[0L], b$colnames), len = b$ncol, names = "unique") 91 | checkmate::expect_integerish(b$missings(b$rownames[0L], "_not_existing_"), len = 0L, names = "unique") 92 | 93 | # $hash 94 | checkmate::expect_string(b$hash) 95 | } 96 | -------------------------------------------------------------------------------- /tests/testthat/helper_future.R: -------------------------------------------------------------------------------- 1 | with_future = function(backend, expr, ...) { 2 | requireNamespace("future") 3 | oplan = force(future::plan(backend, ...)) 4 | on.exit(future::plan(oplan), add = TRUE) 5 | force(expr) 6 | } 7 | -------------------------------------------------------------------------------- /tests/testthat/helper_learner.R: -------------------------------------------------------------------------------- 1 | # Predicts first feature in task 2 | LearnerRegrFeature = R6Class("LearnerRegrFeature", inherit = LearnerRegr, 3 | public = list( 4 | #' @description 5 | #' Creates a new instance of this [R6][R6::R6Class] class. 6 | initialize = function() { 7 | super$initialize( 8 | id = "regr.featureless", 9 | feature_types = mlr_reflections$task_feature_types, 10 | predict_types = "response", 11 | param_set = paradox::ps(), 12 | properties = character(0), 13 | label = "Feature Regression Learner", 14 | man = "mlr3::mlr_learners_regr.featureless", 15 | ) 16 | } 17 | ), 18 | 19 | private = list( 20 | .train = function(task) { 21 | set_class(list(feature = task$feature_names[1]), "regr.feature_model") 22 | }, 23 | 24 | .predict = function(task) { 25 | list(response = task$data()[, self$model$feature, with = FALSE][[1]]) 26 | } 27 | ) 28 | ) 29 | -------------------------------------------------------------------------------- /tests/testthat/setup.R: -------------------------------------------------------------------------------- 1 | old_opts = options( 2 | warnPartialMatchArgs = TRUE, 3 | warnPartialMatchAttr = TRUE, 4 | warnPartialMatchDollar = TRUE 5 | ) 6 | 7 | # https://github.com/HenrikBengtsson/Wishlist-for-R/issues/88 8 | old_opts = lapply(old_opts, function(x) if (is.null(x)) FALSE else x) 9 | 10 | # logger 11 | lg_mlr3 = lgr::get_logger("mlr3") 12 | old_threshold_mlr3 = lg_mlr3$threshold 13 | lg_mlr3$set_threshold("warn") 14 | lg_bbotk = lgr::get_logger("bbotk") 15 | old_threshold_bbotk = lg_bbotk$threshold 16 | lg_bbotk$set_threshold("warn") 17 | lg_mlr3spatial = lgr::get_logger("mlr3spatial") 18 | old_threshold_mlr3spatial = lg_mlr3spatial$threshold 19 | lg_mlr3spatial$set_threshold("warn") 20 | 21 | # future 22 | old_plan = future::plan() 23 | lg$set_threshold("warn") 24 | future::plan("sequential") 25 | -------------------------------------------------------------------------------- /tests/testthat/teardown.R: -------------------------------------------------------------------------------- 1 | options(old_opts) 2 | lg_mlr3$set_threshold(old_threshold_mlr3) 3 | lg_bbotk$set_threshold(old_threshold_bbotk) 4 | lg_mlr3spatial$set_threshold(old_threshold_mlr3spatial) 5 | future::plan(old_plan) 6 | -------------------------------------------------------------------------------- /tests/testthat/test_DataBackendVector.R: -------------------------------------------------------------------------------- 1 | test_that("DataBackendVector works", { 2 | vector = sf::read_sf(system.file("extdata", "leipzig_points.gpkg", package = "mlr3spatial"), stringsAsFactors = TRUE) 3 | primary_key = "..row_id" 4 | vector[[primary_key]] = seq_row(vector) 5 | backend = DataBackendVector$new(vector, primary_key = primary_key) 6 | 7 | expect_class(backend, "DataBackendVector") 8 | expect_class(backend$sfc, "sfc") 9 | }) 10 | 11 | test_that("as_data_backend.sf works", { 12 | vector = sf::read_sf(system.file("extdata", "leipzig_points.gpkg", package = "mlr3spatial"), stringsAsFactors = TRUE) 13 | backend = as_data_backend(vector) 14 | 15 | expect_class(backend, "DataBackendVector") 16 | expect_class(backend$sfc, "sfc") 17 | }) 18 | -------------------------------------------------------------------------------- /tests/testthat/test_LearnerClassifSpatial.R: -------------------------------------------------------------------------------- 1 | test_that("LearnerClassifSpatial ignores observations with missing values", { 2 | skip_if_not_installed("mlr3learners") 3 | require_namespaces("mlr3learners") 4 | 5 | # train task 6 | stack = generate_stack(list( 7 | numeric_layer("x_1"), 8 | factor_layer("y", levels = c("a", "b"))), 9 | dimension = 100) 10 | vector = sample_stack(stack, n = 100) 11 | task_train = as_task_classif_st(vector, id = "test_vector", target = "y") 12 | learner = lrn("classif.ranger") 13 | learner$train(task_train) 14 | 15 | # predict task 16 | stack$y = NULL 17 | stack = mask_stack(stack) 18 | task_predict = as_task_unsupervised(stack, id = "test") 19 | learner_spatial = LearnerClassifSpatial$new(learner) 20 | pred = learner_spatial$predict(task_predict) 21 | 22 | expect_true(all(is.na(pred$response[seq(100)]))) 23 | expect_numeric(pred$response, any.missing = TRUE, all.missing = FALSE) 24 | }) 25 | -------------------------------------------------------------------------------- /tests/testthat/test_LearnerRegrSpatial.R: -------------------------------------------------------------------------------- 1 | test_that("LearnerRegrSpatial ignores observations with missing values", { 2 | skip_if_not_installed("mlr3learners") 3 | require_namespaces("mlr3learners") 4 | 5 | # train task 6 | stack = generate_stack(list( 7 | factor_layer("c_1", levels = c("a", "b")), 8 | numeric_layer("y")), 9 | dimension = 100) 10 | vector = sample_stack(stack, n = 100) 11 | task_train = as_task_regr_st(vector, id = "test_vector", target = "y") 12 | learner = lrn("regr.ranger") 13 | learner$train(task_train) 14 | 15 | # predict task 16 | stack$y = NULL 17 | stack = mask_stack(stack) 18 | task_predict = as_task_unsupervised(stack, id = "test") 19 | learner_spatial = LearnerRegrSpatial$new(learner) 20 | pred = learner_spatial$predict(task_predict) 21 | 22 | expect_true(all(is.na(pred$response[seq(100)]))) 23 | expect_numeric(pred$response, any.missing = TRUE, all.missing = FALSE) 24 | }) 25 | -------------------------------------------------------------------------------- /tests/testthat/test_TaskClassifST.R: -------------------------------------------------------------------------------- 1 | test_that("TaskClassifST throws an error when backend is an sf object", { 2 | stack = generate_stack(list( 3 | numeric_layer("x_1"), 4 | factor_layer("y", levels = c("a", "b"))), 5 | dimension = 100) 6 | vector = st_as_sf(sample_stack(stack, n = 100)) 7 | 8 | expect_error(TaskClassifST$new(id = "test", backend = vector, target = "y"), "convert an sf objects into a task") 9 | }) 10 | -------------------------------------------------------------------------------- /tests/testthat/test_TaskRegrST.R: -------------------------------------------------------------------------------- 1 | test_that("TaskRegrST throws an error when backend is an sf object", { 2 | stack = generate_stack(list( 3 | numeric_layer("x_1"), 4 | numeric_layer("y")), 5 | dimension = 100) 6 | vector = st_as_sf(sample_stack(stack, n = 100)) 7 | 8 | expect_error(TaskRegrST$new(id = "test", backend = vector, target = "y"), "convert an sf objects into a task") 9 | }) 10 | -------------------------------------------------------------------------------- /tests/testthat/test_as_task_classif_st.R: -------------------------------------------------------------------------------- 1 | test_that("as_task_classif_st works on data.frame objects", { 2 | stack = generate_stack(list( 3 | numeric_layer("x_1"), 4 | factor_layer("y", levels = c("a", "b"))), 5 | dimension = 100) 6 | vector = st_as_sf(sample_stack(stack, n = 100)) 7 | data = as.data.frame(vector) 8 | data$geometry = NULL 9 | data = cbind(data, st_coordinates(vector)) 10 | 11 | task = as_task_classif_st(data, target = "y", coordinate_names = c("X", "Y"), crs = "EPSG:4326") 12 | expect_class(task, "TaskClassifST") 13 | expect_data_table(task$coordinates(), types = "numeric", ncols = 2, nrows = 100) 14 | expect_names(colnames(task$coordinates()), identical.to = c("X", "Y")) 15 | expect_equal(task$coordinate_names, c("X", "Y")) 16 | expect_equal(task$crs, "EPSG:4326") 17 | expect_equal(task$col_roles$feature, "x_1") 18 | expect_equal(task$col_roles$coordinate, c("X", "Y")) 19 | }) 20 | 21 | test_that("as_task_classif_st works on DataBackendDataTable objects", { 22 | stack = generate_stack(list( 23 | numeric_layer("x_1"), 24 | factor_layer("y", levels = c("a", "b"))), 25 | dimension = 100) 26 | vector = st_as_sf(sample_stack(stack, n = 100)) 27 | data = as.data.frame(vector) 28 | data$geometry = NULL 29 | data = cbind(data, st_coordinates(vector)) 30 | backend = as_data_backend(data) 31 | 32 | task = as_task_classif_st(backend, target = "y", coordinate_names = c("X", "Y"), crs = "EPSG:4326") 33 | expect_class(task, "TaskClassifST") 34 | expect_data_table(task$coordinates(), types = "numeric", ncols = 2, nrows = 100) 35 | expect_names(colnames(task$coordinates()), identical.to = c("X", "Y")) 36 | expect_equal(task$coordinate_names, c("X", "Y")) 37 | expect_equal(task$crs, "EPSG:4326") 38 | expect_equal(task$col_roles$feature, "x_1") 39 | expect_equal(task$col_roles$coordinate, c("X", "Y")) 40 | }) 41 | 42 | test_that("as_task_classif_st works on sf objects", { 43 | stack = generate_stack(list( 44 | numeric_layer("x_1"), 45 | factor_layer("y", levels = c("a", "b"))), 46 | dimension = 100) 47 | vector = st_as_sf(sample_stack(stack, n = 100)) 48 | 49 | task = as_task_classif_st(vector, target = "y") 50 | expect_class(task, "TaskClassifST") 51 | expect_data_table(task$coordinates(), types = "numeric", ncols = 2, nrows = 100) 52 | expect_names(colnames(task$coordinates()), identical.to = c("X", "Y")) 53 | expect_equal(task$coordinate_names, c("X", "Y")) 54 | expect_equal(task$crs, st_crs(vector)$wkt) 55 | expect_equal(task$col_roles$feature, "x_1") 56 | expect_equal(task$col_roles$coordinate, c("X", "Y")) 57 | }) 58 | 59 | test_that("as_task_classif_st works on TaskClassifST objects", { 60 | stack = generate_stack(list( 61 | numeric_layer("x_1"), 62 | factor_layer("y", levels = c("a", "b"))), 63 | dimension = 100) 64 | vector = st_as_sf(sample_stack(stack, n = 100)) 65 | 66 | task = as_task_classif_st(vector, target = "y") 67 | task = as_task_classif_st(task) 68 | expect_class(task, "TaskClassifST") 69 | expect_data_table(task$coordinates(), types = "numeric", ncols = 2, nrows = 100) 70 | expect_names(colnames(task$coordinates()), identical.to = c("X", "Y")) 71 | expect_equal(task$coordinate_names, c("X", "Y")) 72 | expect_equal(task$crs, st_crs(vector)$wkt) 73 | expect_equal(task$col_roles$feature, "x_1") 74 | expect_equal(task$col_roles$coordinate, c("X", "Y")) 75 | }) 76 | 77 | test_that("convert from TaskRegrST to TaskClassifST", { 78 | stack = generate_stack(list( 79 | numeric_layer("x_1"), 80 | factor_layer("x_2", levels = c("a", "b")), 81 | numeric_layer("y")), 82 | dimension = 100) 83 | vector = st_as_sf(sample_stack(stack, n = 100)) 84 | 85 | task = as_task_regr_st(vector, target = "y") 86 | task = as_task_classif_st(task, target = "x_2", drop_original_target = TRUE) 87 | expect_class(task, "TaskClassifST") 88 | expect_data_table(task$coordinates(), types = "numeric", ncols = 2, nrows = 100) 89 | expect_names(colnames(task$coordinates()), identical.to = c("X", "Y")) 90 | expect_equal(task$coordinate_names, c("X", "Y")) 91 | expect_equal(task$crs, st_crs(vector)$wkt) 92 | expect_equal(task$col_roles$feature, c("x_1", "y")) 93 | expect_equal(task$col_roles$coordinate, c("X", "Y")) 94 | }) 95 | -------------------------------------------------------------------------------- /tests/testthat/test_as_task_regr_st.R: -------------------------------------------------------------------------------- 1 | 2 | test_that("as_task_regr_st works on data.frame objects", { 3 | stack = generate_stack(list( 4 | numeric_layer("x_1"), 5 | numeric_layer("y")), 6 | dimension = 100) 7 | vector = st_as_sf(sample_stack(stack, n = 100)) 8 | data = as.data.frame(vector) 9 | data$geometry = NULL 10 | data = cbind(data, st_coordinates(vector)) 11 | 12 | task = as_task_regr_st(data, target = "y", coordinate_names = c("X", "Y"), crs = "EPSG:4326") 13 | expect_class(task, "TaskRegrST") 14 | expect_data_table(task$coordinates(), types = "numeric", ncols = 2, nrows = 100) 15 | expect_names(colnames(task$coordinates()), identical.to = c("X", "Y")) 16 | expect_equal(task$coordinate_names, c("X", "Y")) 17 | expect_equal(task$crs, "EPSG:4326") 18 | expect_equal(task$col_roles$feature, "x_1") 19 | expect_equal(task$col_roles$coordinate, c("X", "Y")) 20 | }) 21 | 22 | test_that("as_task_regr_st works on DataBackendDataTable objects", { 23 | stack = generate_stack(list( 24 | numeric_layer("x_1"), 25 | numeric_layer("y")), 26 | dimension = 100) 27 | vector = st_as_sf(sample_stack(stack, n = 100)) 28 | data = as.data.frame(vector) 29 | data$geometry = NULL 30 | data = cbind(data, st_coordinates(vector)) 31 | backend = as_data_backend(data) 32 | 33 | task = as_task_regr_st(backend, target = "y", coordinate_names = c("X", "Y"), crs = "EPSG:4326") 34 | expect_class(task, "TaskRegrST") 35 | expect_data_table(task$coordinates(), types = "numeric", ncols = 2, nrows = 100) 36 | expect_names(colnames(task$coordinates()), identical.to = c("X", "Y")) 37 | expect_equal(task$coordinate_names, c("X", "Y")) 38 | expect_equal(task$crs, "EPSG:4326") 39 | expect_equal(task$col_roles$feature, "x_1") 40 | expect_equal(task$col_roles$coordinate, c("X", "Y")) 41 | }) 42 | 43 | test_that("as_task_regr_st works on sf objects", { 44 | stack = generate_stack(list( 45 | numeric_layer("x_1"), 46 | numeric_layer("y")), 47 | dimension = 100) 48 | vector = st_as_sf(sample_stack(stack, n = 100)) 49 | 50 | task = as_task_regr_st(vector, target = "y") 51 | expect_class(task, "TaskRegrST") 52 | expect_data_table(task$coordinates(), types = "numeric", ncols = 2, nrows = 100) 53 | expect_names(colnames(task$coordinates()), identical.to = c("X", "Y")) 54 | expect_equal(task$coordinate_names, c("X", "Y")) 55 | expect_equal(task$crs, st_crs(vector)$wkt) 56 | expect_equal(task$col_roles$feature, "x_1") 57 | expect_equal(task$col_roles$coordinate, c("X", "Y")) 58 | }) 59 | 60 | test_that("as_task_regr_st works on TaskRegrST objects", { 61 | stack = generate_stack(list( 62 | numeric_layer("x_1"), 63 | numeric_layer("y")), 64 | dimension = 100) 65 | vector = st_as_sf(sample_stack(stack, n = 100)) 66 | 67 | task = as_task_regr_st(vector, target = "y") 68 | task = as_task_regr_st(task) 69 | expect_class(task, "TaskRegrST") 70 | expect_data_table(task$coordinates(), types = "numeric", ncols = 2, nrows = 100) 71 | expect_names(colnames(task$coordinates()), identical.to = c("X", "Y")) 72 | expect_equal(task$coordinate_names, c("X", "Y")) 73 | expect_equal(task$crs, st_crs(vector)$wkt) 74 | expect_equal(task$col_roles$feature, "x_1") 75 | expect_equal(task$col_roles$coordinate, c("X", "Y")) 76 | }) 77 | 78 | test_that("convert from TaskClassifST to TaskRegrST", { 79 | stack = generate_stack(list( 80 | numeric_layer("x_1"), 81 | numeric_layer("x_2"), 82 | factor_layer("y", levels = c("a", "b"))), 83 | dimension = 100) 84 | vector = st_as_sf(sample_stack(stack, n = 100)) 85 | 86 | task = as_task_classif_st(vector, target = "y") 87 | task = as_task_regr_st(task, target = "x_2", drop_original_target = TRUE) 88 | expect_class(task, "TaskRegrST") 89 | expect_data_table(task$coordinates(), types = "numeric", ncols = 2, nrows = 100) 90 | expect_names(colnames(task$coordinates()), identical.to = c("X", "Y")) 91 | expect_equal(task$coordinate_names, c("X", "Y")) 92 | expect_equal(task$crs, st_crs(vector)$wkt) 93 | expect_equal(task$col_roles$feature, c("x_1", "y")) 94 | expect_equal(task$col_roles$coordinate, c("X", "Y")) 95 | }) 96 | -------------------------------------------------------------------------------- /tests/testthat/test_as_task_unsupervised.R: -------------------------------------------------------------------------------- 1 | test_that("as_task_unsupervised works on stars objects", { 2 | skip_if_not_installed("stars") 3 | requireNamespace("stars", quietly = TRUE) 4 | 5 | stack = generate_stack(list( 6 | numeric_layer("x_1"), 7 | numeric_layer("y")), 8 | dimension = 100) 9 | stack = invoke(stars::st_as_stars, .x = stack, .opts = allow_partial_matching) 10 | 11 | expect_class(as_task_unsupervised(stack), "TaskUnsupervised") 12 | }) 13 | 14 | test_that("as_task_unsupervised works on SpatRaster objects", { 15 | stack = generate_stack(list( 16 | numeric_layer("x_1"), 17 | numeric_layer("y")), 18 | dimension = 100) 19 | 20 | expect_class(as_task_unsupervised(stack), "TaskUnsupervised") 21 | }) 22 | 23 | test_that("as_task_unsupervised works on RasterBrick objects", { 24 | skip_if_not_installed("raster") 25 | requireNamespace("raster", quietly = TRUE) 26 | 27 | stack = generate_stack(list( 28 | numeric_layer("x_1"), 29 | numeric_layer("y")), 30 | dimension = 100, multi_layer_file = TRUE) 31 | stack = invoke(raster::brick, stack, .opts = allow_partial_matching) 32 | 33 | expect_class(as_task_unsupervised(stack), "TaskUnsupervised") 34 | }) 35 | 36 | test_that("as_task_unsupervised works on RasterStack objects", { 37 | skip_if_not_installed("raster") 38 | requireNamespace("raster", quietly = TRUE) 39 | 40 | stack = generate_stack(list( 41 | numeric_layer("x_1"), 42 | numeric_layer("y")), 43 | dimension = 100) 44 | stack = invoke(raster::stack, x = stack, .opts = allow_partial_matching) 45 | raster::crs(stack) = "EPSG:4326" 46 | 47 | expect_class(as_task_unsupervised(stack), "TaskUnsupervised") 48 | }) 49 | 50 | test_that("as_task_unsupervised works on sf objects", { 51 | vector = sf::read_sf(system.file("extdata", "leipzig_points.gpkg", package = "mlr3spatial"), stringsAsFactors = TRUE) 52 | vector$land_cover = NULL 53 | 54 | expect_class(as_task_unsupervised(vector), "TaskUnsupervised") 55 | }) 56 | -------------------------------------------------------------------------------- /tests/testthat/test_bock_size.R: -------------------------------------------------------------------------------- 1 | test_that("chunk size is 1 out of 8 rows", { 2 | raster = generate_stack(list( 3 | numeric_layer("x_1")), 4 | dimension = 8) 5 | 6 | bs = block_size(raster, chunksize = 64 * 1e-6) 7 | expect_equal(bs$cells_seq, cumsum(c(1, rep(8, 7)))) 8 | expect_equal(bs$cells_to_read, rep(8, 8)) 9 | }) 10 | 11 | test_that("chunk size is 2 out of 8 rows", { 12 | raster = generate_stack(list( 13 | numeric_layer("x_1")), 14 | dimension = 8) 15 | 16 | bs = block_size(raster, chunksize = 64 * 2 * 1e-6) 17 | expect_equal(bs$cells_seq, cumsum(c(1, rep(16, 3)))) 18 | expect_equal(bs$cells_to_read, rep(16, 4)) 19 | }) 20 | 21 | test_that("chunk size is 1/2 out of 8 rows", { 22 | raster = generate_stack(list( 23 | numeric_layer("x_1")), 24 | dimension = 8) 25 | 26 | bs = block_size(raster, chunksize = 32 * 1e-6) # chunk size is round up to 1 rows 27 | expect_equal(bs$cells_seq, cumsum(c(1, rep(8, 7)))) 28 | expect_equal(bs$cells_to_read, rep(8, 8)) 29 | }) 30 | 31 | test_that("chunk size is 1 1/2 out of 8 rows", { 32 | raster = generate_stack(list( 33 | numeric_layer("x_1")), 34 | dimension = 8) 35 | 36 | bs = block_size(raster, chunksize = 96 * 1e-6) # chunk size is round up to 2 rows 37 | expect_equal(bs$cells_seq, cumsum(c(1, rep(16, 3)))) 38 | expect_equal(bs$cells_to_read, rep(16, 4)) 39 | }) 40 | 41 | test_that("chunk size is 3 rows out of 8 rows", { 42 | raster = generate_stack(list( 43 | numeric_layer("x_1")), 44 | dimension = 8) 45 | 46 | bs = block_size(raster, chunksize = 64 * 3 * 1e-6) 47 | expect_equal(bs$cells_seq, cumsum(c(1, rep(24, 2)))) 48 | expect_equal(bs$cells_to_read, c(24, 24, 16)) 49 | }) 50 | 51 | test_that("chunk size is 8 out of 8 rows", { 52 | raster = generate_stack(list( 53 | numeric_layer("x_1")), 54 | dimension = 8) 55 | 56 | bs = block_size(raster, chunksize = 64 * 8 * 1e-6) 57 | expect_equal(bs$cells_seq, 1) 58 | expect_equal(bs$cells_to_read, 64) 59 | }) 60 | 61 | test_that("chunk size is 9 out of 8 rows", { 62 | raster = generate_stack(list( 63 | numeric_layer("x_1")), 64 | dimension = 8) 65 | 66 | bs = block_size(raster, chunksize = 64 * 9 * 1e-6) 67 | expect_equal(bs$cells_seq, 1) 68 | expect_equal(bs$cells_to_read, 64) 69 | }) 70 | 71 | test_that("chunk size is 16 out of 8 rows", { 72 | raster = generate_stack(list( 73 | numeric_layer("x_1")), 74 | dimension = 8) 75 | 76 | bs = block_size(raster, chunksize = 64 * 16 * 1e-6) 77 | expect_equal(bs$cells_seq, 1) 78 | expect_equal(bs$cells_to_read, 64) 79 | }) 80 | -------------------------------------------------------------------------------- /tests/testthat/test_data.R: -------------------------------------------------------------------------------- 1 | test_that("categorical layer is set", { 2 | stack = generate_stack(list( 3 | factor_layer("y", levels = c("a", "b"))), 4 | dimension = 2) 5 | task = as_task_unsupervised(stack) 6 | expect_factor(task$data()$y, levels = c("a", "b"), len = 4) 7 | }) 8 | -------------------------------------------------------------------------------- /tests/testthat/test_predict_spatial.R: -------------------------------------------------------------------------------- 1 | # raster predictions ----------------------------------------------------------- 2 | 3 | test_that("predictions are written to raster", { 4 | skip_if_not_installed("paradox") 5 | # [1] [2] [2] 6 | # [1] [1] [1] 7 | # [2] [2] [1] 8 | # [2] [2] [2] 9 | c_1 = y = c(1, 2, 2, 1, 1, 1, 2, 2, 1, 2, 2, 2) 10 | raster = terra::rast(matrix(c_1, ncol = 3, byrow = TRUE)) 11 | terra::set.names(raster, "c_1") 12 | 13 | # train task 14 | task = as_task_regr(data.table(cbind(c_1, y)), id = "test", target = "y") 15 | 16 | learner = LearnerRegrFeature$new() 17 | learner$train(task) 18 | 19 | # predict task 20 | task_predict = as_task_unsupervised(raster, id = "test") 21 | 22 | # chunk size is 3 out of 12 cells 23 | raster = predict_spatial(task_predict, learner, chunksize = 8 * 3 * 1e-6) 24 | expect_equal(terra::values(raster)[, 1], y) 25 | 26 | # chunk size is 4 out of 12 cells 27 | raster = predict_spatial(task_predict, learner, chunksize = 8 * 4 * 1e-6) 28 | expect_equal(terra::values(raster)[, 1], y) 29 | 30 | # chunk size is 7 out of 12 cells 31 | raster = predict_spatial(task_predict, learner, chunksize = 8 * 7 * 1e-6) 32 | expect_equal(terra::values(raster)[, 1], y) 33 | 34 | # chunk size is 12 out of 12 cells 35 | raster = predict_spatial(task_predict, learner, chunksize = 8 * 12 * 1e-6) 36 | expect_equal(terra::values(raster)[, 1], y) 37 | 38 | # chunk size is 13 out of 12 cells 39 | raster = predict_spatial(task_predict, learner, chunksize = 8 * 12 * 1e-6) 40 | expect_equal(terra::values(raster)[, 1], y) 41 | 42 | # chunk size is 25 out of 12 cells 43 | raster = predict_spatial(task_predict, learner, chunksize = 8 * 12 * 1e-6) 44 | expect_equal(terra::values(raster)[, 1], y) 45 | }) 46 | 47 | # sequential raster predict --------------------------------------------------- 48 | 49 | test_that("sequential execution works", { 50 | # train 51 | stack = generate_stack(list( 52 | numeric_layer("x_1"), 53 | factor_layer("y", levels = c("a", "b"))), 54 | layer_size = 1) 55 | vector = sample_stack(stack, n = 100) 56 | task_train = as_task_classif_st(vector, id = "test_vector", target = "y") 57 | learner = lrn("classif.rpart") 58 | learner$train(task_train) 59 | 60 | # predict 61 | stack$y = NULL 62 | task_predict = as_task_unsupervised(stack, id = "test") 63 | pred = predict_spatial(task_predict, learner, chunksize = 1L) 64 | expect_class(pred, "SpatRaster") 65 | }) 66 | 67 | test_that("sequential execution works in chunks", { 68 | # train 69 | stack = generate_stack(list( 70 | numeric_layer("x_1"), 71 | factor_layer("y", levels = c("a", "b"))), 72 | layer_size = 2) 73 | vector = sample_stack(stack, n = 100) 74 | task_train = as_task_classif_st(vector, id = "test_vector", target = "y") 75 | learner = lrn("classif.rpart") 76 | learner$train(task_train) 77 | 78 | # predict 79 | stack$y = NULL 80 | task_predict = as_task_unsupervised(stack, id = "test") 81 | pred = predict_spatial(task_predict, learner, chunksize = 1L) 82 | expect_class(pred, "SpatRaster") 83 | }) 84 | 85 | # parallel raster predict ------------------------------------------------------ 86 | 87 | test_that("parallel execution works with multicore", { 88 | skip_on_os("windows") 89 | # train 90 | stack = generate_stack(list( 91 | numeric_layer("x_1"), 92 | factor_layer("y", levels = c("a", "b"))), 93 | layer_size = 2) 94 | vector = sample_stack(stack, n = 100) 95 | task_train = as_task_classif_st(vector, id = "test_vector", target = "y") 96 | learner = lrn("classif.rpart") 97 | learner$parallel_predict = TRUE 98 | learner$train(task_train) 99 | 100 | # predict 101 | stack$y = NULL 102 | task_predict = as_task_unsupervised(stack, id = "test") 103 | with_future("multicore", workers = 2, { 104 | pred = predict_spatial(task_predict, learner, chunksize = 1L) 105 | }) 106 | expect_class(pred, "SpatRaster") 107 | }) 108 | 109 | test_that("parallel execution works with multisession", { 110 | # train 111 | stack = generate_stack(list( 112 | numeric_layer("x_1"), 113 | factor_layer("y", levels = c("a", "b"))), 114 | layer_size = 2) 115 | vector = sample_stack(stack, n = 100) 116 | task_train = as_task_classif_st(vector, id = "test_vector", target = "y") 117 | learner = lrn("classif.rpart") 118 | learner$parallel_predict = TRUE 119 | learner$train(task_train) 120 | 121 | # predict 122 | stack$y = NULL 123 | task_predict = as_task_unsupervised(stack, id = "test") 124 | with_future("multisession", workers = 2, { 125 | pred = predict_spatial(task_predict, learner, chunksize = 1L) 126 | }) 127 | expect_class(pred, "SpatRaster") 128 | }) 129 | 130 | test_that("parallel execution works with callr", { 131 | # train 132 | stack = generate_stack(list( 133 | numeric_layer("x_1"), 134 | factor_layer("y", levels = c("a", "b"))), 135 | layer_size = 2) 136 | vector = sample_stack(stack, n = 100) 137 | task_train = as_task_classif_st(vector, id = "test_vector", target = "y") 138 | learner = lrn("classif.rpart") 139 | learner$parallel_predict = TRUE 140 | learner$train(task_train) 141 | 142 | # predict 143 | stack$y = NULL 144 | task_predict = as_task_unsupervised(stack, id = "test") 145 | with_future(future.callr::callr, workers = 2, { 146 | pred = predict_spatial(task_predict, learner, chunksize = 1L) 147 | }) 148 | expect_class(pred, "SpatRaster") 149 | }) 150 | 151 | # raster output formats -------------------------------------------------------- 152 | 153 | test_that("stars output works", { 154 | skip_if_not_installed("stars") 155 | skip_on_os("mac") 156 | 157 | # train 158 | stack = generate_stack(list( 159 | numeric_layer("x_1"), 160 | numeric_layer("y")), 161 | layer_size = 2) 162 | terra::crs(stack) = "EPSG:4326" 163 | vector = sample_stack(stack, n = 100) 164 | task_train = as_task_regr_st(vector, id = "test_vector", target = "y") 165 | learner = lrn("regr.rpart") 166 | learner$train(task_train) 167 | 168 | # predict 169 | stack$y = NULL 170 | task_predict = as_task_unsupervised(stack, id = "test") 171 | pred = predict_spatial(task_predict, learner, chunksize = 1L, format = "stars") 172 | expect_class(pred, "stars") 173 | }) 174 | 175 | test_that("raster output works", { 176 | skip_if_not_installed("raster") 177 | library(raster) 178 | 179 | # train 180 | stack = generate_stack(list( 181 | numeric_layer("x_1"), 182 | numeric_layer("y")), 183 | layer_size = 2) 184 | vector = sample_stack(stack, n = 100) 185 | task_train = as_task_regr_st(vector, id = "test_vector", target = "y") 186 | learner = lrn("regr.rpart") 187 | learner$train(task_train) 188 | 189 | # predict 190 | stack$y = NULL 191 | task_predict = as_task_unsupervised(stack, id = "test") 192 | pred = predict_spatial(task_predict, learner, chunksize = 1L, format = "raster") 193 | expect_class(pred, "RasterLayer") 194 | }) 195 | 196 | # raster with missing values --------------------------------------------------- 197 | 198 | test_that("prediction on classification task works with missing values", { 199 | skip_if_not_installed("mlr3learners") 200 | require_namespaces("mlr3learners") 201 | 202 | # train task 203 | stack = generate_stack(list( 204 | numeric_layer("x_1"), 205 | factor_layer("y", levels = c("a", "b"))), 206 | dimension = 100) 207 | vector = sample_stack(stack, n = 100) 208 | task_train = as_task_classif_st(vector, id = "test_vector", target = "y") 209 | learner = lrn("classif.ranger") 210 | learner$train(task_train) 211 | 212 | # predict task 213 | stack$y = NULL 214 | stack = mask_stack(stack) 215 | task_predict = as_task_unsupervised(stack, id = "test") 216 | pred = predict_spatial(task_predict, learner, chunksize = 1L) 217 | expect_class(pred, "SpatRaster") 218 | expect_true(all(is.na(terra::values(pred[["y"]])[seq(10)]))) 219 | expect_numeric(terra::values(pred[["y"]]), any.missing = TRUE, all.missing = FALSE) 220 | }) 221 | 222 | test_that("prediction on regression task works with missing values", { 223 | skip_if_not_installed("mlr3learners") 224 | require_namespaces("mlr3learners") 225 | 226 | # train task 227 | stack = generate_stack(list( 228 | numeric_layer("x_1"), 229 | numeric_layer("y")), 230 | dimension = 100) 231 | vector = sample_stack(stack, n = 10) 232 | task_train = as_task_regr_st(vector, id = "test_vector", target = "y") 233 | learner = lrn("regr.ranger") 234 | learner$train(task_train) 235 | 236 | # predict task 237 | stack$y = NULL 238 | stack = mask_stack(stack) 239 | task_predict = as_task_unsupervised(stack, id = "test") 240 | pred = predict_spatial(task_predict, learner, chunksize = 1L) 241 | expect_true(all(is.na(terra::values(pred[["y"]])[seq(10)]))) 242 | expect_numeric(terra::values(pred[["y"]]), any.missing = TRUE, all.missing = FALSE) 243 | }) 244 | 245 | # vector prediction ------------------------------------------------------------ 246 | 247 | test_that("prediction are written to sf vector", { 248 | task = tsk("leipzig") 249 | learner = lrn("classif.rpart") 250 | learner$train(task) 251 | 252 | vector = sf::read_sf(system.file("extdata", "leipzig_points.gpkg", package = "mlr3spatial"), stringsAsFactors = TRUE) 253 | vector$land_cover = NULL 254 | task_predict = as_task_unsupervised(vector) 255 | pred = predict_spatial(task_predict, learner) 256 | expect_class(pred, "sf") 257 | expect_equal(nrow(pred), 97) 258 | expect_named(pred, c("land_cover", "geometry")) 259 | expect_class(pred$geometry, "sfc") 260 | }) 261 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/benchmark.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Benchmark Parallel Predictions" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Benchmark Parallel Predictions} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | 11 | 12 | This benchmark was run on a MacBook Pro 2021 M1 Pro. 13 | If you rerun this result on your machine, your results will differ - both in relative and absolute values and maybe by a lot. 14 | This is due to the multicore performance component of your CPU and how efficient the overhead introduced by parallelization is handled (i.e. splitting and combining the chunks). 15 | 16 | {terra} is using a socker-based parallelization by default (which the user cannot change). 17 | The equivalent in {future} is `plan("multisession")`. 18 | Using `plan(multicore)` on UNIX based systems might speed up the {mlr3} approach even more. 19 | This might also be a major part of the speedup of the `mlr3-all-cores` setting. 20 | 21 | Also note that using all available cores does not always result in faster processing. 22 | The parallelization overhead can be substantial and for small tasks you might be better of using less cores. 23 | Nevertheless, if your processing time is the range of minutes or higher, you might usually be better of using all cores (if possible). 24 | 25 | 26 | ```r 27 | library(mlr3spatial) 28 | library(mlr3learners) 29 | library(future) 30 | ``` 31 | 32 | ## Preparations 33 | 34 | 35 | ```r 36 | stack = generate_stack(list( 37 | numeric_layer("x_1"), 38 | factor_layer("y", levels = c("a", "b"))), 39 | layer_size = 10) 40 | vector = sample_stack(stack, n = 500) 41 | task_train = as_task_classif_st(vector, id = "test_vector", target = "y") 42 | learner = lrn("classif.ranger", num.threads = 1) 43 | learner$train(task_train) 44 | ``` 45 | 46 | 47 | ```r 48 | terra_rf = ranger::ranger(y ~ ., data = task_train$data(), num.threads = 1) 49 | ``` 50 | 51 | 52 | ```r 53 | stack$y = NULL 54 | task_predict = as_task_unsupervised(stack, id = "test") 55 | learner$parallel_predict = TRUE 56 | ``` 57 | 58 | ## Benchmark 59 | 60 | 61 | ```r 62 | bm = bench::mark( 63 | 64 | "01-mlr3-4-cores" = { 65 | plan(multicore, workers = 4) 66 | pred = predict_spatial(task_predict, learner, chunksize = 10L) 67 | }, 68 | 69 | "02-terra-4-cores" = { 70 | library(terra) 71 | pred = predict(stack, terra_rf, cores = 4, cpkgs = "ranger", 72 | fun = function(model, ...) predict(model, ...)$predictions) 73 | }, 74 | 75 | "03-mlr3-all-cores" = { 76 | plan(multicore) 77 | pred = predict_spatial(task_predict, learner, chunksize = 10L) 78 | }, 79 | 80 | "04-terra-all-cores" = { 81 | library(terra) 82 | pred = predict(stack, terra_rf, cores = parallelly::availableCores(), cpkgs = "ranger", 83 | fun = function(model, ...) predict(model, ...)$predictions) 84 | }, 85 | 86 | check = FALSE, filter_gc = FALSE, min_iterations = 3, 87 | max_iterations = 3, memory = FALSE) 88 | 89 | bm$`itr/sec` = NULL 90 | bm$result = NULL 91 | bm$`gc/sec` = NULL 92 | bm$memory = NULL 93 | bm$mem_alloc = NULL 94 | 95 | print(bm) 96 | #> # A tibble: 4 × 8 97 | #> expression min median n_itr n_gc total_time time gc 98 | #> 99 | #> 1 01-mlr3-4-cores 20.7s 21.4s 3 57 1.07m 100 | #> 2 02-terra-4-cores 21.9s 23.1s 3 31 1.14m 101 | #> 3 03-mlr3-all-cores 12.5s 12.5s 3 51 37.45s 102 | #> 4 04-terra-all-cores 32.6s 32.7s 3 77 1.64m 103 | ``` 104 | 105 | 106 | ```r 107 | library(ggplot2) 108 | autoplot(bm, type = "ridge") 109 | #> Picking joint bandwidth of 0.00417 110 | ``` 111 | 112 | ![](./plot-benchmark-1.png) 113 | 114 | 115 | -------------------------------------------------------------------------------- /vignettes/benchmark.Rmd.orig: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Benchmark Parallel Predictions" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Benchmark Parallel Predictions} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>", 14 | fig.path = "./" 15 | ) 16 | lgr::get_logger("bbotk")$set_threshold("warn") 17 | lgr::get_logger("mlr3")$set_threshold("warn") 18 | lgr::get_logger("mlr3spatial")$set_threshold("warn") 19 | ``` 20 | 21 | This benchmark was run on a MacBook Pro 2021 M1 Pro. 22 | If you rerun this result on your machine, your results will differ - both in relative and absolute values and maybe by a lot. 23 | This is due to the multicore performance component of your CPU and how efficient the overhead introduced by parallelization is handled (i.e. splitting and combining the chunks). 24 | 25 | {terra} is using a socker-based parallelization by default (which the user cannot change). 26 | The equivalent in {future} is `plan("multisession")`. 27 | Using `plan(multicore)` on UNIX based systems might speed up the {mlr3} approach even more. 28 | This might also be a major part of the speedup of the `mlr3-all-cores` setting. 29 | 30 | Also note that using all available cores does not always result in faster processing. 31 | The parallelization overhead can be substantial and for small tasks you might be better of using less cores. 32 | Nevertheless, if your processing time is the range of minutes or higher, you might usually be better of using all cores (if possible). 33 | 34 | ```{r setup} 35 | library(mlr3spatial) 36 | library(mlr3learners) 37 | library(future) 38 | ``` 39 | 40 | ## Preparations 41 | 42 | ```{r} 43 | stack = generate_stack(list( 44 | numeric_layer("x_1"), 45 | factor_layer("y", levels = c("a", "b"))), 46 | layer_size = 10) 47 | vector = sample_stack(stack, n = 500) 48 | task_train = as_task_classif_st(vector, id = "test_vector", target = "y") 49 | learner = lrn("classif.ranger", num.threads = 1) 50 | learner$train(task_train) 51 | ``` 52 | 53 | ```{r} 54 | terra_rf = ranger::ranger(y ~ ., data = task_train$data(), num.threads = 1) 55 | ``` 56 | 57 | ```{r} 58 | stack$y = NULL 59 | task_predict = as_task_unsupervised(stack, id = "test") 60 | learner$parallel_predict = TRUE 61 | ``` 62 | 63 | ## Benchmark 64 | 65 | ```{r} 66 | bm = bench::mark( 67 | 68 | "01-mlr3-4-cores" = { 69 | plan(multicore, workers = 4) 70 | pred = predict_spatial(task_predict, learner, chunksize = 10L) 71 | }, 72 | 73 | "02-terra-4-cores" = { 74 | library(terra) 75 | pred = predict(stack, terra_rf, cores = 4, cpkgs = "ranger", 76 | fun = function(model, ...) predict(model, ...)$predictions) 77 | }, 78 | 79 | "03-mlr3-all-cores" = { 80 | plan(multicore) 81 | pred = predict_spatial(task_predict, learner, chunksize = 10L) 82 | }, 83 | 84 | "04-terra-all-cores" = { 85 | library(terra) 86 | pred = predict(stack, terra_rf, cores = parallelly::availableCores(), cpkgs = "ranger", 87 | fun = function(model, ...) predict(model, ...)$predictions) 88 | }, 89 | 90 | check = FALSE, filter_gc = FALSE, min_iterations = 3, 91 | max_iterations = 3, memory = FALSE) 92 | 93 | bm$`itr/sec` = NULL 94 | bm$result = NULL 95 | bm$`gc/sec` = NULL 96 | bm$memory = NULL 97 | bm$mem_alloc = NULL 98 | 99 | print(bm) 100 | ``` 101 | 102 | ```{r plot-benchmark, fig.cap=""} 103 | library(ggplot2) 104 | autoplot(bm, type = "ridge") 105 | ``` 106 | 107 | ```{r save-plot, echo = FALSE, message = FALSE} 108 | ggsave("plot-benchmark-1.png") 109 | ``` 110 | -------------------------------------------------------------------------------- /vignettes/plot-benchmark-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3spatial/f8e417455697159103f126ae89fc858a70861af8/vignettes/plot-benchmark-1.png -------------------------------------------------------------------------------- /vignettes/precompile.R: -------------------------------------------------------------------------------- 1 | # Must manually move image files from eia/ to eia/vignettes/ after knit 2 | 3 | time = Sys.time() 4 | library(knitr) 5 | knit(here::here("vignettes/benchmark.Rmd.orig"), here::here("vignettes/benchmark.Rmd")) 6 | # unlink(here::here("plot-benchmark-small-1.png")) 7 | unlink(here::here("plot-benchmark-1.png")) 8 | Sys.time() - time 9 | --------------------------------------------------------------------------------