├── .github ├── .gitignore └── workflows │ ├── pkgdown.yaml │ ├── R-CMD-check.yaml │ └── rhub.yaml ├── vignettes ├── .gitignore ├── main2_ssr.Rmd.orig ├── si2_sct.Rmd.orig ├── main2_ssr.Rmd ├── main4_gpc.Rmd.orig ├── main1_pkgintro.Rmd.orig ├── main6_scpcm.Rmd.orig └── si2_sct.Rmd ├── src ├── .gitignore ├── Makevars ├── Makevars.win ├── DeLongPlacements.h ├── NumericUtils.h ├── CppCombn.h ├── Entropy.h ├── SpatialBlockBootstrap.h ├── CppDistances.h ├── SignatureProjection.h ├── MultiViewEmbedding.h ├── CrossMappingCardinality.h ├── DeLongPlacements.cpp ├── SpatialBlockBootstrap.cpp ├── CppStats.h ├── GCCM4Lattice.h ├── IntersectionCardinality.h ├── FalseNearestNeighbors.h ├── SGC4Grid.h ├── SGC4Lattice.h └── DataStruct.h ├── inst ├── case │ ├── npp.tif │ └── columbus.gpkg └── CITATION ├── cran-comments.md ├── man ├── figures │ ├── spEDM.png │ ├── gccm │ │ ├── fig1-1.png │ │ └── fig2-1.png │ ├── gcmc │ │ ├── fig1-1.png │ │ └── fig2-1.png │ ├── gpc │ │ ├── fig1-1.png │ │ └── fig2-1.png │ ├── slm │ │ ├── slm1-1.png │ │ ├── slm2-1.png │ │ └── sim_trispecies-1.png │ └── ssr │ │ ├── fig1-1.png │ │ └── fig2-1.png ├── detectThreads.Rd ├── embedded.Rd ├── ic.Rd ├── fnn.Rd ├── sc.test.Rd ├── multiview.Rd ├── simplex.Rd ├── gcmc.Rd ├── slm.Rd ├── pc.Rd ├── smap.Rd ├── gccm.Rd └── gpc.Rd ├── R ├── zzz.R ├── spEDM-package.R ├── detectThreads.R ├── globals.R ├── Agenerics.R ├── ic.R ├── embedded.R ├── fnn.R ├── multiview.R ├── smap.R ├── pc.R ├── simplex.R ├── sctest.R ├── slm.R ├── gpc.R ├── gccm.R └── gcmc.R ├── pkgdown ├── favicon │ ├── favicon.ico │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── favicon-48x48.png │ ├── favicon-96x96.png │ ├── apple-touch-icon.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── web-app-manifest-192x192.png │ ├── web-app-manifest-512x512.png │ └── site.webmanifest └── extra.css ├── .gitignore ├── .Rbuildignore ├── spEDM.Rproj ├── NAMESPACE ├── DESCRIPTION ├── _pkgdown.yml ├── README.md └── README.Rmd /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | -------------------------------------------------------------------------------- /inst/case/npp.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/inst/case/npp.tif -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 0 notes 4 | -------------------------------------------------------------------------------- /inst/case/columbus.gpkg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/inst/case/columbus.gpkg -------------------------------------------------------------------------------- /man/figures/spEDM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/man/figures/spEDM.png -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onLoad = function(...) { 2 | loadNamespace("sf") 3 | loadNamespace("terra") 4 | } 5 | -------------------------------------------------------------------------------- /man/figures/gccm/fig1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/man/figures/gccm/fig1-1.png -------------------------------------------------------------------------------- /man/figures/gccm/fig2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/man/figures/gccm/fig2-1.png -------------------------------------------------------------------------------- /man/figures/gcmc/fig1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/man/figures/gcmc/fig1-1.png -------------------------------------------------------------------------------- /man/figures/gcmc/fig2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/man/figures/gcmc/fig2-1.png -------------------------------------------------------------------------------- /man/figures/gpc/fig1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/man/figures/gpc/fig1-1.png -------------------------------------------------------------------------------- /man/figures/gpc/fig2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/man/figures/gpc/fig2-1.png -------------------------------------------------------------------------------- /man/figures/slm/slm1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/man/figures/slm/slm1-1.png -------------------------------------------------------------------------------- /man/figures/slm/slm2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/man/figures/slm/slm2-1.png -------------------------------------------------------------------------------- /man/figures/ssr/fig1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/man/figures/ssr/fig1-1.png -------------------------------------------------------------------------------- /man/figures/ssr/fig2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/man/figures/ssr/fig2-1.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-48x48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/pkgdown/favicon/favicon-48x48.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-96x96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/pkgdown/favicon/favicon-96x96.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .Rdata 4 | .httr-oauth 5 | .DS_Store 6 | .quarto 7 | inst/doc 8 | docs 9 | -------------------------------------------------------------------------------- /man/figures/slm/sim_trispecies-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/man/figures/slm/sim_trispecies-1.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /R/spEDM-package.R: -------------------------------------------------------------------------------- 1 | ## usethis namespace: start 2 | #' @useDynLib spEDM, .registration = TRUE 3 | ## usethis namespace: end 4 | NULL 5 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/web-app-manifest-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/pkgdown/favicon/web-app-manifest-192x192.png -------------------------------------------------------------------------------- /pkgdown/favicon/web-app-manifest-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stscl/spEDM/HEAD/pkgdown/favicon/web-app-manifest-512x512.png -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^spEDM\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^README\.Rmd$ 4 | ^_pkgdown\.yml$ 5 | ^docs$ 6 | ^pkgdown$ 7 | ^\.github$ 8 | ^build-vignettes\.md$ 9 | ^cran-comments\.md$ 10 | ^CRAN-SUBMISSION$ 11 | -------------------------------------------------------------------------------- /R/detectThreads.R: -------------------------------------------------------------------------------- 1 | #' detect the number of available threads 2 | #' 3 | #' @return An integer 4 | #' @export 5 | #' 6 | #' @examples 7 | #' detectThreads() 8 | #' 9 | detectThreads = \() { 10 | return(DetectMaxNumThreads()) 11 | } 12 | -------------------------------------------------------------------------------- /R/globals.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c("libsizes","x_xmap_y_mean", "y_xmap_x_mean", 2 | "x_xmap_y_lower", "x_xmap_y_upper", 3 | "y_xmap_x_lower", "y_xmap_x_upper", 4 | "x_xmap_y_sig", "y_xmap_x_sig", 5 | "causality", "type", "q50")) 6 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite package spEDM in publications, please use:") 2 | 3 | bibentry( 4 | bibtype = "Manual", 5 | title = "{spEDM: Spatial Empirical Dynamic Modeling}", 6 | author = "Wenbo Lv", 7 | year = "2025", 8 | note = "R package version 1.10", 9 | doi = "10.32614/CRAN.package.spEDM" 10 | ) 11 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | #CXX_STD = CXX11 2 | #PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 3 | #PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 4 | 5 | #PKG_LIBS = `"$(R_HOME)/bin/Rscript" -e "RcppThread::LdFlags()"` 6 | 7 | PKG_CXXFLAGS = -DARMA_USE_CURRENT 8 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) \ 9 | `"$(R_HOME)/bin/Rscript" -e "RcppThread::LdFlags()"` 10 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | #CXX_STD = CXX11 2 | #PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 3 | #PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 4 | 5 | #PKG_LIBS = `"$(R_HOME)/bin/Rscript" -e "RcppThread::LdFlags()"` 6 | 7 | PKG_CXXFLAGS = -DARMA_USE_CURRENT 8 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) \ 9 | `"$(R_HOME)/bin/Rscript" -e "RcppThread::LdFlags()"` 10 | -------------------------------------------------------------------------------- /man/detectThreads.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/detectThreads.R 3 | \name{detectThreads} 4 | \alias{detectThreads} 5 | \title{detect the number of available threads} 6 | \usage{ 7 | detectThreads() 8 | } 9 | \value{ 10 | An integer 11 | } 12 | \description{ 13 | detect the number of available threads 14 | } 15 | \examples{ 16 | detectThreads() 17 | 18 | } 19 | -------------------------------------------------------------------------------- /R/Agenerics.R: -------------------------------------------------------------------------------- 1 | register_generic = \(name, def = NULL) { 2 | if (!methods::isGeneric(name)) { 3 | if (is.null(def)) { 4 | def = eval(bquote(function(data, ...) standardGeneric(.(name)))) 5 | } 6 | methods::setGeneric(name, def) 7 | } 8 | } 9 | 10 | for (gen in c("embedded", "fnn", "slm", "simplex", "smap", "ic", "pc", 11 | "multiview", "sc.test", "gccm", "gpc", "gcmc", "scpcm")) { 12 | register_generic(gen) 13 | } 14 | -------------------------------------------------------------------------------- /pkgdown/favicon/site.webmanifest: -------------------------------------------------------------------------------- 1 | { 2 | "name": "", 3 | "short_name": "", 4 | "icons": [ 5 | { 6 | "src": "/web-app-manifest-192x192.png", 7 | "sizes": "192x192", 8 | "type": "image/png", 9 | "purpose": "maskable" 10 | }, 11 | { 12 | "src": "/web-app-manifest-512x512.png", 13 | "sizes": "512x512", 14 | "type": "image/png", 15 | "purpose": "maskable" 16 | } 17 | ], 18 | "theme_color": "#ffffff", 19 | "background_color": "#ffffff", 20 | "display": "standalone" 21 | } -------------------------------------------------------------------------------- /spEDM.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: 4ef0e75d-2307-4f5d-9fc9-e08b517989ab 3 | 4 | RestoreWorkspace: No 5 | SaveWorkspace: No 6 | AlwaysSaveHistory: Default 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 2 11 | Encoding: UTF-8 12 | 13 | RnwWeave: Sweave 14 | LaTeX: XeLaTeX 15 | 16 | AutoAppendNewline: Yes 17 | StripTrailingWhitespace: Yes 18 | LineEndingConversion: Posix 19 | 20 | BuildType: Package 21 | PackageUseDevtools: Yes 22 | PackageInstallArgs: --no-multiarch --with-keep.source 23 | PackageRoxygenize: rd,collate,namespace 24 | -------------------------------------------------------------------------------- /pkgdown/extra.css: -------------------------------------------------------------------------------- 1 | @import url('https://fonts.googleapis.com/css?family=Raleway|Ubuntu+Mono'); 2 | 3 | body{ 4 | font-family: 'Raleway', sans-serif; 5 | } 6 | 7 | code{ 8 | font-family: 'Ubuntu Mono', monospace; 9 | } 10 | 11 | .navbar-brand.me-2 { 12 | color: #a19586; 13 | } 14 | 15 | h1,h2,h3 { 16 | font-weight: bold; 17 | color: #00001c; 18 | } 19 | 20 | a, code a { 21 | color: #a19586; 22 | } 23 | 24 | a:hover, code a:hover { 25 | color: #24e1bd; 26 | } 27 | 28 | a:not([href]) { 29 | color: inherit; 30 | } 31 | 32 | a:not([href]):hover { 33 | color: inherit; 34 | } 35 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(plot,ccm_res) 4 | S3method(plot,cmc_res) 5 | S3method(plot,pcm_res) 6 | S3method(plot,rpc_res) 7 | S3method(print,ccm_res) 8 | S3method(print,cmc_res) 9 | S3method(print,pc_res) 10 | S3method(print,pcm_res) 11 | S3method(print,rpc_res) 12 | S3method(print,sc_res) 13 | S3method(print,xmap_self) 14 | export(detectThreads) 15 | exportMethods(embedded) 16 | exportMethods(fnn) 17 | exportMethods(gccm) 18 | exportMethods(gcmc) 19 | exportMethods(gpc) 20 | exportMethods(ic) 21 | exportMethods(multiview) 22 | exportMethods(pc) 23 | exportMethods(sc.test) 24 | exportMethods(simplex) 25 | exportMethods(slm) 26 | exportMethods(smap) 27 | useDynLib(spEDM, .registration = TRUE) 28 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: spEDM 2 | Title: Spatial Empirical Dynamic Modeling 3 | Version: 1.10 4 | Authors@R: 5 | person(given = "Wenbo", family = "Lv", 6 | email = "lyu.geosocial@gmail.com", 7 | role = c("aut", "cre", "cph"), 8 | comment = c(ORCID = "0009-0002-6003-3800")) 9 | Description: Inferring causation from spatial cross-sectional data through empirical dynamic modeling (EDM), with methodological extensions including geographical convergent cross mapping from Gao et al. (2023) , as well as the spatial causality test following the approach of Herrera et al. (2016) , together with geographical pattern causality proposed in Zhang et al. (2025) . 10 | License: GPL-3 11 | Encoding: UTF-8 12 | Roxygen: list(markdown = TRUE) 13 | RoxygenNote: 7.3.3 14 | URL: https://stscl.github.io/spEDM/, https://github.com/stscl/spEDM 15 | BugReports: https://github.com/stscl/spEDM/issues 16 | Depends: 17 | R (>= 4.1.0) 18 | LinkingTo: 19 | Rcpp, 20 | RcppThread, 21 | RcppArmadillo 22 | Imports: 23 | dplyr, 24 | ggplot2, 25 | methods, 26 | sdsfun (>= 0.7.0), 27 | sf, 28 | terra 29 | Suggests: 30 | knitr, 31 | Rcpp, 32 | RcppThread, 33 | RcppArmadillo, 34 | rmarkdown, 35 | readr, 36 | plot3D 37 | VignetteBuilder: knitr 38 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | release: 8 | types: [published] 9 | workflow_dispatch: 10 | 11 | name: pkgdown 12 | 13 | permissions: read-all 14 | 15 | jobs: 16 | pkgdown: 17 | runs-on: ubuntu-latest 18 | # Only restrict concurrency for non-PR jobs 19 | concurrency: 20 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 21 | env: 22 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 23 | permissions: 24 | contents: write 25 | steps: 26 | - uses: actions/checkout@v4 27 | 28 | - uses: r-lib/actions/setup-pandoc@v2 29 | 30 | - uses: r-lib/actions/setup-r@v2 31 | with: 32 | use-public-rspm: true 33 | 34 | - uses: r-lib/actions/setup-r-dependencies@v2 35 | with: 36 | extra-packages: any::pkgdown, local::. 37 | needs: website 38 | 39 | - name: Build site 40 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 41 | shell: Rscript {0} 42 | 43 | - name: Deploy to GitHub pages 🚀 44 | if: github.event_name != 'pull_request' 45 | uses: JamesIves/github-pages-deploy-action@v4.5.0 46 | with: 47 | clean: false 48 | branch: gh-pages 49 | folder: docs 50 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | 8 | name: R-CMD-check 9 | 10 | permissions: read-all 11 | 12 | jobs: 13 | R-CMD-check: 14 | runs-on: ${{ matrix.config.os }} 15 | 16 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 17 | 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | config: 22 | - {os: macos-latest, r: 'release'} 23 | - {os: windows-latest, r: 'release'} 24 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 25 | - {os: ubuntu-latest, r: 'release'} 26 | - {os: ubuntu-latest, r: 'oldrel-1'} 27 | 28 | env: 29 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 30 | R_KEEP_PKG_SOURCE: yes 31 | 32 | steps: 33 | - uses: actions/checkout@v4 34 | 35 | - uses: r-lib/actions/setup-pandoc@v2 36 | 37 | - uses: r-lib/actions/setup-r@v2 38 | with: 39 | r-version: ${{ matrix.config.r }} 40 | http-user-agent: ${{ matrix.config.http-user-agent }} 41 | use-public-rspm: true 42 | 43 | - uses: r-lib/actions/setup-r-dependencies@v2 44 | with: 45 | extra-packages: any::rcmdcheck 46 | needs: check 47 | 48 | - uses: r-lib/actions/check-r-package@v2 49 | with: 50 | upload-snapshots: true 51 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 52 | -------------------------------------------------------------------------------- /src/DeLongPlacements.h: -------------------------------------------------------------------------------- 1 | #ifndef DeLongPlacements_H 2 | #define DeLongPlacements_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "DataStruct.h" 10 | 11 | /** 12 | * @brief Computes DeLong placements for ROC analysis. 13 | * 14 | * This function implements the DeLong method to calculate placements for cases and controls 15 | * in the context of ROC (Receiver Operating Characteristic) curve analysis. The method is 16 | * used for non-parametric estimation of the area under the curve (AUC) and supports handling 17 | * tied values efficiently. 18 | * 19 | * The function accepts two sets of data points: `cases` and `controls`. Depending on the 20 | * specified `direction` (either ">" or "<"), it processes the data accordingly by inverting 21 | * the sign of the input values if necessary. 22 | * 23 | * @reference https://github.com/xrobin/pROC/blob/master/src/delong.cpp 24 | * 25 | * @param cases A vector of numeric values representing positive cases. 26 | * @param controls A vector of numeric values representing negative controls. 27 | * @param direction A string indicating the comparison direction. If set to ">", values are inverted. 28 | * 29 | * @return A structure containing: 30 | * - theta: The estimated AUC value. 31 | * - X: A vector of normalized placement values for cases. 32 | * - Y: A vector of normalized placement values for controls. 33 | */ 34 | DeLongPlacementsRes CppDeLongPlacements(const std::vector& cases, 35 | const std::vector& controls, 36 | const std::string& direction); 37 | 38 | #endif // DeLongPlacements_H 39 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://stscl.github.io/spEDM/ 2 | template: 3 | bootstrap: 5 4 | math-rendering: mathjax 5 | light-switch: true 6 | 7 | navbar: 8 | structure: 9 | right: [search, github] 10 | left: [reference, articles, news] 11 | bg: custom 12 | components: 13 | articles: 14 | text: Vignettes 15 | menu: 16 | - text: 1. Introduction to the spEDM package 17 | href: articles/main1_pkgintro.html 18 | - text: 2. State Space Reconstruction 19 | href: articles/main2_ssr.html 20 | - text: 3. Geographical Convergent Cross Mapping 21 | href: articles/main3_gccm.html 22 | - text: 4. Geographical Pattern Causality 23 | href: articles/main4_gpc.html 24 | - text: 5. Geographical Cross Mapping Cardinality 25 | href: articles/main5_gcmc.html 26 | - text: SI1. Spatial Logistic Map 27 | href: articles/si1_slm.html 28 | - text: SI2. Spatial Causality Test 29 | href: articles/si2_sct.html 30 | 31 | home: 32 | title: | 33 | spEDM | Spatial Empirical Dynamic Modeling 34 | authors: 35 | Wenbo Lv: 36 | href: https://spatlyu.github.io/ 37 | 38 | reference: 39 | 40 | - title: Spatial Empirical Dynamic Modeling 41 | 42 | - subtitle: State Space Reconstructions 43 | contents: 44 | - embedded 45 | - fnn 46 | 47 | - subtitle: Mutual Cross Mapping 48 | contents: 49 | - simplex 50 | - smap 51 | - ic 52 | - pc 53 | - gccm 54 | - gcmc 55 | - gpc 56 | 57 | - subtitle: Nonlinear forecasting 58 | contents: 59 | - multiview 60 | 61 | - subtitle: Spatiotemporal Chaos 62 | contents: 63 | - slm 64 | 65 | - title: Causality in Information Flux (for Comparison) 66 | contents: 67 | - sc.test 68 | 69 | - title: Miscellaneous Utility Functions 70 | contents: 71 | - detectThreads 72 | -------------------------------------------------------------------------------- /man/embedded.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/embedded.R 3 | \name{embedded} 4 | \alias{embedded} 5 | \alias{embedded,sf-method} 6 | \alias{embedded,SpatRaster-method} 7 | \title{embedding spatial cross sectional data} 8 | \usage{ 9 | \S4method{embedded}{sf}( 10 | data, 11 | target, 12 | E = 3, 13 | tau = 1, 14 | style = 1, 15 | stack = FALSE, 16 | detrend = FALSE, 17 | nb = NULL 18 | ) 19 | 20 | \S4method{embedded}{SpatRaster}( 21 | data, 22 | target, 23 | E = 3, 24 | tau = 1, 25 | style = 1, 26 | stack = FALSE, 27 | detrend = FALSE, 28 | grid.coord = TRUE, 29 | embed.direction = 0 30 | ) 31 | } 32 | \arguments{ 33 | \item{data}{observation data.} 34 | 35 | \item{target}{name of target variable.} 36 | 37 | \item{E}{(optional) embedding dimensions.} 38 | 39 | \item{tau}{(optional) step of spatial lags.} 40 | 41 | \item{style}{(optional) embedding style (\code{0} includes current state, \code{1} excludes it).} 42 | 43 | \item{stack}{(optional) whether to stack embeddings.} 44 | 45 | \item{detrend}{(optional) whether to remove the linear trend.} 46 | 47 | \item{nb}{(optional) neighbours list.} 48 | 49 | \item{grid.coord}{(optional) whether to detrend using cell center coordinates (\code{TRUE}) or row/column numbers (\code{FALSE}).} 50 | 51 | \item{embed.direction}{(optional) direction selector for embeddings (\code{0} returns all directions, \code{1-8} correspond to NW, N, NE, W, E, SW, S, SE).} 52 | } 53 | \value{ 54 | A matrix (when \code{stack} is \code{FALSE}) or list. 55 | } 56 | \description{ 57 | embedding spatial cross sectional data 58 | } 59 | \examples{ 60 | columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 61 | v = embedded(columbus,"crime") 62 | v[1:5,] 63 | 64 | npp = terra::rast(system.file("case/npp.tif",package="spEDM")) 65 | r = embedded(npp,"npp") 66 | r[which(!is.na(r),arr.ind = TRUE)[1:5],] 67 | 68 | } 69 | -------------------------------------------------------------------------------- /src/NumericUtils.h: -------------------------------------------------------------------------------- 1 | #ifndef NUMERIC_UTILS_H 2 | #define NUMERIC_UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | /** 10 | * @file NumericUtils.h 11 | * @brief Utility functions for safe and consistent floating-point operations. 12 | * 13 | * Provides helper functions for: 14 | * - Floating-point comparison with combined relative and absolute tolerance. 15 | * - Portable numeric constants (epsilon and tolerance). 16 | * 17 | * Intended for scientific computation where double precision stability matters. 18 | * 19 | * Author: Wenbo Lv 20 | * Created: 2025-11-11 21 | * License: GPL-3 22 | */ 23 | 24 | // ============================== 25 | // Common numeric constants 26 | // ============================== 27 | constexpr double DOUBLE_EPS = std::numeric_limits::epsilon(); // ≈ 2.22e-16 28 | constexpr double DOUBLE_TOL_ABS = 1.5e-16; // Absolute tolerance 29 | constexpr double DOUBLE_TOL_REL = 1.5e-8; // Relative tolerance 30 | 31 | // ============================== 32 | // Floating-point comparison 33 | // ============================== 34 | /** 35 | * @brief Compare two double values with combined relative and absolute tolerance. 36 | * 37 | * Implements a numerically stable test for "near equality": 38 | * |x - y| <= max(rel_tol * max(|x|, |y|, 1.0), abs_tol) 39 | * 40 | * @param x First value 41 | * @param y Second value 42 | * @param rel_tol Relative tolerance (default DOUBLE_TOL_REL) 43 | * @param abs_tol Absolute tolerance (default DOUBLE_TOL_ABS) 44 | * @return true if x and y are considered equal within tolerance 45 | */ 46 | inline bool doubleNearlyEqual(double x, double y, 47 | double rel_tol = DOUBLE_TOL_REL, 48 | double abs_tol = DOUBLE_TOL_ABS) noexcept { 49 | double diff = std::fabs(x - y); 50 | double scale = std::max({1.0, std::fabs(x), std::fabs(y)}); 51 | return diff <= std::max(rel_tol * scale, abs_tol); 52 | } 53 | 54 | #endif // NUMERIC_UTILS_H 55 | -------------------------------------------------------------------------------- /src/CppCombn.h: -------------------------------------------------------------------------------- 1 | #ifndef CPP_COMBN_H 2 | #define CPP_COMBN_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | /** 9 | * @brief Generate all combinations of m elements from a given vector vec. 10 | * 11 | * @tparam T The type of elements in the vector. 12 | * @param vec The input vector to generate combinations from. 13 | * @param m The number of elements in each combination. 14 | * @return std::vector> A vector containing all combinations. 15 | */ 16 | template 17 | std::vector> CppCombn(const std::vector& vec, int m) { 18 | std::vector> result; 19 | std::vector current; 20 | 21 | int vec_size = static_cast(vec.size()); 22 | 23 | std::function combnHelper = [&](int start) { 24 | if (static_cast(current.size()) == m) { 25 | result.push_back(current); 26 | return; 27 | } 28 | int remaining = m - static_cast(current.size()); 29 | for (int i = start; i <= vec_size - remaining; ++i) { 30 | current.push_back(vec[i]); 31 | combnHelper(i + 1); 32 | current.pop_back(); 33 | } 34 | }; 35 | 36 | combnHelper(0); 37 | return result; 38 | } 39 | 40 | /** 41 | * @brief Generate all non-empty subsets of a given vector. 42 | * 43 | * This function generates all subsets of the input vector with sizes from 1 to vec.size(). 44 | * Internally it calls CppCombn repeatedly for all sizes. 45 | * 46 | * @tparam T The type of elements in the vector. 47 | * @param set The input vector to generate subsets from. 48 | * @return std::vector> A vector containing all non-empty subsets. 49 | */ 50 | template 51 | std::vector> CppGenSubsets(const std::vector& set) { 52 | std::vector> allSubsets; 53 | for (int m = 1; m <= static_cast(set.size()); ++m) { 54 | std::vector> combs = CppCombn(set, m); 55 | allSubsets.insert(allSubsets.end(), combs.begin(), combs.end()); 56 | } 57 | return allSubsets; 58 | } 59 | 60 | #endif // CPP_COMBN_H 61 | -------------------------------------------------------------------------------- /R/ic.R: -------------------------------------------------------------------------------- 1 | .ic_sf_method = \(data, column, target, E = 2:10, k = E+2, tau = 1, style = 1, lib = NULL, pred = NULL, 2 | dist.metric = "L2", threads = detectThreads(), detrend = FALSE, nb = NULL){ 3 | vx = .uni_lattice(data,column,detrend) 4 | vy = .uni_lattice(data,target,detrend) 5 | if (is.null(lib)) lib = .internal_library(cbind(vx,vy)) 6 | if (is.null(pred)) pred = lib 7 | if (is.null(nb)) nb = .internal_lattice_nb(data) 8 | res = RcppIC4Lattice(vx, vy, nb, lib, pred, E, k, tau, 0, style, 9 | .check_distmetric(dist.metric),threads,0) 10 | return(.bind_xmapself(res,target,"ic")) 11 | } 12 | 13 | .ic_spatraster_method = \(data, column, target, E = 2:10, k = E+2, tau = 1, style = 1, lib = NULL, pred = NULL, 14 | dist.metric = "L2", threads = detectThreads(), detrend = FALSE, grid.coord = TRUE){ 15 | mx = .uni_grid(data,column,detrend,grid.coord) 16 | my = .uni_grid(data,target,detrend,grid.coord) 17 | if (is.null(lib)) lib = which(!(is.na(mx) | is.na(my)), arr.ind = TRUE) 18 | if (is.null(pred)) pred = lib 19 | res = RcppIC4Grid(mx, my, lib, pred, E, k, tau, 0, style, 20 | .check_distmetric(dist.metric),threads,0) 21 | return(.bind_xmapself(res,target,"ic")) 22 | } 23 | 24 | #' optimal parameter search for intersection cardinality 25 | #' 26 | #' @inheritParams simplex 27 | #' 28 | #' @return A list 29 | #' \describe{ 30 | #' \item{\code{xmap}}{cross mapping performance} 31 | #' \item{\code{varname}}{name of target variable} 32 | #' \item{\code{method}}{method of cross mapping} 33 | #' } 34 | #' @export 35 | #' @name ic 36 | #' @aliases ic,sf-method 37 | #' @references 38 | #' Tao, P., Wang, Q., Shi, J., Hao, X., Liu, X., Min, B., Zhang, Y., Li, C., Cui, H., Chen, L., 2023. Detecting dynamical causality by intersection cardinal concavity. Fundamental Research. 39 | #' 40 | #' @examples 41 | #' columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 42 | #' \donttest{ 43 | #' ic(columbus,"hoval","crime",E = 7,k = 15:25) 44 | #' } 45 | methods::setMethod("ic", "sf", .ic_sf_method) 46 | 47 | #' @rdname ic 48 | methods::setMethod("ic", "SpatRaster", .ic_spatraster_method) 49 | -------------------------------------------------------------------------------- /R/embedded.R: -------------------------------------------------------------------------------- 1 | .embedded_sf_method = \(data, target, E = 3, tau = 1, style = 1, 2 | stack = FALSE,detrend = FALSE,nb = NULL){ 3 | vec = .uni_lattice(data,target,detrend) 4 | if (is.null(nb)) nb = .internal_lattice_nb(data) 5 | if (!stack) { 6 | res = RcppGenLatticeEmbeddings(vec,nb,E,tau,style) 7 | } else { 8 | res = RcppGenLatticeEmbeddingsCom(vec,nb,E,tau,style) 9 | } 10 | return(res) 11 | } 12 | 13 | .embedded_spatraster_method = \(data, target, E = 3, tau = 1, style = 1, stack = FALSE, 14 | detrend = FALSE, grid.coord = TRUE, embed.direction = 0){ 15 | mat = .uni_grid(data,target,detrend,grid.coord) 16 | if (!stack) { 17 | res = RcppGenGridEmbeddings(mat,E,tau,style) 18 | } else { 19 | res = RcppGenGridEmbeddingsCom(mat,E,tau,style,embed.direction) 20 | } 21 | return(res) 22 | } 23 | 24 | #' embedding spatial cross sectional data 25 | #' 26 | #' @param data observation data. 27 | #' @param target name of target variable. 28 | #' @param E (optional) embedding dimensions. 29 | #' @param tau (optional) step of spatial lags. 30 | #' @param style (optional) embedding style (`0` includes current state, `1` excludes it). 31 | #' @param stack (optional) whether to stack embeddings. 32 | #' @param detrend (optional) whether to remove the linear trend. 33 | #' @param nb (optional) neighbours list. 34 | #' 35 | #' @return A matrix (when `stack` is `FALSE`) or list. 36 | #' @export 37 | #' @name embedded 38 | #' @aliases embedded,sf-method 39 | #' 40 | #' @examples 41 | #' columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 42 | #' v = embedded(columbus,"crime") 43 | #' v[1:5,] 44 | #' 45 | #' npp = terra::rast(system.file("case/npp.tif",package="spEDM")) 46 | #' r = embedded(npp,"npp") 47 | #' r[which(!is.na(r),arr.ind = TRUE)[1:5],] 48 | #' 49 | methods::setMethod("embedded", "sf", .embedded_sf_method) 50 | 51 | #' @rdname embedded 52 | #' @param grid.coord (optional) whether to detrend using cell center coordinates (`TRUE`) or row/column numbers (`FALSE`). 53 | #' @param embed.direction (optional) direction selector for embeddings (`0` returns all directions, `1-8` correspond to NW, N, NE, W, E, SW, S, SE). 54 | methods::setMethod("embedded", "SpatRaster", .embedded_spatraster_method) 55 | -------------------------------------------------------------------------------- /src/Entropy.h: -------------------------------------------------------------------------------- 1 | #ifndef Entropy_H 2 | #define Entropy_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "CppStats.h" 12 | #include "CppDistances.h" 13 | 14 | double CppEntropy_Cont(const std::vector& vec, size_t k, 15 | double base = 10, bool NA_rm = false); 16 | 17 | double CppJoinEntropy_Cont(const std::vector>& mat, 18 | const std::vector& columns, size_t k, 19 | double base = 10, bool NA_rm = false); 20 | 21 | double CppMutualInformation_Cont(const std::vector>& mat, 22 | const std::vector& columns1, 23 | const std::vector& columns2, 24 | size_t k, int alg = 1, 25 | bool normalize = false, bool NA_rm = false); 26 | 27 | double CppConditionalEntropy_Cont(const std::vector>& mat, 28 | const std::vector& target_columns, 29 | const std::vector& conditional_columns, 30 | size_t k, double base = 10, bool NA_rm = false); 31 | 32 | double CppEntropy_Disc(const std::vector& vec, 33 | double base = 10, bool NA_rm = false); 34 | 35 | double CppJoinEntropy_Disc(const std::vector>& mat, 36 | const std::vector& columns, 37 | double base = 10, bool NA_rm = false); 38 | 39 | double CppMutualInformation_Disc(const std::vector>& mat, 40 | const std::vector& columns1, 41 | const std::vector& columns2, 42 | double base = 10, bool NA_rm = false); 43 | 44 | double CppConditionalEntropy_Disc(const std::vector>& mat, 45 | const std::vector& target_columns, 46 | const std::vector& conditional_columns, 47 | double base = 10, bool NA_rm = false); 48 | 49 | #endif // Entropy_H 50 | -------------------------------------------------------------------------------- /src/SpatialBlockBootstrap.h: -------------------------------------------------------------------------------- 1 | #ifndef SpatialBlockBootstrap_H 2 | #define SpatialBlockBootstrap_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | /** 9 | * @brief Generate a spatial block bootstrap resample of block indices based on predefined blocks. 10 | * 11 | * This function follows the Spatial Block Bootstrap (SBB) procedure described by Carlstein (1986) 12 | * and Herrera et al. (2013), as used in spatial Granger causality frameworks. It samples blocks 13 | * of indices with replacement, preserving spatial contiguity defined by a block ID vector. 14 | * 15 | * 16 | * @param block Vector of block IDs assigning each element to a contiguous block. 17 | * Elements with the same integer value belong to the same block. 18 | * @param seed Random seed for reproducibility (optional). 19 | * @return std::vector The bootstrap-resampled vector of indices based on block IDs. 20 | */ 21 | std::vector SpatialBlockBootstrap( 22 | const std::vector& block, 23 | unsigned int seed = 42 24 | ); 25 | 26 | /** 27 | * Generate a spatial block bootstrap sample of indices using an external random number generator. 28 | * 29 | * This function performs block-based resampling from spatial or grouped data. The input vector `block` 30 | * specifies a block ID for each observation (e.g., spatial unit, group, or time block). The function: 31 | * 32 | * 1. Groups indices by block ID; 33 | * 2. Randomly samples block IDs with replacement using the provided `std::mt19937` RNG; 34 | * 3. Concatenates the indices of the selected blocks to form the bootstrap sample; 35 | * 4. Trims the result to ensure the same length as the original data. 36 | * 37 | * The sampling is done at the block level rather than at the individual level, preserving local structure. 38 | * This is particularly useful for spatial or temporal data where neighboring observations may be dependent. 39 | * 40 | * @param block A vector of block identifiers (one per observation), e.g., spatial or temporal blocks. 41 | * @param rng A reference to an externally managed random number generator (e.g., from a parallel RNG pool). 42 | * @return A vector of resampled indices with the same length as the input data. 43 | */ 44 | std::vector SpatialBlockBootstrapRNG( 45 | const std::vector& block, 46 | std::mt19937_64& rng 47 | ); 48 | 49 | #endif // SpatialBlockBootstrap_H 50 | -------------------------------------------------------------------------------- /R/fnn.R: -------------------------------------------------------------------------------- 1 | .fnn_sf_method = \(data, target, E = 1:10, tau = 1, style = 1, stack = FALSE, lib = NULL, pred = NULL, 2 | dist.metric = "L1", rt = 10, eps = 2, threads = detectThreads(), detrend = TRUE, nb = NULL){ 3 | vec = .uni_lattice(data,target,detrend) 4 | rt = .check_inputelementnum(rt,max(E)) 5 | eps = .check_inputelementnum(eps,max(E)) 6 | if (is.null(lib)) lib = which(!is.na(vec)) 7 | if (is.null(pred)) pred = lib 8 | if (is.null(nb)) nb = .internal_lattice_nb(data) 9 | return(RcppFNN4Lattice(vec, nb, rt, eps, lib, pred, E, tau, style, 10 | stack, .check_distmetric(dist.metric), threads)) 11 | } 12 | 13 | .fnn_spatraster_method = \(data, target, E = 1:10, tau = 1, style = 1, stack = FALSE, lib = NULL, pred = NULL, dist.metric = "L1", 14 | rt = 10, eps = 2, threads = detectThreads(), detrend = TRUE, grid.coord = TRUE, embed.direction = 0){ 15 | mat = .uni_grid(data,target,detrend,grid.coord) 16 | rt = .check_inputelementnum(rt,max(E)) 17 | eps = .check_inputelementnum(eps,max(E)) 18 | if (is.null(lib)) lib = which(!is.na(mat), arr.ind = TRUE) 19 | if (is.null(pred)) pred = lib 20 | return(RcppFNN4Grid(mat, rt, eps, lib, pred, E, tau, style, stack, 21 | .check_distmetric(dist.metric),embed.direction,threads)) 22 | } 23 | 24 | #' false nearest neighbours 25 | #' 26 | #' @inheritParams embedded 27 | #' @param lib (optional) libraries indices (input needed: `vector` - spatial vector, `matrix` - spatial raster). 28 | #' @param pred (optional) predictions indices (input requirement same as `lib`). 29 | #' @param dist.metric (optional) distance metric (`L1`: Manhattan, `L2`: Euclidean). 30 | #' @param rt (optional) escape factor. 31 | #' @param eps (optional) neighborhood diameter. 32 | #' @param threads (optional) number of threads to use. 33 | #' 34 | #' @return A vector 35 | #' @export 36 | #' @name fnn 37 | #' @aliases fnn,sf-method 38 | #' @references 39 | #' Kennel M. B., Brown R. and Abarbanel H. D. I., Determining embedding dimension for phase-space reconstruction using a geometrical construction, Phys. Rev. A, Volume 45, 3403 (1992). 40 | #' 41 | #' @examples 42 | #' columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 43 | #' \donttest{ 44 | #' fnn(columbus,"crime") 45 | #' } 46 | methods::setMethod("fnn", "sf", .fnn_sf_method) 47 | 48 | #' @rdname fnn 49 | methods::setMethod("fnn", "SpatRaster", .fnn_spatraster_method) 50 | -------------------------------------------------------------------------------- /R/multiview.R: -------------------------------------------------------------------------------- 1 | .multiview_sf_method = \(data, column, target, nvar, E = 3, k = E+2, tau = 1, style = 1, stack = FALSE, lib = NULL, pred = NULL, 2 | dist.metric = "L2", dist.average = TRUE, top = NULL, threads = detectThreads(), detrend = TRUE, nb = NULL){ 3 | xmat = .multivar_lattice(data,column,detrend) 4 | yvec = .uni_lattice(data,target,detrend) 5 | if (is.null(lib)) lib = .internal_library(cbind(xmat,yvec)) 6 | if (is.null(pred)) pred = lib 7 | if (is.null(nb)) nb = .internal_lattice_nb(data) 8 | if (is.null(top)) top = 0 9 | res = RcppMultiView4Lattice(xmat,yvec,nb,lib,pred,E,tau,k,top,nvar,style,stack, 10 | .check_distmetric(dist.metric), dist.average, threads) 11 | return(res) 12 | } 13 | 14 | .multiview_spatraster_method = \(data, column, target, nvar, E = 3, k = E+2, tau = 1, style = 1, stack = FALSE, lib = NULL, pred = NULL, 15 | dist.metric = "L2", dist.average = TRUE, top = NULL, threads = detectThreads(), detrend = TRUE, grid.coord = TRUE){ 16 | xmat = .multivar_grid(data,column,detrend,grid.coord) 17 | ymat = .multivar_grid(data,target,detrend,grid.coord) 18 | if (is.null(lib)) lib = .internal_library(cbind(xmat,ymat),TRUE) 19 | if (is.null(pred)) pred = lib 20 | if (is.null(top)) top = 0 21 | res = RcppMultiView4Grid(xmat, ymat, lib, pred, E, tau, k, top, nvar, style, stack, 22 | .check_distmetric(dist.metric), dist.average, threads) 23 | return(res) 24 | } 25 | 26 | #' multiview embedding forecast 27 | #' 28 | #' @inheritParams simplex 29 | #' @param nvar number of variable combinations. 30 | #' @param top (optional) number of reconstructions used in MVE forecast. 31 | #' 32 | #' @return A vector (when input is sf object) or matrix 33 | #' @export 34 | #' @name multiview 35 | #' @aliases multiview,sf-method 36 | #' @references 37 | #' Ye H., and G. Sugihara, 2016. Information leverage in interconnected ecosystems: Overcoming the curse of dimensionality. Science 353:922-925. 38 | #' 39 | #' @examples 40 | #' columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 41 | #' \donttest{ 42 | #' multiview(columbus, 43 | #' column = c("inc","crime","open","plumb","discbd"), 44 | #' target = "hoval", nvar = 3) 45 | #' } 46 | methods::setMethod("multiview", "sf", .multiview_sf_method) 47 | 48 | #' @rdname multiview 49 | methods::setMethod("multiview", "SpatRaster", .multiview_spatraster_method) 50 | -------------------------------------------------------------------------------- /man/ic.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ic.R 3 | \name{ic} 4 | \alias{ic} 5 | \alias{ic,sf-method} 6 | \alias{ic,SpatRaster-method} 7 | \title{optimal parameter search for intersection cardinality} 8 | \usage{ 9 | \S4method{ic}{sf}( 10 | data, 11 | column, 12 | target, 13 | E = 2:10, 14 | k = E + 2, 15 | tau = 1, 16 | style = 1, 17 | lib = NULL, 18 | pred = NULL, 19 | dist.metric = "L2", 20 | threads = detectThreads(), 21 | detrend = FALSE, 22 | nb = NULL 23 | ) 24 | 25 | \S4method{ic}{SpatRaster}( 26 | data, 27 | column, 28 | target, 29 | E = 2:10, 30 | k = E + 2, 31 | tau = 1, 32 | style = 1, 33 | lib = NULL, 34 | pred = NULL, 35 | dist.metric = "L2", 36 | threads = detectThreads(), 37 | detrend = FALSE, 38 | grid.coord = TRUE 39 | ) 40 | } 41 | \arguments{ 42 | \item{data}{observation data.} 43 | 44 | \item{column}{name of library variable.} 45 | 46 | \item{target}{name of target variable.} 47 | 48 | \item{E}{(optional) embedding dimensions.} 49 | 50 | \item{k}{(optional) number of nearest neighbors used.} 51 | 52 | \item{tau}{(optional) step of spatial lags.} 53 | 54 | \item{style}{(optional) embedding style (\code{0} includes current state, \code{1} excludes it).} 55 | 56 | \item{lib}{(optional) libraries indices (input needed: \code{vector} - spatial vector, \code{matrix} - spatial raster).} 57 | 58 | \item{pred}{(optional) predictions indices (input requirement same as \code{lib}).} 59 | 60 | \item{dist.metric}{(optional) distance metric (\code{L1}: Manhattan, \code{L2}: Euclidean).} 61 | 62 | \item{threads}{(optional) number of threads to use.} 63 | 64 | \item{detrend}{(optional) whether to remove the linear trend.} 65 | 66 | \item{nb}{(optional) neighbours list.} 67 | 68 | \item{grid.coord}{(optional) whether to detrend using cell center coordinates (\code{TRUE}) or row/column numbers (\code{FALSE}).} 69 | } 70 | \value{ 71 | A list 72 | \describe{ 73 | \item{\code{xmap}}{cross mapping performance} 74 | \item{\code{varname}}{name of target variable} 75 | \item{\code{method}}{method of cross mapping} 76 | } 77 | } 78 | \description{ 79 | optimal parameter search for intersection cardinality 80 | } 81 | \examples{ 82 | columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 83 | \donttest{ 84 | ic(columbus,"hoval","crime",E = 7,k = 15:25) 85 | } 86 | } 87 | \references{ 88 | Tao, P., Wang, Q., Shi, J., Hao, X., Liu, X., Min, B., Zhang, Y., Li, C., Cui, H., Chen, L., 2023. Detecting dynamical causality by intersection cardinal concavity. Fundamental Research. 89 | } 90 | -------------------------------------------------------------------------------- /R/smap.R: -------------------------------------------------------------------------------- 1 | .smap_sf_method = \(data, column, target, E = 3, k = E+2, tau = 1, style = 1, stack = FALSE, lib = NULL, pred = NULL, dist.metric = "L2", dist.average = TRUE, 2 | theta = c(0, 1e-04, 3e-04, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 0.5, 0.75, 1, 1.5, 2, 3, 4, 6, 8), threads = detectThreads(), detrend = TRUE, nb = NULL){ 3 | vx = .uni_lattice(data,column,detrend) 4 | vy = .uni_lattice(data,target,detrend) 5 | if (is.null(lib)) lib = .internal_library(cbind(vx,vy)) 6 | if (is.null(pred)) pred = lib 7 | if (is.null(nb)) nb = .internal_lattice_nb(data) 8 | res = RcppSMap4Lattice(vx,vy,nb,lib,pred,theta,E,tau,k,style,stack, 9 | .check_distmetric(dist.metric),dist.average, threads) 10 | return(.bind_xmapself(res,target,"smap")) 11 | } 12 | 13 | .smap_spatraster_method = \(data, column, target, E = 3, k = E+2, tau = 1, style = 1, stack = FALSE, lib = NULL, pred = NULL, dist.metric = "L2", 14 | dist.average = TRUE, theta = c(0, 1e-04, 3e-04, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 0.5, 0.75, 1, 1.5, 2, 3, 4, 6, 8), 15 | threads = detectThreads(), detrend = TRUE, grid.coord = TRUE, embed.direction = 0){ 16 | mx = .uni_grid(data,column,detrend,grid.coord) 17 | my = .uni_grid(data,target,detrend,grid.coord) 18 | if (is.null(lib)) lib = which(!(is.na(mx) | is.na(my)), arr.ind = TRUE) 19 | if (is.null(pred)) pred = lib 20 | res = RcppSMap4Grid(mx, my, lib, pred, theta, E, tau, k, style, stack, 21 | .check_distmetric(dist.metric),dist.average,embed.direction,threads) 22 | return(.bind_xmapself(res,target,"smap")) 23 | } 24 | 25 | #' optimal parameter search for smap forecasting 26 | #' 27 | #' @inheritParams simplex 28 | #' @param theta (optional) weighting parameter for distances. 29 | #' 30 | #' @return A list 31 | #' \describe{ 32 | #' \item{\code{xmap}}{forecast performance} 33 | #' \item{\code{varname}}{name of target variable} 34 | #' \item{\code{method}}{method of cross mapping} 35 | #' } 36 | #' @export 37 | #' @name smap 38 | #' @aliases smap,sf-method 39 | #' @references 40 | #' Sugihara G. 1994. Nonlinear forecasting for the classification of natural time series. Philosophical Transactions: Physical Sciences and Engineering, 348 (1688):477-495. 41 | #' 42 | #' @examples 43 | #' columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 44 | #' \donttest{ 45 | #' smap(columbus,"inc","crime",E = 5,k = 6) 46 | #' } 47 | methods::setMethod("smap", "sf", .smap_sf_method) 48 | 49 | #' @rdname smap 50 | methods::setMethod("smap", "SpatRaster", .smap_spatraster_method) 51 | -------------------------------------------------------------------------------- /man/fnn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fnn.R 3 | \name{fnn} 4 | \alias{fnn} 5 | \alias{fnn,sf-method} 6 | \alias{fnn,SpatRaster-method} 7 | \title{false nearest neighbours} 8 | \usage{ 9 | \S4method{fnn}{sf}( 10 | data, 11 | target, 12 | E = 1:10, 13 | tau = 1, 14 | style = 1, 15 | stack = FALSE, 16 | lib = NULL, 17 | pred = NULL, 18 | dist.metric = "L1", 19 | rt = 10, 20 | eps = 2, 21 | threads = detectThreads(), 22 | detrend = TRUE, 23 | nb = NULL 24 | ) 25 | 26 | \S4method{fnn}{SpatRaster}( 27 | data, 28 | target, 29 | E = 1:10, 30 | tau = 1, 31 | style = 1, 32 | stack = FALSE, 33 | lib = NULL, 34 | pred = NULL, 35 | dist.metric = "L1", 36 | rt = 10, 37 | eps = 2, 38 | threads = detectThreads(), 39 | detrend = TRUE, 40 | grid.coord = TRUE, 41 | embed.direction = 0 42 | ) 43 | } 44 | \arguments{ 45 | \item{data}{observation data.} 46 | 47 | \item{target}{name of target variable.} 48 | 49 | \item{E}{(optional) embedding dimensions.} 50 | 51 | \item{tau}{(optional) step of spatial lags.} 52 | 53 | \item{style}{(optional) embedding style (\code{0} includes current state, \code{1} excludes it).} 54 | 55 | \item{stack}{(optional) whether to stack embeddings.} 56 | 57 | \item{lib}{(optional) libraries indices (input needed: \code{vector} - spatial vector, \code{matrix} - spatial raster).} 58 | 59 | \item{pred}{(optional) predictions indices (input requirement same as \code{lib}).} 60 | 61 | \item{dist.metric}{(optional) distance metric (\code{L1}: Manhattan, \code{L2}: Euclidean).} 62 | 63 | \item{rt}{(optional) escape factor.} 64 | 65 | \item{eps}{(optional) neighborhood diameter.} 66 | 67 | \item{threads}{(optional) number of threads to use.} 68 | 69 | \item{detrend}{(optional) whether to remove the linear trend.} 70 | 71 | \item{nb}{(optional) neighbours list.} 72 | 73 | \item{grid.coord}{(optional) whether to detrend using cell center coordinates (\code{TRUE}) or row/column numbers (\code{FALSE}).} 74 | 75 | \item{embed.direction}{(optional) direction selector for embeddings (\code{0} returns all directions, \code{1-8} correspond to NW, N, NE, W, E, SW, S, SE).} 76 | } 77 | \value{ 78 | A vector 79 | } 80 | \description{ 81 | false nearest neighbours 82 | } 83 | \examples{ 84 | columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 85 | \donttest{ 86 | fnn(columbus,"crime") 87 | } 88 | } 89 | \references{ 90 | Kennel M. B., Brown R. and Abarbanel H. D. I., Determining embedding dimension for phase-space reconstruction using a geometrical construction, Phys. Rev. A, Volume 45, 3403 (1992). 91 | } 92 | -------------------------------------------------------------------------------- /man/sc.test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sctest.R 3 | \name{sc.test} 4 | \alias{sc.test} 5 | \alias{sc.test,sf-method} 6 | \alias{sc.test,SpatRaster-method} 7 | \title{spatial causality test} 8 | \usage{ 9 | \S4method{sc.test}{sf}( 10 | data, 11 | cause, 12 | effect, 13 | k, 14 | block = 3, 15 | boot = 399, 16 | seed = 42L, 17 | base = 2, 18 | lib = NULL, 19 | pred = NULL, 20 | threads = detectThreads(), 21 | detrend = TRUE, 22 | normalize = FALSE, 23 | progressbar = FALSE, 24 | nb = NULL 25 | ) 26 | 27 | \S4method{sc.test}{SpatRaster}( 28 | data, 29 | cause, 30 | effect, 31 | k, 32 | block = 3, 33 | boot = 399, 34 | seed = 42L, 35 | base = 2, 36 | lib = NULL, 37 | pred = NULL, 38 | threads = detectThreads(), 39 | detrend = TRUE, 40 | normalize = FALSE, 41 | progressbar = FALSE, 42 | grid.coord = TRUE 43 | ) 44 | } 45 | \arguments{ 46 | \item{data}{observation data.} 47 | 48 | \item{cause}{name of causal variable.} 49 | 50 | \item{effect}{name of effect variable.} 51 | 52 | \item{k}{(optional) number of nearest neighbors used in symbolization.} 53 | 54 | \item{block}{(optional) number of blocks used in spatial block bootstrap.} 55 | 56 | \item{boot}{(optional) number of bootstraps to perform.} 57 | 58 | \item{seed}{(optional) random seed.} 59 | 60 | \item{base}{(optional) logarithm base.} 61 | 62 | \item{lib}{(optional) libraries indices (input needed: \code{vector} - spatial vector, \code{matrix} - spatial raster).} 63 | 64 | \item{pred}{(optional) predictions indices (input requirement same as \code{lib}).} 65 | 66 | \item{threads}{(optional) number of threads to use.} 67 | 68 | \item{detrend}{(optional) whether to remove the linear trend.} 69 | 70 | \item{normalize}{(optional) whether to normalize the result.} 71 | 72 | \item{progressbar}{(optional) whether to show the progress bar.} 73 | 74 | \item{nb}{(optional) neighbours list.} 75 | 76 | \item{grid.coord}{(optional) whether to detrend using cell center coordinates (\code{TRUE}) or row/column numbers (\code{FALSE}).} 77 | } 78 | \value{ 79 | A list 80 | \describe{ 81 | \item{\code{sc}}{statistic for spatial causality} 82 | \item{\code{varname}}{names of causal and effect variables} 83 | } 84 | } 85 | \description{ 86 | spatial causality test 87 | } 88 | \examples{ 89 | columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 90 | \donttest{ 91 | sc.test(columbus,"hoval","crime",k = 15) 92 | } 93 | } 94 | \references{ 95 | Herrera, M., Mur, J., & Ruiz, M. (2016). Detecting causal relationships between spatial processes. Papers in Regional Science, 95(3), 577–595. 96 | } 97 | -------------------------------------------------------------------------------- /src/CppDistances.h: -------------------------------------------------------------------------------- 1 | #ifndef CppDistances_H 2 | #define CppDistances_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "NumericUtils.h" 12 | #include 13 | 14 | double CppDistance(const std::vector& vec1, 15 | const std::vector& vec2, 16 | bool L1norm = false, 17 | bool NA_rm = false); 18 | 19 | double CppChebyshevDistance(const std::vector& vec1, 20 | const std::vector& vec2, 21 | bool NA_rm = false); 22 | 23 | std::vector CppKNearestDistance(const std::vector& vec, size_t k, 24 | bool L1norm = false, bool NA_rm = false); 25 | 26 | std::vector CppMatKNearestDistance(const std::vector>& mat, 27 | size_t k, bool NA_rm = false); 28 | 29 | std::vector> CppMatDistance( 30 | const std::vector>& mat, 31 | bool L1norm = false, 32 | bool NA_rm = false); 33 | 34 | std::vector> CppMatChebyshevDistance( 35 | const std::vector>& mat, 36 | bool NA_rm = false); 37 | 38 | std::vector CppNeighborsNum( 39 | const std::vector& vec, 40 | const std::vector& radius, 41 | bool equal = false, 42 | bool L1norm = false, 43 | bool NA_rm = false); 44 | 45 | std::vector CppMatNeighborsNum( 46 | const std::vector>& mat, 47 | const std::vector& radius, 48 | bool equal = false, 49 | bool NA_rm = false); 50 | 51 | std::vector CppKNNIndice( 52 | const std::vector>& embedding_space, 53 | size_t target_idx, 54 | size_t k, 55 | const std::vector& lib, 56 | bool include_self = false); 57 | 58 | std::vector CppDistKNNIndice( 59 | const std::vector>& dist_mat, 60 | size_t target_idx, 61 | size_t k, 62 | const std::vector& lib, 63 | bool include_self = false); 64 | 65 | std::vector> CppDistSortedIndice( 66 | const std::vector>& dist_mat, 67 | const std::vector& lib, 68 | size_t k, 69 | bool include_self = false); 70 | 71 | std::vector> CppMatKNNeighbors( 72 | const std::vector>& embedding_space, 73 | const std::vector& lib, 74 | size_t k, 75 | size_t threads, 76 | bool L1norm = false, 77 | bool include_self = false); 78 | 79 | #endif // CppDistances_H 80 | -------------------------------------------------------------------------------- /R/pc.R: -------------------------------------------------------------------------------- 1 | .pc_sf_method = \(data, column, target, E = 2:10, k = E+2, tau = 1, style = 1, lib = NULL, pred = NULL, dist.metric = "L2", zero.tolerance = max(k), 2 | relative = TRUE, weighted = TRUE, maximize = "positive", threads = detectThreads(), detrend = FALSE, nb = NULL){ 3 | vx = .uni_lattice(data,column,detrend) 4 | vy = .uni_lattice(data,target,detrend) 5 | if (is.null(lib)) lib = .internal_library(cbind(vx,vy)) 6 | if (is.null(pred)) pred = lib 7 | if (is.null(nb)) nb = .internal_lattice_nb(data) 8 | res = RcppPC4Lattice(vx, vy, nb, lib, pred, E, k, tau, style, zero.tolerance, 9 | .check_distmetric(dist.metric),relative,weighted,threads) 10 | return(.bind_xmapself(res,target,"pc",maximize = maximize)) 11 | } 12 | 13 | .pc_spatraster_method = \(data, column, target, E = 2:10, k = E+2, tau = 1, style = 1, lib = NULL, pred = NULL, dist.metric = "L2", zero.tolerance = max(k), 14 | relative = TRUE, weighted = TRUE, maximize = "positive", threads = detectThreads(), detrend = FALSE, grid.coord = TRUE){ 15 | mx = .uni_grid(data,column,detrend,grid.coord) 16 | my = .uni_grid(data,target,detrend,grid.coord) 17 | if (is.null(lib)) lib = which(!(is.na(mx) | is.na(my)), arr.ind = TRUE) 18 | if (is.null(pred)) pred = lib 19 | res = RcppPC4Grid(mx, my, lib, pred, E, k, tau, style, zero.tolerance, 20 | .check_distmetric(dist.metric),relative,weighted,threads) 21 | return(.bind_xmapself(res,target,"pc",maximize = maximize)) 22 | } 23 | 24 | #' optimal parameter search for pattern causality 25 | #' 26 | #' @inheritParams simplex 27 | #' @param zero.tolerance (optional) maximum number of zeros tolerated in signature space. 28 | #' @param relative (optional) whether to calculate relative changes in embeddings. 29 | #' @param weighted (optional) whether to weight causal strength. 30 | #' @param maximize (optional) causality metric to maximize: one of "positive", "negative", or "dark". 31 | #' 32 | #' @return A list 33 | #' \describe{ 34 | #' \item{\code{xmap}}{cross mapping performance} 35 | #' \item{\code{varname}}{name of target variable} 36 | #' \item{\code{method}}{method of cross mapping} 37 | #' \item{\code{maximize}}{maximized causality metric} 38 | #' } 39 | #' @export 40 | #' @name pc 41 | #' @aliases pc,sf-method 42 | #' @references 43 | #' Stavroglou, S.K., Pantelous, A.A., Stanley, H.E., Zuev, K.M., 2020. Unveiling causal interactions in complex systems. Proceedings of the National Academy of Sciences 117, 7599–7605. 44 | #' 45 | #' @examples 46 | #' columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 47 | #' \donttest{ 48 | #' pc(columbus,"hoval","crime",E = 5:10,maximize = "negative") 49 | #' } 50 | methods::setMethod("pc", "sf", .pc_sf_method) 51 | 52 | #' @rdname pc 53 | methods::setMethod("pc", "SpatRaster", .pc_spatraster_method) 54 | -------------------------------------------------------------------------------- /R/simplex.R: -------------------------------------------------------------------------------- 1 | .simplex_sf_method = \(data, column, target, E = 2:10, k = E+2, tau = 1, style = 1, stack = FALSE, lib = NULL, pred = NULL, 2 | dist.metric = "L2", dist.average = TRUE, threads = detectThreads(), detrend = TRUE, nb = NULL){ 3 | vx = .uni_lattice(data,column,detrend) 4 | vy = .uni_lattice(data,target,detrend) 5 | if (is.null(lib)) lib = .internal_library(cbind(vx,vy)) 6 | if (is.null(pred)) pred = lib 7 | if (is.null(nb)) nb = .internal_lattice_nb(data) 8 | res = RcppSimplex4Lattice(vx,vy,nb,lib,pred,E,k,tau,style,stack, 9 | .check_distmetric(dist.metric),dist.average,threads) 10 | return(.bind_xmapself(res,target,"simplex")) 11 | } 12 | 13 | .simplex_spatraster_method = \(data, column, target, E = 2:10, k = E+2, tau = 1, style = 1, stack = FALSE, lib = NULL, pred = NULL, dist.metric = "L2", 14 | dist.average = TRUE, threads = detectThreads(), detrend = TRUE, grid.coord = TRUE, embed.direction = 0){ 15 | mx = .uni_grid(data,column,detrend,grid.coord) 16 | my = .uni_grid(data,target,detrend,grid.coord) 17 | if (is.null(lib)) lib = which(!(is.na(mx) | is.na(my)), arr.ind = TRUE) 18 | if (is.null(pred)) pred = lib 19 | res = RcppSimplex4Grid(mx, my, lib, pred, E, k, tau, style, stack, 20 | .check_distmetric(dist.metric),dist.average,embed.direction,threads) 21 | return(.bind_xmapself(res,target,"simplex")) 22 | } 23 | 24 | #' optimal parameter search for simplex forecasting 25 | #' 26 | #' @inheritParams embedded 27 | #' @param column name of library variable. 28 | #' @param k (optional) number of nearest neighbors used. 29 | #' @param lib (optional) libraries indices (input needed: `vector` - spatial vector, `matrix` - spatial raster). 30 | #' @param pred (optional) predictions indices (input requirement same as `lib`). 31 | #' @param dist.metric (optional) distance metric (`L1`: Manhattan, `L2`: Euclidean). 32 | #' @param dist.average (optional) whether to average distance. 33 | #' @param threads (optional) number of threads to use. 34 | #' 35 | #' @return A list 36 | #' \describe{ 37 | #' \item{\code{xmap}}{forecast performance} 38 | #' \item{\code{varname}}{name of target variable} 39 | #' \item{\code{method}}{method of cross mapping} 40 | #' } 41 | #' @export 42 | #' @name simplex 43 | #' @aliases simplex,sf-method 44 | #' @references 45 | #' Sugihara G. and May R. 1990. Nonlinear forecasting as a way of distinguishing chaos from measurement error in time series. Nature, 344:734-741. 46 | #' 47 | #' @examples 48 | #' columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 49 | #' \donttest{ 50 | #' simplex(columbus,"inc","crime") 51 | #' } 52 | methods::setMethod("simplex", "sf", .simplex_sf_method) 53 | 54 | #' @rdname simplex 55 | methods::setMethod("simplex", "SpatRaster", .simplex_spatraster_method) 56 | -------------------------------------------------------------------------------- /src/SignatureProjection.h: -------------------------------------------------------------------------------- 1 | #ifndef SignatureProjection_H 2 | #define SignatureProjection_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "NumericUtils.h" 10 | #include 11 | /** 12 | * @brief Predicts signature vectors for a subset of target points using weighted nearest neighbors. 13 | * 14 | * This function performs local weighted prediction in the signature space as follows: 15 | * 1. For each prediction index `p` in `pred_indices`, find its `num_neighbors` nearest neighbors 16 | * among `lib_indices` based on distances in `Dx[p][*]`, ignoring NaN distances. 17 | * 2. Compute exponential weights scaled by the total distance sum to emphasize close points. 18 | * If all distances are zero, uniform weights are used instead. 19 | * 3. For each dimension of the signature space: 20 | * - Count how many neighbor signatures are exactly zero. 21 | * - If the zero count exceeds `zero_tolerance`, set the predicted value to 0. 22 | * - Otherwise, compute a weighted average of valid (non-NaN) neighbor signatures. 23 | * 4. Predictions are stored and updated only for indices in `pred_indices`; other entries remain undefined (NaN). 24 | * 25 | * Parallelization: 26 | * - Controlled by the parameter `threads`. 27 | * - If `threads <= 1`, computation is serial (standard for-loop). 28 | * - Otherwise, the loop over prediction indices is executed in parallel via RcppThread::parallelFor. 29 | * 30 | * @param SMy Signature space of the target variable Y. Shape: (N_obs, E−1) 31 | * @param Dx Distance matrix from prediction points to library points. Shape: (SMy.size(), SMy.size()) 32 | * @param lib_indices Indices of valid library points used for neighbor search (subset of [0, SMy.size())). 33 | * @param pred_indices Indices of points to predict (subset of [0, SMy.size())). 34 | * @param num_neighbors Number of nearest neighbors to use. If <= 0, defaults to E+1. 35 | * @param zero_tolerance Maximum allowed zero values per dimension before forcing prediction to zero. 36 | * If <= 0, defaults to E−1. 37 | * @param threads Number of threads to use. If <= 1, runs serially; otherwise runs parallel. 38 | * 39 | * @return A matrix of predicted signature vectors, sized SMy.size() × (E−1). 40 | */ 41 | std::vector> SignatureProjection( 42 | const std::vector>& SMy, 43 | const std::vector>& Dx, 44 | const std::vector& lib_indices, 45 | const std::vector& pred_indices, 46 | int num_neighbors = 0, /* = std::numeric_limits::min() */ 47 | int zero_tolerance = 0, /* = std::numeric_limits::max() */ 48 | size_t threads = 1 49 | ); 50 | 51 | #endif // SignatureProjection_H 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # spEDM 5 | 6 | 7 | 8 | [![CRAN](https://www.r-pkg.org/badges/version/spEDM)](https://CRAN.R-project.org/package=spEDM) 9 | [![CRAN 10 | Release](https://www.r-pkg.org/badges/last-release/spEDM)](https://CRAN.R-project.org/package=spEDM) 11 | [![CRAN 12 | Checks](https://badges.cranchecks.info/worst/spEDM.svg)](https://cran.r-project.org/web/checks/check_results_spEDM.html) 13 | [![Downloads_all](https://badgen.net/cran/dt/spEDM?color=orange)](https://CRAN.R-project.org/package=spEDM) 14 | [![Downloads_month](https://cranlogs.r-pkg.org/badges/spEDM)](https://CRAN.R-project.org/package=spEDM) 15 | [![License](https://img.shields.io/badge/license-GPL--3-brightgreen.svg?style=flat)](http://www.gnu.org/licenses/gpl-3.0.html) 16 | [![Lifecycle: 17 | stable](https://img.shields.io/badge/lifecycle-stable-20b2aa.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) 18 | [![R-CMD-check](https://github.com/stscl/spEDM/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/stscl/spEDM/actions/workflows/R-CMD-check.yaml) 19 | [![R-universe](https://stscl.r-universe.dev/badges/spEDM?color=cyan)](https://stscl.r-universe.dev/spEDM) 20 | 21 | 22 | 23 | spEDM website: https://stscl.github.io/spEDM/ 24 | 25 | ***Sp**atial **E**mpirical **D**ynamic **M**odeling* 26 | 27 | *spEDM* is an R package for spatial causal discovery. It extends 28 | Empirical Dynamic Modeling (EDM) from time series to spatial 29 | cross-sectional data, provides seamless support for vector and raster 30 | spatial data via tight integration with the 31 | [*sf*](https://CRAN.R-project.org/package=sf) and 32 | [*terra*](https://CRAN.R-project.org/package=terra) packages, and 33 | enables data-driven causal inference from spatial snapshots. 34 | 35 | > *Refer to the package documentation 36 | > for more detailed information.* 37 | 38 | ## Installation 39 | 40 | - Install from [CRAN](https://CRAN.R-project.org/package=spEDM) with: 41 | 42 | ``` r 43 | install.packages("spEDM", dep = TRUE) 44 | ``` 45 | 46 | - Install binary version from 47 | [R-universe](https://stscl.r-universe.dev/spEDM) with: 48 | 49 | ``` r 50 | install.packages("spEDM", 51 | repos = c("https://stscl.r-universe.dev", 52 | "https://cloud.r-project.org"), 53 | dep = TRUE) 54 | ``` 55 | 56 | - Install from source code on [GitHub](https://github.com/stscl/spEDM) 57 | with: 58 | 59 | ``` r 60 | if (!requireNamespace("devtools")) { 61 | install.packages("devtools") 62 | } 63 | devtools::install_github("stscl/spEDM", 64 | build_vignettes = TRUE, 65 | dep = TRUE) 66 | ``` 67 | -------------------------------------------------------------------------------- /man/multiview.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/multiview.R 3 | \name{multiview} 4 | \alias{multiview} 5 | \alias{multiview,sf-method} 6 | \alias{multiview,SpatRaster-method} 7 | \title{multiview embedding forecast} 8 | \usage{ 9 | \S4method{multiview}{sf}( 10 | data, 11 | column, 12 | target, 13 | nvar, 14 | E = 3, 15 | k = E + 2, 16 | tau = 1, 17 | style = 1, 18 | stack = FALSE, 19 | lib = NULL, 20 | pred = NULL, 21 | dist.metric = "L2", 22 | dist.average = TRUE, 23 | top = NULL, 24 | threads = detectThreads(), 25 | detrend = TRUE, 26 | nb = NULL 27 | ) 28 | 29 | \S4method{multiview}{SpatRaster}( 30 | data, 31 | column, 32 | target, 33 | nvar, 34 | E = 3, 35 | k = E + 2, 36 | tau = 1, 37 | style = 1, 38 | stack = FALSE, 39 | lib = NULL, 40 | pred = NULL, 41 | dist.metric = "L2", 42 | dist.average = TRUE, 43 | top = NULL, 44 | threads = detectThreads(), 45 | detrend = TRUE, 46 | grid.coord = TRUE 47 | ) 48 | } 49 | \arguments{ 50 | \item{data}{observation data.} 51 | 52 | \item{column}{name of library variable.} 53 | 54 | \item{target}{name of target variable.} 55 | 56 | \item{nvar}{number of variable combinations.} 57 | 58 | \item{E}{(optional) embedding dimensions.} 59 | 60 | \item{k}{(optional) number of nearest neighbors used.} 61 | 62 | \item{tau}{(optional) step of spatial lags.} 63 | 64 | \item{style}{(optional) embedding style (\code{0} includes current state, \code{1} excludes it).} 65 | 66 | \item{stack}{(optional) whether to stack embeddings.} 67 | 68 | \item{lib}{(optional) libraries indices (input needed: \code{vector} - spatial vector, \code{matrix} - spatial raster).} 69 | 70 | \item{pred}{(optional) predictions indices (input requirement same as \code{lib}).} 71 | 72 | \item{dist.metric}{(optional) distance metric (\code{L1}: Manhattan, \code{L2}: Euclidean).} 73 | 74 | \item{dist.average}{(optional) whether to average distance.} 75 | 76 | \item{top}{(optional) number of reconstructions used in MVE forecast.} 77 | 78 | \item{threads}{(optional) number of threads to use.} 79 | 80 | \item{detrend}{(optional) whether to remove the linear trend.} 81 | 82 | \item{nb}{(optional) neighbours list.} 83 | 84 | \item{grid.coord}{(optional) whether to detrend using cell center coordinates (\code{TRUE}) or row/column numbers (\code{FALSE}).} 85 | } 86 | \value{ 87 | A vector (when input is sf object) or matrix 88 | } 89 | \description{ 90 | multiview embedding forecast 91 | } 92 | \examples{ 93 | columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 94 | \donttest{ 95 | multiview(columbus, 96 | column = c("inc","crime","open","plumb","discbd"), 97 | target = "hoval", nvar = 3) 98 | } 99 | } 100 | \references{ 101 | Ye H., and G. Sugihara, 2016. Information leverage in interconnected ecosystems: Overcoming the curse of dimensionality. Science 353:922-925. 102 | } 103 | -------------------------------------------------------------------------------- /man/simplex.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simplex.R 3 | \name{simplex} 4 | \alias{simplex} 5 | \alias{simplex,sf-method} 6 | \alias{simplex,SpatRaster-method} 7 | \title{optimal parameter search for simplex forecasting} 8 | \usage{ 9 | \S4method{simplex}{sf}( 10 | data, 11 | column, 12 | target, 13 | E = 2:10, 14 | k = E + 2, 15 | tau = 1, 16 | style = 1, 17 | stack = FALSE, 18 | lib = NULL, 19 | pred = NULL, 20 | dist.metric = "L2", 21 | dist.average = TRUE, 22 | threads = detectThreads(), 23 | detrend = TRUE, 24 | nb = NULL 25 | ) 26 | 27 | \S4method{simplex}{SpatRaster}( 28 | data, 29 | column, 30 | target, 31 | E = 2:10, 32 | k = E + 2, 33 | tau = 1, 34 | style = 1, 35 | stack = FALSE, 36 | lib = NULL, 37 | pred = NULL, 38 | dist.metric = "L2", 39 | dist.average = TRUE, 40 | threads = detectThreads(), 41 | detrend = TRUE, 42 | grid.coord = TRUE, 43 | embed.direction = 0 44 | ) 45 | } 46 | \arguments{ 47 | \item{data}{observation data.} 48 | 49 | \item{column}{name of library variable.} 50 | 51 | \item{target}{name of target variable.} 52 | 53 | \item{E}{(optional) embedding dimensions.} 54 | 55 | \item{k}{(optional) number of nearest neighbors used.} 56 | 57 | \item{tau}{(optional) step of spatial lags.} 58 | 59 | \item{style}{(optional) embedding style (\code{0} includes current state, \code{1} excludes it).} 60 | 61 | \item{stack}{(optional) whether to stack embeddings.} 62 | 63 | \item{lib}{(optional) libraries indices (input needed: \code{vector} - spatial vector, \code{matrix} - spatial raster).} 64 | 65 | \item{pred}{(optional) predictions indices (input requirement same as \code{lib}).} 66 | 67 | \item{dist.metric}{(optional) distance metric (\code{L1}: Manhattan, \code{L2}: Euclidean).} 68 | 69 | \item{dist.average}{(optional) whether to average distance.} 70 | 71 | \item{threads}{(optional) number of threads to use.} 72 | 73 | \item{detrend}{(optional) whether to remove the linear trend.} 74 | 75 | \item{nb}{(optional) neighbours list.} 76 | 77 | \item{grid.coord}{(optional) whether to detrend using cell center coordinates (\code{TRUE}) or row/column numbers (\code{FALSE}).} 78 | 79 | \item{embed.direction}{(optional) direction selector for embeddings (\code{0} returns all directions, \code{1-8} correspond to NW, N, NE, W, E, SW, S, SE).} 80 | } 81 | \value{ 82 | A list 83 | \describe{ 84 | \item{\code{xmap}}{forecast performance} 85 | \item{\code{varname}}{name of target variable} 86 | \item{\code{method}}{method of cross mapping} 87 | } 88 | } 89 | \description{ 90 | optimal parameter search for simplex forecasting 91 | } 92 | \examples{ 93 | columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 94 | \donttest{ 95 | simplex(columbus,"inc","crime") 96 | } 97 | } 98 | \references{ 99 | Sugihara G. and May R. 1990. Nonlinear forecasting as a way of distinguishing chaos from measurement error in time series. Nature, 344:734-741. 100 | } 101 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "##", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # spEDM 17 | 18 | 19 | 20 | [![CRAN](https://www.r-pkg.org/badges/version/spEDM)](https://CRAN.R-project.org/package=spEDM) 21 | [![CRAN Release](https://www.r-pkg.org/badges/last-release/spEDM)](https://CRAN.R-project.org/package=spEDM) 22 | [![CRAN Checks](https://badges.cranchecks.info/worst/spEDM.svg)](https://cran.r-project.org/web/checks/check_results_spEDM.html) 23 | [![Downloads_all](https://badgen.net/cran/dt/spEDM?color=orange)](https://CRAN.R-project.org/package=spEDM) 24 | [![Downloads_month](https://cranlogs.r-pkg.org/badges/spEDM)](https://CRAN.R-project.org/package=spEDM) 25 | [![License](https://img.shields.io/badge/license-GPL--3-brightgreen.svg?style=flat)](http://www.gnu.org/licenses/gpl-3.0.html) 26 | [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-20b2aa.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) 27 | [![R-CMD-check](https://github.com/stscl/spEDM/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/stscl/spEDM/actions/workflows/R-CMD-check.yaml) 28 | [![R-universe](https://stscl.r-universe.dev/badges/spEDM?color=cyan)](https://stscl.r-universe.dev/spEDM) 29 | 30 | 31 | 32 | spEDM website: https://stscl.github.io/spEDM/ 33 | 34 | ***Sp**atial **E**mpirical **D**ynamic **M**odeling* 35 | 36 | *spEDM* is an R package for spatial causal discovery. It extends Empirical Dynamic Modeling (EDM) from time series to spatial cross-sectional data, provides seamless support for vector and raster spatial data via tight integration with the [*sf*](https://CRAN.R-project.org/package=sf) and [*terra*](https://CRAN.R-project.org/package=terra) packages, and enables data-driven causal inference from spatial snapshots. 37 | 38 | > *Refer to the package documentation for more detailed information.* 39 | 40 | ## Installation 41 | 42 | - Install from [CRAN](https://CRAN.R-project.org/package=spEDM) with: 43 | 44 | ``` r 45 | install.packages("spEDM", dep = TRUE) 46 | ``` 47 | 48 | - Install binary version from [R-universe](https://stscl.r-universe.dev/spEDM) with: 49 | 50 | ``` r 51 | install.packages("spEDM", 52 | repos = c("https://stscl.r-universe.dev", 53 | "https://cloud.r-project.org"), 54 | dep = TRUE) 55 | ``` 56 | 57 | - Install from source code on [GitHub](https://github.com/stscl/spEDM) with: 58 | 59 | ```r 60 | if (!requireNamespace("devtools")) { 61 | install.packages("devtools") 62 | } 63 | devtools::install_github("stscl/spEDM", 64 | build_vignettes = TRUE, 65 | dep = TRUE) 66 | ``` 67 | -------------------------------------------------------------------------------- /man/gcmc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gcmc.R 3 | \name{gcmc} 4 | \alias{gcmc} 5 | \alias{gcmc,sf-method} 6 | \alias{gcmc,SpatRaster-method} 7 | \title{geographical cross mapping cardinality} 8 | \usage{ 9 | \S4method{gcmc}{sf}( 10 | data, 11 | cause, 12 | effect, 13 | libsizes = NULL, 14 | E = 3, 15 | k = pmin(E^2), 16 | tau = 1, 17 | style = 1, 18 | lib = NULL, 19 | pred = NULL, 20 | dist.metric = "L2", 21 | threads = detectThreads(), 22 | detrend = FALSE, 23 | parallel.level = "low", 24 | bidirectional = TRUE, 25 | progressbar = TRUE, 26 | nb = NULL 27 | ) 28 | 29 | \S4method{gcmc}{SpatRaster}( 30 | data, 31 | cause, 32 | effect, 33 | libsizes = NULL, 34 | E = 3, 35 | k = pmin(E^2), 36 | tau = 1, 37 | style = 1, 38 | lib = NULL, 39 | pred = NULL, 40 | dist.metric = "L2", 41 | threads = detectThreads(), 42 | detrend = FALSE, 43 | parallel.level = "low", 44 | bidirectional = TRUE, 45 | progressbar = TRUE, 46 | grid.coord = TRUE 47 | ) 48 | } 49 | \arguments{ 50 | \item{data}{observation data.} 51 | 52 | \item{cause}{name of causal variable.} 53 | 54 | \item{effect}{name of effect variable.} 55 | 56 | \item{libsizes}{(optional) number of spatial units used (input needed: \code{vector} - spatial vector, \code{matrix} - spatial raster).} 57 | 58 | \item{E}{(optional) embedding dimensions.} 59 | 60 | \item{k}{(optional) number of nearest neighbors.} 61 | 62 | \item{tau}{(optional) step of spatial lags.} 63 | 64 | \item{style}{(optional) embedding style (\code{0} includes current state, \code{1} excludes it).} 65 | 66 | \item{lib}{(optional) libraries indices (input requirement same as \code{libsizes}).} 67 | 68 | \item{pred}{(optional) predictions indices (input requirement same as \code{libsizes}).} 69 | 70 | \item{dist.metric}{(optional) distance metric (\code{L1}: Manhattan, \code{L2}: Euclidean).} 71 | 72 | \item{threads}{(optional) number of threads to use.} 73 | 74 | \item{detrend}{(optional) whether to remove the linear trend.} 75 | 76 | \item{parallel.level}{(optional) level of parallelism, \code{low} or \code{high}.} 77 | 78 | \item{bidirectional}{(optional) whether to examine bidirectional causality.} 79 | 80 | \item{progressbar}{(optional) whether to show the progress bar.} 81 | 82 | \item{nb}{(optional) neighbours list.} 83 | 84 | \item{grid.coord}{(optional) whether to detrend using cell center coordinates (\code{TRUE}) or row/column numbers (\code{FALSE}).} 85 | } 86 | \value{ 87 | A list 88 | \describe{ 89 | \item{\code{xmap}}{cross mapping results} 90 | \item{\code{cs}}{causal strength} 91 | \item{\code{varname}}{names of causal and effect variables} 92 | \item{\code{bidirectional}}{whether to examine bidirectional causality} 93 | } 94 | } 95 | \description{ 96 | geographical cross mapping cardinality 97 | } 98 | \examples{ 99 | columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 100 | \donttest{ 101 | g = gcmc(columbus,"hoval","crime",E = 7,k = 19) 102 | g 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /man/slm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/slm.R 3 | \name{slm} 4 | \alias{slm} 5 | \alias{slm,sf-method} 6 | \alias{slm,SpatRaster-method} 7 | \title{spatial logistic map} 8 | \usage{ 9 | \S4method{slm}{sf}( 10 | data, 11 | x = NULL, 12 | y = NULL, 13 | z = NULL, 14 | k = 4, 15 | step = 15, 16 | alpha_x = 0.28, 17 | alpha_y = 0.25, 18 | alpha_z = 0.22, 19 | beta_xy = 0.05, 20 | beta_xz = 0.05, 21 | beta_yx = 0.2, 22 | beta_yz = 0.2, 23 | beta_zx = 0.35, 24 | beta_zy = 0.35, 25 | threshold = Inf, 26 | transient = 1, 27 | interact = "local", 28 | aggregate_fn = NULL, 29 | noise = 0, 30 | seed = 42L, 31 | nb = NULL 32 | ) 33 | 34 | \S4method{slm}{SpatRaster}( 35 | data, 36 | x = NULL, 37 | y = NULL, 38 | z = NULL, 39 | k = 4, 40 | step = 15, 41 | alpha_x = 0.28, 42 | alpha_y = 0.25, 43 | alpha_z = 0.22, 44 | beta_xy = 0.05, 45 | beta_xz = 0.05, 46 | beta_yx = 0.2, 47 | beta_yz = 0.2, 48 | beta_zx = 0.35, 49 | beta_zy = 0.35, 50 | threshold = Inf, 51 | transient = 1, 52 | interact = "local", 53 | aggregate_fn = NULL, 54 | noise = 0, 55 | seed = 42L 56 | ) 57 | } 58 | \arguments{ 59 | \item{data}{observation data.} 60 | 61 | \item{x}{(optional) name of first spatial variable.} 62 | 63 | \item{y}{(optional) name of second spatial variable.} 64 | 65 | \item{z}{(optional) name of third spatial variable.} 66 | 67 | \item{k}{(optional) number of neighbors to used.} 68 | 69 | \item{step}{(optional) number of simulation time steps.} 70 | 71 | \item{alpha_x}{(optional) growth parameter for x.} 72 | 73 | \item{alpha_y}{(optional) growth parameter for y.} 74 | 75 | \item{alpha_z}{(optional) growth parameter for z.} 76 | 77 | \item{beta_xy}{(optional) cross-inhibition from x to y.} 78 | 79 | \item{beta_xz}{(optional) cross-inhibition from x to z.} 80 | 81 | \item{beta_yx}{(optional) cross-inhibition from y to x.} 82 | 83 | \item{beta_yz}{(optional) cross-inhibition from y to z.} 84 | 85 | \item{beta_zx}{(optional) cross-inhibition from z to x.} 86 | 87 | \item{beta_zy}{(optional) cross-inhibition from z to y.} 88 | 89 | \item{threshold}{(optional) set to \code{NaN} if the absolute value exceeds this threshold.} 90 | 91 | \item{transient}{(optional) transients to be excluded from the results.} 92 | 93 | \item{interact}{(optional) type of cross-variable interaction (\code{local} or \code{neighbors}).} 94 | 95 | \item{aggregate_fn}{(optional) custom aggregation function (must accept a numeric vector and return a single numeric value).} 96 | 97 | \item{noise}{(optional) standard deviation of white noise.} 98 | 99 | \item{seed}{(optional) random seed.} 100 | 101 | \item{nb}{(optional) neighbours list.} 102 | } 103 | \value{ 104 | A list 105 | } 106 | \description{ 107 | spatial logistic map 108 | } 109 | \examples{ 110 | columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 111 | columbus$inc = sdsfun::normalize_vector(columbus$inc) 112 | slm(columbus,"inc") 113 | 114 | } 115 | \references{ 116 | Willeboordse, F.H., The spatial logistic map as a simple prototype for spatiotemporal chaos, Chaos, 533–540 (2003). 117 | } 118 | -------------------------------------------------------------------------------- /R/sctest.R: -------------------------------------------------------------------------------- 1 | .sc_sf_method = \(data, cause, effect, k, block = 3, boot = 399, seed = 42L, base = 2, lib = NULL, pred = NULL, 2 | threads = detectThreads(), detrend = TRUE, normalize = FALSE, progressbar = FALSE, nb = NULL){ 3 | varname = .check_character(cause, effect) 4 | if (is.null(nb)) nb = .internal_lattice_nb(data) 5 | block = RcppDivideLattice(nb,block) 6 | cause = .uni_lattice(data,cause,detrend) 7 | effect = .uni_lattice(data,effect,detrend) 8 | if (is.null(lib)) lib = which(!(is.na(cause) | is.na(effect))) 9 | if (is.null(pred)) pred = lib 10 | return(.bind_sc(RcppSGC4Lattice(cause,effect,nb,lib,pred,block,k,threads,boot,base,seed,TRUE,normalize,progressbar),varname)) 11 | } 12 | 13 | .sc_spatraster_method = \(data, cause, effect, k, block = 3, boot = 399, seed = 42L, base = 2, lib = NULL, pred = NULL, 14 | threads = detectThreads(), detrend = TRUE, normalize = FALSE, progressbar = FALSE, grid.coord = TRUE){ 15 | varname = .check_character(cause, effect) 16 | cause = .uni_grid(data,cause,detrend,grid.coord) 17 | effect = .uni_grid(data,effect,detrend,grid.coord) 18 | block = matrix(RcppDivideGrid(effect,block),ncol = 1) 19 | if (is.null(lib)) lib = which(!(is.na(cause) | is.na(effect)), arr.ind = TRUE) 20 | if (is.null(pred)) pred = lib 21 | return(.bind_sc(RcppSGC4Grid(cause,effect,lib,pred,block,k,threads,boot,base,seed,TRUE,normalize,progressbar),varname)) 22 | } 23 | 24 | #' spatial causality test 25 | #' 26 | #' @param data observation data. 27 | #' @param cause name of causal variable. 28 | #' @param effect name of effect variable. 29 | #' @param k (optional) number of nearest neighbors used in symbolization. 30 | #' @param block (optional) number of blocks used in spatial block bootstrap. 31 | #' @param boot (optional) number of bootstraps to perform. 32 | #' @param seed (optional) random seed. 33 | #' @param base (optional) logarithm base. 34 | #' @param lib (optional) libraries indices (input needed: `vector` - spatial vector, `matrix` - spatial raster). 35 | #' @param pred (optional) predictions indices (input requirement same as `lib`). 36 | #' @param threads (optional) number of threads to use. 37 | #' @param detrend (optional) whether to remove the linear trend. 38 | #' @param normalize (optional) whether to normalize the result. 39 | #' @param progressbar (optional) whether to show the progress bar. 40 | #' @param nb (optional) neighbours list. 41 | #' 42 | #' @return A list 43 | #' \describe{ 44 | #' \item{\code{sc}}{statistic for spatial causality} 45 | #' \item{\code{varname}}{names of causal and effect variables} 46 | #' } 47 | #' @export 48 | #' @name sc.test 49 | #' @aliases sc.test,sf-method 50 | #' @references 51 | #' Herrera, M., Mur, J., & Ruiz, M. (2016). Detecting causal relationships between spatial processes. Papers in Regional Science, 95(3), 577–595. 52 | #' 53 | #' @examples 54 | #' columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 55 | #' \donttest{ 56 | #' sc.test(columbus,"hoval","crime",k = 15) 57 | #' } 58 | methods::setMethod("sc.test", "sf", .sc_sf_method) 59 | 60 | #' @rdname sc.test 61 | #' @param grid.coord (optional) whether to detrend using cell center coordinates (`TRUE`) or row/column numbers (`FALSE`). 62 | methods::setMethod("sc.test", "SpatRaster", .sc_spatraster_method) 63 | -------------------------------------------------------------------------------- /man/pc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pc.R 3 | \name{pc} 4 | \alias{pc} 5 | \alias{pc,sf-method} 6 | \alias{pc,SpatRaster-method} 7 | \title{optimal parameter search for pattern causality} 8 | \usage{ 9 | \S4method{pc}{sf}( 10 | data, 11 | column, 12 | target, 13 | E = 2:10, 14 | k = E + 2, 15 | tau = 1, 16 | style = 1, 17 | lib = NULL, 18 | pred = NULL, 19 | dist.metric = "L2", 20 | zero.tolerance = max(k), 21 | relative = TRUE, 22 | weighted = TRUE, 23 | maximize = "positive", 24 | threads = detectThreads(), 25 | detrend = FALSE, 26 | nb = NULL 27 | ) 28 | 29 | \S4method{pc}{SpatRaster}( 30 | data, 31 | column, 32 | target, 33 | E = 2:10, 34 | k = E + 2, 35 | tau = 1, 36 | style = 1, 37 | lib = NULL, 38 | pred = NULL, 39 | dist.metric = "L2", 40 | zero.tolerance = max(k), 41 | relative = TRUE, 42 | weighted = TRUE, 43 | maximize = "positive", 44 | threads = detectThreads(), 45 | detrend = FALSE, 46 | grid.coord = TRUE 47 | ) 48 | } 49 | \arguments{ 50 | \item{data}{observation data.} 51 | 52 | \item{column}{name of library variable.} 53 | 54 | \item{target}{name of target variable.} 55 | 56 | \item{E}{(optional) embedding dimensions.} 57 | 58 | \item{k}{(optional) number of nearest neighbors used.} 59 | 60 | \item{tau}{(optional) step of spatial lags.} 61 | 62 | \item{style}{(optional) embedding style (\code{0} includes current state, \code{1} excludes it).} 63 | 64 | \item{lib}{(optional) libraries indices (input needed: \code{vector} - spatial vector, \code{matrix} - spatial raster).} 65 | 66 | \item{pred}{(optional) predictions indices (input requirement same as \code{lib}).} 67 | 68 | \item{dist.metric}{(optional) distance metric (\code{L1}: Manhattan, \code{L2}: Euclidean).} 69 | 70 | \item{zero.tolerance}{(optional) maximum number of zeros tolerated in signature space.} 71 | 72 | \item{relative}{(optional) whether to calculate relative changes in embeddings.} 73 | 74 | \item{weighted}{(optional) whether to weight causal strength.} 75 | 76 | \item{maximize}{(optional) causality metric to maximize: one of "positive", "negative", or "dark".} 77 | 78 | \item{threads}{(optional) number of threads to use.} 79 | 80 | \item{detrend}{(optional) whether to remove the linear trend.} 81 | 82 | \item{nb}{(optional) neighbours list.} 83 | 84 | \item{grid.coord}{(optional) whether to detrend using cell center coordinates (\code{TRUE}) or row/column numbers (\code{FALSE}).} 85 | } 86 | \value{ 87 | A list 88 | \describe{ 89 | \item{\code{xmap}}{cross mapping performance} 90 | \item{\code{varname}}{name of target variable} 91 | \item{\code{method}}{method of cross mapping} 92 | \item{\code{maximize}}{maximized causality metric} 93 | } 94 | } 95 | \description{ 96 | optimal parameter search for pattern causality 97 | } 98 | \examples{ 99 | columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 100 | \donttest{ 101 | pc(columbus,"hoval","crime",E = 5:10,maximize = "negative") 102 | } 103 | } 104 | \references{ 105 | Stavroglou, S.K., Pantelous, A.A., Stanley, H.E., Zuev, K.M., 2020. Unveiling causal interactions in complex systems. Proceedings of the National Academy of Sciences 117, 7599–7605. 106 | } 107 | -------------------------------------------------------------------------------- /.github/workflows/rhub.yaml: -------------------------------------------------------------------------------- 1 | # R-hub's generic GitHub Actions workflow file. It's canonical location is at 2 | # https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml 3 | # You can update this file to a newer version using the rhub2 package: 4 | # 5 | # rhub::rhub_setup() 6 | # 7 | # It is unlikely that you need to modify this file manually. 8 | 9 | name: R-hub 10 | run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}" 11 | 12 | on: 13 | workflow_dispatch: 14 | inputs: 15 | config: 16 | description: 'A comma separated list of R-hub platforms to use.' 17 | type: string 18 | default: 'linux,windows,macos' 19 | name: 20 | description: 'Run name. You can leave this empty now.' 21 | type: string 22 | id: 23 | description: 'Unique ID. You can leave this empty now.' 24 | type: string 25 | 26 | jobs: 27 | 28 | setup: 29 | runs-on: ubuntu-latest 30 | outputs: 31 | containers: ${{ steps.rhub-setup.outputs.containers }} 32 | platforms: ${{ steps.rhub-setup.outputs.platforms }} 33 | 34 | steps: 35 | # NO NEED TO CHECKOUT HERE 36 | - uses: r-hub/actions/setup@v1 37 | with: 38 | config: ${{ github.event.inputs.config }} 39 | id: rhub-setup 40 | 41 | linux-containers: 42 | needs: setup 43 | if: ${{ needs.setup.outputs.containers != '[]' }} 44 | runs-on: ubuntu-latest 45 | name: ${{ matrix.config.label }} 46 | strategy: 47 | fail-fast: false 48 | matrix: 49 | config: ${{ fromJson(needs.setup.outputs.containers) }} 50 | container: 51 | image: ${{ matrix.config.container }} 52 | 53 | steps: 54 | - uses: r-hub/actions/checkout@v1 55 | - uses: r-hub/actions/platform-info@v1 56 | with: 57 | token: ${{ secrets.RHUB_TOKEN }} 58 | job-config: ${{ matrix.config.job-config }} 59 | - uses: r-hub/actions/setup-deps@v1 60 | with: 61 | token: ${{ secrets.RHUB_TOKEN }} 62 | job-config: ${{ matrix.config.job-config }} 63 | - uses: r-hub/actions/run-check@v1 64 | with: 65 | token: ${{ secrets.RHUB_TOKEN }} 66 | job-config: ${{ matrix.config.job-config }} 67 | 68 | other-platforms: 69 | needs: setup 70 | if: ${{ needs.setup.outputs.platforms != '[]' }} 71 | runs-on: ${{ matrix.config.os }} 72 | name: ${{ matrix.config.label }} 73 | strategy: 74 | fail-fast: false 75 | matrix: 76 | config: ${{ fromJson(needs.setup.outputs.platforms) }} 77 | 78 | steps: 79 | - uses: r-hub/actions/checkout@v1 80 | - uses: r-hub/actions/setup-r@v1 81 | with: 82 | job-config: ${{ matrix.config.job-config }} 83 | token: ${{ secrets.RHUB_TOKEN }} 84 | - uses: r-hub/actions/platform-info@v1 85 | with: 86 | token: ${{ secrets.RHUB_TOKEN }} 87 | job-config: ${{ matrix.config.job-config }} 88 | - uses: r-hub/actions/setup-deps@v1 89 | with: 90 | job-config: ${{ matrix.config.job-config }} 91 | token: ${{ secrets.RHUB_TOKEN }} 92 | - uses: r-hub/actions/run-check@v1 93 | with: 94 | job-config: ${{ matrix.config.job-config }} 95 | token: ${{ secrets.RHUB_TOKEN }} 96 | -------------------------------------------------------------------------------- /man/smap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/smap.R 3 | \name{smap} 4 | \alias{smap} 5 | \alias{smap,sf-method} 6 | \alias{smap,SpatRaster-method} 7 | \title{optimal parameter search for smap forecasting} 8 | \usage{ 9 | \S4method{smap}{sf}( 10 | data, 11 | column, 12 | target, 13 | E = 3, 14 | k = E + 2, 15 | tau = 1, 16 | style = 1, 17 | stack = FALSE, 18 | lib = NULL, 19 | pred = NULL, 20 | dist.metric = "L2", 21 | dist.average = TRUE, 22 | theta = c(0, 1e-04, 3e-04, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 0.5, 0.75, 1, 1.5, 2, 3, 23 | 4, 6, 8), 24 | threads = detectThreads(), 25 | detrend = TRUE, 26 | nb = NULL 27 | ) 28 | 29 | \S4method{smap}{SpatRaster}( 30 | data, 31 | column, 32 | target, 33 | E = 3, 34 | k = E + 2, 35 | tau = 1, 36 | style = 1, 37 | stack = FALSE, 38 | lib = NULL, 39 | pred = NULL, 40 | dist.metric = "L2", 41 | dist.average = TRUE, 42 | theta = c(0, 1e-04, 3e-04, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 0.5, 0.75, 1, 1.5, 2, 3, 43 | 4, 6, 8), 44 | threads = detectThreads(), 45 | detrend = TRUE, 46 | grid.coord = TRUE, 47 | embed.direction = 0 48 | ) 49 | } 50 | \arguments{ 51 | \item{data}{observation data.} 52 | 53 | \item{column}{name of library variable.} 54 | 55 | \item{target}{name of target variable.} 56 | 57 | \item{E}{(optional) embedding dimensions.} 58 | 59 | \item{k}{(optional) number of nearest neighbors used.} 60 | 61 | \item{tau}{(optional) step of spatial lags.} 62 | 63 | \item{style}{(optional) embedding style (\code{0} includes current state, \code{1} excludes it).} 64 | 65 | \item{stack}{(optional) whether to stack embeddings.} 66 | 67 | \item{lib}{(optional) libraries indices (input needed: \code{vector} - spatial vector, \code{matrix} - spatial raster).} 68 | 69 | \item{pred}{(optional) predictions indices (input requirement same as \code{lib}).} 70 | 71 | \item{dist.metric}{(optional) distance metric (\code{L1}: Manhattan, \code{L2}: Euclidean).} 72 | 73 | \item{dist.average}{(optional) whether to average distance.} 74 | 75 | \item{theta}{(optional) weighting parameter for distances.} 76 | 77 | \item{threads}{(optional) number of threads to use.} 78 | 79 | \item{detrend}{(optional) whether to remove the linear trend.} 80 | 81 | \item{nb}{(optional) neighbours list.} 82 | 83 | \item{grid.coord}{(optional) whether to detrend using cell center coordinates (\code{TRUE}) or row/column numbers (\code{FALSE}).} 84 | 85 | \item{embed.direction}{(optional) direction selector for embeddings (\code{0} returns all directions, \code{1-8} correspond to NW, N, NE, W, E, SW, S, SE).} 86 | } 87 | \value{ 88 | A list 89 | \describe{ 90 | \item{\code{xmap}}{forecast performance} 91 | \item{\code{varname}}{name of target variable} 92 | \item{\code{method}}{method of cross mapping} 93 | } 94 | } 95 | \description{ 96 | optimal parameter search for smap forecasting 97 | } 98 | \examples{ 99 | columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 100 | \donttest{ 101 | smap(columbus,"inc","crime",E = 5,k = 6) 102 | } 103 | } 104 | \references{ 105 | Sugihara G. 1994. Nonlinear forecasting for the classification of natural time series. Philosophical Transactions: Physical Sciences and Engineering, 348 (1688):477-495. 106 | } 107 | -------------------------------------------------------------------------------- /R/slm.R: -------------------------------------------------------------------------------- 1 | .slm_sf_method = \(data, x = NULL, y = NULL, z = NULL, 2 | k = 4, step = 15, alpha_x = 0.28, alpha_y = 0.25, alpha_z = 0.22, 3 | beta_xy = 0.05, beta_xz = 0.05, beta_yx = 0.2, beta_yz = 0.2, beta_zx = 0.35, beta_zy = 0.35, 4 | threshold = Inf, transient = 1, interact = "local", aggregate_fn = NULL, noise = 0, seed = 42L, nb = NULL){ 5 | vx = .uni_lattice(data,x,FALSE) 6 | vy = .uni_lattice(data,y,FALSE) 7 | vz = .uni_lattice(data,z,FALSE) 8 | if (is.null(nb)) nb = .internal_lattice_nb(data) 9 | return(.bind_slm(RcppSLMTri4Lattice(vx,vy,vz,nb,k,step,alpha_x,alpha_y,alpha_z,beta_xy,beta_xz,beta_yx,beta_yz,beta_zx,beta_zy,any(interact != "local"),noise,threshold,seed),x,y,z,transient,aggregate_fn)) 10 | } 11 | 12 | .slm_spatraster_method = \(data, x = NULL, y = NULL, z = NULL, 13 | k = 4, step = 15, alpha_x = 0.28, alpha_y = 0.25, alpha_z = 0.22, 14 | beta_xy = 0.05, beta_xz = 0.05, beta_yx = 0.2, beta_yz = 0.2, beta_zx = 0.35, beta_zy = 0.35, 15 | threshold = Inf, transient = 1, interact = "local", aggregate_fn = NULL, noise = 0, seed = 42L){ 16 | mx = .uni_grid(data,x,FALSE) 17 | my = .uni_grid(data,y,FALSE) 18 | mz = .uni_grid(data,z,FALSE) 19 | return(.bind_slm(RcppSLMTri4Grid(mx,my,mz,k,step,alpha_x,alpha_y,alpha_z,beta_xy,beta_xz,beta_yx,beta_yz,beta_zx,beta_zy,any(interact != "local"),noise,threshold,seed),x,y,z,transient,aggregate_fn)) 20 | } 21 | 22 | #' spatial logistic map 23 | #' 24 | #' @param data observation data. 25 | #' @param x (optional) name of first spatial variable. 26 | #' @param y (optional) name of second spatial variable. 27 | #' @param z (optional) name of third spatial variable. 28 | #' @param k (optional) number of neighbors to used. 29 | #' @param step (optional) number of simulation time steps. 30 | #' @param alpha_x (optional) growth parameter for x. 31 | #' @param alpha_y (optional) growth parameter for y. 32 | #' @param alpha_z (optional) growth parameter for z. 33 | #' @param beta_xy (optional) cross-inhibition from x to y. 34 | #' @param beta_xz (optional) cross-inhibition from x to z. 35 | #' @param beta_yx (optional) cross-inhibition from y to x. 36 | #' @param beta_yz (optional) cross-inhibition from y to z. 37 | #' @param beta_zx (optional) cross-inhibition from z to x. 38 | #' @param beta_zy (optional) cross-inhibition from z to y. 39 | #' @param threshold (optional) set to `NaN` if the absolute value exceeds this threshold. 40 | #' @param transient (optional) transients to be excluded from the results. 41 | #' @param interact (optional) type of cross-variable interaction (`local` or `neighbors`). 42 | #' @param aggregate_fn (optional) custom aggregation function (must accept a numeric vector and return a single numeric value). 43 | #' @param noise (optional) standard deviation of white noise. 44 | #' @param seed (optional) random seed. 45 | #' @param nb (optional) neighbours list. 46 | #' 47 | #' @return A list 48 | #' @export 49 | #' @name slm 50 | #' @aliases slm,sf-method 51 | #' @references 52 | #' Willeboordse, F.H., The spatial logistic map as a simple prototype for spatiotemporal chaos, Chaos, 533–540 (2003). 53 | #' 54 | #' @examples 55 | #' columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 56 | #' columbus$inc = sdsfun::normalize_vector(columbus$inc) 57 | #' slm(columbus,"inc") 58 | #' 59 | methods::setMethod("slm", "sf", .slm_sf_method) 60 | 61 | #' @rdname slm 62 | methods::setMethod("slm", "SpatRaster", .slm_spatraster_method) 63 | -------------------------------------------------------------------------------- /R/gpc.R: -------------------------------------------------------------------------------- 1 | .gpc_sf_method = \(data, cause, effect, libsizes = NULL, E = 3, k = E+2, tau = 1, style = 1, lib = NULL, pred = NULL, boot = 99, random = TRUE, seed = 42L, dist.metric = "L2", zero.tolerance = k, 2 | relative = TRUE, weighted = TRUE, threads = detectThreads(), detrend = FALSE, parallel.level = "low", bidirectional = TRUE, progressbar = TRUE, nb = NULL){ 3 | varname = .check_character(cause, effect) 4 | if (is.null(nb)) nb = .internal_lattice_nb(data) 5 | cause = .uni_lattice(data,cause,detrend) 6 | effect = .uni_lattice(data,effect,detrend) 7 | if (is.null(lib)) lib = which(!(is.na(cause) | is.na(effect))) 8 | if (is.null(pred)) pred = lib 9 | return(.run_gpc(cause,effect,E[1],k[1],tau[1],style,lib,pred,.check_distmetric(dist.metric), 10 | zero.tolerance, relative, weighted, threads, bidirectional, varname, nb, 11 | libsizes, boot, random, seed, parallel.level, progressbar)) 12 | } 13 | 14 | .gpc_spatraster_method = \(data, cause, effect, libsizes = NULL, E = 3, k = E+2, tau = 1, style = 1, lib = NULL, pred = NULL, boot = 99, random = TRUE, seed = 42L, dist.metric = "L2", zero.tolerance = k, 15 | relative = TRUE, weighted = TRUE, threads = detectThreads(), detrend = FALSE, parallel.level = "low", bidirectional = TRUE, progressbar = TRUE, grid.coord = TRUE){ 16 | varname = .check_character(cause, effect) 17 | cause = .uni_grid(data,cause,detrend,grid.coord) 18 | effect = .uni_grid(data,effect,detrend,grid.coord) 19 | if (is.null(lib)) lib = which(!(is.na(cause) | is.na(effect)), arr.ind = TRUE) 20 | if (is.null(pred)) pred = lib 21 | return(.run_gpc(cause,effect,E[1],k[1],tau[1],style,lib,pred,.check_distmetric(dist.metric), 22 | zero.tolerance, relative, weighted, threads, bidirectional, varname, NULL, 23 | libsizes, boot, random, seed, parallel.level, progressbar)) 24 | } 25 | 26 | #' geographical pattern causality 27 | #' 28 | #' @inheritParams gcmc 29 | #' @param boot (optional) number of bootstraps to perform. 30 | #' @param seed (optional) random seed. 31 | #' @param random (optional) whether to use random sampling. 32 | #' @param zero.tolerance (optional) maximum number of zeros tolerated in signature space. 33 | #' @param relative (optional) whether to calculate relative changes in embeddings. 34 | #' @param weighted (optional) whether to weight causal strength. 35 | #' 36 | #' @return A list 37 | #' \describe{ 38 | #' \item{\code{xmap}}{cross mapping results (only present if `libsizes` is not `NULL`)} 39 | #' \item{\code{causality}}{per-sample causality statistics (present if `libsizes` is `NULL`)} 40 | #' \item{\code{summary}}{overall causal strength (present if `libsizes` is `NULL`)} 41 | #' \item{\code{pattern}}{pairwise pattern relationships (present if `libsizes` is `NULL`)} 42 | #' \item{\code{varname}}{names of causal and effect variables} 43 | #' \item{\code{bidirectional}}{whether to examine bidirectional causality} 44 | #' } 45 | #' @export 46 | #' @name gpc 47 | #' @aliases gpc,sf-method 48 | #' @references 49 | #' Zhang, Z., Wang, J., 2025. A model to identify causality for geographic patterns. International Journal of Geographical Information Science 1–21. 50 | #' 51 | #' @examples 52 | #' columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 53 | #' \donttest{ 54 | #' gpc(columbus,"hoval","crime",E = 6,k = 9) 55 | #' 56 | #' # convergence diagnostics 57 | #' g = gpc(columbus,"hoval","crime",libsizes = seq(5,45,5),E = 6,k = 9) 58 | #' plot(g) 59 | #' } 60 | methods::setMethod("gpc", "sf", .gpc_sf_method) 61 | 62 | #' @rdname gpc 63 | #' @param grid.coord (optional) whether to detrend using cell center coordinates (`TRUE`) or row/column numbers (`FALSE`). 64 | methods::setMethod("gpc", "SpatRaster", .gpc_spatraster_method) 65 | -------------------------------------------------------------------------------- /src/MultiViewEmbedding.h: -------------------------------------------------------------------------------- 1 | #ifndef MultiViewEmbedding_H 2 | #define MultiViewEmbedding_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "CppStats.h" 9 | #include "SimplexProjection.h" 10 | #include 11 | 12 | /** 13 | * Computes the multi-view embedding by evaluating multiple feature embeddings using simplex projection, 14 | * selecting top-performing embeddings, and aggregating their contributions. 15 | * 16 | * Parameters: 17 | * - vectors: 2D vector where each row represents a sample and each column a feature. 18 | * - target: Target spatial cross sectional series aligned with the samples in vectors. 19 | * - lib_indices: A vector of indices indicating the library (training) set. 20 | * - pred_indices: A vector of indices indicating the prediction set. 21 | * - num_neighbors: Number of neighbors used for simplex projection. 22 | * - top_num: Number of top-performing reconstructions to select. 23 | * - dist_metric: Distance metric selector (1: Manhattan, 2: Euclidean). 24 | * - dist_average: Whether to average distance by the number of valid vector components. 25 | * - threads: Number of threads used from the global pool. 26 | * 27 | * Returns: 28 | * A vector where each element is the predict value from selected embeddings columns. 29 | */ 30 | std::vector MultiViewEmbedding( 31 | const std::vector>& vectors, 32 | const std::vector& target, 33 | const std::vector& lib_indices, 34 | const std::vector& pred_indices, 35 | int num_neighbors = 4, 36 | int top_num = 3, 37 | int dist_metric = 2, 38 | int dist_average = true, 39 | int threads = 8 40 | ); 41 | 42 | /** 43 | * @brief Computes the multi-view embedding from stacked (3D) feature embeddings using simplex projection. 44 | * 45 | * This overload supports a 3D nested vector structure where each outer element 46 | * represents a feature stack or multivariate embedding set. Each stack contains 47 | * a 2D matrix (samples × embedded dimensions). The function evaluates each stack 48 | * independently using simplex projection (via SimplexBehavior), ranks the stacks 49 | * by their predictive performance, and constructs a new 3D subset of the top 50 | * performing stacks for final prediction. 51 | * 52 | * Unlike the 2D overload, this version preserves the structural independence of 53 | * each selected embedding stack, forming a 3D tensor of selected embeddings that 54 | * is passed to SimplexProjectionPrediction for final multi-view prediction. 55 | * 56 | * @param vectors 3D vector: [stack][sample][dimension], representing multiple feature stacks. 57 | * @param target Target spatial cross-sectional series aligned with sample indices. 58 | * @param lib_indices Indices for the library (training) set. 59 | * @param pred_indices Indices for the prediction set. 60 | * @param num_neighbors Number of neighbors used for simplex projection. 61 | * @param top_num Number of top-performing stacks to select. 62 | * @param dist_metric Distance metric selector (1: Manhattan, 2: Euclidean). 63 | * @param dist_average Whether to average distances by the number of valid vector components. 64 | * @param threads Number of threads used for parallel computation. 65 | * 66 | * @return A vector containing predicted values derived from the selected embedding stacks. 67 | */ 68 | std::vector MultiViewEmbedding( 69 | const std::vector>>& vectors, 70 | const std::vector& target, 71 | const std::vector& lib_indices, 72 | const std::vector& pred_indices, 73 | int num_neighbors = 4, 74 | int top_num = 3, 75 | int dist_metric = 2, 76 | int dist_average = true, 77 | int threads = 8 78 | ); 79 | 80 | #endif // MultiViewEmbedding_H 81 | -------------------------------------------------------------------------------- /src/CrossMappingCardinality.h: -------------------------------------------------------------------------------- 1 | #ifndef CrossMappingCardinality_H 2 | #define CrossMappingCardinality_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "CppStats.h" 12 | #include "CppDistances.h" 13 | #include "DataStruct.h" 14 | #include "IntersectionCardinality.h" 15 | #include 16 | 17 | /** 18 | * @brief Computes the Cross Mapping Cardinality (CMC) causal strength score. 19 | * 20 | * This function evaluates the directional causal influence from one time series 21 | * to another using Cross Mapping Cardinality. It performs state-space reconstruction, 22 | * neighbor searching, and statistical evaluation across a range of library sizes. 23 | * 24 | * @param embedding_x State-space reconstructed time series of the potential cause. 25 | * @param embedding_y State-space reconstructed time series of the potential effect. 26 | * @param lib_sizes A vector of library sizes to use for subsampling during CMC analysis. 27 | * @param lib Indices of library points (0-based). 28 | * @param pred Indices of prediction points (0-based). 29 | * @param num_neighbors Number of neighbors used in cross mapping. 30 | * @param n_excluded Number of temporally excluded neighbors (Theiler window). 31 | * @param dist_metric Distance metric selector (1: Manhattan, 2: Euclidean). 32 | * @param threads Number of threads for parallel processing. 33 | * @param parallel_level Level of parallelism to control nested parallel execution. 34 | * @param progressbar Boolean flag to show or hide a progress bar. 35 | * 36 | * @return CMCRes A struct containing: 37 | * - cross_mapping: A vector of AUC values for the largest library size. 38 | * - causal_strength: A 2D vector with rows [library size, mean AUC] across all lib sizes. 39 | */ 40 | CMCRes CrossMappingCardinality( 41 | const std::vector>& embedding_x, 42 | const std::vector>& embedding_y, 43 | const std::vector& lib_sizes, 44 | const std::vector& lib, 45 | const std::vector& pred, 46 | size_t num_neighbors = 4, 47 | size_t n_excluded = 0, 48 | int dist_metric = 2, 49 | int threads = 8, 50 | int parallel_level = 0, 51 | bool progressbar = true); 52 | 53 | // /** 54 | // * Computes the Cross Mapping Cardinality (CMC) causal strength score (adjusted based on Python logic). 55 | // * 56 | // * Parameters: 57 | // * embedding_x: State-space reconstruction (embedded) of the potential cause variable. 58 | // * embedding_y: State-space reconstruction (embedded) of the potential effect variable. 59 | // * lib: Library index vector (1-based in R, converted to 0-based). 60 | // * pred: Prediction index vector (1-based in R, converted to 0-based). 61 | // * num_neighbors: Vector of numbers of neighbors used for cross mapping (corresponding to n_neighbor in python package crossmapy). 62 | // * n_excluded: Vector of numbers of neighbors excluded from the distance matrix (corresponding to n_excluded in python package crossmapy). 63 | // * threads: Number of parallel threads. 64 | // * parallel_level: the level of parallelization 65 | // * progressbar: Whether to display a progress bar. 66 | // * 67 | // * Returns: 68 | // * A vector the results of the DeLong test for the AUC values: [number of neighbors, IC score, p-value, confidence interval upper bound, confidence interval lower bound] one for each entry in num_neighbors. 69 | // * The result contains multiple rows, each corresponding to a different number of neighbors. 70 | // */ 71 | // std::vector> CrossMappingCardinality( 72 | // const std::vector>& embedding_x, 73 | // const std::vector>& embedding_y, 74 | // const std::vector& lib, 75 | // const std::vector& pred, 76 | // const std::vector& num_neighbors, 77 | // const std::vector& n_excluded, 78 | // int threads, 79 | // int parallel_level = 0, 80 | // bool progressbar = true); 81 | 82 | #endif // CrossMappingCardinality_H 83 | -------------------------------------------------------------------------------- /man/gccm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gccm.R 3 | \name{gccm} 4 | \alias{gccm} 5 | \alias{gccm,sf-method} 6 | \alias{gccm,SpatRaster-method} 7 | \title{geographical convergent cross mapping} 8 | \usage{ 9 | \S4method{gccm}{sf}( 10 | data, 11 | cause, 12 | effect, 13 | libsizes = NULL, 14 | E = 3, 15 | k = E + 2, 16 | tau = 1, 17 | style = 1, 18 | stack = FALSE, 19 | lib = NULL, 20 | pred = NULL, 21 | dist.metric = "L2", 22 | dist.average = TRUE, 23 | theta = 1, 24 | algorithm = "simplex", 25 | threads = detectThreads(), 26 | detrend = TRUE, 27 | parallel.level = "low", 28 | bidirectional = TRUE, 29 | progressbar = TRUE, 30 | nb = NULL 31 | ) 32 | 33 | \S4method{gccm}{SpatRaster}( 34 | data, 35 | cause, 36 | effect, 37 | libsizes = NULL, 38 | E = 3, 39 | k = E + 2, 40 | tau = 1, 41 | style = 1, 42 | stack = FALSE, 43 | lib = NULL, 44 | pred = NULL, 45 | dist.metric = "L2", 46 | dist.average = TRUE, 47 | theta = 1, 48 | algorithm = "simplex", 49 | threads = detectThreads(), 50 | detrend = TRUE, 51 | parallel.level = "low", 52 | bidirectional = TRUE, 53 | progressbar = TRUE, 54 | grid.coord = TRUE, 55 | embed.direction = 0, 56 | win.ratio = 0 57 | ) 58 | } 59 | \arguments{ 60 | \item{data}{observation data.} 61 | 62 | \item{cause}{name of causal variable.} 63 | 64 | \item{effect}{name of effect variable.} 65 | 66 | \item{libsizes}{(optional) number of spatial units used (input needed: \code{vector} - spatial vector, \code{matrix} - spatial raster).} 67 | 68 | \item{E}{(optional) embedding dimensions.} 69 | 70 | \item{k}{(optional) number of nearest neighbors.} 71 | 72 | \item{tau}{(optional) step of spatial lags.} 73 | 74 | \item{style}{(optional) embedding style (\code{0} includes current state, \code{1} excludes it).} 75 | 76 | \item{stack}{(optional) whether to stack embeddings.} 77 | 78 | \item{lib}{(optional) libraries indices (input requirement same as \code{libsizes}).} 79 | 80 | \item{pred}{(optional) predictions indices (input requirement same as \code{libsizes}).} 81 | 82 | \item{dist.metric}{(optional) distance metric (\code{L1}: Manhattan, \code{L2}: Euclidean).} 83 | 84 | \item{dist.average}{(optional) whether to average distance.} 85 | 86 | \item{theta}{(optional) weighting parameter for distances, useful when \code{algorithm} is \code{smap}.} 87 | 88 | \item{algorithm}{(optional) prediction algorithm.} 89 | 90 | \item{threads}{(optional) number of threads to use.} 91 | 92 | \item{detrend}{(optional) whether to remove the linear trend.} 93 | 94 | \item{parallel.level}{(optional) level of parallelism, \code{low} or \code{high}.} 95 | 96 | \item{bidirectional}{(optional) whether to examine bidirectional causality.} 97 | 98 | \item{progressbar}{(optional) whether to show the progress bar.} 99 | 100 | \item{nb}{(optional) neighbours list.} 101 | 102 | \item{grid.coord}{(optional) whether to detrend using cell center coordinates (\code{TRUE}) or row/column numbers (\code{FALSE}).} 103 | 104 | \item{embed.direction}{(optional) direction selector for embeddings (\code{0} returns all directions, \code{1-8} correspond to NW, N, NE, W, E, SW, S, SE).} 105 | 106 | \item{win.ratio}{(optional) ratio of sliding window scale to speed up state-space predictions.} 107 | } 108 | \value{ 109 | A list 110 | \describe{ 111 | \item{\code{xmap}}{cross mapping results} 112 | \item{\code{varname}}{names of causal and effect variables} 113 | \item{\code{bidirectional}}{whether to examine bidirectional causality} 114 | } 115 | } 116 | \description{ 117 | geographical convergent cross mapping 118 | } 119 | \examples{ 120 | columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 121 | \donttest{ 122 | g = gccm(columbus,"hoval","crime",libsizes = seq(5,45,5),E = 6) 123 | g 124 | plot(g,ylimits = c(0,0.85)) 125 | } 126 | } 127 | \references{ 128 | Gao, B., Yang, J., Chen, Z. et al. Causal inference from cross-sectional earth system data with geographical convergent cross mapping. Nat Commun 14, 5875 (2023). 129 | } 130 | -------------------------------------------------------------------------------- /vignettes/main2_ssr.Rmd.orig: -------------------------------------------------------------------------------- 1 | --- 2 | title: "State Space Reconstruction" 3 | author: "Wenbo Lv" 4 | date: | 5 | | Last update: 2025-12-15 6 | | Last run: `r Sys.Date()` 7 | output: rmarkdown::html_vignette 8 | vignette: > 9 | %\VignetteIndexEntry{2. State Space Reconstruction} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | ```{r, include = FALSE} 15 | knitr::opts_chunk$set( 16 | collapse = TRUE, 17 | comment = "##", 18 | fig.path = "man/figures/ssr/" 19 | ) 20 | ``` 21 | 22 | ## Methodological Background 23 | 24 | Takens theory proves that for a dynamical system $\phi$, if its trajectory converges to an attractor manifold $M$—a bounded and invariant set of states—then there exists a smooth mapping between the system $\phi$ and its attractor $M$. Consequently, the time series observations of $\phi$ can be used to reconstruct the structure of $M$ through delay embedding. 25 | 26 | According to the generalized embedding theorem, for a compact $d$-dimensional manifold $M$ and a set of observation functions $\langle h_1, h_2, \ldots, h_L \rangle$, the mapping $\psi_{\phi,h}(x) = \langle h_1(x), h_2(x), \ldots, h_L(x) \rangle$ is an embedding of $M$ when $L \geq 2d + 1$. Here, *embedding* refers to a one-to-one map that resolves all singularities of the original manifold. The observation functions $h_i$ can take the form of time-lagged values from a single time series, lags from multiple time series, or even completely distinct measurements. The former two are simply special cases of the third. 27 | 28 | This embedding framework can be extended to *spatial cross-sectional data*, which lack temporal ordering but are observed over a spatial domain. In this context, the observation functions can be defined using the values of a variable at a focal spatial unit and its surrounding neighbors (known as *spatial lags* in spatial statistics). Specifically, for a spatial location $s$, the embedding can be written as: 29 | 30 | $$ 31 | \psi(x, s) = \langle h_s(x), h_{s(1)}(x), \ldots, h_{s(L-1)}(x) \rangle, 32 | $$ 33 | 34 | where $h_{s(i)}(x)$ denotes the observation function of the $i$-th order spatial lag unit relative to $s$. These spatial lags provide the necessary diversity of observations for effective manifold reconstruction. In practice, if a given spatial lag order involves multiple units, summary statistics such as the mean or directionally-weighted averages can be used as the observation function to maintain a one-to-one embedding. 35 | 36 | ## Usage examples 37 | 38 | ### Example of spatial vector data 39 | 40 | Load the `spEDM` package and its county-level population density data: 41 | 42 | ```{r load_lattice_data} 43 | library(spEDM) 44 | 45 | popd_nb = spdep::read.gal(system.file("case/popd_nb.gal",package = "spEDM")) 46 | popd = readr::read_csv(system.file("case/popd.csv",package = "spEDM")) 47 | popd_sf = sf::st_as_sf(popd, coords = c("lon","lat"), crs = 4326) 48 | popd_sf 49 | ``` 50 | 51 | Embedding the variable `popd` from county-level population density: 52 | 53 | ```{r embed_lattice_data} 54 | v = spEDM::embedded(popd_sf,"popd",E = 10) 55 | v[1:5,c(4,5,10)] 56 | ``` 57 | 58 | ```{r fig1,fig.width=4.25,fig.height=4.5,fig.dpi=100,fig.cap=knitr::asis_output("**Figure 1**. The reconstructed shadow manifolds for the variable `popd`.")} 59 | plot3D::scatter3D(v[,4], v[,5], v[,10], colvar = NULL, pch = 19, 60 | col = "red", theta = 45, phi = 10, cex = 0.35, 61 | bty = "f", clab = NA, tickmarks = FALSE) 62 | ``` 63 | 64 |
65 | 66 | ### Example of spatial raster data 67 | 68 | Load the `spEDM` package and its farmland npp data: 69 | 70 | ```{r load_grid_data} 71 | library(spEDM) 72 | 73 | npp = terra::rast(system.file("case/npp.tif", package = "spEDM")) 74 | npp 75 | ``` 76 | 77 | Embedding the variable `npp` from farmland npp data: 78 | 79 | ```{r embed_grid_data} 80 | r = spEDM::embedded(npp,"npp",E = 5,tau = 20) 81 | r[which(!is.na(r),arr.ind = T)[1:5],1:3] 82 | ``` 83 | 84 | ```{r fig2,fig.width=4.25,fig.height=4.5,fig.dpi=100,fig.cap=knitr::asis_output("**Figure 2**. The reconstructed shadow manifolds for the variable `npp`.")} 85 | plot3D::scatter3D(r[,1], r[,2], r[,3], colvar = NULL, pch = 19, 86 | col = "#e77854", theta = 45, phi = 10, cex = 0.01, 87 | bty = "f", clab = NA, tickmarks = FALSE) 88 | ``` 89 | -------------------------------------------------------------------------------- /man/gpc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gpc.R 3 | \name{gpc} 4 | \alias{gpc} 5 | \alias{gpc,sf-method} 6 | \alias{gpc,SpatRaster-method} 7 | \title{geographical pattern causality} 8 | \usage{ 9 | \S4method{gpc}{sf}( 10 | data, 11 | cause, 12 | effect, 13 | libsizes = NULL, 14 | E = 3, 15 | k = E + 2, 16 | tau = 1, 17 | style = 1, 18 | lib = NULL, 19 | pred = NULL, 20 | boot = 99, 21 | random = TRUE, 22 | seed = 42L, 23 | dist.metric = "L2", 24 | zero.tolerance = k, 25 | relative = TRUE, 26 | weighted = TRUE, 27 | threads = detectThreads(), 28 | detrend = FALSE, 29 | parallel.level = "low", 30 | bidirectional = TRUE, 31 | progressbar = TRUE, 32 | nb = NULL 33 | ) 34 | 35 | \S4method{gpc}{SpatRaster}( 36 | data, 37 | cause, 38 | effect, 39 | libsizes = NULL, 40 | E = 3, 41 | k = E + 2, 42 | tau = 1, 43 | style = 1, 44 | lib = NULL, 45 | pred = NULL, 46 | boot = 99, 47 | random = TRUE, 48 | seed = 42L, 49 | dist.metric = "L2", 50 | zero.tolerance = k, 51 | relative = TRUE, 52 | weighted = TRUE, 53 | threads = detectThreads(), 54 | detrend = FALSE, 55 | parallel.level = "low", 56 | bidirectional = TRUE, 57 | progressbar = TRUE, 58 | grid.coord = TRUE 59 | ) 60 | } 61 | \arguments{ 62 | \item{data}{observation data.} 63 | 64 | \item{cause}{name of causal variable.} 65 | 66 | \item{effect}{name of effect variable.} 67 | 68 | \item{libsizes}{(optional) number of spatial units used (input needed: \code{vector} - spatial vector, \code{matrix} - spatial raster).} 69 | 70 | \item{E}{(optional) embedding dimensions.} 71 | 72 | \item{k}{(optional) number of nearest neighbors.} 73 | 74 | \item{tau}{(optional) step of spatial lags.} 75 | 76 | \item{style}{(optional) embedding style (\code{0} includes current state, \code{1} excludes it).} 77 | 78 | \item{lib}{(optional) libraries indices (input requirement same as \code{libsizes}).} 79 | 80 | \item{pred}{(optional) predictions indices (input requirement same as \code{libsizes}).} 81 | 82 | \item{boot}{(optional) number of bootstraps to perform.} 83 | 84 | \item{random}{(optional) whether to use random sampling.} 85 | 86 | \item{seed}{(optional) random seed.} 87 | 88 | \item{dist.metric}{(optional) distance metric (\code{L1}: Manhattan, \code{L2}: Euclidean).} 89 | 90 | \item{zero.tolerance}{(optional) maximum number of zeros tolerated in signature space.} 91 | 92 | \item{relative}{(optional) whether to calculate relative changes in embeddings.} 93 | 94 | \item{weighted}{(optional) whether to weight causal strength.} 95 | 96 | \item{threads}{(optional) number of threads to use.} 97 | 98 | \item{detrend}{(optional) whether to remove the linear trend.} 99 | 100 | \item{parallel.level}{(optional) level of parallelism, \code{low} or \code{high}.} 101 | 102 | \item{bidirectional}{(optional) whether to examine bidirectional causality.} 103 | 104 | \item{progressbar}{(optional) whether to show the progress bar.} 105 | 106 | \item{nb}{(optional) neighbours list.} 107 | 108 | \item{grid.coord}{(optional) whether to detrend using cell center coordinates (\code{TRUE}) or row/column numbers (\code{FALSE}).} 109 | } 110 | \value{ 111 | A list 112 | \describe{ 113 | \item{\code{xmap}}{cross mapping results (only present if \code{libsizes} is not \code{NULL})} 114 | \item{\code{causality}}{per-sample causality statistics (present if \code{libsizes} is \code{NULL})} 115 | \item{\code{summary}}{overall causal strength (present if \code{libsizes} is \code{NULL})} 116 | \item{\code{pattern}}{pairwise pattern relationships (present if \code{libsizes} is \code{NULL})} 117 | \item{\code{varname}}{names of causal and effect variables} 118 | \item{\code{bidirectional}}{whether to examine bidirectional causality} 119 | } 120 | } 121 | \description{ 122 | geographical pattern causality 123 | } 124 | \examples{ 125 | columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 126 | \donttest{ 127 | gpc(columbus,"hoval","crime",E = 6,k = 9) 128 | 129 | # convergence diagnostics 130 | g = gpc(columbus,"hoval","crime",libsizes = seq(5,45,5),E = 6,k = 9) 131 | plot(g) 132 | } 133 | } 134 | \references{ 135 | Zhang, Z., Wang, J., 2025. A model to identify causality for geographic patterns. International Journal of Geographical Information Science 1–21. 136 | } 137 | -------------------------------------------------------------------------------- /vignettes/si2_sct.Rmd.orig: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Spatial Causality Test" 3 | author: "Wenbo Lv" 4 | date: | 5 | | Last update: 2025-12-15 6 | | Last run: `r Sys.Date()` 7 | output: rmarkdown::html_vignette 8 | vignette: > 9 | %\VignetteIndexEntry{SI2. Spatial Causality Test} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | ```{r, include = FALSE} 15 | knitr::opts_chunk$set( 16 | collapse = TRUE, 17 | comment = "##", 18 | fig.path = "man/figures/sct/" 19 | ) 20 | ``` 21 | 22 | ## Methodological Background 23 | 24 | Let us begin by defining two spatial cross-sectional series $\{x_s\}_{s \in S}$ and $\{y_s\}_{s \in S}$, where $S$ represents the study area. 25 | 26 | We use: 27 | 28 | $$ 29 | \mathcal{X}_W = \{ W_i x \mid W_i \in \mathcal{W}(x, y) \} 30 | $$ 31 | 32 | $$ 33 | \mathcal{Y}_W = \{ W_i y \mid W_i \in \mathcal{W}(x, y) \} 34 | $$ 35 | 36 | to denote the sets of spatial lags of $x$ and $y$ given by all the weighting matrices in $\mathcal{W}(x, y)$(that is the set of spatial 37 | dependence structures between $x$ and $y$). 38 | 39 | 40 | We say that $\{x_s\}_{s \in S}$ does not cause $\{y_s\}_{s \in S}$ **under the spatial structures** $\mathcal{X}_W$ and $\mathcal{Y}_W$ if 41 | 42 | $$ 43 | h_{y |\mathcal{Y}_W}(m) = h_{y | \mathcal{Y}_W,\mathcal{X}_W}(m) 44 | $$ 45 | A unilateral non-parametric test can be applied to assess the spatial causality via the following null hypothesis: 46 | 47 | $$ 48 | H_0 : \{x_s\}_{s \in S} \text{ does not cause } \{y_s\}_{s \in S} \text{ under the spatial structures } \mathcal{X}_W \text{ and } \mathcal{Y}_W, $$ 49 | 50 | with the following statistic: 51 | 52 | $$ 53 | \hat{\delta}(\mathcal{Y}_W, \mathcal{X}_W) = \hat{h}_{y |\mathcal{Y}_W}(m) - \hat{h}_{y | \mathcal{Y}_W,\mathcal{X}_W}(m) 54 | $$ 55 | 56 | where $\hat{h}_*(m)$ is the estimated conditional symbolic entropy using Shannon’s entropy with $m-1$ nearest neighbors. The alternative is that the null hypothesis of is not true. 57 | 58 | If $\mathcal{X}_W$ does not contain extra information about $y$ then $\hat{\delta}(\mathcal{Y}_W, \mathcal{X}_W) = 0$, otherwise, $ \hat{\delta}(\mathcal{Y}_W, \mathcal{X}_W) > 0$. 59 | 60 | $h_{y |\mathcal{Y}_W}(m)$ measures the uncertainty of the distribution of symbols of $y$, conditional to the symbols of its spatial lag, $ \mathcal{Y}_W$. Moreover, $h_{y | \mathcal{Y}_W,\mathcal{X}_W}(m)$ measures the uncertainty of the distribution of symbols of $y$, conditional to the symbols of the spatial lags of $y$, $\mathcal{Y}_W$, and of $x$, $ \mathcal{X}_W$. If the second variable, $x$, indeed causes the first one then there should be a significant decrease in the entropy, and the statistic $\hat{\delta}(\mathcal{Y}_W, \mathcal{X}_W)$ will take on high positive values. If there is only a spatial correlation, but not causation, the difference between both entropies will be small. The statistical significance of $\hat{\delta}(\mathcal{Y}_W, \mathcal{X}_W)$ is assessed using spatial block bootstrap. 61 | 62 | ## Usage examples 63 | 64 | ### Example of spatial vector data 65 | 66 | Load the `spEDM` package and the columbus OH dataset: 67 | 68 | ```{r load_lattice_data} 69 | library(spEDM) 70 | 71 | columbus = sf::read_sf(system.file("case/columbus.gpkg", package="spEDM")) 72 | columbus 73 | ``` 74 | 75 | Detect spatial causality among the variables *inc*, *crime*, and *hoval* : 76 | 77 | ```{r case_lattice} 78 | # house value and crime 79 | sc.test(columbus, "hoval", "crime", k = 15) 80 | 81 | # household income and crime 82 | sc.test(columbus, "inc", "crime", k = 15) 83 | 84 | # household income and house value 85 | sc.test(columbus, "inc", "hoval", k = 15) 86 | ``` 87 | 88 | ### Example of spatial raster data 89 | 90 | Load the `spEDM` package and its farmland NPP data: 91 | 92 | ```{r load_grid_data} 93 | library(spEDM) 94 | 95 | npp = terra::rast(system.file("case/npp.tif", package = "spEDM")) 96 | # To save the computation time, we will aggregate the data by 3 times 97 | npp = terra::aggregate(npp, fact = 3, na.rm = TRUE) 98 | npp 99 | ``` 100 | 101 | Detect spatial causality among the variables *pre*, *tem*, and *npp* : 102 | 103 | ```{r case_grid} 104 | # precipitation and npp 105 | sc.test(npp,"pre","npp",k = 30) 106 | 107 | # temperature and npp 108 | sc.test(npp,"tem","npp",k = 30) 109 | 110 | # precipitation and temperature 111 | sc.test(npp,"pre","tem",k = 30) 112 | ``` 113 | -------------------------------------------------------------------------------- /src/DeLongPlacements.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "DataStruct.h" 7 | 8 | /** 9 | * @brief Computes DeLong placements for ROC analysis. 10 | * 11 | * This function implements the DeLong method to calculate placements for cases and controls 12 | * in the context of ROC (Receiver Operating Characteristic) curve analysis. The method is 13 | * used for non-parametric estimation of the area under the curve (AUC) and supports handling 14 | * tied values efficiently. 15 | * 16 | * The function accepts two sets of data points: `cases` and `controls`. Depending on the 17 | * specified `direction` (either ">" or "<"), it processes the data accordingly by inverting 18 | * the sign of the input values if necessary. 19 | * 20 | * @reference https://github.com/xrobin/pROC/blob/master/src/delong.cpp 21 | * 22 | * @param cases A vector of numeric values representing positive cases. 23 | * @param controls A vector of numeric values representing negative controls. 24 | * @param direction A string indicating the comparison direction. If set to ">", values are inverted. 25 | * 26 | * @return A structure containing: 27 | * - theta: The estimated AUC value. 28 | * - X: A vector of normalized placement values for cases. 29 | * - Y: A vector of normalized placement values for controls. 30 | */ 31 | DeLongPlacementsRes CppDeLongPlacements(const std::vector& cases, 32 | const std::vector& controls, 33 | const std::string& direction) { 34 | // Initialize variables 35 | size_t m = cases.size(); 36 | size_t n = controls.size(); 37 | size_t L = m + n; 38 | 39 | // Create working copies to handle direction 40 | std::vector proc_cases = cases; 41 | std::vector proc_controls = controls; 42 | 43 | // Handle direction parameter 44 | if (direction == ">") { 45 | for (auto& val : proc_cases) val = -val; 46 | for (auto& val : proc_controls) val = -val; 47 | } 48 | 49 | // Create combined vector with indices and class labels 50 | // Use size_t instead of int for indices and Replace vector with vector 51 | std::vector> Z; 52 | std::vector labels(L, 0); 53 | 54 | // Populate case data 55 | for (size_t i = 0; i < m; ++i) { 56 | Z.emplace_back(i, proc_cases[i]); 57 | labels[i] = 1; 58 | } 59 | 60 | // Populate control data 61 | for (size_t j = 0; j < n; ++j) { 62 | Z.emplace_back(m + j, proc_controls[j]); 63 | labels[m + j] = 0; 64 | } 65 | 66 | // Sort combined data by value 67 | std::sort(Z.begin(), Z.end(), [](const std::pair& a, const std::pair& b) { 68 | return a.second < b.second; 69 | }); 70 | 71 | // Calculate XY placements 72 | std::vector XY(L, 0.0); 73 | size_t current_m = 0, current_n = 0; 74 | size_t i = 0; 75 | 76 | while (i < L) { 77 | std::vector case_indices, control_indices; 78 | size_t case_count = 0, control_count = 0; 79 | 80 | // Handle tied values 81 | while (true) { 82 | size_t index = Z[i].first; 83 | if (labels[index]) { 84 | ++case_count; 85 | case_indices.push_back(index); 86 | } else { 87 | ++control_count; 88 | control_indices.push_back(index); 89 | } 90 | 91 | if (i == L-1 || Z[i].second != Z[i+1].second) break; 92 | ++i; 93 | } 94 | 95 | // Update XY values for cases 96 | for (size_t idx : case_indices) { 97 | XY[idx] = current_n + control_count/2.0; 98 | } 99 | 100 | // Update XY values for controls 101 | for (size_t idx : control_indices) { 102 | XY[idx] = current_m + case_count/2.0; 103 | } 104 | 105 | // Accumulate counts 106 | current_m += case_count; 107 | current_n += control_count; 108 | ++i; 109 | } 110 | 111 | // Calculate final X and Y vectors 112 | DeLongPlacementsRes ret; 113 | // ret.X.reserve(n); 114 | // ret.Y.reserve(m); 115 | 116 | double sum = 0.0; 117 | const double norm_n = static_cast(n); 118 | const double norm_m = static_cast(m); 119 | 120 | for (size_t k = 0; k < L; ++k) { 121 | if (labels[k]) { 122 | sum += XY[k]; 123 | ret.X.push_back(XY[k] / norm_n); 124 | } else { 125 | ret.Y.push_back(1.0 - XY[k] / norm_m); 126 | } 127 | } 128 | 129 | // Calculate theta (AUC estimate) 130 | ret.theta = sum / (norm_m * norm_n); 131 | 132 | return ret; 133 | } 134 | -------------------------------------------------------------------------------- /src/SpatialBlockBootstrap.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /** 6 | * @brief Generate a spatial block bootstrap resample of block indices based on predefined blocks. 7 | * 8 | * This function follows the Spatial Block Bootstrap (SBB) procedure described by Carlstein (1986) 9 | * and Herrera et al. (2013), as used in spatial Granger causality frameworks. It samples blocks 10 | * of indices with replacement, preserving spatial contiguity defined by a block ID vector. 11 | * 12 | * 13 | * @param block Vector of block IDs assigning each element to a contiguous block. 14 | * Elements with the same integer value belong to the same block. 15 | * @param seed Random seed for reproducibility (optional). 16 | * @return std::vector The bootstrap-resampled vector of indices based on block IDs. 17 | */ 18 | std::vector SpatialBlockBootstrap( 19 | const std::vector& block, 20 | unsigned int seed = 42 21 | ) { 22 | size_t N = block.size(); 23 | 24 | // Step 1: Group indices by block ID 25 | std::unordered_map> block_to_indices; 26 | for (size_t i = 0; i < N; ++i) { 27 | block_to_indices[block[i]].push_back(i); 28 | } 29 | 30 | // Step 2: Extract unique block IDs 31 | std::vector block_ids; 32 | for (const auto& pair : block_to_indices) { 33 | block_ids.push_back(pair.first); 34 | } 35 | 36 | // Step 3: Random sampling of block IDs with replacement 37 | std::mt19937_64 rng(seed); // Random number generator with fixed seed 38 | std::uniform_int_distribution<> dist(0, block_ids.size() - 1); 39 | 40 | // Step 4: Generate bootstrap sample by sampling blocks 41 | std::vector bootstrapped_indices; 42 | while (bootstrapped_indices.size() < N) { 43 | int sampled_block_id = block_ids[dist(rng)]; 44 | const auto& indices = block_to_indices[sampled_block_id]; 45 | 46 | // Append block indices to the bootstrap sample 47 | for (size_t idx : indices) { 48 | bootstrapped_indices.push_back(idx); 49 | } 50 | } 51 | 52 | // Trim to original size (in case last block pushed size beyond N) 53 | if (bootstrapped_indices.size() > N) { 54 | bootstrapped_indices.resize(N); 55 | } 56 | 57 | return bootstrapped_indices; 58 | } 59 | 60 | /** 61 | * Generate a spatial block bootstrap sample of indices using an external random number generator. 62 | * 63 | * This function performs block-based resampling from spatial or grouped data. The input vector `block` 64 | * specifies a block ID for each observation (e.g., spatial unit, group, or time block). The function: 65 | * 66 | * 1. Groups indices by block ID; 67 | * 2. Randomly samples block IDs with replacement using the provided `std::mt19937` RNG; 68 | * 3. Concatenates the indices of the selected blocks to form the bootstrap sample; 69 | * 4. Trims the result to ensure the same length as the original data. 70 | * 71 | * The sampling is done at the block level rather than at the individual level, preserving local structure. 72 | * This is particularly useful for spatial or temporal data where neighboring observations may be dependent. 73 | * 74 | * @param block A vector of block identifiers (one per observation), e.g., spatial or temporal blocks. 75 | * @param rng A reference to an externally managed random number generator (e.g., from a parallel RNG pool). 76 | * @return A vector of resampled indices with the same length as the input data. 77 | */ 78 | std::vector SpatialBlockBootstrapRNG( 79 | const std::vector& block, 80 | std::mt19937_64& rng 81 | ) { 82 | size_t N = block.size(); 83 | 84 | // Step 1: Group indices by block ID 85 | std::unordered_map> block_to_indices; 86 | for (size_t i = 0; i < N; ++i) { 87 | block_to_indices[block[i]].push_back(i); 88 | } 89 | 90 | // Step 2: Extract unique block IDs 91 | std::vector block_ids; 92 | for (const auto& pair : block_to_indices) { 93 | block_ids.push_back(pair.first); 94 | } 95 | 96 | // Step 3: Sampling of block IDs with replacement using external RNG 97 | std::uniform_int_distribution<> dist(0, block_ids.size() - 1); 98 | 99 | // Step 4: Generate bootstrap sample 100 | std::vector bootstrapped_indices; 101 | while (bootstrapped_indices.size() < N) { 102 | int sampled_block_id = block_ids[dist(rng)]; 103 | const auto& indices = block_to_indices[sampled_block_id]; 104 | bootstrapped_indices.insert(bootstrapped_indices.end(), indices.begin(), indices.end()); 105 | } 106 | 107 | // Trim to original size if needed 108 | if (bootstrapped_indices.size() > N) { 109 | bootstrapped_indices.resize(N); 110 | } 111 | 112 | return bootstrapped_indices; 113 | } 114 | -------------------------------------------------------------------------------- /src/CppStats.h: -------------------------------------------------------------------------------- 1 | #ifndef CppStats_H 2 | #define CppStats_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include // for std::accumulate 10 | #include // for std::numeric_limits 11 | #include "DeLongPlacements.h" 12 | // #include 13 | #include 14 | 15 | bool isNA(double value); 16 | 17 | bool checkIntNA(int value); 18 | 19 | bool checkOneDimVectorHasNaN(const std::vector& vec); 20 | 21 | int checkOneDimVectorNotNanNum(const std::vector& vec); 22 | 23 | unsigned long long CppFactorial(unsigned int n); 24 | 25 | unsigned long long CppCombine(unsigned int n, unsigned int k); 26 | 27 | double CppDigamma(double x); 28 | 29 | double CppLog(double x, double base = 10); 30 | 31 | double CppMedian(const std::vector& vec, 32 | bool NA_rm = false); 33 | 34 | double CppMean(const std::vector& vec, 35 | bool NA_rm = false); 36 | 37 | double CppMin(const std::vector& vec, 38 | bool NA_rm = false); 39 | 40 | double CppMax(const std::vector& vec, 41 | bool NA_rm = false); 42 | 43 | double CppSum(const std::vector& vec, 44 | bool NA_rm = false); 45 | 46 | double CppMAE(const std::vector& x1, 47 | const std::vector& x2, 48 | bool NA_rm = false); 49 | 50 | double CppRMSE(const std::vector& x1, 51 | const std::vector& x2, 52 | bool NA_rm = false); 53 | 54 | std::vector CppCumSum(const std::vector& vec); 55 | 56 | std::vector CppAbsDiff(const std::vector& vec1, 57 | const std::vector& vec2); 58 | 59 | std::vector CppSumNormalize(const std::vector& vec, 60 | bool NA_rm = false); 61 | 62 | std::vector CppArithmeticSeq(double from, double to, size_t length_out); 63 | 64 | std::vector CppQuantile(const std::vector& vec, 65 | const std::vector& probs = {0.05, 0.5, 0.95}, 66 | bool NA_rm = true); 67 | 68 | double CppVariance(const std::vector& vec, bool NA_rm = false); 69 | 70 | double CppCovariance(const std::vector& vec1, 71 | const std::vector& vec2, 72 | bool NA_rm = false); 73 | 74 | double PearsonCor(const std::vector& y, 75 | const std::vector& y_hat, 76 | bool NA_rm = false); 77 | 78 | double SpearmanCor(const std::vector& y, 79 | const std::vector& y_hat, 80 | bool NA_rm = false); 81 | 82 | double KendallCor(const std::vector& y, 83 | const std::vector& y_hat, 84 | bool NA_rm = false); 85 | 86 | double PartialCor(const std::vector& y, 87 | const std::vector& y_hat, 88 | const std::vector>& controls, 89 | bool NA_rm = false, 90 | bool linear = false, 91 | double pinv_tol = 1e-10); 92 | 93 | double PartialCorTrivar(const std::vector& y, 94 | const std::vector& y_hat, 95 | const std::vector& control, 96 | bool NA_rm = false, 97 | bool linear = false, 98 | double pinv_tol = 1e-10); 99 | 100 | double CppCorSignificance(double r, size_t n, size_t k = 0); 101 | 102 | std::vector CppCorConfidence(double r, size_t n, size_t k = 0, 103 | double level = 0.05); 104 | 105 | double CppMeanCorSignificance(const std::vector& rho_vec, 106 | size_t n, size_t k = 0); 107 | 108 | std::vector CppMeanCorConfidence(const std::vector& rho_vec, 109 | size_t n, size_t k = 0, 110 | double level = 0.05); 111 | 112 | std::vector CppDeLongAUCConfidence(const std::vector& cases, 113 | const std::vector& controls, 114 | const std::string& direction, 115 | double level = 0.05); 116 | 117 | std::vector CppCMCTest(const std::vector& cases, 118 | const std::string& direction, 119 | double level = 0.05, 120 | size_t num_samples = 0); 121 | 122 | std::vector>> CppSVD(const std::vector>& X); 123 | 124 | std::vector LinearTrendRM(const std::vector& vec, 125 | const std::vector& xcoord, 126 | const std::vector& ycoord, 127 | bool NA_rm = false); 128 | 129 | #endif // CppStats_H 130 | -------------------------------------------------------------------------------- /R/gccm.R: -------------------------------------------------------------------------------- 1 | .gccm_sf_method = \(data, cause, effect, libsizes = NULL, E = 3, k = E+2, tau = 1, style = 1, stack = FALSE, lib = NULL, pred = NULL, dist.metric = "L2", dist.average = TRUE, 2 | theta = 1, algorithm = "simplex", threads = detectThreads(), detrend = TRUE, parallel.level = "low", bidirectional = TRUE, progressbar = TRUE, nb = NULL){ 3 | varname = .check_character(cause, effect) 4 | E = .check_inputelementnum(E,2) 5 | tau = .check_inputelementnum(tau,2) 6 | k = .check_inputelementnum(k,2) 7 | pl = .check_parallellevel(parallel.level) 8 | .varname = .internal_varname() 9 | if (is.null(nb)) nb = .internal_lattice_nb(data) 10 | coords = as.data.frame(sdsfun::sf_coordinates(data)) 11 | data = sf::st_drop_geometry(data) 12 | data = data[,varname] 13 | names(data) = .varname 14 | 15 | if (detrend){ 16 | data = .internal_detrend(data,.varname,coords) 17 | } 18 | cause = data[,"cause",drop = TRUE] 19 | effect = data[,"effect",drop = TRUE] 20 | 21 | if (is.null(lib)) lib = .internal_library(data) 22 | if (is.null(pred)) pred = lib 23 | if (is.null(libsizes)) libsizes = length(lib) 24 | 25 | simplex = ifelse(algorithm == "simplex", TRUE, FALSE) 26 | x_xmap_y = NULL 27 | if (bidirectional){ 28 | x_xmap_y = RcppGCCM4Lattice(cause,effect,nb,libsizes,lib,pred,E[1],tau[1],k[1],simplex,theta,threads,pl, 29 | style, stack, .check_distmetric(dist.metric), dist.average, TRUE, progressbar) 30 | } 31 | y_xmap_x = RcppGCCM4Lattice(effect,cause,nb,libsizes,lib,pred,E[2],tau[2],k[2],simplex,theta,threads,pl, 32 | style, stack, .check_distmetric(dist.metric), dist.average, TRUE, progressbar) 33 | 34 | return(.bind_xmapdf(varname,x_xmap_y,y_xmap_x,bidirectional)) 35 | } 36 | 37 | .gccm_spatraster_method = \(data, cause, effect, libsizes = NULL, E = 3, k = E+2, tau = 1, style = 1, stack = FALSE, lib = NULL, pred = NULL, dist.metric = "L2", dist.average = TRUE, theta = 1, algorithm = "simplex", 38 | threads = detectThreads(), detrend = TRUE, parallel.level = "low", bidirectional = TRUE, progressbar = TRUE, grid.coord = TRUE, embed.direction = 0, win.ratio = 0){ 39 | varname = .check_character(cause, effect) 40 | E = .check_inputelementnum(E,2) 41 | tau = .check_inputelementnum(tau,2) 42 | k = .check_inputelementnum(k,2) 43 | win.ratio = .check_inputelementnum(win.ratio,2) 44 | pl = .check_parallellevel(parallel.level) 45 | .varname = .internal_varname() 46 | data = data[[varname]] 47 | names(data) = .varname 48 | 49 | dtf = .internal_grid2df(data,grid.coord) 50 | if (detrend){ 51 | dtf = .internal_detrend(dtf,.varname) 52 | } 53 | causemat = matrix(dtf[,"cause"],nrow = terra::nrow(data),byrow = TRUE) 54 | effectmat = matrix(dtf[,"effect"],nrow = terra::nrow(data),byrow = TRUE) 55 | 56 | if (is.null(lib)) lib = .internal_library(dtf,TRUE) 57 | if (is.null(pred)) pred = lib 58 | if (is.null(libsizes)) libsizes = matrix(nrow(lib)) 59 | 60 | simplex = ifelse(algorithm == "simplex", TRUE, FALSE) 61 | x_xmap_y = NULL 62 | if (bidirectional){ 63 | x_xmap_y = RcppGCCM4Grid(causemat,effectmat,libsizes,lib,pred,E[1],tau[1],k[1],simplex,theta,threads,pl,style,stack, 64 | .check_distmetric(dist.metric),dist.average,TRUE,embed.direction,win.ratio,progressbar) 65 | } 66 | y_xmap_x = RcppGCCM4Grid(effectmat,causemat,libsizes,lib,pred,E[2],tau[2],k[2],simplex,theta,threads,pl,style,stack, 67 | .check_distmetric(dist.metric),dist.average,TRUE,embed.direction,win.ratio,progressbar) 68 | 69 | return(.bind_xmapdf(varname,x_xmap_y,y_xmap_x,bidirectional)) 70 | } 71 | 72 | #' geographical convergent cross mapping 73 | #' 74 | #' @inheritParams gcmc 75 | #' @param stack (optional) whether to stack embeddings. 76 | #' @param dist.average (optional) whether to average distance. 77 | #' @param theta (optional) weighting parameter for distances, useful when `algorithm` is `smap`. 78 | #' @param algorithm (optional) prediction algorithm. 79 | #' 80 | #' @return A list 81 | #' \describe{ 82 | #' \item{\code{xmap}}{cross mapping results} 83 | #' \item{\code{varname}}{names of causal and effect variables} 84 | #' \item{\code{bidirectional}}{whether to examine bidirectional causality} 85 | #' } 86 | #' @export 87 | #' @name gccm 88 | #' @aliases gccm,sf-method 89 | #' @references 90 | #' Gao, B., Yang, J., Chen, Z. et al. Causal inference from cross-sectional earth system data with geographical convergent cross mapping. Nat Commun 14, 5875 (2023). 91 | #' 92 | #' @examples 93 | #' columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 94 | #' \donttest{ 95 | #' g = gccm(columbus,"hoval","crime",libsizes = seq(5,45,5),E = 6) 96 | #' g 97 | #' plot(g,ylimits = c(0,0.85)) 98 | #' } 99 | methods::setMethod("gccm", "sf", .gccm_sf_method) 100 | 101 | #' @rdname gccm 102 | #' @param embed.direction (optional) direction selector for embeddings (`0` returns all directions, `1-8` correspond to NW, N, NE, W, E, SW, S, SE). 103 | #' @param win.ratio (optional) ratio of sliding window scale to speed up state-space predictions. 104 | methods::setMethod("gccm", "SpatRaster", .gccm_spatraster_method) 105 | -------------------------------------------------------------------------------- /R/gcmc.R: -------------------------------------------------------------------------------- 1 | .gcmc_sf_method = \(data, cause, effect, libsizes = NULL, E = 3, k = pmin(E^2), tau = 1, style = 1, lib = NULL, pred = NULL, dist.metric = "L2", 2 | threads = detectThreads(), detrend = FALSE, parallel.level = "low", bidirectional = TRUE, progressbar = TRUE, nb = NULL){ 3 | varname = .check_character(cause, effect) 4 | E = .check_inputelementnum(E,2) 5 | tau = .check_inputelementnum(tau,2) 6 | k = .check_inputelementnum(k,2) 7 | pl = .check_parallellevel(parallel.level) 8 | .varname = .internal_varname() 9 | if (is.null(nb)) nb = .internal_lattice_nb(data) 10 | coords = as.data.frame(sdsfun::sf_coordinates(data)) 11 | data = sf::st_drop_geometry(data) 12 | data = data[,varname] 13 | names(data) = .varname 14 | 15 | if (detrend){ 16 | data = .internal_detrend(data,.varname,coords) 17 | } 18 | cause = data[,"cause",drop = TRUE] 19 | effect = data[,"effect",drop = TRUE] 20 | 21 | if (is.null(lib)) lib = .internal_library(data) 22 | if (is.null(pred)) pred = lib 23 | if (is.null(libsizes)) libsizes = length(lib) 24 | 25 | x_xmap_y = NULL 26 | if (bidirectional){ 27 | x_xmap_y = RcppGCMC4Lattice(cause,effect,nb,libsizes,lib,pred,E,tau,k[1],0,style, 28 | .check_distmetric(dist.metric),threads,pl,progressbar) 29 | } 30 | y_xmap_x = RcppGCMC4Lattice(effect,cause,nb,libsizes,lib,pred,rev(E),rev(tau),k[2],0, 31 | style, .check_distmetric(dist.metric),threads,pl,progressbar) 32 | 33 | return(.bind_intersectdf(varname,x_xmap_y,y_xmap_x,bidirectional)) 34 | } 35 | 36 | .gcmc_spatraster_method = \(data, cause, effect, libsizes = NULL, E = 3, k = pmin(E^2), tau = 1, style = 1, lib = NULL, pred = NULL, dist.metric = "L2", 37 | threads = detectThreads(), detrend = FALSE, parallel.level = "low", bidirectional = TRUE, progressbar = TRUE, grid.coord = TRUE){ 38 | varname = .check_character(cause, effect) 39 | E = .check_inputelementnum(E,2) 40 | tau = .check_inputelementnum(tau,2) 41 | k = .check_inputelementnum(k,2) 42 | pl = .check_parallellevel(parallel.level) 43 | .varname = .internal_varname() 44 | data = data[[varname]] 45 | names(data) = .varname 46 | 47 | dtf = .internal_grid2df(data,grid.coord) 48 | if (detrend){ 49 | dtf = .internal_detrend(dtf,.varname) 50 | } 51 | causemat = matrix(dtf[,"cause"],nrow = terra::nrow(data),byrow = TRUE) 52 | effectmat = matrix(dtf[,"effect"],nrow = terra::nrow(data),byrow = TRUE) 53 | 54 | if (is.null(lib)) lib = .internal_library(dtf,TRUE) 55 | if (is.null(pred)) pred = lib 56 | if (is.null(libsizes)) libsizes = matrix(nrow(lib)) 57 | 58 | x_xmap_y = NULL 59 | if (bidirectional){ 60 | x_xmap_y = RcppGCMC4Grid(causemat,effectmat,libsizes,lib,pred,E,tau,k[1],0,style, 61 | .check_distmetric(dist.metric),threads,pl,progressbar) 62 | } 63 | y_xmap_x = RcppGCMC4Grid(effectmat,causemat,libsizes,lib,pred,rev(E),rev(tau),k[2],0, 64 | style,.check_distmetric(dist.metric),threads,pl,progressbar) 65 | 66 | return(.bind_intersectdf(varname,x_xmap_y,y_xmap_x,bidirectional)) 67 | } 68 | 69 | #' geographical cross mapping cardinality 70 | #' 71 | #' @param data observation data. 72 | #' @param cause name of causal variable. 73 | #' @param effect name of effect variable. 74 | #' @param libsizes (optional) number of spatial units used (input needed: `vector` - spatial vector, `matrix` - spatial raster). 75 | #' @param E (optional) embedding dimensions. 76 | #' @param k (optional) number of nearest neighbors. 77 | #' @param tau (optional) step of spatial lags. 78 | #' @param style (optional) embedding style (`0` includes current state, `1` excludes it). 79 | #' @param lib (optional) libraries indices (input requirement same as `libsizes`). 80 | #' @param pred (optional) predictions indices (input requirement same as `libsizes`). 81 | #' @param dist.metric (optional) distance metric (`L1`: Manhattan, `L2`: Euclidean). 82 | #' @param threads (optional) number of threads to use. 83 | #' @param detrend (optional) whether to remove the linear trend. 84 | #' @param parallel.level (optional) level of parallelism, `low` or `high`. 85 | #' @param bidirectional (optional) whether to examine bidirectional causality. 86 | #' @param progressbar (optional) whether to show the progress bar. 87 | #' @param nb (optional) neighbours list. 88 | #' 89 | #' @return A list 90 | #' \describe{ 91 | #' \item{\code{xmap}}{cross mapping results} 92 | #' \item{\code{cs}}{causal strength} 93 | #' \item{\code{varname}}{names of causal and effect variables} 94 | #' \item{\code{bidirectional}}{whether to examine bidirectional causality} 95 | #' } 96 | #' @export 97 | #' @name gcmc 98 | #' @aliases gcmc,sf-method 99 | #' 100 | #' @examples 101 | #' columbus = sf::read_sf(system.file("case/columbus.gpkg",package="spEDM")) 102 | #' \donttest{ 103 | #' g = gcmc(columbus,"hoval","crime",E = 7,k = 19) 104 | #' g 105 | #' } 106 | methods::setMethod("gcmc", "sf", .gcmc_sf_method) 107 | 108 | #' @rdname gcmc 109 | #' @param grid.coord (optional) whether to detrend using cell center coordinates (`TRUE`) or row/column numbers (`FALSE`). 110 | methods::setMethod("gcmc", "SpatRaster", .gcmc_spatraster_method) 111 | -------------------------------------------------------------------------------- /src/GCCM4Lattice.h: -------------------------------------------------------------------------------- 1 | #ifndef GCCM4Lattice_H 2 | #define GCCM4Lattice_H 3 | 4 | #include 5 | #include 6 | #include // Include for std::partial_sort 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "CppStats.h" 12 | #include "CppLatticeUtils.h" 13 | #include "SimplexProjection.h" 14 | #include "SMap.h" 15 | #include 16 | 17 | /* 18 | * Perform GCCM on a single lib and pred for lattice data. 19 | * 20 | * Parameters: 21 | * - x_vectors: Reconstructed state-space (each row represents a separate vector/state). 22 | * - y: Spatial cross-section series used as the target (should align with x_vectors). 23 | * - lib_size: Size of the library used for cross mapping. 24 | * - lib_indices: Vector of indices indicating which states to include when searching for neighbors. 25 | * - pred_indices: Vector of indices indicating which states to predict from. 26 | * - b: Number of neighbors to use for simplex projection. 27 | * - simplex: If true, uses simplex projection for prediction; otherwise, uses s-mapping. 28 | * - theta: Distance weighting parameter for local neighbors in the manifold (used in s-mapping). 29 | * - threads: The number of threads to use for parallel processing. 30 | * - parallel_level: Level of parallel computing: 0 for `lower`, 1 for `higher`. 31 | * - dist_metric: Distance metric selector (1: Manhattan, 2: Euclidean). 32 | * - dist_average: Whether to average distance by the number of valid vector components. 33 | * 34 | * Returns: 35 | * A vector of pairs, where each pair consists of: 36 | * - An integer representing the library size. 37 | * - A double representing the Pearson correlation coefficient (rho) between predicted and actual values. 38 | */ 39 | std::vector> GCCMSingle4Lattice( 40 | const std::vector>& x_vectors, 41 | const std::vector& y, 42 | int lib_size, 43 | const std::vector& lib_indices, 44 | const std::vector& pred_indices, 45 | int b, 46 | bool simplex, 47 | double theta, 48 | size_t threads, 49 | int parallel_level, 50 | int dist_metric, 51 | bool dist_average 52 | ); 53 | 54 | // Perform GCCM on a single lib and pred for lattice data (composite embeddings version). 55 | std::vector> GCCMSingle4Lattice( 56 | const std::vector>>& x_vectors, 57 | const std::vector& y, 58 | int lib_size, 59 | const std::vector& lib_indices, 60 | const std::vector& pred_indices, 61 | int b, 62 | bool simplex, 63 | double theta, 64 | size_t threads, 65 | int parallel_level, 66 | int dist_metric, 67 | bool dist_average 68 | ); 69 | 70 | /** 71 | * Performs GCCM on a spatial lattice data. 72 | * 73 | * Parameters: 74 | * - x: Spatial cross-section series used as the predict variable (**cross mapping from**). 75 | * - y: Spatial cross-section series used as the target variable (**cross mapping to**). 76 | * - nb_vec: A nested vector containing neighborhood information for lattice data. 77 | * - lib_sizes: A vector specifying different library sizes for GCCM analysis. 78 | * - lib: A vector of representing the indices of spatial units to be the library. 79 | * - pred: A vector of representing the indices of spatial units to be predicted. 80 | * - E: Embedding dimension for attractor reconstruction. 81 | * - tau: the step of spatial lags for prediction. 82 | * - b: Number of nearest neighbors used for prediction. 83 | * - simplex: Boolean flag indicating whether to use simplex projection (true) or S-mapping (false) for prediction. 84 | * - theta: Distance weighting parameter used for weighting neighbors in the S-mapping prediction. 85 | * - threads: Number of threads to use for parallel computation. 86 | * - parallel_level: Level of parallel computing: 0 for `lower`, 1 for `higher`. 87 | * - style: Embedding style selector (0: includes current state, 1: excludes it). 88 | * - stack: Embedding arrangement selector (0: single - average lags, 1: composite - stack). Default is 0 (average lags). 89 | * - dist_metric: Distance metric selector (1: Manhattan, 2: Euclidean). 90 | * - dist_average: Whether to average distance by the number of valid vector components. 91 | * - single_sig: Whether to estimate significance and confidence intervals using a single rho value. 92 | * - progressbar: Boolean flag to indicate whether to display a progress bar during computation. 93 | * 94 | * Returns: 95 | * A 2D vector of results, where each row contains: 96 | * - The library size. 97 | * - The mean cross-mapping correlation. 98 | * - The statistical significance of the correlation. 99 | * - The lower bound of the confidence interval. 100 | * - The upper bound of the confidence interval. 101 | */ 102 | std::vector> GCCM4Lattice( 103 | const std::vector& x, 104 | const std::vector& y, 105 | const std::vector>& nb_vec, 106 | const std::vector& lib_sizes, 107 | const std::vector& lib, 108 | const std::vector& pred, 109 | int E, 110 | int tau, 111 | int b, 112 | bool simplex, 113 | double theta, 114 | int threads, 115 | int parallel_level, 116 | int style, 117 | int stack, 118 | int dist_metric, 119 | bool dist_average, 120 | bool single_sig, 121 | bool progressbar 122 | ); 123 | 124 | #endif // GCCM4Lattice_H 125 | -------------------------------------------------------------------------------- /vignettes/main2_ssr.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "State Space Reconstruction" 3 | author: "Wenbo Lv" 4 | date: | 5 | | Last update: 2025-12-15 6 | | Last run: 2025-12-15 7 | output: rmarkdown::html_vignette 8 | vignette: > 9 | %\VignetteIndexEntry{2. State Space Reconstruction} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | 15 | 16 | ## Methodological Background 17 | 18 | Takens theory proves that for a dynamical system $\phi$, if its trajectory converges to an attractor manifold $M$—a bounded and invariant set of states—then there exists a smooth mapping between the system $\phi$ and its attractor $M$. Consequently, the time series observations of $\phi$ can be used to reconstruct the structure of $M$ through delay embedding. 19 | 20 | According to the generalized embedding theorem, for a compact $d$-dimensional manifold $M$ and a set of observation functions $\langle h_1, h_2, \ldots, h_L \rangle$, the mapping $\psi_{\phi,h}(x) = \langle h_1(x), h_2(x), \ldots, h_L(x) \rangle$ is an embedding of $M$ when $L \geq 2d + 1$. Here, *embedding* refers to a one-to-one map that resolves all singularities of the original manifold. The observation functions $h_i$ can take the form of time-lagged values from a single time series, lags from multiple time series, or even completely distinct measurements. The former two are simply special cases of the third. 21 | 22 | This embedding framework can be extended to *spatial cross-sectional data*, which lack temporal ordering but are observed over a spatial domain. In this context, the observation functions can be defined using the values of a variable at a focal spatial unit and its surrounding neighbors (known as *spatial lags* in spatial statistics). Specifically, for a spatial location $s$, the embedding can be written as: 23 | 24 | $$ 25 | \psi(x, s) = \langle h_s(x), h_{s(1)}(x), \ldots, h_{s(L-1)}(x) \rangle, 26 | $$ 27 | 28 | where $h_{s(i)}(x)$ denotes the observation function of the $i$-th order spatial lag unit relative to $s$. These spatial lags provide the necessary diversity of observations for effective manifold reconstruction. In practice, if a given spatial lag order involves multiple units, summary statistics such as the mean or directionally-weighted averages can be used as the observation function to maintain a one-to-one embedding. 29 | 30 | ## Usage examples 31 | 32 | ### Example of spatial vector data 33 | 34 | Load the `spEDM` package and its county-level population density data: 35 | 36 | 37 | ``` r 38 | library(spEDM) 39 | 40 | popd_nb = spdep::read.gal(system.file("case/popd_nb.gal",package = "spEDM")) 41 | ## Warning in spdep::read.gal(system.file("case/popd_nb.gal", package = "spEDM")): 42 | ## neighbour object has 4 sub-graphs 43 | popd = readr::read_csv(system.file("case/popd.csv",package = "spEDM")) 44 | ## Rows: 2806 Columns: 7 45 | ## ── Column specification ───────────────────────────────────────────────────────── 46 | ## Delimiter: "," 47 | ## dbl (7): lon, lat, popd, elev, tem, pre, slope 48 | ## 49 | ## ℹ Use `spec()` to retrieve the full column specification for this data. 50 | ## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message. 51 | popd_sf = sf::st_as_sf(popd, coords = c("lon","lat"), crs = 4326) 52 | popd_sf 53 | ## Simple feature collection with 2806 features and 5 fields 54 | ## Geometry type: POINT 55 | ## Dimension: XY 56 | ## Bounding box: xmin: 74.9055 ymin: 18.2698 xmax: 134.269 ymax: 52.9346 57 | ## Geodetic CRS: WGS 84 58 | ## # A tibble: 2,806 × 6 59 | ## popd elev tem pre slope geometry 60 | ## * 61 | ## 1 780. 8 17.4 1528. 0.452 (116.912 30.4879) 62 | ## 2 395. 48 17.2 1487. 0.842 (116.755 30.5877) 63 | ## 3 261. 49 16.0 1456. 3.56 (116.541 30.7548) 64 | ## 4 258. 23 17.4 1555. 0.932 (116.241 30.104) 65 | ## 5 211. 101 16.3 1494. 3.34 (116.173 30.495) 66 | ## 6 386. 10 16.6 1382. 1.65 (116.935 30.9839) 67 | ## 7 350. 23 17.5 1569. 0.346 (116.677 30.2412) 68 | ## 8 470. 22 17.1 1493. 1.88 (117.066 30.6514) 69 | ## 9 1226. 11 17.4 1526. 0.208 (117.171 30.5558) 70 | ## 10 137. 598 13.9 1458. 5.92 (116.208 30.8983) 71 | ## # ℹ 2,796 more rows 72 | ``` 73 | 74 | Embedding the variable `popd` from county-level population density: 75 | 76 | 77 | ``` r 78 | v = spEDM::embedded(popd_sf,"popd",E = 10) 79 | v[1:5,c(4,5,10)] 80 | ## [,1] [,2] [,3] 81 | ## [1,] 962.7204 1664.756 1581.4351 82 | ## [2,] 919.6000 2408.766 1494.8241 83 | ## [3,] 1435.0165 1958.686 813.9077 84 | ## [4,] 1488.2727 2066.748 1216.6986 85 | ## [5,] 2326.8429 1290.188 1038.3864 86 | ``` 87 | 88 | 89 | ``` r 90 | plot3D::scatter3D(v[,4], v[,5], v[,10], colvar = NULL, pch = 19, 91 | col = "red", theta = 45, phi = 10, cex = 0.35, 92 | bty = "f", clab = NA, tickmarks = FALSE) 93 | ``` 94 | 95 | ![**Figure 1**. The reconstructed shadow manifolds for the variable `popd`.](../man/figures/ssr/fig1-1.png) 96 | 97 |
98 | 99 | ### Example of spatial raster data 100 | 101 | Load the `spEDM` package and its farmland npp data: 102 | 103 | 104 | ``` r 105 | library(spEDM) 106 | 107 | npp = terra::rast(system.file("case/npp.tif", package = "spEDM")) 108 | npp 109 | ## class : SpatRaster 110 | ## size : 404, 483, 5 (nrow, ncol, nlyr) 111 | ## resolution : 10000, 10000 (x, y) 112 | ## extent : -2625763, 2204237, 1877078, 5917078 (xmin, xmax, ymin, ymax) 113 | ## coord. ref. : CGCS2000_Albers 114 | ## source : npp.tif 115 | ## names : npp, pre, tem, elev, hfp 116 | ## min values : 164.00, 384.3409, -47.8194, -122.2004, 0.03390418 117 | ## max values : 16606.33, 23878.3555, 263.6938, 5350.4902, 44.90312195 118 | ``` 119 | 120 | Embedding the variable `npp` from farmland npp data: 121 | 122 | 123 | ``` r 124 | r = spEDM::embedded(npp,"npp",E = 5,tau = 20) 125 | r[which(!is.na(r),arr.ind = T)[1:5],1:3] 126 | ## [,1] [,2] [,3] 127 | ## [1,] 2896.833 2933.926 2898.288 128 | ## [2,] 3664.286 3071.789 3003.966 129 | ## [3,] 3317.337 3090.832 2973.627 130 | ## [4,] 3196.011 3107.388 2942.477 131 | ## [5,] 3329.188 3080.360 2892.083 132 | ``` 133 | 134 | 135 | ``` r 136 | plot3D::scatter3D(r[,1], r[,2], r[,3], colvar = NULL, pch = 19, 137 | col = "#e77854", theta = 45, phi = 10, cex = 0.01, 138 | bty = "f", clab = NA, tickmarks = FALSE) 139 | ``` 140 | 141 | ![**Figure 2**. The reconstructed shadow manifolds for the variable `npp`.](../man/figures/ssr/fig2-1.png) 142 | -------------------------------------------------------------------------------- /vignettes/main4_gpc.Rmd.orig: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Geographical Pattern Causality" 3 | author: "Wenbo Lv" 4 | date: | 5 | | Last update: 2025-12-15 6 | | Last run: `r Sys.Date()` 7 | output: rmarkdown::html_vignette 8 | vignette: > 9 | %\VignetteIndexEntry{4. Geographical Pattern Causality} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | ```{r, include = FALSE} 15 | knitr::opts_chunk$set( 16 | collapse = TRUE, 17 | comment = "##", 18 | fig.path = "man/figures/gpc/" 19 | ) 20 | ``` 21 | 22 | ## Methodological Background 23 | 24 | Geographical Pattern Causality (GPC) infers causal relations from spatial cross-sectional data by reconstructing a symbolic approximation of the underlying spatial dynamical system. 25 | 26 | Let $x(s)$ and $y(s)$ denote two spatial cross-sections over locations $s \in \mathcal{S}$. 27 | 28 | **(1) Spatial Embedding** 29 | 30 | For each location $s_i$, GPC constructs an embedding vector 31 | 32 | $$ 33 | \mathbf{E}_{x(s_i)} = \big( x(s_{i}^{(1)}), x(s_{i}^{(2)}), \dots, x(s_{i}^{(E\tau)}) \big), 34 | $$ 35 | 36 | where $s_{i}^{(k)}$ denotes the $k$-th spatially lagged value of the spatial unit $s_i$, determined by embedding dimension $E$ and lag $\tau$. 37 | This yields two reconstructed state spaces $\mathcal{M}_x, \mathcal{M}_y \subset \mathbb{R}^E$. 38 | 39 | **(2) Symbolic Pattern Extraction** 40 | 41 | Local geometric transitions in each manifold are mapped to symbols 42 | 43 | $$ 44 | \sigma_x(s_i),; \sigma_y(s_i) \in \mathcal{A}, 45 | $$ 46 | 47 | encoding increasing, decreasing, or non-changing modes. These symbolic trajectories summarize local pattern evolution. 48 | 49 | **(3) Cross-Pattern Mapping** 50 | 51 | Causality from $x \to y$ is assessed by predicting: 52 | 53 | $$ 54 | \hat{\sigma}_y(s_i) = F\big( \sigma_x(s_j): s_j \in \mathcal{N}_k(s_i) \big), 55 | $$ 56 | 57 | where $\mathcal{N}_k$ denotes the set of $k$ nearest neighbors in $\mathcal{M}_x$. 58 | The agreement structure between $\hat{\sigma}_y(s_i)$ and $\sigma_y(s_i)$ determines the causal mode: 59 | 60 | * Positive: $\hat{\sigma}_y = \sigma_y$ 61 | * Negative: $\hat{\sigma}_y = -\sigma_y$ 62 | * Dark: neither agreement nor opposition 63 | 64 | **(4) Causal Strength** 65 | 66 | The global causal strength is the normalized consistency of symbol matches: 67 | 68 | $$ 69 | C_{x \to y} = \frac{1}{|\mathcal{S}|} \sum_{s_i \in \mathcal{S}} \mathbb{I}\big[ \hat{\sigma}_y(s_i) \bowtie \sigma_y(s_i) \big], 70 | $$ 71 | 72 | where $\bowtie$ encodes positive, negative, or dark matching rules. 73 | 74 | ## Usage examples 75 | 76 | ### Example of spatial vector data 77 | 78 | Load the `spEDM` package and its columbus spatial analysis data: 79 | 80 | ```{r load_lattice_data} 81 | library(spEDM) 82 | 83 | columbus = sf::read_sf(system.file("case/columbus.gpkg", package="spEDM")) 84 | columbus 85 | ``` 86 | 87 | The false nearest neighbours (FNN) method helps identify the appropriate minimal embedding dimension for reconstructing the state space of a time series or spatial cross-sectional data. 88 | 89 | ```{r fnn_lattice} 90 | spEDM::fnn(columbus, "crime", E = 1:10, eps = stats::sd(columbus$crime)) 91 | ``` 92 | 93 | The false nearest neighbours (FNN) ratio decreased to approximately 0.001 when the embedding dimension E reached 7, and remained relatively stable thereafter. Therefore, we adopted $E = 7$ as the minimal embedding dimension for subsequent parameter search. 94 | 95 | Then, search optimal parameters: 96 | 97 | ```{r pc_lattice} 98 | # determine the type of causality using correlation 99 | stats::cor.test(columbus$hoval,columbus$crime) 100 | 101 | # since the correlation is -0.574, negative causality is selected as the metric to maximize in the optimal parameter search 102 | spEDM::pc(columbus, "hoval", "crime", E = 5:6, k = 7:10, tau = 1, maximize = "negative") 103 | ``` 104 | 105 | Run geographical pattern causality analysis 106 | 107 | ```{r gpc_lattice} 108 | spEDM::gpc(columbus, "hoval", "crime", E = 6, k = 9) 109 | ``` 110 | 111 | Convergence diagnostics 112 | 113 | ```{r fig1,fig.width=5.55,fig.height=3.05,fig.dpi=100,fig.cap=knitr::asis_output("**Figure 1**. **Convergence curves of causal strengths among house value and crime.**")} 114 | crime_convergence = spEDM::gpc(columbus, "hoval", "crime", 115 | libsizes = seq(5, 45, by = 5), 116 | E = 6, k = 9, progressbar = FALSE) 117 | crime_convergence 118 | plot(crime_convergence, ylimits = c(-0.01,1), 119 | xlimits = c(9,46), xbreaks = seq(10, 45, 10)) 120 | ``` 121 | 122 |
123 | 124 | ### Example of spatial raster data 125 | 126 | Load the `spEDM` package and its farmland NPP data: 127 | 128 | ```{r load_grid_data} 129 | library(spEDM) 130 | 131 | npp = terra::rast(system.file("case/npp.tif", package = "spEDM")) 132 | # To save the computation time, we will aggregate the data by 3 times 133 | npp = terra::aggregate(npp, fact = 3, na.rm = TRUE) 134 | npp 135 | 136 | # Check the validated cell number 137 | nnamat = terra::as.matrix(npp[[1]], wide = TRUE) 138 | nnaindice = which(!is.na(nnamat), arr.ind = TRUE) 139 | dim(nnaindice) 140 | ``` 141 | 142 | Determining minimal embedding dimension: 143 | 144 | ```{r fnn_grid} 145 | spEDM::fnn(npp, "npp", E = 1:15, 146 | eps = stats::sd(terra::values(npp[["npp"]]),na.rm = TRUE)) 147 | ``` 148 | 149 | At $E = 6$, the false nearest neighbor ratio stabilizes approximately at 0.0001 and remains constant thereafter. Therefore, $E = 6$ is selected as minimal embedding dimension for the subsequent GPC analysis. 150 | 151 | Then, search optimal parameters: 152 | 153 | ```{r pc_grid} 154 | stats::cor.test(~ pre + npp, 155 | data = terra::values(npp[[c("pre","npp")]], 156 | datafame = TRUE, na.rm = TRUE)) 157 | 158 | 159 | g1 = spEDM::pc(npp, "npp", "pre", E = 6:10, k = 12, tau = 1:5, maximize = "positive") 160 | g1 161 | ``` 162 | 163 | Run geographical pattern causality analysis 164 | 165 | ```{r gpc_grid} 166 | spEDM::gpc(npp, "pre", "npp", E = 8, k = 12, tau = 5) 167 | ``` 168 | 169 | Convergence diagnostics 170 | 171 | ```{r fig2,fig.width=5.55,fig.height=3.05,fig.dpi=100,fig.cap=knitr::asis_output("**Figure 2**. **Convergence curves of causal strengths among precipitation and NPP.**")} 172 | npp_convergence = spEDM::gpc(npp, "pre", "npp", 173 | libsizes = matrix(rep(seq(10,80,10),2),ncol = 2), 174 | E = 8, k = 12, tau = 5, progressbar = FALSE) 175 | npp_convergence 176 | plot(npp_convergence, ylimits = c(-0.01,0.65), 177 | xlimits = c(0,6500), xbreaks = seq(100, 6400, 500)) 178 | ``` 179 | -------------------------------------------------------------------------------- /src/IntersectionCardinality.h: -------------------------------------------------------------------------------- 1 | #ifndef IntersectionCardinality_H 2 | #define IntersectionCardinality_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "CppStats.h" 12 | #include "CppDistances.h" 13 | #include "DataStruct.h" 14 | #include 15 | 16 | /** 17 | * Computes intersection-based mapping ratio sequences between two neighbor graphs 18 | * for use in Cross Mapping Cardinality (CMC) or similar causal inference frameworks. 19 | * 20 | * Parameters: 21 | * neighborsX - Precomputed sorted neighbor indices for embedding X 22 | * neighborsY - Precomputed sorted neighbor indices for embedding Y 23 | * lib_size - Size of the moving library used in mapping 24 | * lib_indices - Global indices from which to draw the sliding libraries 25 | * pred_indices - Indices at which to perform prediction (evaluation points) 26 | * num_neighbors - Number of neighbors used for mapping (after exclusion) 27 | * n_excluded - Number of nearest neighbors to exclude from the front 28 | * threads - Number of parallel threads for computation 29 | * parallel_level - Whether to use multithreaded (0) or serial (1) mode 30 | * 31 | * Returns: 32 | * A vector of IntersectionRes structures, each containing the average intersection 33 | * ratio sequence (IC curve) for a different starting point of the moving library. 34 | * If lib_size == lib_indices.size(), returns a single result using full library. 35 | * 36 | * Notes: 37 | * - Neighbor lists must use std::numeric_limits::max() to indicate invalid entries. 38 | * - This function assumes that the neighbor vectors are sorted by ascending distance. 39 | * - Use in combination with AUC computation to assess causal strength. 40 | */ 41 | std::vector IntersectionCardinalitySingle( 42 | const std::vector>& neighborsX, 43 | const std::vector>& neighborsY, 44 | size_t lib_size, 45 | const std::vector& lib_indices, 46 | const std::vector& pred_indices, 47 | size_t num_neighbors, 48 | size_t n_excluded, 49 | size_t threads, 50 | int parallel_level 51 | ); 52 | 53 | /** 54 | * Computes the Intersection Cardinality (IC) curve for causal inference via cross mapping. 55 | * 56 | * This function evaluates the extent to which neighbors of the effect variable Y 57 | * are preserved when mapped through the neighbors of cause variable X. 58 | * Specifically, for each number of neighbors from 1 to `num_neighbors`, it computes 59 | * the intersection count between the k nearest neighbors of Y and the k nearest neighbors of X, 60 | * for each prediction point. 61 | * 62 | * The output is an Intersection Cardinality (IC) curve, which can be further processed 63 | * (e.g., calculating AUC, statistical significance) outside this function. 64 | * 65 | * @param embedding_x State-space embedding of the potential cause variable (NxE matrix). 66 | * @param embedding_y State-space embedding of the potential effect variable (NxE matrix). 67 | * @param lib Vector of library indices (shouble be 0-based in C++). 68 | * @param pred Vector of prediction indices (shouble be 0-based in C++). 69 | * @param num_neighbors Maximum number of neighbors to consider in intersection (e.g., from 1 to k). 70 | * @param n_excluded Number of nearest neighbors to exclude (e.g., due to temporal proximity). 71 | * @param dist_metric Distance metric selector (1: Manhattan, 2: Euclidean). 72 | * @param threads Number of threads used for parallel computation. 73 | * @param parallel_level Parallel mode flag: 0 = parallel, 1 = serial. 74 | * 75 | * @return A vector of size `num_neighbors`: 76 | * - Each element represents the average number of overlapping neighbors 77 | * between X and Y across prediction points, for each neighbor count k = 1, 2, ..., num_neighbors. 78 | * 79 | * If inputs are invalid or no valid prediction points exist, the returned vector 80 | * is filled with `NaN` values. 81 | * 82 | * @note 83 | * - This function returns only the raw intersection values. To compute AUC or p-values, 84 | * use additional post-processing such as DeLong’s test. 85 | */ 86 | std::vector IntersectionCardinality( 87 | const std::vector>& embedding_x, 88 | const std::vector>& embedding_y, 89 | const std::vector& lib, 90 | const std::vector& pred, 91 | size_t num_neighbors, 92 | size_t n_excluded, 93 | int dist_metric = 2, 94 | int threads = 8, 95 | int parallel_level = 0); 96 | 97 | /** 98 | * Computes the Intersection Cardinality (IC) AUC-based causal strength score. 99 | * 100 | * This function evaluates the extent to which neighbors of the effect variable Y 101 | * are preserved when mapped through the neighbors of cause variable X, by calculating 102 | * the intersection ratio curve and evaluating its AUC (area under curve). 103 | * Statistical significance (p-value) and confidence interval are computed via DeLong's test. 104 | * 105 | * Parameters: 106 | * embedding_x - State-space reconstruction (embedding) of the potential cause variable. 107 | * embedding_y - State-space reconstruction (embedding) of the potential effect variable. 108 | * lib - Library index vector (shouble be 0-based in C++). 109 | * pred - Prediction index vector (shouble be 0-based in C++). 110 | * num_neighbors - Number of neighbors used for cross mapping (after exclusion). 111 | * n_excluded - Number of nearest neighbors to exclude (e.g. temporal). 112 | * dist_metric - Distance metric selector (1: Manhattan, 2: Euclidean). 113 | * threads - Number of threads used in parallel computation. 114 | * parallel_level - Whether to use multithreaded (0) or serial (1) mode 115 | * 116 | * Returns: 117 | * A vector of 4 values: 118 | * [0] - AUC (Intersection Cardinality score, bounded [0, 1]) 119 | * [1] - p-value from DeLong test (testing whether AUC > 0.5) 120 | * [2] - Confidence interval lower bound 121 | * [3] - Confidence interval upper bound 122 | */ 123 | std::vector IntersectionCardinalityScores( 124 | const std::vector>& embedding_x, 125 | const std::vector>& embedding_y, 126 | const std::vector& lib, 127 | const std::vector& pred, 128 | size_t num_neighbors, 129 | size_t n_excluded, 130 | int dist_metric = 2, 131 | int threads = 8, 132 | int parallel_level = 0); 133 | 134 | #endif // IntersectionCardinality_H 135 | -------------------------------------------------------------------------------- /vignettes/main1_pkgintro.Rmd.orig: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Introduction to the spEDM package" 3 | author: "Wenbo Lv" 4 | date: | 5 | | Last update: 2025-12-01 6 | | Last run: `r Sys.Date()` 7 | output: rmarkdown::html_vignette 8 | vignette: > 9 | %\VignetteIndexEntry{1. Introduction to the spEDM package} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | ```{r, include = FALSE} 15 | knitr::opts_chunk$set( 16 | collapse = TRUE, 17 | comment = "##", 18 | fig.path = "man/figures/pkgintro/" 19 | ) 20 | ``` 21 | 22 | ## Overview 23 | 24 | The *spEDM* package is an open-source, computationally efficient toolkit designed to provide a unified API for **Spatial Empirical Dynamic Modeling**. It supports a suite of prediction-based causal discovery algorithms, including *spatial (granger) causality test*, *geographical convergent cross mapping*, *geographical pattern causality*, and *geographical cross mapping cardinality*. 25 | 26 | To support both learning and practical application, *spEDM* offers four carefully curated spatial cross-sectional datasets, each paired with a detailed example showcasing the use of key algorithms in the package. 27 | 28 | ## Installation 29 | 30 | Install the stable version from [CRAN](https://CRAN.R-project.org/package=spEDM) with: 31 | 32 | ```r 33 | install.packages("spEDM", dep = TRUE) 34 | ``` 35 | 36 | Alternatively, you can install the development version from [R-universe](https://stscl.r-universe.dev/spEDM) with: 37 | 38 | ```r 39 | install.packages("spEDM", 40 | repos = c("https://stscl.r-universe.dev", 41 | "https://cloud.r-project.org"), 42 | dep = TRUE) 43 | ``` 44 | 45 | ## Data 46 | 47 | The *spEDM* package includes three illustrative datasets with well-known causation to demonstrate key functionalities: 48 | 49 | 1. **Columbus, OH** dataset — A classic spatial dataset widely used in spatial econometrics, included here for benchmarking and demonstration purposes. 50 | *Data files*: `columbus.gpkg` 51 | 52 | 2. **Population density and its potential drivers in mainland China** — A county-level dataset capturing population density alongside relevant socio-environmental drivers. 53 | *Data files*: `popd.csv`, `popd_nb.gal` 54 | 55 | 3. **Farmland NPP and related variables** — A raster dataset capturing net primary productivity (NPP) of farmland, key climatic variables, elevation, and human activity footprints across mainland China, suitable for analyzing the interactions between agricultural productivity, environmental conditions, and human activities. 56 | *Data files*: `npp.tif` 57 | 58 | These datasets can be loaded as shown below: 59 | 60 | ### Columbus OH spatial analysis dataset 61 | 62 | ```{r columbus} 63 | library(spEDM) 64 | 65 | columbus = sf::read_sf(system.file("case/columbus.gpkg", package="spEDM")) 66 | columbus 67 | ``` 68 | 69 | ### County-level population density in mainland China 70 | 71 | ```{r popd} 72 | library(spEDM) 73 | 74 | popd_nb = spdep::read.gal(system.file("case/popd_nb.gal",package = "spEDM")) 75 | popd_nb 76 | 77 | popd = readr::read_csv(system.file("case/popd.csv",package = "spEDM")) 78 | popd 79 | 80 | popd_sf = sf::st_as_sf(popd, coords = c("lon","lat"), crs = 4326) 81 | popd_sf 82 | ``` 83 | 84 | ### Farmland NPP and related variables in mainland China 85 | 86 | ```{r npp} 87 | library(spEDM) 88 | npp = terra::rast(system.file("case/npp.tif", package = "spEDM")) 89 | npp 90 | ``` 91 | 92 | ## Usage 93 | 94 | Users can refer to several additional vignettes for more detailed examples of using *spEDM*, namely: 95 | 96 | | *Method* | *Vignette* | 97 | |---------------------|---------------------| 98 | | State Space Reconstruction | [SSR](https://stscl.github.io/spEDM/articles/main2_ssr.html) | 99 | | Geographical Convergent Cross Mapping | [GCCM](https://stscl.github.io/spEDM/articles/main3_gccm.html) | 100 | | Geographical Cross Mapping Cardinality | [GCMC](https://stscl.github.io/spEDM/articles/main5_gcmc.html) | 101 | | Geographical Pattern Causality |[GPC](https://stscl.github.io/spEDM/articles/main4_gpc.html) | 102 | | Spatial Logistic Map | [SLM](https://stscl.github.io/spEDM/articles/si1_slm.html) | 103 | | Spatial Causality Test |[SCT](https://stscl.github.io/spEDM/articles/si2_sct.html) | 104 | 105 | In addition to the spatial extensions provided by *spEDM*, several other R packages support Empirical Dynamic Modeling (EDM) for **time series data**: 106 | 107 | * **[tEDM](https://cran.r-project.org/package=tEDM)**: An R package developed by the author of `spEDM`, designed to implement the **Temporal Empirical Dynamic Modeling** framework. It aims for maximal consistency with `spEDM` in terms of API design and modeling workflow, facilitating seamless transition between spatial and temporal EDM applications. 108 | * **[rEDM](https://cran.r-project.org/package=rEDM)**: A foundational EDM package implementing simplex projection, S-map, and convergent cross mapping (CCM), widely used for analyzing nonlinear time series dynamics. 109 | * **[multispatialCCM](https://cran.r-project.org/package=multispatialCCM)**: Implements CCM for collections of short time series using bootstrapping, enabling causal inference across replicated or multi-site time series. 110 | * **[fastEDM](https://edm-developers.github.io/fastEDM-r/)**: A high-performance EDM package with a multi-threaded C++ backend. It supports large-scale time series analysis, handles panel data, and provides robust options for dealing with missing values using delay-tolerant algorithms or data exclusion. 111 | 112 | ## References 113 | 114 | Takens, F. (1981). Detecting strange attractors in turbulence. Dynamical Systems and Turbulence, Warwick 1980, 366–381. https://doi.org/10.1007/bfb0091924 115 | 116 | Mañé, R. (1981). On the dimension of the compact invariant sets of certain non-linear maps. Dynamical Systems and Turbulence, Warwick 1980, 230–242. https://doi.org/10.1007/bfb0091916 117 | 118 | Willeboordse, F.H., 2003. The spatial logistic map as a simple prototype for spatiotemporal chaos. Chaos: An Interdisciplinary Journal of Nonlinear Science 13, 533–540. https://doi.org/10.1063/1.1568692 119 | 120 | Herrera, M., Mur, J., & Ruiz, M. (2016). Detecting causal relationships between spatial processes. Papers in Regional Science, 95(3), 577–595. https://doi.org/10.1111/pirs.12144 121 | 122 | Gao, B., Yang, J., Chen, Z., Sugihara, G., Li, M., Stein, A., Kwan, M.-P., & Wang, J. (2023). Causal inference from cross-sectional earth system data with geographical convergent cross mapping. Nature Communications, 14(1). https://doi.org/10.1038/s41467-023-41619-6 123 | 124 | Zhang, Z., Wang, J., 2025. A model to identify causality for geographic patterns. International Journal of Geographical Information Science 1–21. https://doi.org/10.1080/13658816.2025.2581207 125 | 126 |
127 | -------------------------------------------------------------------------------- /vignettes/main6_scpcm.Rmd.orig: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Spatially Convergent Partial Cross Mapping" 3 | author: "Wenbo Lv" 4 | date: | 5 | | Last update: 2025-12-15 6 | | Last run: `r Sys.Date()` 7 | output: rmarkdown::html_vignette 8 | vignette: > 9 | %\VignetteIndexEntry{6. Spatially Convergent Partial Cross Mapping} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | ```{r, include = FALSE} 15 | knitr::opts_chunk$set( 16 | collapse = TRUE, 17 | comment = "##", 18 | fig.path = "man/figures/scpcm/" 19 | ) 20 | ``` 21 | 22 | ## Model principles 23 | 24 | The methodological details are pending peer review and will be made available thereafter. 25 | 26 | ## Usage examples 27 | 28 | ### Example of spatial vector data 29 | 30 | Load the `spEDM` package and columbus OH spatial analysis dataset: 31 | 32 | ```{r load_lattice_data} 33 | if (!requireNamespace("spEDM")) install.packages("spEDM") 34 | 35 | columbus = sf::read_sf(system.file("case/columbus.gpkg", package="spEDM")) 36 | columbus 37 | ``` 38 | 39 | We demonstrate how spatial vector data can be used in SCPCM analysis through a causal example examining the influences of the level of burglary incidents in a neighbourhood on house values, with neighbourhood household income included as a conditioning variable. 40 | 41 | Determine minimum embedding dimensions: 42 | 43 | ```{r fnn_lattice} 44 | spEDM::fnn(columbus,"crime",E = 1:10) 45 | spEDM::fnn(columbus,"hoval",E = 1:10) 46 | spEDM::fnn(columbus,"inc",E = 1:10) 47 | ``` 48 | 49 | Self prediction for parameter turning: 50 | 51 | ```{r simplex_lattice} 52 | spEDM::simplex(columbus,"crime","crime",E = 7:10,k=12) 53 | spEDM::simplex(columbus,"hoval","hoval",E = 7:10,k=12) 54 | spEDM::simplex(columbus,"inc","inc",E = 7:10,k=12) 55 | ``` 56 | 57 | Conduct SCPCM: 58 | 59 | ```{r scpcm_lattice} 60 | crime_hoval = spEDM::scpcm(data = columbus, 61 | cause = "crime", 62 | effect = "hoval", 63 | conds = "inc", 64 | libsizes = seq(5, 45, by = 5), 65 | E = c(8,7,8), 66 | k = 12) 67 | crime_hoval 68 | ``` 69 | 70 | Visualize the result: 71 | 72 | ```{r fig1,fig.width=6.25,fig.height=3.15,fig.dpi=100,fig.cap=knitr::asis_output("**Figure 1**. The causation between crime and house value. **a** Crime–hoval causality without accounting for covariates. **b** Crime–hoval causality controlling for household income.")} 73 | if (!requireNamespace("cowplot")) install.packages("cowplot") 74 | 75 | fig1a = plot(crime_hoval,partial = FALSE,ylimits = c(0.1,0.75)) 76 | fig1b = plot(crime_hoval,partial = TRUE,ylimits = c(-0.05,0.55)) 77 | fig1 = cowplot::plot_grid(fig1a,fig1b,ncol = 2,label_fontfamily = 'serif', 78 | labels = paste0('(',letters[1:2],')')) 79 | fig1 80 | ``` 81 | 82 |
83 | 84 | ### Example of spatial raster data 85 | 86 | Load the `spEDM` package and simulate raster data with a cyclic interaction structure $x \rightarrow y \rightarrow z \rightarrow x$: 87 | 88 | ```{r sim_grid_data} 89 | if (!requireNamespace("fields")) install.packages("fields") 90 | if (!requireNamespace("MASS")) install.packages("MASS") 91 | 92 | sim_trispecies = \(nx,ny,seed = 123){ 93 | grid = expand.grid(seq(0, 10, length.out = nx), 94 | seq(0, 10, length.out = ny)) 95 | cov.fun = \(d, range = 1.5, sill=1) sill * exp(-d/range) 96 | dist.mat = fields::rdist(grid) 97 | cov.mat = cov.fun(dist.mat, range=1.5, sill=1) 98 | set.seed(seed) 99 | res = replicate(3, { 100 | MASS::mvrnorm(1, rep(0, nrow(grid)), cov.mat) |> 101 | pmax(0) |> 102 | sdsfun::normalize_vector(0,1) |> 103 | matrix(nrow = nx, ncol = ny) |> 104 | terra::rast() 105 | }, simplify = FALSE) 106 | terra::rast(res) 107 | } 108 | 109 | species = sim_trispecies(20,20, seed = 42) 110 | names(species) = c("x","y","z") 111 | 112 | sim = spEDM::slm(species, x = "x", y = "y", z = "z", k = 4, 113 | step = 15, transient = 1, threshold = Inf, 114 | aggregate_fn = \(.x) mean(.x, na.rm = TRUE), 115 | alpha_x = 0.05, alpha_y = 0.05, alpha_z = 0.05, 116 | beta_xy = 1, beta_xz = 0, beta_yx = 0, beta_yz = 1, 117 | beta_zx = 1, beta_zy = 0) 118 | 119 | terra::values(species[["x"]]) = sim$x 120 | terra::values(species[["y"]]) = sim$y 121 | terra::values(species[["z"]]) = sim$z 122 | species 123 | ``` 124 | 125 | Determine minimum embedding dimensions: 126 | 127 | ```{r fnn_grid} 128 | spEDM::fnn(species, "x") 129 | spEDM::fnn(species, "y") 130 | spEDM::fnn(species, "z") 131 | ``` 132 | 133 | Self prediction for parameter turning: 134 | 135 | ```{r simplex_grid} 136 | s1 = spEDM::simplex(species, "x", "x", E = 5:10, k = 15, tau = 1) 137 | s2 = spEDM::simplex(species, "y", "y", E = 5:10, k = 15, tau = 1) 138 | s3 = spEDM::simplex(species, "z", "z", E = 5:10, k = 15, tau = 1) 139 | list(s1,s2,s3) 140 | ``` 141 | 142 | Due to the statistical aggregation performed after simulating with the spatial logistic map, we adopt a uniform embedding dimension for x, y, and z to reduce inference bias: 143 | 144 | ``` {r fig2,fig.width=3.55,fig.height=2.85,fig.dpi=100,fig.cap=knitr::asis_output("**Figure 2**. Self cross prediction skills for three variables.")} 145 | if (!requireNamespace("purrr")) install.packages("purrr") 146 | simplex_res = purrr::map2_dfr(list(s1,s2,s3), c("x","y","z"), 147 | \(.list,.name) dplyr::mutate(.list$xmap,variable = .name)) 148 | ggplot2::ggplot(data = simplex_res) + 149 | ggplot2::geom_line(ggplot2::aes(x = E, y = rho, color = variable)) + 150 | ggplot2::theme_classic() 151 | ``` 152 | 153 |
154 | 155 | Since the self-prediction performance of the y variable is relatively weaker than that of x and z, we select the embedding dimension that yields the strongest predictive performance for 156 | y as the final dimension used in the SCPCM analysis, that is $E = 9$. 157 | 158 | Investigate the causation between x and z, with y as control variables: 159 | 160 | ```{r scpcm_grid} 161 | xz = spEDM::scpcm(species, "x", "z", "y", E = 9, k = 15, 162 | libsizes = matrix(seq(50,400,50), ncol = 1)) 163 | xz 164 | ``` 165 | 166 | Visualize the result: 167 | 168 | ```{r fig2,fig.width=5.55,fig.height=2.85,fig.dpi=100,fig.cap=knitr::asis_output("**Figure 3**. The cross prediction between x and z. **a** x–z causality without accounting for y. **b** x–z causality controlling for y.")} 169 | fig2a = plot(xz,partial = FALSE,ylimits = c(0.05,0.7)) 170 | fig2b = plot(xz,partial = TRUE,ylimits = c(0.05,0.7)) 171 | fig2 = cowplot::plot_grid(fig2a,fig2b,ncol = 2,label_fontfamily = 'serif', 172 | labels = paste0('(',letters[1:2],')')) 173 | fig2 174 | ``` 175 | -------------------------------------------------------------------------------- /vignettes/si2_sct.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Spatial Causality Test" 3 | author: "Wenbo Lv" 4 | date: | 5 | | Last update: 2025-12-15 6 | | Last run: 2025-12-15 7 | output: rmarkdown::html_vignette 8 | vignette: > 9 | %\VignetteIndexEntry{SI2. Spatial Causality Test} 10 | %\VignetteEngine{knitr::rmarkdown} 11 | %\VignetteEncoding{UTF-8} 12 | --- 13 | 14 | 15 | 16 | ## Methodological Background 17 | 18 | Let us begin by defining two spatial cross-sectional series $\{x_s\}_{s \in S}$ and $\{y_s\}_{s \in S}$, where $S$ represents the study area. 19 | 20 | We use: 21 | 22 | $$ 23 | \mathcal{X}_W = \{ W_i x \mid W_i \in \mathcal{W}(x, y) \} 24 | $$ 25 | 26 | $$ 27 | \mathcal{Y}_W = \{ W_i y \mid W_i \in \mathcal{W}(x, y) \} 28 | $$ 29 | 30 | to denote the sets of spatial lags of $x$ and $y$ given by all the weighting matrices in $\mathcal{W}(x, y)$(that is the set of spatial 31 | dependence structures between $x$ and $y$). 32 | 33 | 34 | We say that $\{x_s\}_{s \in S}$ does not cause $\{y_s\}_{s \in S}$ **under the spatial structures** $\mathcal{X}_W$ and $\mathcal{Y}_W$ if 35 | 36 | $$ 37 | h_{y |\mathcal{Y}_W}(m) = h_{y | \mathcal{Y}_W,\mathcal{X}_W}(m) 38 | $$ 39 | A unilateral non-parametric test can be applied to assess the spatial causality via the following null hypothesis: 40 | 41 | $$ 42 | H_0 : \{x_s\}_{s \in S} \text{ does not cause } \{y_s\}_{s \in S} \text{ under the spatial structures } \mathcal{X}_W \text{ and } \mathcal{Y}_W, $$ 43 | 44 | with the following statistic: 45 | 46 | $$ 47 | \hat{\delta}(\mathcal{Y}_W, \mathcal{X}_W) = \hat{h}_{y |\mathcal{Y}_W}(m) - \hat{h}_{y | \mathcal{Y}_W,\mathcal{X}_W}(m) 48 | $$ 49 | 50 | where $\hat{h}_*(m)$ is the estimated conditional symbolic entropy using Shannon’s entropy with $m-1$ nearest neighbors. The alternative is that the null hypothesis of is not true. 51 | 52 | If $\mathcal{X}_W$ does not contain extra information about $y$ then $\hat{\delta}(\mathcal{Y}_W, \mathcal{X}_W) = 0$, otherwise, $ \hat{\delta}(\mathcal{Y}_W, \mathcal{X}_W) > 0$. 53 | 54 | $h_{y |\mathcal{Y}_W}(m)$ measures the uncertainty of the distribution of symbols of $y$, conditional to the symbols of its spatial lag, $ \mathcal{Y}_W$. Moreover, $h_{y | \mathcal{Y}_W,\mathcal{X}_W}(m)$ measures the uncertainty of the distribution of symbols of $y$, conditional to the symbols of the spatial lags of $y$, $\mathcal{Y}_W$, and of $x$, $ \mathcal{X}_W$. If the second variable, $x$, indeed causes the first one then there should be a significant decrease in the entropy, and the statistic $\hat{\delta}(\mathcal{Y}_W, \mathcal{X}_W)$ will take on high positive values. If there is only a spatial correlation, but not causation, the difference between both entropies will be small. The statistical significance of $\hat{\delta}(\mathcal{Y}_W, \mathcal{X}_W)$ is assessed using spatial block bootstrap. 55 | 56 | ## Usage examples 57 | 58 | ### Example of spatial vector data 59 | 60 | Load the `spEDM` package and the columbus OH dataset: 61 | 62 | 63 | ``` r 64 | library(spEDM) 65 | 66 | columbus = sf::read_sf(system.file("case/columbus.gpkg", package="spEDM")) 67 | columbus 68 | ## Simple feature collection with 49 features and 6 fields 69 | ## Geometry type: POLYGON 70 | ## Dimension: XY 71 | ## Bounding box: xmin: 5.874907 ymin: 10.78863 xmax: 11.28742 ymax: 14.74245 72 | ## Projected CRS: Undefined Cartesian SRS with unknown unit 73 | ## # A tibble: 49 × 7 74 | ## hoval inc crime open plumb discbd geom 75 | ## 76 | ## 1 80.5 19.5 15.7 2.85 0.217 5.03 ((8.624129 14.23698, 8.5597 14.74245, 8… 77 | ## 2 44.6 21.2 18.8 5.30 0.321 4.27 ((8.25279 14.23694, 8.282758 14.22994, … 78 | ## 3 26.4 16.0 30.6 4.53 0.374 3.89 ((8.653305 14.00809, 8.81814 14.00205, … 79 | ## 4 33.2 4.48 32.4 0.394 1.19 3.7 ((8.459499 13.82035, 8.473408 13.83227,… 80 | ## 5 23.2 11.3 50.7 0.406 0.625 2.83 ((8.685274 13.63952, 8.677577 13.72221,… 81 | ## 6 28.8 16.0 26.1 0.563 0.254 3.78 ((9.401384 13.5504, 9.434411 13.69427, … 82 | ## 7 75 8.44 0.178 0 2.40 2.74 ((8.037741 13.60752, 8.062716 13.60452,… 83 | ## 8 37.1 11.3 38.4 3.48 2.74 2.89 ((8.247527 13.58651, 8.2795 13.5965, 8.… 84 | ## 9 52.6 17.6 30.5 0.527 0.891 3.17 ((9.333297 13.27242, 9.671007 13.27361,… 85 | ## 10 96.4 13.6 34.0 1.55 0.558 4.33 ((10.08251 13.03377, 10.0925 13.05275, … 86 | ## # ℹ 39 more rows 87 | ``` 88 | 89 | Detect spatial causality among the variables *inc*, *crime*, and *hoval* : 90 | 91 | 92 | ``` r 93 | # house value and crime 94 | sc.test(columbus, "hoval", "crime", k = 15) 95 | ## spatial causality test 96 | ## hoval -> crime: statistic = 1.114, p value = 0.549 97 | ## crime -> hoval: statistic = 1.555, p value = 0.008 98 | 99 | # household income and crime 100 | sc.test(columbus, "inc", "crime", k = 15) 101 | ## spatial causality test 102 | ## inc -> crime: statistic = 0.739, p value = 0.99 103 | ## crime -> inc: statistic = 0.946, p value = 0.762 104 | 105 | # household income and house value 106 | sc.test(columbus, "inc", "hoval", k = 15) 107 | ## spatial causality test 108 | ## inc -> hoval: statistic = 1.211, p value = 0.311 109 | ## hoval -> inc: statistic = 0.992, p value = 0.702 110 | ``` 111 | 112 | ### Example of spatial raster data 113 | 114 | Load the `spEDM` package and its farmland NPP data: 115 | 116 | 117 | ``` r 118 | library(spEDM) 119 | 120 | npp = terra::rast(system.file("case/npp.tif", package = "spEDM")) 121 | # To save the computation time, we will aggregate the data by 3 times 122 | npp = terra::aggregate(npp, fact = 3, na.rm = TRUE) 123 | npp 124 | ## class : SpatRaster 125 | ## size : 135, 161, 5 (nrow, ncol, nlyr) 126 | ## resolution : 30000, 30000 (x, y) 127 | ## extent : -2625763, 2204237, 1867078, 5917078 (xmin, xmax, ymin, ymax) 128 | ## coord. ref. : CGCS2000_Albers 129 | ## source(s) : memory 130 | ## names : npp, pre, tem, elev, hfp 131 | ## min values : 187.50, 390.3351, -47.8194, -110.1494, 0.04434316 132 | ## max values : 15381.89, 23734.5330, 262.8576, 5217.6431, 42.68803711 133 | ``` 134 | 135 | Detect spatial causality among the variables *pre*, *tem*, and *npp* : 136 | 137 | 138 | ``` r 139 | # precipitation and npp 140 | sc.test(npp,"pre","npp",k = 30) 141 | ## spatial causality test 142 | ## pre -> npp: statistic = 0.227, p value = 0 143 | ## npp -> pre: statistic = 0.16, p value = 0 144 | 145 | # temperature and npp 146 | sc.test(npp,"tem","npp",k = 30) 147 | ## spatial causality test 148 | ## tem -> npp: statistic = 0.168, p value = 0 149 | ## npp -> tem: statistic = 0.169, p value = 0 150 | 151 | # precipitation and temperature 152 | sc.test(npp,"pre","tem",k = 30) 153 | ## spatial causality test 154 | ## pre -> tem: statistic = 0.19, p value = 0 155 | ## tem -> pre: statistic = 0.147, p value = 0 156 | ``` 157 | -------------------------------------------------------------------------------- /src/FalseNearestNeighbors.h: -------------------------------------------------------------------------------- 1 | #ifndef FalseNearestNeighbors_H 2 | #define FalseNearestNeighbors_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "NumericUtils.h" 10 | #include "CppStats.h" 11 | #include "CppDistances.h" 12 | #include 13 | 14 | /* 15 | * Compute the False Nearest Neighbors (FNN) ratio for spatial cross-sectional data. 16 | * 17 | * This function determines whether nearest neighbors identified in a lower-dimensional 18 | * embedded space (E1) remain close in a higher-dimensional space (E2). 19 | * If not, the neighbor is considered a "false" neighbor, indicating the need for 20 | * a higher embedding dimension to accurately capture spatial proximity. 21 | * 22 | * The FNN test is computed in two modes: 23 | * - parallel_level = 0: each prediction is processed in parallel using RcppThreads. 24 | * - parallel_level = 1: all pairwise distances are precomputed once in advance 25 | * (better for repeated queries or small prediction sets). 26 | * 27 | * Parameters: 28 | * - embedding: A matrix (vector of vectors) representing the spatial embedding, 29 | * where each row corresponds to a spatial unit's attributes. 30 | * Must contain at least E2 columns. 31 | * - lib: Library index vector (1-based in R, converted to 0-based). 32 | * - pred: Prediction index vector (1-based in R, converted to 0-based). 33 | * - E1: The base embedding dimension used to identify the nearest neighbor (E1 < E2). 34 | * - E2: The full embedding dimension used to test false neighbors (usually E1 + 1). 35 | * - threads: Number of threads used when parallel_level = 0. 36 | * - parallel_level: 0 for per-pred parallelism (default), 1 for precomputed full distance matrix. 37 | * - Rtol: Relative threshold (default 10.0). 38 | * - Atol: Absolute threshold (default 2.0). 39 | * - L1norm: Whether to use Manhattan (L1) distance instead of Euclidean (L2). 40 | * 41 | * Returns: 42 | * - A double value indicating the proportion of false nearest neighbors (0–1). 43 | * If no valid pairs are found, returns NaN. 44 | */ 45 | double CppSingleFNN(const std::vector>& embedding, 46 | const std::vector& lib, 47 | const std::vector& pred, 48 | size_t E1, 49 | size_t E2, 50 | size_t threads, 51 | int parallel_level = 0, 52 | double Rtol = 10.0, 53 | double Atol = 2.0, 54 | bool L1norm = false); 55 | 56 | /* 57 | * Compute the False Nearest Neighbors (FNN) ratio for 3D embeddings. 58 | * 59 | * Embedding structure: 60 | * embedding[e][unit][lag] 61 | * e = embedding level 62 | * unit = spatial index 63 | * lag = lagged coordinate 64 | * 65 | * Distance definitions: 66 | * Dist_E1 = mean_{e = 0 .. E1-1} distance( embedding[e][pred], embedding[e][lib] ) 67 | * 68 | * Dist_E2 = mean_{lag = 0 .. embedding[E2-1][pred].size()} abs( embedding[E2-1][pred][lag] - embedding[E2-1][lib][lag] ) 69 | * 70 | * A false neighbor is flagged if: 71 | * Dist_E2 / Dist_E1 > Rtol OR Dist_E2 > Atol 72 | * 73 | * Supports two computation modes: 74 | * parallel_level = 0 → per-pred parallel computation 75 | * parallel_level = 1 → precompute distance tables to reuse for repeated queries 76 | * 77 | * Returns: 78 | * proportion of false nearest neighbors in [0,1], or NaN if none are valid. 79 | */ 80 | double CppSingleFNN(const std::vector>>& embedding, 81 | const std::vector& lib, 82 | const std::vector& pred, 83 | size_t E1, 84 | size_t E2, 85 | size_t threads, 86 | int parallel_level = 0, 87 | double Rtol = 10.0, 88 | double Atol = 2.0, 89 | bool L1norm = false); 90 | 91 | /* 92 | * Compute False Nearest Neighbor (FNN) ratios across multiple embedding dimensions 93 | * for spatial cross-sectional data. 94 | * 95 | * For a given embedding matrix (with each row representing a spatial unit and 96 | * each column an embedding dimension), this function evaluates the proportion 97 | * of false nearest neighbors (FNN) as the embedding dimension increases. 98 | * 99 | * It iteratively calls `CppSingleFNN` for each embedding dimension pair (E1, E2), 100 | * where E1 ranges from 1 to D - 1 (D = number of columns), and E2 = E1 + 1. 101 | * The FNN ratio measures how often a nearest neighbor in dimension E1 becomes 102 | * distant in dimension E2, suggesting that E1 is insufficient for reconstructing 103 | * the system. 104 | * 105 | * If `parallel_level == 0`, the function executes in serial; 106 | * otherwise, it uses multithreading to compute FNN ratios for each (E1, E2) pair 107 | * in parallel. 108 | * 109 | * Parameters: 110 | * - embedding: A vector of vectors where each row is a spatial unit’s embedding. 111 | * Must have at least 2 columns (dimensions). 112 | * - lib: A vector of indices indicating the library set (0-based). 113 | * - pred: A vector of indices indicating the prediction set (0-based). 114 | * - Rtol: A vector of relative distance thresholds (one per E1). 115 | * - Atol: A vector of absolute distance thresholds (one per E1). 116 | * - L1norm: If true, use L1 (Manhattan) distance; otherwise, use L2 (Euclidean). 117 | * - threads: Number of threads to use for parallel computation. 118 | * - parallel_level: 0 for serial loop over E1, >0 for parallel loop over E1. 119 | * 120 | * Returns: 121 | * - A vector of FNN ratios corresponding to each E1 from 1 to D - 1. 122 | * If not computable for a given E1, NaN is returned at that position. 123 | */ 124 | std::vector CppFNN(const std::vector>& embedding, 125 | const std::vector& lib, 126 | const std::vector& pred, 127 | const std::vector& Rtol, 128 | const std::vector& Atol, 129 | bool L1norm = false, 130 | int threads = 8, 131 | int parallel_level = 0); 132 | 133 | /* 134 | * Compute FNN ratios for 3D embeddings across all embedding dimensions E1 = 1 .. D-1 135 | * 136 | * Returns: std::vector of size D-1 137 | */ 138 | std::vector CppFNN(const std::vector>>& embedding, 139 | const std::vector& lib, 140 | const std::vector& pred, 141 | const std::vector& Rtol, 142 | const std::vector& Atol, 143 | bool L1norm = false, 144 | int threads = 8, 145 | int parallel_level = 0); 146 | 147 | #endif // FalseNearestNeighbors_H 148 | -------------------------------------------------------------------------------- /src/SGC4Grid.h: -------------------------------------------------------------------------------- 1 | #ifndef SGC4Grid_H 2 | #define SGC4Grid_H 3 | 4 | #include 5 | #include 6 | #include "CppGridUtils.h" 7 | #include "Entropy.h" 8 | #include "SpatialBlockBootstrap.h" 9 | #include 10 | 11 | /** 12 | * @brief Compute directional spatial granger causality for 2D grid data using symbolic entropy measures. 13 | * 14 | * This function estimates the bidirectional spatial granger causality between two spatial variables 15 | * `x` and `y` observed on a 2D lattice/grid. It is based on a symbolic approximation of spatial transfer entropy, 16 | * which evaluates whether the spatial neighborhood of one variable improves the predictive information of the other. 17 | * 18 | * The procedure includes the following key steps: 19 | * 20 | * - **Spatial embedding construction**: Generate spatial lag vectors (`wx` and `wy`) for both variables using a neighborhood window. 21 | * - **Optional symbolization**: Convert raw values into discrete symbols to enhance robustness in the presence of spatial autocorrelation. 22 | * - **Entropy computation**: Estimate joint and marginal entropies of the original variables and their spatial embeddings. 23 | * - **Directional causality estimation**: Compute granger-like causal influence based on the information gain from adding the other variable's lag: 24 | * 25 | * - Causality from X to Y: 26 | * \f[ 27 | * SC_{x→y} = [H(y, wy) − H(wy)] − [H(y, wy, wx) − H(wy, wx)] 28 | * \f] 29 | * 30 | * - Causality from Y to X: 31 | * \f[ 32 | * SC_{y→x} = [H(x, wx) − H(wx)] − [H(x, wx, wy) − H(wx, wy)] 33 | * \f] 34 | * 35 | * - **Optional normalization**: If `normalize = true`, each causality score is scaled by its baseline information gain 36 | * (i.e., the first term in each direction). This yields values bounded between -1 and 1, making them comparable across variables and scales. 37 | * 38 | * @param x 2D grid (matrix) representing variable X. 39 | * @param y 2D grid (matrix) representing variable Y. 40 | * @param lib A vector of pairs representing the indices (row, column) of spatial units to be the library. 41 | * @param pred A vector of pairs representing the indices (row, column) of spatial units to be predicted. 42 | * @param k Embedding neighborhood radius (e.g., k = 1 means 3×3 window). 43 | * @param base Logarithm base used in entropy computation (default is 2, for bits). 44 | * @param symbolize Whether to discretize the data via symbolic transformation before entropy computation. 45 | * @param normalize Whether to normalize the causality scores to lie within [-1, 1] (default is false). 46 | * 47 | * @return A std::vector of two values: 48 | * - sc_x_to_y: Estimated spatial granger causality from x to y (normalized if specified). 49 | * - sc_y_to_x: Estimated spatial granger causality from y to x (normalized if specified). 50 | */ 51 | std::vector SGCSingle4Grid( 52 | const std::vector>& x, 53 | const std::vector>& y, 54 | const std::vector>& lib, 55 | const std::vector>& pred, 56 | size_t k, 57 | double base = 2, 58 | bool symbolize = true, 59 | bool normalize = false 60 | ); 61 | 62 | /** 63 | * @brief Compute spatial granger causality for gridded data using spatial block bootstrap. 64 | * 65 | * This function estimates the directional spatial granger causality between two gridded variables `x` and `y`, 66 | * by applying a symbolic entropy-based method, and assesses the statistical significance of the causality using 67 | * spatial block bootstrap techniques. It calculates the causality in both directions: X → Y and Y → X. 68 | * Additionally, the function evaluates the significance of the estimated causality statistics by comparing them 69 | * to bootstrap realizations of the causality. 70 | * 71 | * The method involves the following steps: 72 | * - **Computation of true causality**: The function first calculates the spatial Granger causality statistic 73 | * using the original data grids `x` and `y`. 74 | * - **Spatial block bootstrap resampling**: The grid values are resampled with spatial block bootstrapping. 75 | * Each resample preserves local spatial structure and generates new bootstrap realizations of the causality statistic. 76 | * - **Estimation of causality for bootstrapped samples**: The causality statistic is estimated for each of the 77 | * bootstrapped realizations, which involves calculating the symbolic entropy measures and their differences. 78 | * - **Empirical p-values**: The final p-values for both directional causality estimates (X → Y and Y → X) are 79 | * derived by comparing the bootstrapped statistics with the true causality statistics. 80 | * 81 | * This approach takes into account spatial autocorrelation and allows the use of parallel processing for faster 82 | * bootstrap estimation. The spatial bootstrap method involves reshuffling grid cells into spatial blocks, 83 | * preserving local dependencies, and calculating causality for each realization. 84 | * 85 | * @param x 2D grid (matrix) of variable X. 86 | * @param y 2D grid (matrix) of variable Y, same size as x. 87 | * @param lib A vector of pairs representing the indices (row, column) of spatial units to be the library. 88 | * @param pred A vector of pairs representing the indices (row, column) of spatial units to be predicted. 89 | * @param block Vector assigning each grid cell to a spatial block for bootstrapping. 90 | * @param k Neighborhood window size used for symbolization (typically 3 or 5). 91 | * @param threads Number of threads to use for parallel bootstrap estimation. 92 | * @param boot Number of bootstrap iterations (default: 399). 93 | * @param base Base of the logarithm used in entropy computation (default: 2 for bits). 94 | * @param seed Seed for the random number generator to ensure reproducibility (default: 42). 95 | * @param symbolize Whether to use symbolic transformation for the grids (default: true). 96 | * @param normalize Whether to normalize entropy values (optional, default: false). 97 | * @param progressbar Whether to show a progress bar during bootstrap computation (default: true). 98 | * 99 | * @return A vector of four values: 100 | * - sc_x_to_y: Estimated spatial granger causality from x to y. 101 | * - p_x_to_y: Empirical p-value for x → y based on bootstrap distribution. 102 | * - sc_y_to_x: Estimated spatial granger causality from y to x. 103 | * - p_y_to_x: Empirical p-value for y → x based on bootstrap distribution. 104 | */ 105 | std::vector SGC4Grid( 106 | const std::vector>& x, 107 | const std::vector>& y, 108 | const std::vector>& lib, 109 | const std::vector>& pred, 110 | const std::vector& block, 111 | int k, 112 | int threads, 113 | int boot = 399, 114 | double base = 2, 115 | unsigned long long seed = 42, 116 | bool symbolize = true, 117 | bool normalize = false, 118 | bool progressbar = true 119 | ); 120 | 121 | #endif // SGC4Grid_H 122 | -------------------------------------------------------------------------------- /src/SGC4Lattice.h: -------------------------------------------------------------------------------- 1 | #ifndef SGC4Lattice_H 2 | #define SGC4Lattice_H 3 | 4 | #include 5 | #include 6 | #include "CppLatticeUtils.h" 7 | #include "Entropy.h" 8 | #include "SpatialBlockBootstrap.h" 9 | #include 10 | 11 | /** 12 | * @brief Computes directional spatial granger causality between two spatial variables 13 | * on a spatial lattice using spatial neighbor embeddings and quantized entropy measures. 14 | * 15 | * This function quantifies the asymmetric spatial granger causality between two 16 | * spatial variables `x` and `y`, both defined over a spatial lattice structure. 17 | * It adopts an information-theoretic framework based on symbolic (or continuous) 18 | * entropy estimation, incorporating spatial embedding through neighboring structures. 19 | * 20 | * Method Overview: 21 | * 1. Lattice-based Embedding: 22 | * - For each spatial unit, embedding vectors `wx` and `wy` are generated using 23 | * one-level spatial neighbors defined by the `nb` adjacency list. 24 | * 25 | * 2. Symbolization (Optional): 26 | * - If `symbolize = true`, the inputs (`x`, `y`, `wx`, `wy`) are discretized into 27 | * `k` symbolic categories prior to entropy computation. 28 | * Otherwise, entropy is estimated directly from continuous values using kernel methods. 29 | * 30 | * 3. Entropy Computation: 31 | * - The function calculates marginal and joint entropies including: 32 | * H(x, wx), H(y, wy), H(wx), H(wy), H(wx, wy), H(wx, wy, x), H(wx, wy, y). 33 | * 34 | * 4. Directional Causality Strengths: 35 | * - From x to y: 36 | * sc_x_to_y = ((H(y, wy) - H(wy)) - (H(wx, wy, y) - H(wx, wy))) 37 | * - From y to x: 38 | * sc_y_to_x = ((H(x, wx) - H(wx)) - (H(wx, wy, x) - H(wx, wy))) 39 | * 40 | * 5. Normalization (Optional): 41 | * - If `normalize = true`, the raw causality values are scaled by their respective 42 | * baseline entropy gains to fall within the range [-1, 1]. This normalization 43 | * enhances interpretability and comparability across different variable pairs 44 | * or spatial datasets. 45 | * 46 | * Parameters: 47 | * - x: Input spatial variable `x` (vector of doubles). 48 | * - y: Input spatial variable `y` (same size as `x`). 49 | * - nb: Neighborhood list defining spatial adjacency (e.g., rook or queen contiguity). 50 | * - lib: A vector of indices representing valid neighbors to consider for each spatial unit. 51 | * - pred: A vector of indices specifying which elements to compute the spatial Granger causality. 52 | * - k: Number of discrete bins used for symbolization or KDE estimation. 53 | * - base: Logarithm base for entropy (default = 2, for bits). 54 | * - symbolize: Whether to apply symbolication for symbolic entropy (default = true). 55 | * - normalize: Whether to normalize causality values to the range [-1, 1] (default = false). 56 | * 57 | * Returns: 58 | * A `std::vector` of size 2: 59 | * - [0] Estimated spatial granger causality from x to y 60 | * - [1] Estimated spatial granger causality from y to x 61 | * If `normalize = true`, both values are scaled to the range [-1, 1]. 62 | */ 63 | std::vector SGCSingle4Lattice( 64 | const std::vector& x, 65 | const std::vector& y, 66 | const std::vector>& nb, 67 | const std::vector& lib, 68 | const std::vector& pred, 69 | size_t k, 70 | double base = 2, 71 | bool symbolize = true, 72 | bool normalize = false 73 | ); 74 | 75 | /** 76 | * @brief Compute spatial granger causality for lattice data using spatial block bootstrap. 77 | * 78 | * This function estimates the directional spatial granger causality between two lattice variables `x` and `y`, 79 | * by applying a symbolic entropy-based method, and assesses the statistical significance of the causality using 80 | * spatial block bootstrap techniques. It calculates the causality in both directions: X → Y and Y → X. 81 | * Additionally, the function evaluates the significance of the estimated causality statistics by comparing them 82 | * to bootstrap realizations of the causality. 83 | * 84 | * The method involves the following steps: 85 | * - **Computation of true causality**: The function first calculates the spatial Granger causality statistic 86 | * using the original lattice data `x` and `y`. 87 | * - **Spatial block bootstrap resampling**: The lattice values are resampled with spatial block bootstrapping. 88 | * Each resample preserves local spatial structure and generates new bootstrap realizations of the causality statistic. 89 | * - **Estimation of causality for bootstrapped samples**: The causality statistic is estimated for each of the 90 | * bootstrapped realizations, which involves calculating the symbolic entropy measures and their differences. 91 | * - **Empirical p-values**: The final p-values for both directional causality estimates (X → Y and Y → X) are 92 | * derived by comparing the bootstrapped statistics with the true causality statistics. 93 | * 94 | * This approach accounts for spatial autocorrelation and allows the use of parallel processing for faster 95 | * bootstrap estimation. The spatial bootstrap method involves reshuffling lattice cells into spatial blocks, 96 | * preserving local dependencies, and calculating causality for each realization. 97 | * 98 | * @param x Input vector for spatial variable x. 99 | * @param y Input vector for spatial variable y (same length as x). 100 | * @param nb Neighborhood list (e.g., queen or rook adjacency), used for embedding. 101 | * @param lib A vector of indices representing valid neighbors to consider for each spatial unit. 102 | * @param pred A vector of indices specifying which elements to compute the spatial Granger causality. 103 | * @param block Vector indicating block assignments for spatial block bootstrapping. 104 | * @param k Number of discrete bins used for symbolization or KDE estimation. 105 | * @param threads Number of threads to use for parallel bootstrapping. 106 | * @param boot Number of bootstrap iterations (default: 399). 107 | * @param base Logarithmic base for entropy (default: 2, i.e., bits). 108 | * @param seed Random seed for reproducibility (default: 42). 109 | * @param symbolize Whether to apply symbolization before entropy computation (default: true). 110 | * @param normalize Whether to normalize entropy values (optional, default: false). 111 | * @param progressbar Whether to display a progress bar during bootstrapping (default: true). 112 | * 113 | * @return A vector of four values: 114 | * - sc_x_to_y: Estimated spatial granger causality from x to y. 115 | * - p_x_to_y: Empirical p-value for x → y based on bootstrap distribution. 116 | * - sc_y_to_x: Estimated spatial granger causality from y to x. 117 | * - p_y_to_x: Empirical p-value for y → x based on bootstrap distribution. 118 | */ 119 | std::vector SGC4Lattice( 120 | const std::vector& x, 121 | const std::vector& y, 122 | const std::vector>& nb, 123 | const std::vector& lib, 124 | const std::vector& pred, 125 | const std::vector& block, 126 | int k, 127 | int threads, 128 | int boot = 399, 129 | double base = 2, 130 | unsigned long long seed = 42, 131 | bool symbolize = true, 132 | bool normalize = false, 133 | bool progressbar = true 134 | ); 135 | 136 | #endif // SGC4Lattice_H 137 | -------------------------------------------------------------------------------- /src/DataStruct.h: -------------------------------------------------------------------------------- 1 | #ifndef DataStruct_H 2 | #define DataStruct_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include // for std::move 9 | 10 | struct PartialCorRes { 11 | int first; 12 | double second; 13 | double third; 14 | 15 | /// Default constructor: initializes all members to zero. 16 | PartialCorRes() : first(0), second(0.0), third(0.0) {} 17 | 18 | /// Parameterized constructor: initializes all members. 19 | PartialCorRes(int f, double s, double t) : first(f), second(s), third(t) {} 20 | 21 | /// Defaulted copy/move constructors and assignment operators. 22 | PartialCorRes(const PartialCorRes&) = default; 23 | PartialCorRes& operator=(const PartialCorRes&) = default; 24 | PartialCorRes(PartialCorRes&&) noexcept = default; 25 | PartialCorRes& operator=(PartialCorRes&&) noexcept = default; 26 | }; 27 | 28 | struct CMCRes { 29 | std::vector cross_mapping; ///< Cross mapping values. 30 | std::vector> causal_strength; ///< Causal strength matrix. 31 | 32 | /// Default constructor. 33 | CMCRes() = default; 34 | 35 | /// Copy constructor (for lvalues). 36 | CMCRes(const std::vector& cross_mapping, 37 | const std::vector>& causal_strength) 38 | : cross_mapping(cross_mapping), causal_strength(causal_strength) {} 39 | 40 | /// Move constructor (for rvalues). 41 | CMCRes(std::vector&& cross_mapping, 42 | std::vector>&& causal_strength) noexcept 43 | : cross_mapping(std::move(cross_mapping)), causal_strength(std::move(causal_strength)) {} 44 | 45 | /// Defaulted copy/move assignment operators. 46 | CMCRes(const CMCRes&) = default; 47 | CMCRes& operator=(const CMCRes&) = default; 48 | CMCRes(CMCRes&&) noexcept = default; 49 | CMCRes& operator=(CMCRes&&) noexcept = default; 50 | }; 51 | 52 | /** 53 | * @brief Represents the result of an intersection calculation. 54 | * 55 | * This structure stores: 56 | * - `libsize`: The library size. 57 | * - `Intersection`: The intersection values as a vector of doubles. 58 | * 59 | */ 60 | struct IntersectionRes { 61 | size_t libsize; ///< Library size. 62 | std::vector Intersection; ///< Intersection values. 63 | 64 | /// Default constructor: initializes libsize to 0 and empty Intersection vector. 65 | IntersectionRes() : libsize(0), Intersection() {} 66 | 67 | /// Copy constructor (for lvalues): deep copies Intersection vector. 68 | IntersectionRes(size_t t, const std::vector& x) 69 | : libsize(t), Intersection(x) {} 70 | 71 | /// Move constructor (for rvalues): moves Intersection vector. 72 | IntersectionRes(size_t t, std::vector&& x) noexcept 73 | : libsize(t), Intersection(std::move(x)) {} 74 | 75 | /// Defaulted copy/move assignment operators. 76 | IntersectionRes(const IntersectionRes&) = default; 77 | IntersectionRes& operator=(const IntersectionRes&) = default; 78 | IntersectionRes(IntersectionRes&&) noexcept = default; 79 | IntersectionRes& operator=(IntersectionRes&&) noexcept = default; 80 | }; 81 | 82 | /** 83 | * @brief Holds the results of DeLong placement calculations. 84 | * 85 | * This structure stores: 86 | * - `theta`: The AUC or placement statistic. 87 | * - `X`: Placement values for the positive class. 88 | * - `Y`: Placement values for the negative class. 89 | * 90 | * The move constructor and move assignment are marked `noexcept` 91 | * to enable efficient transfers when stored in standard containers 92 | * like `std::vector`. 93 | */ 94 | struct DeLongPlacementsRes { 95 | double theta; ///< The AUC or placement statistic. 96 | std::vector X; ///< Placement values for the positive class. 97 | std::vector Y; ///< Placement values for the negative class. 98 | 99 | /** 100 | * @brief Default constructor. 101 | * Initializes theta = 0.0 and empty vectors. 102 | */ 103 | DeLongPlacementsRes() : theta(0.0), X(), Y() {} 104 | 105 | /** 106 | * @brief Copy constructor (for lvalues). 107 | * Performs deep copies of input vectors. 108 | * 109 | * @param t The statistic value (e.g., theta). 110 | * @param x The placement values for the positive class. 111 | * @param y The placement values for the negative class. 112 | */ 113 | DeLongPlacementsRes(double t, const std::vector& x, const std::vector& y) 114 | : theta(t), X(x), Y(y) {} 115 | 116 | /** 117 | * @brief Move constructor (for rvalues). 118 | * Transfers ownership of vectors without copying. 119 | */ 120 | DeLongPlacementsRes(double t, std::vector&& x, std::vector&& y) noexcept 121 | : theta(t), X(std::move(x)), Y(std::move(y)) {} 122 | 123 | /// Defaulted copy/move assignment operators. 124 | DeLongPlacementsRes(const DeLongPlacementsRes&) = default; 125 | DeLongPlacementsRes& operator=(const DeLongPlacementsRes&) = default; 126 | DeLongPlacementsRes(DeLongPlacementsRes&&) noexcept = default; 127 | DeLongPlacementsRes& operator=(DeLongPlacementsRes&&) noexcept = default; 128 | }; 129 | 130 | /** 131 | * @brief Represents the result of causal pattern analysis between two signature spaces. 132 | * 133 | * This structure stores both per-sample and aggregated causality measures derived 134 | * from pattern-space interactions. It is designed to capture symbolic dynamics 135 | * relationships between real and predicted signals. 136 | * 137 | * Fields include: 138 | * - **NoCausality, PositiveCausality, NegativeCausality, DarkCausality**: 139 | * Per-sample causal strengths categorized by type. 140 | * - **RealLoop**: Index list of valid samples actually processed. 141 | * - **PatternTypes**: Encoded causal type for each sample (0=no, 1=pos, 2=neg, 3=dark). 142 | * - **matrice**: Final averaged causality heatmap. 143 | * - **TotalPos, TotalNeg, TotalDark**: Global mean strengths for each category. 144 | */ 145 | struct PatternCausalityRes { 146 | std::vector NoCausality; ///< Strengths classified as "no causality". 147 | std::vector PositiveCausality; ///< Strengths classified as "positive causality". 148 | std::vector NegativeCausality; ///< Strengths classified as "negative causality". 149 | std::vector DarkCausality; ///< Strengths classified as "dark causality". 150 | std::vector RealLoop; ///< Indices of valid samples actually processed. 151 | std::vector PatternTypes; ///< Encoded pattern types (0–3). 152 | std::vector PatternStrings; ///< Encoded diff strings ("123","312",etc). 153 | std::vector> matrice;///< Averaged heatmap (pattern × pattern). 154 | double TotalPos = std::numeric_limits::quiet_NaN(); ///< Global mean of diagonal elements. 155 | double TotalNeg = std::numeric_limits::quiet_NaN(); ///< Global mean of anti-diagonal elements. 156 | double TotalDark = std::numeric_limits::quiet_NaN();///< Global mean of off-diagonal elements. 157 | 158 | /// Default constructor: initializes empty result with NaN totals. 159 | PatternCausalityRes() = default; 160 | 161 | /// Copy constructor. 162 | PatternCausalityRes(const PatternCausalityRes&) = default; 163 | 164 | /// Move constructor (noexcept for efficiency in containers). 165 | PatternCausalityRes(PatternCausalityRes&&) noexcept = default; 166 | 167 | /// Copy assignment. 168 | PatternCausalityRes& operator=(const PatternCausalityRes&) = default; 169 | 170 | /// Move assignment. 171 | PatternCausalityRes& operator=(PatternCausalityRes&&) noexcept = default; 172 | }; 173 | 174 | #endif // DataStruct_H 175 | --------------------------------------------------------------------------------