├── .Rbuildignore ├── .gitignore ├── CITATION.cff ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS.md ├── R ├── EDM.R ├── EDM_AuxFuncs.R ├── Examples.R ├── apps │ ├── CCM-Multiprocess.R │ └── Embedding.R └── zzz.R ├── README.md ├── data ├── EvergladesFlow.RData ├── Lorenz5D.RData ├── TentMap.RData ├── TentMapNoise.RData ├── Thrips.RData ├── block_3sp.RData ├── circle.RData ├── paramecium_didinium.RData └── sardine_anchovy_sst.RData ├── doc ├── rEDM-tutorial.html ├── rEDM-tutorial.pdf └── rEDM-tutorial.tex ├── man ├── CCM.Rd ├── ComputeError.Rd ├── Embed.Rd ├── EmbedDimension.Rd ├── EvergladesFlow.Rd ├── Lorenz5D.Rd ├── MakeBlock.Rd ├── Multiview.Rd ├── PredictInterval.Rd ├── PredictNonlinear.Rd ├── SMap.Rd ├── Simplex.Rd ├── SurrogateData.Rd ├── TentMap.Rd ├── TentMapNoise.Rd ├── Thrips.Rd ├── block_3sp.Rd ├── circle.Rd ├── figures │ ├── optimal-E-1.png │ ├── simplex-projection-1.png │ ├── sunspots-1.png │ └── unnamed-chunk-3-1.png ├── paramecium_didinium.Rd ├── rEDM.Rd └── sardine_anchovy_sst.Rd ├── src ├── CCM.cpp ├── ComputeError.cpp ├── DataFrame.cpp ├── Embed.cpp ├── EmbedDim.cpp ├── Makevars ├── Makevars.win ├── Multiview.cpp ├── ParameterList.cpp ├── PredictInterval.cpp ├── PredictNL.cpp ├── RcppEDMCommon.cpp ├── RcppEDMCommon.h ├── RcppExports.cpp ├── SMap.cpp ├── Simplex.cpp └── cppEDM │ ├── lib │ ├── .gitignore │ └── ignore.h │ └── src │ ├── API.cc │ ├── API.h │ ├── CCM.cc │ ├── CCM.h │ ├── Common.cc │ ├── Common.h │ ├── DataFrame.h │ ├── DateTime.cc │ ├── DateTime.h │ ├── EDM.cc │ ├── EDM.h │ ├── EDM_Formatting.cc │ ├── EDM_Neighbors.cc │ ├── EDM_Neighbors.h │ ├── Eval.cc │ ├── Multiview.cc │ ├── Multiview.h │ ├── Parameter.cc │ ├── Parameter.h │ ├── SMap.cc │ ├── SMap.h │ ├── Simplex.cc │ ├── Simplex.h │ ├── Version.h │ ├── makefile │ ├── makefile.mingw │ └── makefile.windows ├── tests ├── testthat.R └── testthat │ ├── test-1-Simplex.R │ ├── test-2-SMap.R │ ├── test-3-CCM.R │ ├── test-4-Multiview.R │ ├── test-5-EmbedDimension.R │ ├── test-6-PredictInterval.R │ └── test-7-PredictNonlinear.R └── vignettes ├── .gitignore ├── CrossMap.png ├── CrossMap.svg ├── Lorenz_Projection.png ├── Lorenz_Projection.svg ├── Lorenz_Reconstruct.png ├── Lorenz_Reconstruct.svg ├── ParameterTable.csv ├── References.bib ├── rEDM-algorithms.ltx ├── rEDM-algorithms.pdf ├── rEDM-tutorial.Rmd ├── rEDM-tutorial.html └── rEDM-tutorial.pdf /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ..Rcheck 2 | ^README.Rmd 3 | ^tests 4 | ^cppEDM/lib 5 | ^.*\.o 6 | ^.*\.a 7 | ^doc$ 8 | ^Meta$ 9 | ^vignettes/rEDM-tutorial_cache$ 10 | ^vignettes/vignette_figs/ 11 | ^CITATION.cff 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Meta 2 | /doc/ 3 | /Meta/ 4 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | title: rEDM 3 | message: >- 4 | If you use this software please cite. Licensed by the 5 | University of California for educational, research and 6 | non-profit purposes. 7 | type: software 8 | authors: 9 | - given-names: Joseph 10 | family-names: Park 11 | email: josephpark@ieee.org 12 | orcid: 'https://orcid.org/0000-0001-5411-1409' 13 | - given-names: Cameron 14 | family-names: Smith 15 | orcid: 'https://orcid.org/0000-0003-0020-5607' 16 | email: omid.smith.cameron@gmail.com 17 | - given-names: George 18 | family-names: Sugihara 19 | orcid: 'https://orcid.org/0000-0002-2863-6946' 20 | - given-names: Ethan 21 | family-names: Deyle 22 | orcid: 'https://orcid.org/0000-0001-8704-8434' 23 | - given-names: Erik 24 | family-names: Saberski 25 | orcid: 'https://orcid.org/0000-0002-6475-6187' 26 | - given-names: Hao 27 | family-names: Ye 28 | orcid: 'https://orcid.org/0000-0002-8630-1458' 29 | - {} 30 | identifiers: 31 | - type: url 32 | value: 'https://github.com/SugiharaLab/rEDM' 33 | repository-code: 'https://github.com/SugiharaLab/rEDM' 34 | url: 'https://github.com/SugiharaLab/rEDM#readme' 35 | repository: 'https://CRAN.R-project.org/package=rEDM' 36 | abstract: >- 37 | R wrapper of C++ EDM tools. UCSD Scripps Institution of 38 | Oceanography, Sugihara Lab. 39 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: rEDM 2 | Type: Package 3 | Title: Empirical Dynamic Modeling ('EDM') 4 | Version: 1.15.4 5 | Date: 2024-04-05 6 | Authors@R: c( person("Joseph", "Park", role = c("aut", "cre"), 7 | email = "JosephPark@IEEE.org", 8 | comment = c(ORCID = "0000-0001-5411-1409")), 9 | person("Cameron", "Smith", role = c("aut"), 10 | email = "cos008@ucsd.edu", 11 | comment = c(ORCID = "0000-0003-0020-5607")), 12 | person("George", "Sugihara", role = c("aut", "ccp"), 13 | comment = c(ORCID = "0000-0002-2863-6946")), 14 | person("Ethan", "Deyle", role = c("aut"), 15 | comment = c(ORCID = "0000-0001-8704-8434")), 16 | person("Erik", "Saberski", role = c("ctb"), 17 | comment = c(ORCID = "0000-0002-6475-6187")), 18 | person("Hao", "Ye", role = c("ctb"), 19 | comment = c(ORCID = "0000-0002-8630-1458")), 20 | person("The Regents of the University of California", 21 | role = c("cph") ) ) 22 | Maintainer: Joseph Park 23 | Description: An implementation of 'EDM' algorithms based on research software developed for internal use at the Sugihara Lab ('UCSD/SIO'). The package is implemented with 'Rcpp' wrappers around the 'cppEDM' library. It implements the 'simplex' projection method from Sugihara & May (1990) , the 'S-map' algorithm from Sugihara (1994) , convergent cross mapping described in Sugihara et al. (2012) , and, 'multiview embedding' described in Ye & Sugihara (2016) . 24 | License: BSD_2_clause + file LICENSE 25 | LazyData: true 26 | LazyLoad: yes 27 | Imports: methods, Rcpp (>= 1.0.1) 28 | LinkingTo: Rcpp, RcppThread 29 | Suggests: knitr, rmarkdown, formatR 30 | VignetteBuilder: knitr 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR:2019 2 | COPYRIGHT HOLDER:The Regents of the University of California. 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib( rEDM, .registration = TRUE ) 2 | 3 | export( Simplex ) 4 | export( SMap ) 5 | export( CCM ) 6 | export( Multiview ) 7 | export( Embed ) 8 | export( MakeBlock ) 9 | export( ComputeError ) 10 | export( EmbedDimension ) 11 | export( PredictInterval ) 12 | export( PredictNonlinear ) 13 | export( SurrogateData ) 14 | 15 | # Legacy functions 16 | # export( ccm ) 17 | # export( block_lnlp ) 18 | # export( s_map ) 19 | # export( simplex ) 20 | # export( multiview ) 21 | # export( make_block ) 22 | # export( compute_stats ) 23 | # export( make_surrogate_data ) 24 | 25 | importFrom("grDevices", "dev.cur", "dev.new", "dev.list") 26 | importFrom("graphics", "abline", "legend", "lines", "mtext", "par", "plot") 27 | importFrom("utils", "data", "read.csv") 28 | importFrom("stats", "fft", "predict", "rnorm", "runif", "sd", "smooth.spline", "cov", "pnorm") 29 | import( methods ) 30 | import( Rcpp ) 31 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | #### rEDM NEWS 2 | 3 | 2024-04-05 version 1.15.4 4 | 5 | --- 6 | 7 | ##### NOTES: 8 | - It is required as of version 1.15 to use functions: `Simplex`, `SMap`, `CCM`, `Embed`, `Multiview`, `EmbedDimension`, `PredictInterval`, `PredictNonlinear`, `ComputeError` instead of the legacy version 0.7 signatures. See Version 1.3 notes. 9 | - [Rcpp](https://CRAN.R-project.org/package=Rcpp) imposes a 20 parameter limit on functions. The rEDM wrapper of [cppEDM](https://github.com/SugiharaLab/cppEDM#empirical-dynamic-modeling-edm) therefore does not invoke the full cppEDM API. Users requiring the full API are referred to the [pyEDM](https://pypi.org/project/pyEDM/) wrapper. 10 | - `SMap` linear system solver regularization: The R [glmnet](https://CRAN.R-project.org/package=glmnet) package does not seperate the model from the data. This prevents integration in rEDM. Users requiring `SMap` regularization are referred to the [pyEDM](https://pypi.org/project/pyEDM/) wrapper. 11 | 12 | --- 13 | 14 | ##### Version 1.15 15 | - `SMap()` `ignoreNan` parameter added. If `ignoreNan` is `TRUE` (default) the library is redefined to ignore embedding vectors with nan. If `ignoreNan` is `FALSE` no change is made, the user can manually specify library segments in `lib`. 16 | - `SMap()` return list includes data.frame of SVD singular values. 17 | - `noTime` parameter added, default `FALSE`. If `noTime` is `TRUE` the first column of the data is not required to be a time vector. A row index vector will be inserted and passed to cppEDM. 18 | - `const_pred` parameter removed due to Rcpp 20 parameter limit. 19 | - `CCM()` `replacement` parameter removed. 20 | - Legacy overload functions removed. 21 | - Version 1.15.1 `ignoreNan` added in `PredictNonlinear()`. Replace unicode in pLot labels with mathplot expression. cppEDM initialize `nanFound` in DataFrame.h for UBSAN. Sync with cppEDM 1.15.1. 22 | - Version 1.15.2 Allow `columns` names with spaces. If the `columns` argument is a string use the "," delimiter to separate names. Remove `SMap` warning for disjoint library. 23 | - Version 1.15.3 Allow `columns` and `target` names with spaces in CCM. 24 | - Version 1.15.4 Move `SMap` warning for NaN into `verbose` mode. cppEDM `ComputeError` NaN removal improved, require more than 5 observation : prediction pairs. 25 | 26 | ##### Version 1.14 27 | - cppEDM core added `generateLibrary` parameter to `Simplex()` and `SMap()`. If `TRUE` the state-space library has newly generated points added. Not available due to Rcpp 20 parameter limit. 28 | - Version 1.14.2 Remove CCM multivariable warning, positive tau warning. Add Embedding application. 29 | 30 | ##### Version 1.13 31 | - Adds `embedded` and multivariate embedding to `CCM()`. 32 | - Parameters `pathOut`, `predictFile` are removed from `CCM` to accomodate the Rcpp 20 parameter limit. 33 | - Version 1.13.1 cppEDM DateTime H:M:S fix. Allow first column data.frame characters. Set target to columns[0] if empty. 34 | 35 | ##### Version 1.12 36 | - Adds `exclusionRadius` and `validLib` to `EmbedDimension()`, `PredictInterval()` and `PredictNonlinear()`. 37 | - Version 1.12.2 Multiview return data.frame, correct SMap coefficient labels. 38 | - Version 1.12.2.1 Rcpp character encoding workaround on Windows for DataFrame column names. 39 | - Version 1.12.3 cppEDM DateTime regex removed to avoid UTF-8 gcc issue in Windows. 40 | 41 | ##### Version 1.11 42 | - Removes `nan` from `SMap` `columns` and `target`. Warning generated. 43 | 44 | ##### Version 1.10 45 | - Adds the `generateSteps` parameter to `Simplex` and `SMap` implementing generative feedback prediction. 46 | - Adds the `parameterList` argument to `Simplex`, `SMap`, `CCM` and `Multiview`. 47 | - Parameters `pathOut`, `predictFile` are removed from `SMap`, `Multiview` to accomodate the Rcpp 20 parameter limit. 48 | - Version 1.10.1 converts `parameterList` values to numerics. 49 | - Version 1.10.2 is a bug fix for `Tp < 1` in generative mode. 50 | - Version 1.10.3 `SMap` `dgelss` error message. `CCM` `libSize` limits `Tp < 0`. 51 | 52 | ##### Version 1.9 53 | - Adds the `validLib` parameter to `Simplex` and `SMap`. `validLib` is a boolean vector with the same number of elements as input data rows. For `validLib` elements that are `false`, the correspoding data row will not be included in the state-space library. 54 | - Version 1.9.1 Requires .csv dataFiles to have column names. 55 | - Version 1.9.2 is a bug fix for `CCM` parameter validation with `tau > 0`. 56 | - Version 1.9.3 is a bug fix for `CCM` parameter validation with `Tp < -1`. 57 | 58 | ##### Version 1.8 59 | - Removes the deletion of partial embedding data rows. 60 | - Adds the `deletePartial` argument to `MakeBlock`. 61 | - Bug fix in disjoint library indexing. 62 | 63 | ##### Version 1.7 64 | - Updates nearest neighbors to better align results with legacy code. 65 | - Bug fixes in `SMap`, `CMM` `includeData`, and, the use of disjoint libraries. 66 | 67 | ##### Version 1.6 68 | - Attempts to label `SMap` coefficients with names from the `columns` and `target` parameters. 69 | - Adds exclusionRadius to `CCM`. 70 | 71 | ##### Version 1.5 72 | - Implemented an object oriented design in the core cppEDM. 73 | 74 | ##### Version 1.3 75 | - A major rewrite of the 'rEDM' package as an Rcpp wrapper for the [cppEDM](https://github.com/SugiharaLab/cppEDM#empirical-dynamic-modeling-edm) library providing a unified computation engine for EDM algorithms across C++, Python and R implementations. The revised package provides improved alignment between observed and forecast data rows, handling of date time vectors, and, strict exclusion of partial data vectors. 76 | 77 | - To align with cppEDM and pyEDM, function names and signatures have changed from versions 0.7 and earlier. **It is recommended to use the new functions: `Simplex`, `SMap`, `CCM`, `Embed`, `Multiview`, `EmbedDimension`, `PredictInterval`, `PredictNonlinear`, `ComputeError`.** See [EDM Documentation](https://sugiharalab.github.io/EDM_Documentation/) or the package documentation. 78 | 79 | - A legacy function interface is provided to emulate function signatures of rEDM 0.7, *but does not have complete coverage*. It also has slightly different return values since nested data.frames are not returned. Return values are either a data.frame, or, a named list of data.frames, as noted in the man pages. Implemented functions' include: `simplex`, `s_map`, `block_lnlp`, `ccm`, `multiview`, `make_block`, `compute_stats` and `make_surrogate_data`. Functions `ccm_means`, `tde_gp`, `block_gp` and `test_nonlinearity` are deprecated. 80 | -------------------------------------------------------------------------------- /R/Examples.R: -------------------------------------------------------------------------------- 1 | 2 | #------------------------------------------------------------------------ 3 | # 4 | #------------------------------------------------------------------------ 5 | Examples = function() { 6 | 7 | library( rEDM ) 8 | 9 | # make sure data is loaded 10 | tryCatch( 11 | expr = { 12 | data( TentMap, envir = environment() ) 13 | data( TentMapNoise, envir = environment() ) 14 | data( block_3sp, envir = environment() ) 15 | data( circle, envir = environment() ) 16 | data( sardine_anchovy_sst, envir = environment() ) 17 | }, 18 | error = function( err ) { 19 | print( err ) 20 | stop("Examples(): Failed to load package data.") 21 | } 22 | ) 23 | 24 | if ( is.null( dev.list() ) ) { 25 | newPlot( mfrow = c( 4, 2 ) ) 26 | } 27 | else { 28 | par( mfrow = c( 4, 2 ) ) 29 | } 30 | 31 | # EmbedDimension() 32 | cmd = paste0('EmbedDimension( dataFrame = TentMap,', 33 | ' lib = "1 100", pred = "201 500",', 34 | ' columns = "TentMap", target = "TentMap")' ) 35 | df = eval( parse( text = cmd ) ) 36 | 37 | # PredictInterval() 38 | cmd = paste0('PredictInterval( dataFrame = TentMap,', 39 | ' lib = "1 100", pred = "201 500", E = 2,', 40 | ' columns = "TentMap", target = "TentMap") ') 41 | df = eval( parse( text = cmd ) ) 42 | 43 | # PredictNonlinear() 44 | cmd = paste0('PredictNonlinear( dataFrame = TentMapNoise,', 45 | ' E = 2,lib = "1 100", pred = "201 500", ', 46 | ' columns = "TentMap",target = "TentMap") ') 47 | df = eval( parse( text = cmd ) ) 48 | 49 | # Simplex() 50 | # Tent map : specify multivariable columns embedded = TRUE 51 | cmd = paste0('Simplex( dataFrame = block_3sp,', 52 | ' lib = "1 99", pred = "100 195", ', 53 | ' E = 3, embedded = TRUE, showPlot = TRUE,', 54 | ' columns = "x_t y_t z_t", target = "x_t") ') 55 | df = eval( parse( text = cmd ) ) 56 | 57 | # Simplex() 58 | # Tent map : Embed column x_t to E = 3, embedded = False 59 | cmd = paste0('Simplex( dataFrame = block_3sp,', 60 | ' lib = "1 99", pred = "105 190", ', 61 | ' E = 3, showPlot = TRUE,', 62 | ' columns = "x_t", target = "x_t") ') 63 | df = eval( parse( text = cmd ) ) 64 | 65 | # Multiview() 66 | cmd = paste0('Multiview( dataFrame = block_3sp,', 67 | ' lib = "1 99", pred = "105 190", ', 68 | ' E = 3, columns = "x_t y_t z_t", target = "x_t",', 69 | ' showPlot = TRUE) ') 70 | df = eval( parse( text = cmd ) ) 71 | 72 | # CCM demo 73 | cmd = paste0('CCM( dataFrame = sardine_anchovy_sst,', 74 | ' E = 3, Tp = 0, columns = "anchovy", target = "np_sst",', 75 | ' libSizes = "10 70 10", sample = 100, verbose = TRUE, ', 76 | ' showPlot = TRUE) ') 77 | df = eval( parse( text = cmd ) ) 78 | 79 | par( ask = TRUE ) 80 | # SMap circle : specify multivariable columns embedded = TRUE 81 | cmd = paste0('SMap( dataFrame = circle,', 82 | ' lib = "1 100", pred = "110 190", theta = 4, E = 2,', 83 | ' verbose = TRUE, showPlot = TRUE, embedded = TRUE,', 84 | ' columns = "x y", target = "x") ') 85 | df = eval( parse( text = cmd ) ) 86 | par( ask = FALSE ) 87 | } 88 | 89 | #------------------------------------------------------------------------ 90 | # 91 | #------------------------------------------------------------------------ 92 | newPlot = function( 93 | mar = c( 4, 4, 1, 1 ), 94 | mgp = c( 2.3, 0.8, 0 ), 95 | cex = 1.5, 96 | cex.axis = 1.3, 97 | cex.lab = 1.3, 98 | mfrow = c( 1, 1 ) 99 | ) { 100 | dev.new() 101 | par( mar = mar ) 102 | par( mgp = mgp ) 103 | par( cex = cex ) 104 | par( cex.axis = cex.axis ) 105 | par( cex.lab = cex.lab ) 106 | par( mfrow = mfrow ) 107 | invisible( dev.cur() ) 108 | } 109 | -------------------------------------------------------------------------------- /R/apps/CCM-Multiprocess.R: -------------------------------------------------------------------------------- 1 | 2 | library( rEDM ) 3 | library( foreach ) 4 | library( doParallel ) 5 | 6 | #------------------------------------------------------------------- 7 | # CCM for all dataFrame columns against target using foreach %dopar% 8 | # Presumes first column is time/index, not processed 9 | #------------------------------------------------------------------- 10 | CCM_MP_Columns = function( 11 | dataFrame = NULL, 12 | target = 'V5', 13 | libSizes = '20 920 100', 14 | sample = 10, 15 | E = 5, 16 | Tp = 0, 17 | cores = 4 # CCM uses 2 cores, max is detectCores()/2 - 2 18 | ) { 19 | 20 | if ( is.null( dataFrame ) ) { dataFrame = Lorenz5D } 21 | 22 | registerDoParallel( cores = cores ) 23 | 24 | dataCols = names( dataFrame )[ 2 : ncol( dataFrame ) ] # Skip first column 25 | 26 | # Parallel process columns using foreach ... %dopar% 27 | L = foreach ( col = iter( dataCols ) ) %dopar% { 28 | 29 | CCM( dataFrame = dataFrame, 30 | E = E, 31 | Tp = Tp, 32 | columns = col, 33 | target = target, 34 | libSizes = libSizes, 35 | sample = sample ) 36 | } 37 | 38 | stopImplicitCluster() 39 | 40 | # Get names for the returned list L from the CCM data.frame 41 | keys = c() 42 | for ( cmap in L ) { 43 | keys = c( keys, names( cmap )[3] ) 44 | } 45 | names( L ) = keys 46 | 47 | invisible( L ) 48 | } 49 | 50 | #--------------------------------------------------------------------- 51 | # CCM for single columns : target over a set of libSizes using foreach %dopar% 52 | # libSizeList is a list of partioned libSizes 53 | # libSizesList elements can be any libSizes format used by CCM 54 | #--------------------------------------------------------------------- 55 | CCM_MP_LibSizes = function( 56 | dataFrame = NULL, 57 | columns = 'V1', 58 | target = 'V5', 59 | libSizesList = c( '20 50 70 100', '150 200 250 300', '400 500 600 700 900' ), 60 | sample = 10, 61 | E = 5, 62 | Tp = 0, 63 | cores = 4 # CCM uses 2 cores, max is detectCores()/2 - 2 64 | ) { 65 | 66 | if ( is.null( dataFrame ) ) { dataFrame = Lorenz5D } 67 | 68 | registerDoParallel( cores = cores ) 69 | 70 | # Parallel process libSizesList using foreach ... %dopar% 71 | L = foreach ( libSize = iter( libSizesList ) ) %dopar% { 72 | 73 | CCM( dataFrame = dataFrame, 74 | E = E, 75 | Tp = Tp, 76 | columns = columns, 77 | target = target, 78 | libSizes = libSize, 79 | sample = sample ) 80 | } 81 | 82 | stopImplicitCluster() 83 | 84 | # Set names 85 | names( L ) = libSizesList 86 | 87 | invisible( L ) 88 | } 89 | 90 | #--------------------------------------------------------------------- 91 | # CCM for single columns : target over a set of libSizes using clusterApply 92 | # libSizeList is a list of partioned libSizes 93 | # libSizesList elements can be any libSizes format used by CCM 94 | # 95 | # DO NOT USE mclapply 96 | # From ?mclapply: 97 | # It is _strongly discouraged_ to use these functions with 98 | # multi-threaded libraries or packages (see ‘mcfork’ for more 99 | # details). If in doubt, it is safer to use a non-FORK cluster 100 | # (see ‘makeCluster’, ‘clusterApply’). 101 | #--------------------------------------------------------------------- 102 | CCM_MP_LibSizes_cluster = function( 103 | dataFrame = NULL, 104 | columns = 'V1', 105 | target = 'V5', 106 | libSizesList = c( '20 30 40 50 60 70 80 90 100', 107 | '120 150 200 250 300', 108 | '400 500 600 700 900' ), 109 | sample = 20, 110 | E = 5, 111 | Tp = 0, 112 | cores = 4 113 | 114 | ) { 115 | 116 | if ( is.null( dataFrame ) ) { dataFrame = Lorenz5D } 117 | 118 | cl = makeCluster( cores ) 119 | 120 | clusterExport( cl, list("CCM") ) 121 | 122 | cmap = clusterApply( cl = cl, x = libSizesList, fun = CrossMapFunc, 123 | dataFrame, E, Tp, columns, target, sample ) 124 | 125 | stopCluster( cl ) 126 | 127 | invisible( cmap ) 128 | } 129 | 130 | #--------------------------------------------------------------------- 131 | # Call rEDM CCM on behalf of CCM_MP_LibSizes_cluster() clusterApply() 132 | #--------------------------------------------------------------------- 133 | CrossMapFunc = function( 134 | libSizes, # First argument : from clusterApply( x = libSizesList ) 135 | dataFrame, E, Tp, columns, target, sample 136 | ) { 137 | 138 | cm = CCM( dataFrame = dataFrame, 139 | E = E, 140 | columns = columns, 141 | target = target, 142 | libSizes = libSizes, 143 | sample = sample ) 144 | } 145 | -------------------------------------------------------------------------------- /R/apps/Embedding.R: -------------------------------------------------------------------------------- 1 | 2 | library( rEDM ) 3 | 4 | #------------------------------------------------------------------- 5 | # EDM Embed wrapper 6 | # Create time-delay embedding with time column for EDM. 7 | # Useful to create mixed multivariate embeddings for SMap and 8 | # embeddings with time-advanced vectors. 9 | # Rename V(t-0), V(t+0) to V. Add Time column. 10 | # If columns is NULL, embedd all except the first (time) column. 11 | # If plusminus create time-advanced & time-delayed columns. 12 | #------------------------------------------------------------------- 13 | Embedding = function( 14 | dataFrame = NULL, 15 | dataFile = NULL, 16 | outFile = NULL, 17 | plusminus = FALSE, 18 | columns = NULL, 19 | E = 2, 20 | tau = -1, 21 | verbose = FALSE 22 | ) { 23 | 24 | if ( is.null( dataFrame ) & is.null( dataFile ) ) { 25 | stop( 'dataFrame and dataFile are empty, specify one.' ) 26 | } 27 | if ( tau > 0 & plusminus ) { 28 | # Convert to negative 29 | tau = -tau 30 | } 31 | 32 | if ( is.null( dataFrame ) ) { 33 | # Load from dataFile 34 | data = read.csv( dataFile ) 35 | } 36 | else { 37 | data = dataFrame 38 | } 39 | 40 | # Presume time is first column 41 | timeName = colnames( data )[1] 42 | timeSeries = data[ , timeName ] 43 | 44 | # If no columns specified, use all except first 45 | if ( is.null( columns ) ) { 46 | columns = colnames( data )[ 2 : ncol( data ) ] 47 | } 48 | 49 | if ( verbose ) { 50 | print( paste( "Time column: ", timeName ) ) 51 | print( "Embed columns: " ); print( columns ) 52 | } 53 | 54 | # Create embeddings of columns 55 | # There will be redundancies vis V1(t-0), V1(t+0) 56 | if ( plusminus ) { 57 | embed_minus = Embed( dataFrame = data, E = E, tau = tau, columns = columns ) 58 | embed_plus = Embed( dataFrame = data, E = E, tau = abs( tau ), 59 | columns = columns ) 60 | embed = cbind( timeSeries, embed_minus, embed_plus, stringsAsFactors=FALSE ) 61 | 62 | # TRUE / FALSE vector 63 | cols_tplus0 = grepl( '(t+0)', colnames( embed ), fixed = TRUE ) 64 | # Remove *(t+0) : redunant with *(t-0) 65 | embed = embed[ , !cols_tplus0 ] 66 | } 67 | else { 68 | embed_ = Embed( dataFrame = data, E = E, tau = tau, columns = columns ) 69 | embed = cbind( timeSeries, embed_, stringsAsFactors = FALSE ) 70 | } 71 | 72 | # Rename *(t-0) to original column names 73 | columnNames = colnames( embed ) 74 | for ( i in 1:length( columnNames ) ) { 75 | if ( grepl( '(t-0)', columnNames[i], fixed = TRUE ) ) { 76 | columnNames[i] = sub( '(t-0)', '', columnNames[i], fixed = TRUE ) 77 | } 78 | } 79 | 80 | # Rename *(t+0) to original column names 81 | for ( i in 1:length( columnNames ) ) { 82 | if ( grepl( '(t+0)', columnNames[i], fixed = TRUE ) ) { 83 | columnNames[i] = sub( '(t+0)', '', columnNames[i], fixed = TRUE ) 84 | } 85 | } 86 | 87 | # Rename first column to original time column name 88 | columnNames[ 1 ] = timeName 89 | colnames( embed ) = columnNames 90 | 91 | if ( verbose ) { 92 | print( head( embed, 4 ) ) 93 | print( tail( embed, 4 ) ) 94 | } 95 | 96 | if ( ! is.null( outFile ) ) { 97 | write.csv( embed, file = outFile, row.names = FALSE ) 98 | } 99 | 100 | return( embed ) 101 | } 102 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | loadModule("EDMInternal", TRUE) 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | rEDM 2 | ==== 3 | 4 | Overview 5 | -------- 6 | 7 | The `rEDM` package is a collection of methods for Empirical Dynamic 8 | Modeling (EDM). EDM is based on the mathematical theory of 9 | reconstructing attractor manifolds from time series data, with 10 | applications to forecasting, causal inference, and more. It is based on 11 | research software developed for the [Sugihara Lab](https://deepeco.ucsd.edu/) 12 | (University of California San Diego, Scripps Institution of Oceanography). 13 | 14 | Empirical Dynamic Modeling (EDM) 15 | ------------------------------------- 16 | 17 | This package implements an R wrapper of 18 | [EDM](https://deepeco.ucsd.edu/nonlinear-dynamics-research/edm/) tools from 19 | the [cppEDM](https://github.com/SugiharaLab/cppEDM/#empirical-dynamic-modeling-edm) library. 20 | Introduction and documentation are are avilable 21 | [online](https://sugiharalab.github.io/EDM_Documentation/ "EDM Docs"), or 22 | in the package [tutorial](https://github.com/SugiharaLab/rEDM/blob/d5aafe06573be73f603488f6ee4ae68a73da5e12/doc/rEDM-tutorial.pdf "rEDM turorial"). 23 | 24 | Functionality includes: 25 | 26 | * Simplex projection (Sugihara and May 1990) 27 | * Sequential Locally Weighted Global Linear Maps (S-map) (Sugihara 1994) 28 | * Multivariate embeddings (Dixon et. al. 1999) 29 | * Convergent cross mapping (Sugihara et. al. 2012) 30 | * Multiview embedding (Ye and Sugihara 2016) 31 | 32 | Installation 33 | ------------ 34 | To install from CRAN [rEDM](https://CRAN.R-project.org/package=rEDM): 35 | 36 | install.packages(rEDM) 37 | 38 | Using R devtools for latest development version: 39 | 40 | install.packages("devtools") 41 | devtools::install_github("SugiharaLab/rEDM") 42 | 43 | Building from source: 44 | 45 | git clone https://github.com/SugiharaLab/rEDM.git 46 | cd rEDM 47 | R CMD INSTALL . 48 | 49 | Example 50 | ------- 51 | 52 | We begin by looking at annual time series of sunspots: 53 | 54 | df = data.frame(yr = as.numeric(time(sunspot.year)), 55 | sunspot_count = as.numeric(sunspot.year)) 56 | 57 | plot(df$yr, df$sunspot_count, type = "l", 58 | xlab = "year", ylab = "sunspots") 59 | 60 | ![](man/figures/sunspots-1.png) 61 | 62 | First, we use `EmbedDimension()` to determine the optimal embedding 63 | dimension, E: 64 | 65 | library(rEDM) # load the package 66 | # If you're new to the rEDM package, please consult the tutorial: 67 | # vignette("rEDM-tutorial") 68 | 69 | E.opt = EmbedDimension( dataFrame = df, # input data 70 | lib = "1 280", # portion of data to train 71 | pred = "1 280", # portion of data to predict 72 | columns = "sunspot_count", 73 | target = "sunspot_count" ) 74 | 75 | ![](man/figures/optimal-E-1.png) 76 | 77 | E.opt 78 | # E rho 79 | # 1 1 0.7397 80 | # 2 2 0.8930 81 | # 3 3 0.9126 82 | # 4 4 0.9133 83 | # 5 5 0.9179 84 | # 6 6 0.9146 85 | # 7 7 0.9098 86 | # 8 8 0.9065 87 | # 9 9 0.8878 88 | # 10 10 0.8773 89 | 90 | Highest predictive skill is found between `E = 3` and `E = 6`. Since we 91 | generally want a simpler model, if possible, we use `E = 3` to forecast 92 | the last 1/3 of data based on training (attractor reconstruction) from 93 | the first 2/3. 94 | 95 | simplex = Simplex( dataFrame = df, 96 | lib = "1 190", # portion of data to train 97 | pred = "191 287", # portion of data to predict 98 | columns = "sunspot_count", 99 | target = "sunspot_count", 100 | E = 3 ) 101 | 102 | plot( df$yr, df$sunspot_count, type = "l", lwd = 2, 103 | xlab = "year", ylab = "sunspots") 104 | lines( simplex$yr, simplex$Predictions, col = "red", lwd = 2) 105 | legend( 'topleft', legend = c( "Observed", "Predicted (year + 1)" ), 106 | fill = c( 'black', 'red' ), bty = 'n', cex = 1.3 ) 107 | 108 | ![](man/figures/simplex-projection-1.png) 109 | 110 | Further Examples 111 | ---------------- 112 | 113 | Please see the package vignettes for more details: 114 | 115 | browseVignettes("rEDM") 116 | 117 | ### References 118 | 119 | Sugihara G. and May R. 1990. Nonlinear forecasting as a way of 120 | distinguishing chaos from measurement error in time series. Nature, 121 | 344:734–741. 122 | 123 | Sugihara G. 1994. Nonlinear forecasting for the classification of 124 | natural time series. Philosophical Transactions: Physical Sciences and 125 | Engineering, 348 (1688) : 477–495. 126 | 127 | Dixon, P. A., M. Milicich, and G. Sugihara, 1999. Episodic fluctuations 128 | in larval supply. Science 283:1528–1530. 129 | 130 | Sugihara G., May R., Ye H., Hsieh C., Deyle E., Fogarty M., Munch S., 131 | 2012. Detecting Causality in Complex Ecosystems. Science 338:496-500. 132 | 133 | Ye H., and G. Sugihara, 2016. Information leverage in interconnected 134 | ecosystems: Overcoming the curse of dimensionality. Science 353:922–925. 135 | 136 | -------------------------------------------------------------------------------- /data/EvergladesFlow.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/EvergladesFlow.RData -------------------------------------------------------------------------------- /data/Lorenz5D.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/Lorenz5D.RData -------------------------------------------------------------------------------- /data/TentMap.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/TentMap.RData -------------------------------------------------------------------------------- /data/TentMapNoise.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/TentMapNoise.RData -------------------------------------------------------------------------------- /data/Thrips.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/Thrips.RData -------------------------------------------------------------------------------- /data/block_3sp.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/block_3sp.RData -------------------------------------------------------------------------------- /data/circle.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/circle.RData -------------------------------------------------------------------------------- /data/paramecium_didinium.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/paramecium_didinium.RData -------------------------------------------------------------------------------- /data/sardine_anchovy_sst.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/sardine_anchovy_sst.RData -------------------------------------------------------------------------------- /doc/rEDM-tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/doc/rEDM-tutorial.pdf -------------------------------------------------------------------------------- /man/CCM.Rd: -------------------------------------------------------------------------------- 1 | \name{CCM} 2 | \alias{CCM} 3 | \title{Convergent cross mapping using simplex projection} 4 | \usage{ 5 | CCM(pathIn = "./", dataFile = "", dataFrame = NULL, 6 | E = 0, Tp = 0, knn = 0, tau = -1, 7 | exclusionRadius = 0, columns = "", target = "", libSizes = "", 8 | sample = 0, random = TRUE, seed = 0, 9 | embedded = FALSE, includeData = FALSE, parameterList = FALSE, 10 | verbose = FALSE, showPlot = FALSE, noTime = FALSE) 11 | } 12 | \arguments{ 13 | \item{pathIn}{path to \code{dataFile}.} 14 | 15 | \item{dataFile}{.csv format data file name. The first column must be a time 16 | index or time values unless noTime is TRUE. The first row must be column names.} 17 | 18 | \item{dataFrame}{input data.frame. The first column must be a time 19 | index or time values unless noTime is TRUE. The columns must be named.} 20 | 21 | \item{E}{embedding dimension.} 22 | 23 | \item{Tp}{prediction horizon (number of time column rows).} 24 | 25 | \item{knn}{number of nearest neighbors. If knn=0, knn is set to E+1.} 26 | 27 | \item{tau}{lag of time delay embedding specified as number of 28 | time column rows.} 29 | 30 | \item{exclusionRadius}{excludes vectors from the search space of nearest 31 | neighbors if their relative time index is within exclusionRadius.} 32 | 33 | \item{columns}{string of whitespace separated column name(s), or vector 34 | of column names used to create the library. If individual column names 35 | contain whitespace place names in a vector, or, append ',' to the name.} 36 | 37 | \item{target}{column name used for prediction.} 38 | 39 | \item{libSizes}{string of 3 whitespace separated integer values 40 | specifying the intial library size, the final library size, 41 | and the library size increment. Can also be a list of strictly 42 | increasing library sizes.} 43 | 44 | \item{sample}{integer specifying the number of random samples to draw at 45 | each library size evaluation.} 46 | 47 | \item{random}{logical to specify random (\code{TRUE}) or sequential 48 | library sampling. Note \code{random = FALSE} is not convergent 49 | cross mapping.} 50 | 51 | \item{seed}{integer specifying the random sampler seed. If 52 | \code{seed=0} then a random seed is generated.} 53 | 54 | \item{embedded}{logical specifying if the input data are embedded.} 55 | 56 | \item{includeData}{logical to include statistics and predictions for 57 | every prediction in the ensemble.} 58 | 59 | \item{parameterList}{logical to add list of invoked parameters.} 60 | 61 | \item{verbose}{logical to produce additional console reporting.} 62 | 63 | \item{showPlot}{logical to plot results.} 64 | 65 | \item{noTime}{logical to allow input data with no time column.} 66 | } 67 | 68 | \value{ 69 | A data.frame with 3 columns. The first column is \code{LibSize} 70 | specifying the subsampled library size. Columns 2 and 3 report 71 | Pearson correlation coefficients for the prediction of X from Y, and 72 | Y from X. 73 | 74 | if \code{includeData = TRUE} a named list with the following data.frames 75 | data.frame \code{Combo_rho} columns: 76 | \tabular{ll}{ 77 | LibMeans\tab CCM mean correlations for each library size\cr 78 | CCM1_PredictStat\tab Forward cross map prediction statistics\cr 79 | CCM1_Predictions\tab Forward cross map prediction values\cr 80 | CCM2_PredictStat\tab Reverse cross map prediction statistics\cr 81 | CCM2_Predictions\tab Reverse cross map prediction values\cr 82 | } 83 | 84 | If \code{includeData = TRUE} and \code{parameterList = TRUE} a 85 | named list "parameters" is added. 86 | } 87 | 88 | \references{Sugihara G., May R., Ye H., Hsieh C., Deyle E., Fogarty M., Munch S., 2012. Detecting Causality in Complex Ecosystems. Science 338:496-500. 89 | } 90 | 91 | \description{ 92 | The state-space of a multivariate dynamical system (not a purely 93 | stochastic one) encodes coherent phase-space variable trajectories. If 94 | enough information is available, one can infer the presence or absence 95 | of cross-variable interactions associated with causal links between 96 | variables. \code{\link{CCM}} measures the extent to which states of 97 | variable Y can reliably estimate states of variable X. This can happen 98 | if X is causally influencing Y. 99 | 100 | If cross-variable state predictability converges as more state-space 101 | information is provided, this indicates a causal link. \code{\link{CCM}} 102 | performs this cross-variable mapping using Simplex, with convergence 103 | assessed across a range of observational library sizes as described in 104 | \cite{Sugihara et al. 2012}. 105 | } 106 | 107 | \details{ 108 | \code{\link{CCM}} computes the X:Y and Y:X cross-mappings in parallel 109 | using threads. 110 | } 111 | 112 | \examples{ 113 | data(sardine_anchovy_sst) 114 | df = CCM( dataFrame = sardine_anchovy_sst, E = 3, Tp = 0, columns = "anchovy", 115 | target = "np_sst", libSizes = "10 70 10", sample = 100 ) 116 | 117 | } 118 | -------------------------------------------------------------------------------- /man/ComputeError.Rd: -------------------------------------------------------------------------------- 1 | \name{ComputeError} 2 | \alias{ComputeError} 3 | \title{Compute error} 4 | \usage{ 5 | ComputeError(obs, pred) 6 | } 7 | \arguments{ 8 | \item{obs}{vector of observations.} 9 | 10 | \item{pred}{vector of predictions.} 11 | } 12 | 13 | \value{ 14 | A name list with components: 15 | \tabular{ll}{ 16 | rho \tab Pearson correlation\cr 17 | MAE \tab mean absolute error\cr 18 | RMSE \tab root mean square error\cr 19 | } 20 | } 21 | 22 | \description{ 23 | \code{\link{ComputeError}} evaluates the Pearson correlation 24 | coefficient, mean absolute error and root mean square error between two 25 | numeric vectors. 26 | } 27 | 28 | \examples{ 29 | data(block_3sp) 30 | smplx <- Simplex( dataFrame=block_3sp, lib="1 99", pred="105 190", E=3, 31 | columns="x_t", target="x_t") 32 | err <- ComputeError( smplx$Observations, smplx$Predictions ) 33 | } 34 | -------------------------------------------------------------------------------- /man/Embed.Rd: -------------------------------------------------------------------------------- 1 | \name{Embed} 2 | \alias{Embed} 3 | \title{Embed data with time lags} 4 | 5 | \usage{ 6 | Embed(path = "./", dataFile = "", dataFrame = NULL, E = 0, tau = -1, 7 | columns = "", verbose = FALSE) 8 | } 9 | 10 | \arguments{ 11 | \item{path}{path to \code{dataFile}.} 12 | 13 | \item{dataFile}{.csv format data file name. The first column must be a time 14 | index or time values. The first row must be column names. One of 15 | \code{dataFile} or \code{dataFrame} are required.} 16 | 17 | \item{dataFrame}{input data.frame. The first column must be a time 18 | index or time values. The columns must be named. One of 19 | \code{dataFile} or \code{dataFrame} are required.} 20 | 21 | \item{E}{embedding dimension.} 22 | 23 | \item{tau}{integer time delay embedding lag specified as number of 24 | time column rows.} 25 | 26 | \item{columns}{string of whitespace separated column name(s), or vector 27 | of column names used to create the library. If individual column names 28 | contain whitespace place names in a vector, or, append ',' to the name.} 29 | 30 | \item{verbose}{logical to produce additional console reporting.} 31 | } 32 | 33 | \description{ 34 | \code{\link{Embed}} performs Takens time-delay embedding on \code{columns}. 35 | } 36 | 37 | \value{ 38 | A data.frame with lagged columns. E columns for each variable specified 39 | in \code{columns}. 40 | } 41 | 42 | \details{ 43 | Each \code{columns} item will have E-1 time-lagged vectors created. 44 | The column name is appended with \code{(t-n)}. For example, data 45 | columns X, Y, with E = 2 will have columns named 46 | \code{X(t-0) X(t-1) Y(t-0) Y(t-1)}. 47 | 48 | The returned data.frame does not have a time column. The returned 49 | data.frame is truncated by tau * (E-1) rows to remove state vectors 50 | with partial data (NaN elements). 51 | } 52 | 53 | \examples{ 54 | data(circle) 55 | embed <- Embed( dataFrame = circle, E = 2, tau = -1, columns = "x y" ) 56 | } 57 | -------------------------------------------------------------------------------- /man/EmbedDimension.Rd: -------------------------------------------------------------------------------- 1 | \name{EmbedDimension} 2 | \alias{EmbedDimension} 3 | \title{Optimal embedding dimension} 4 | \usage{ 5 | EmbedDimension(pathIn = "./", dataFile = "", dataFrame = NULL, pathOut = "", 6 | predictFile = "", lib = "", pred = "", maxE = 10, Tp = 1, tau = -1, 7 | exclusionRadius = 0, columns = "", target = "", embedded = FALSE, 8 | verbose = FALSE, validLib = vector(), numThreads = 4, showPlot = TRUE, 9 | noTime = FALSE) 10 | } 11 | \arguments{ 12 | \item{pathIn}{path to \code{dataFile}.} 13 | 14 | \item{dataFile}{.csv format data file name. The first column must be a time 15 | index or time values unless noTime is TRUE. The first row must be column names.} 16 | 17 | \item{dataFrame}{input data.frame. The first column must be a time 18 | index or time values unless noTime is TRUE. The columns must be named.} 19 | 20 | \item{pathOut}{path for \code{predictFile} containing output predictions.} 21 | 22 | \item{predictFile}{output file name.} 23 | 24 | \item{lib}{string or vector with start and stop indices of input data 25 | rows used to create the library from observations. Mulitple row index 26 | pairs can be specified with each pair defining the first and last 27 | rows of time series observation segments used to create the library.} 28 | 29 | \item{pred}{string with start and stop indices of input data rows used for 30 | predictions. A single contiguous range is supported.} 31 | 32 | \item{maxE}{maximum value of E to evalulate.} 33 | 34 | \item{Tp}{prediction horizon (number of time column rows).} 35 | 36 | \item{tau}{lag of time delay embedding specified as number of 37 | time column rows.} 38 | 39 | \item{exclusionRadius}{excludes vectors from the search space of nearest 40 | neighbors if their relative time index is within exclusionRadius.} 41 | 42 | \item{columns}{string of whitespace separated column name(s), or vector 43 | of column names used to create the library. If individual column names 44 | contain whitespace place names in a vector, or, append ',' to the name.} 45 | 46 | \item{target}{column name used for prediction.} 47 | 48 | \item{embedded}{logical specifying if the input data are embedded.} 49 | 50 | \item{verbose}{logical to produce additional console reporting.} 51 | 52 | \item{validLib}{logical vector the same length as the number of data 53 | rows. Any data row represented in this vector as FALSE, will not be 54 | included in the library.} 55 | 56 | \item{numThreads}{number of parallel threads for computation.} 57 | 58 | \item{showPlot}{logical to plot results.} 59 | 60 | \item{noTime}{logical to allow input data with no time column.} 61 | } 62 | 63 | \value{ 64 | A data.frame with columns \code{E, rho}. 65 | } 66 | 67 | \description{ 68 | \code{\link{EmbedDimension}} uses \code{\link{Simplex}} to evaluate 69 | prediction accuracy as a function of embedding dimension. 70 | } 71 | 72 | %\details{ 73 | %} 74 | 75 | \examples{ 76 | data(TentMap) 77 | E.rho = EmbedDimension( dataFrame = TentMap, lib = "1 100", pred = "201 500", 78 | columns = "TentMap", target = "TentMap", showPlot = FALSE ) 79 | } 80 | -------------------------------------------------------------------------------- /man/EvergladesFlow.Rd: -------------------------------------------------------------------------------- 1 | \docType{data} 2 | \name{EvergladesFlow} 3 | \alias{EvergladesFlow} 4 | \title{Water flow to NE Everglades} 5 | \format{A data frame with 1379 rows and 2 columns: 6 | \describe{ 7 | \item{\code{Date}}{Date.} 8 | \item{\code{S12CD_S333_CFS}}{Cumulative weekly flow (CFS).} 9 | } 10 | } 11 | \usage{ 12 | EvergladesFlow 13 | } 14 | \description{ 15 | Cumulative weekly water flow into northeast Everglades from water 16 | control structures S12C, S12D and S333 from 1980 through 2005. 17 | } 18 | \keyword{datasets} 19 | -------------------------------------------------------------------------------- /man/Lorenz5D.Rd: -------------------------------------------------------------------------------- 1 | \docType{data} 2 | \name{Lorenz5D} 3 | \alias{Lorenz5D} 4 | \title{5-D Lorenz'96} 5 | \format{Data frame with 1000 rows and 6 columns 6 | \describe{ 7 | \item{\code{Time}}{Time.} 8 | \item{\code{V1}}{variable 1.} 9 | \item{\code{V2}}{variable 2.} 10 | \item{\code{V3}}{variable 3.} 11 | \item{\code{V4}}{variable 4.} 12 | \item{\code{V5}}{variable 5.} 13 | } 14 | } 15 | \usage{ 16 | Lorenz5D 17 | } 18 | \description{5-D Lorenz'96 timeseries with F = 8. 19 | } 20 | \references{ 21 | Lorenz, Edward (1996). Predictability - A problem partly solved, 22 | Seminar on Predictability, Vol. I, ECMWF. 23 | } 24 | \keyword{datasets} 25 | -------------------------------------------------------------------------------- /man/MakeBlock.Rd: -------------------------------------------------------------------------------- 1 | \name{MakeBlock} 2 | \alias{MakeBlock} 3 | \title{Make embedded data block} 4 | 5 | \usage{ 6 | MakeBlock(dataFrame, E = 0, tau = -1, columns = "", deletePartial = FALSE) 7 | } 8 | 9 | \arguments{ 10 | \item{dataFrame}{input data.frame. The first column must be a time 11 | index or time values. The columns must be named.} 12 | 13 | \item{E}{embedding dimension.} 14 | 15 | \item{tau}{integer time delay embedding lag specified as number of 16 | time column rows.} 17 | 18 | \item{columns}{string of whitespace separated column name(s) in the 19 | input data to be embedded.} 20 | 21 | \item{deletePartial}{boolean to delete rows with partial data.} 22 | } 23 | 24 | \description{ 25 | \code{\link{MakeBlock}} performs Takens time-delay embedding on 26 | \code{columns}. It is an internal function called by \code{\link{Embed}} 27 | that does not perform input error checking or validation. 28 | } 29 | 30 | \value{ 31 | A data.frame with lagged columns. E columns for each variable specified 32 | in \code{columns}. 33 | } 34 | 35 | \details{ 36 | Each \code{columns} item will have E-1 time-lagged vectors created. 37 | The column name is appended with \code{(t-n)}. For example, data 38 | columns X, Y, with E = 2 will have columns named 39 | \code{X(t-0) X(t-1) Y(t-0) Y(t-1)}. 40 | 41 | The returned data.frame does not have a time column. 42 | 43 | If \code{deletePartial} is \code{TRUE}, the returned 44 | data.frame is truncated by tau * (E-1) rows to remove state vectors 45 | with partial data (NaN elements). 46 | } 47 | 48 | \examples{ 49 | data(TentMap) 50 | embed <- MakeBlock(TentMap, 3, 1, "TentMap") 51 | } 52 | -------------------------------------------------------------------------------- /man/Multiview.Rd: -------------------------------------------------------------------------------- 1 | \name{Multiview} 2 | \alias{Multiview} 3 | \title{Forecasting using multiview embedding} 4 | \usage{ 5 | Multiview(pathIn = "./", dataFile = "", dataFrame = NULL, 6 | lib = "", pred = "", D = 0, E = 1, Tp = 1, knn = 0, 7 | tau = -1, columns = "", target = "", multiview = 0, exclusionRadius = 0, 8 | trainLib = TRUE, excludeTarget = FALSE, parameterList = FALSE, 9 | verbose = FALSE, numThreads = 4, showPlot = FALSE, noTime = FALSE) 10 | } 11 | \arguments{ 12 | \item{pathIn}{path to \code{dataFile}.} 13 | 14 | \item{dataFile}{.csv format data file name. The first column must be a time 15 | index or time values. The first row must be column names unless noTime is TRUE.} 16 | 17 | \item{dataFrame}{input data.frame. The first column must be a time 18 | index or time values unless noTime is TRUE. The columns must be named.} 19 | 20 | \item{lib}{a 2-column matrix, data.frame, 2-element vector or string of 21 | row indice pairs, where each pair specifies the first and last *rows* of 22 | the time series to create the library.} 23 | 24 | \item{pred}{(same format as lib), but specifying the sections of the time 25 | series to forecast.} 26 | 27 | \item{D}{multivariate dimension.} 28 | 29 | \item{E}{embedding dimension.} 30 | 31 | \item{Tp}{prediction horizon (number of time column rows).} 32 | 33 | \item{knn}{number of nearest neighbors. If knn=0, knn is set to E+1.} 34 | 35 | \item{tau}{lag of time delay embedding specified as number of 36 | time column rows.} 37 | 38 | \item{columns}{string of whitespace separated column name(s), or vector 39 | of column names used to create the library. If individual column names 40 | contain whitespace place names in a vector, or, append ',' to the name.} 41 | 42 | \item{target}{column name used for prediction.} 43 | 44 | \item{multiview}{number of multiview ensembles to average for the final 45 | prediction estimate.} 46 | 47 | \item{exclusionRadius}{number of adjacent observation vector rows to exclude as 48 | nearest neighbors in prediction.} 49 | 50 | \item{trainLib}{logical to use in-sample (lib=pred) projections for the 51 | ranking of column combinations.} 52 | 53 | \item{excludeTarget}{logical to exclude embedded target column from combinations.} 54 | 55 | \item{parameterList}{logical to add list of invoked parameters.} 56 | 57 | \item{verbose}{logical to produce additional console reporting.} 58 | 59 | \item{numThreads}{number of CPU threads to use in multiview processing.} 60 | 61 | \item{showPlot}{logical to plot results.} 62 | 63 | \item{noTime}{logical to allow input data with no time column.} 64 | } 65 | 66 | \value{ 67 | Named list with data.frames \code{[[View, Predictions]]}. 68 | 69 | data.frame \code{View} columns: 70 | \tabular{ll}{ 71 | Col_1 \tab column index\cr 72 | ... \tab column index\cr 73 | Col_D \tab column index\cr 74 | rho \tab Pearson correlation\cr 75 | MAE \tab mean absolute error\cr 76 | RMSE \tab root mean square error\cr 77 | name_1 \tab column name\cr 78 | ... \tab column name\cr 79 | name_D \tab column name\cr 80 | } 81 | 82 | If \code{parameterList = TRUE} a named list "parameters" is added. 83 | } 84 | 85 | \references{Ye H., and G. Sugihara, 2016. Information leverage in 86 | interconnected ecosystems: Overcoming the curse of dimensionality. 87 | Science 353:922-925. 88 | } 89 | 90 | \description{ 91 | \code{\link{Multiview}} applies the method of \cite{Ye & Sugihara} 92 | to find optimal combinations of variables that best represent the 93 | dynamics. 94 | } 95 | 96 | \details{Multiview embedding is a method to identify variables in a 97 | multivariate dynamical system that are most likely to contribute to 98 | the observed dynamics. It is a multistep algorithm with these general 99 | steps: 100 | \enumerate{ 101 | \item Compute D-dimensional variable combination forecasts. 102 | \item Rank forecasts. 103 | \item Compute predictions of top combinations. 104 | \item Compute multiview averaged prediction. 105 | } 106 | If \code{E>1}, all variables are embedded to dimension E. 107 | If \code{trainLib} is \code{TRUE} initial forecasts and ranking are 108 | done in-sample (\code{lib=pred}) and predictions using the top ranked 109 | combinations use the specified \code{lib} and \code{pred}. 110 | If \code{trainLib} is \code{FALSE} initial forecasts and ranking use 111 | the specified \code{lib} and \code{pred}, the step of computing 112 | predictions of the top combinations is skipped. 113 | } 114 | 115 | \examples{ 116 | data(block_3sp) 117 | L = Multiview( dataFrame = block_3sp, lib = "1 100", pred = "101 190", 118 | E = 2, columns = "x_t y_t z_t", target = "x_t" ) 119 | } 120 | -------------------------------------------------------------------------------- /man/PredictInterval.Rd: -------------------------------------------------------------------------------- 1 | \name{PredictInterval} 2 | \alias{PredictInterval} 3 | \title{Forecast interval accuracy} 4 | \usage{ 5 | PredictInterval(pathIn = "./", dataFile = "", dataFrame = NULL, pathOut = "./", 6 | predictFile = "", lib = "", pred = "", maxTp = 10, E = 1, tau = -1, 7 | exclusionRadius = 0, columns = "", target = "", embedded = FALSE, 8 | verbose = FALSE, validLib = vector(), numThreads = 4, showPlot = TRUE, 9 | noTime = FALSE) 10 | } 11 | \arguments{ 12 | \item{pathIn}{path to \code{dataFile}.} 13 | 14 | \item{dataFile}{.csv format data file name. The first column must be a time 15 | index or time values unless noTime is TRUE. The first row must be column names.} 16 | 17 | \item{dataFrame}{input data.frame. The first column must be a time 18 | index or time values unless noTime is TRUE. The columns must be named.} 19 | 20 | \item{pathOut}{path for \code{predictFile} containing output predictions.} 21 | 22 | \item{predictFile}{output file name.} 23 | 24 | \item{lib}{string or vector with start and stop indices of input data 25 | rows used to create the library from observations. Mulitple row index 26 | pairs can be specified with each pair defining the first and last 27 | rows of time series observation segments used to create the library.} 28 | 29 | \item{pred}{string with start and stop indices of input data rows used for 30 | predictions. A single contiguous range is supported.} 31 | 32 | \item{maxTp}{maximum value of Tp to evalulate.} 33 | 34 | \item{E}{embedding dimension.} 35 | 36 | \item{tau}{lag of time delay embedding specified as number of 37 | time column rows.} 38 | 39 | \item{exclusionRadius}{excludes vectors from the search space of nearest 40 | neighbors if their relative time index is within exclusionRadius.} 41 | 42 | \item{columns}{string of whitespace separated column name(s), or vector 43 | of column names used to create the library. If individual column names 44 | contain whitespace place names in a vector, or, append ',' to the name.} 45 | 46 | \item{target}{column name used for prediction.} 47 | 48 | \item{embedded}{logical specifying if the input data are embedded.} 49 | 50 | \item{verbose}{logical to produce additional console reporting.} 51 | 52 | \item{validLib}{logical vector the same length as the number of data 53 | rows. Any data row represented in this vector as FALSE, will not be 54 | included in the library.} 55 | 56 | \item{numThreads}{number of parallel threads for computation.} 57 | 58 | \item{showPlot}{logical to plot results.} 59 | 60 | \item{noTime}{logical to allow input data with no time column.} 61 | } 62 | 63 | \value{ 64 | A data.frame with columns \code{Tp, rho}. 65 | } 66 | 67 | \description{ 68 | \code{\link{PredictInterval}} uses \code{\link{Simplex}} to evaluate 69 | prediction accuracy as a function of forecast interval Tp. 70 | } 71 | 72 | %\details{ 73 | %} 74 | 75 | \examples{ 76 | data(TentMap) 77 | Tp.rho = PredictInterval( dataFrame = TentMap, lib = "1 100", 78 | pred = "201 500", E = 2, columns = "TentMap", target = "TentMap", 79 | showPlot = FALSE ) 80 | } 81 | -------------------------------------------------------------------------------- /man/PredictNonlinear.Rd: -------------------------------------------------------------------------------- 1 | \name{PredictNonlinear} 2 | \alias{PredictNonlinear} 3 | \title{Test for nonlinear dynamics} 4 | \usage{ 5 | PredictNonlinear(pathIn = "./", dataFile = "", dataFrame = NULL, 6 | pathOut = "./", predictFile = "", lib = "", pred = "", theta = "", 7 | E = 1, Tp = 1, knn = 0, tau = -1, exclusionRadius = 0, 8 | columns = "", target = "", embedded = FALSE, verbose = FALSE, 9 | validLib = vector(), ignoreNan = TRUE, numThreads = 4, 10 | showPlot = TRUE, noTime = FALSE ) 11 | } 12 | \arguments{ 13 | \item{pathIn}{path to \code{dataFile}.} 14 | 15 | \item{dataFile}{.csv format data file name. The first column must be a time 16 | index or time values unless noTime is TRUE. The first row must be column names.} 17 | 18 | \item{dataFrame}{input data.frame. The first column must be a time 19 | index or time values unless noTime is TRUE. The columns must be named.} 20 | 21 | \item{pathOut}{path for \code{predictFile} containing output predictions.} 22 | 23 | \item{predictFile}{output file name.} 24 | 25 | \item{lib}{string or vector with start and stop indices of input data 26 | rows used to create the library from observations. Mulitple row index 27 | pairs can be specified with each pair defining the first and last 28 | rows of time series observation segments used to create the library.} 29 | 30 | \item{pred}{string with start and stop indices of input data rows used for 31 | predictions. A single contiguous range is supported.} 32 | 33 | \item{theta}{A whitespace delimeted string with values of the S-map 34 | localisation parameter. An empty string will use default values of 35 | \code{[0.01 0.1 0.3 0.5 0.75 1 1.5 2 3 4 5 6 7 8 9]}.} 36 | 37 | \item{E}{embedding dimension.} 38 | 39 | \item{Tp}{prediction horizon (number of time column rows).} 40 | 41 | \item{knn}{number of nearest neighbors. If knn=0, knn is set to the 42 | library size.} 43 | 44 | \item{tau}{lag of time delay embedding specified as number of 45 | time column rows.} 46 | 47 | \item{exclusionRadius}{excludes vectors from the search space of nearest 48 | neighbors if their relative time index is within exclusionRadius.} 49 | 50 | \item{columns}{string of whitespace separated column name(s), or vector 51 | of column names used to create the library. If individual column names 52 | contain whitespace place names in a vector, or, append ',' to the name.} 53 | 54 | \item{target}{column name used for prediction.} 55 | 56 | \item{embedded}{logical specifying if the input data are embedded.} 57 | 58 | \item{verbose}{logical to produce additional console reporting.} 59 | 60 | \item{validLib}{logical vector the same length as the number of data 61 | rows. Any data row represented in this vector as FALSE, will not be 62 | included in the library.} 63 | 64 | \item{ignoreNan}{logical to internally redefine library to avoid nan.} 65 | 66 | \item{numThreads}{number of parallel threads for computation.} 67 | 68 | \item{showPlot}{logical to plot results.} 69 | 70 | \item{noTime}{logical to allow input data with no time column.} 71 | } 72 | 73 | \value{ 74 | A data.frame with columns \code{Theta, rho}. 75 | } 76 | 77 | \description{ 78 | \code{\link{PredictNonlinear}} uses \code{\link{SMap}} to evaluate 79 | prediction accuracy as a function of the localisation parameter 80 | \code{theta}. 81 | } 82 | 83 | \details{The localisation parameter \code{theta} weights nearest 84 | neighbors according to exp( (-theta D / D_avg) ) where D is the 85 | distance between the observation vector and neighbor, D_avg the mean 86 | distance. If theta = 0, weights are uniformally unity corresponding 87 | to a global autoregressive model. As theta increases, neighbors in 88 | closer proximity to the observation are considered. 89 | } 90 | 91 | \examples{ 92 | data(TentMapNoise) 93 | theta.rho = PredictNonlinear( dataFrame = TentMapNoise, E = 2, 94 | lib = "1 100", pred = "201 500", columns = "TentMap", 95 | target = "TentMap", showPlot = FALSE ) 96 | } 97 | -------------------------------------------------------------------------------- /man/SMap.Rd: -------------------------------------------------------------------------------- 1 | \name{SMap} 2 | \alias{SMap} 3 | \title{SMap forecasting} 4 | \usage{ 5 | SMap(pathIn = "./", dataFile = "", dataFrame = NULL, 6 | lib = "", pred = "", E = 0, Tp = 1, knn = 0, tau = -1, 7 | theta = 0, exclusionRadius = 0, columns = "", target = "", 8 | embedded = FALSE, verbose = FALSE, 9 | validLib = vector(), ignoreNan = TRUE, 10 | generateSteps = 0, parameterList = FALSE, 11 | showPlot = FALSE, noTime = FALSE) 12 | } 13 | \arguments{ 14 | \item{pathIn}{path to \code{dataFile}.} 15 | 16 | \item{dataFile}{.csv format data file name. The first column must be a time 17 | index or time values unless noTime is TRUE. The first row must be column names.} 18 | 19 | \item{dataFrame}{input data.frame. The first column must be a time 20 | index or time values unless noTime is TRUE. The columns must be named.} 21 | 22 | \item{lib}{string or vector with start and stop indices of input data 23 | rows used to create the library from observations. Mulitple row index 24 | pairs can be specified with each pair defining the first and last 25 | rows of time series observation segments used to create the library.} 26 | 27 | \item{pred}{string with start and stop indices of input data rows used for 28 | predictions. A single contiguous range is supported.} 29 | 30 | \item{E}{embedding dimension.} 31 | 32 | \item{Tp}{prediction horizon (number of time column rows).} 33 | 34 | \item{knn}{number of nearest neighbors. If knn=0, knn is set to the 35 | library size.} 36 | 37 | \item{tau}{lag of time delay embedding specified as number of 38 | time column rows.} 39 | 40 | \item{theta}{neighbor localisation exponent.} 41 | 42 | \item{exclusionRadius}{excludes vectors from the search space of nearest 43 | neighbors if their relative time index is within exclusionRadius.} 44 | 45 | \item{columns}{string of whitespace separated column name(s), or vector 46 | of column names used to create the library. If individual column names 47 | contain whitespace place names in a vector, or, append ',' to the name.} 48 | 49 | \item{target}{column name used for prediction.} 50 | 51 | \item{embedded}{logical specifying if the input data are embedded.} 52 | 53 | \item{verbose}{logical to produce additional console reporting.} 54 | 55 | \item{validLib}{logical vector the same length as the number of data 56 | rows. Any data row represented in this vector as FALSE, will not be 57 | included in the library.} 58 | 59 | \item{ignoreNan}{logical to internally redefine library to avoid nan.} 60 | 61 | \item{generateSteps}{number of predictive feedback generative steps.} 62 | 63 | \item{parameterList}{logical to add list of invoked parameters.} 64 | 65 | \item{showPlot}{logical to plot results.} 66 | 67 | \item{noTime}{logical to allow input data with no time column.} 68 | } 69 | 70 | \value{ 71 | A named list with three data.frames 72 | \code{[[predictions, coefficients, singularValues]]}. 73 | \code{predictions} has columns \code{Observations, Predictions}. 74 | The first column contains time or index values. 75 | 76 | \code{coefficients} data.frame has time or index values in the first column. 77 | Columns 2 through E+2 (E+1 columns) are the SMap coefficients. 78 | 79 | \code{singularValues} data.frame has time or index values in the first column. 80 | Columns 2 through E+2 (E+1 columns) are the SVD singularValues. The 81 | first value corresponds to the SVD bias (intercept) term. 82 | 83 | If \code{parameterList = TRUE} a named list "parameters" is added. 84 | } 85 | 86 | \references{Sugihara G. 1994. Nonlinear forecasting for the classification of natural time series. Philosophical Transactions: Physical Sciences and Engineering, 348 (1688):477-495.} 87 | 88 | \description{ 89 | \code{\link{SMap}} performs time series forecasting based on localised 90 | (or global) nearest neighbor projection in the time series phase space as 91 | described in \cite{Sugihara 1994}. 92 | } 93 | 94 | \details{ 95 | If \code{embedded} is \code{FALSE}, the data \code{column(s)} are embedded 96 | to dimension \code{E} with time lag \code{tau}. This embedding forms an 97 | n-columns * E-dimensional phase space for the \code{\link{SMap}} projection. 98 | If embedded is \code{TRUE}, the data are assumed to contain an 99 | E-dimensional embedding with E equal to the number of \code{columns}. 100 | See the Note below for proper use of multivariate data (number of 101 | \code{columns} > 1). 102 | 103 | If \code{ignoreNan} is \code{TRUE}, the library (\code{lib}) is 104 | internally redefined to exclude nan embedding vectors. If 105 | \code{ignoreNan} is \code{FALSE} no library adjustment is made. The 106 | (\code{lib}) can be explicitly specified to exclude nan library vectors. 107 | 108 | Predictions are made using leave-one-out cross-validation, i.e. 109 | observation rows are excluded from the prediction regression. 110 | 111 | In contrast to \code{\link{Simplex}}, \code{\link{SMap}} uses all 112 | available neighbors and weights them with an exponential decay 113 | in phase space distance with exponent \code{theta}. \code{theta}=0 114 | uses all neighbors corresponding to a global autoregressive model. 115 | As \code{theta} increases, neighbors closer in vicinity to the 116 | observation are considered. 117 | } 118 | 119 | \note{ 120 | \code{\link{SMap}} should be called with columns explicitly corresponding to 121 | dimensions E. In the univariate case (number of \code{columns} = 1) with 122 | default \code{embedded = FALSE}, the time series will be time-delay 123 | embedded to dimension E, SMap coefficients correspond to each dimension. 124 | 125 | If a multivariate data set is used (number of \code{columns} > 1) it 126 | must use \code{embedded = TRUE} with E equal to the number of columns. 127 | This prevents the function from internally time-delay embedding the 128 | multiple columns to dimension E. If the internal time-delay embedding 129 | is performed, then state-space columns will not correspond to the 130 | intended dimensions in the matrix inversion, coefficient assignment, 131 | and prediction. In the multivariate case, the user should first prepare 132 | the embedding (using \code{\link{Embed}} for time-delay embedding), then 133 | pass this embedding to \code{\link{SMap}} with appropriately specified 134 | \code{columns}, \code{E}, and \code{embedded = TRUE}. 135 | } 136 | 137 | \examples{ 138 | data(circle) 139 | L = SMap( dataFrame = circle, lib="1 100", pred="110 190", theta = 4, 140 | E = 2, embedded = TRUE, columns = "x y", target = "x" ) 141 | } 142 | -------------------------------------------------------------------------------- /man/Simplex.Rd: -------------------------------------------------------------------------------- 1 | \name{Simplex} 2 | \alias{Simplex} 3 | \title{Simplex forecasting} 4 | \usage{ 5 | Simplex(pathIn = "./", dataFile = "", dataFrame = NULL, pathOut = "./", 6 | predictFile = "", lib = "", pred = "", E = 0, Tp = 1, knn = 0, tau = -1, 7 | exclusionRadius = 0, columns = "", target = "", embedded = FALSE, 8 | verbose = FALSE, validLib = vector(), generateSteps = 0, 9 | parameterList = FALSE, showPlot = FALSE, noTime = FALSE) 10 | } 11 | \arguments{ 12 | \item{pathIn}{path to \code{dataFile}.} 13 | 14 | \item{dataFile}{.csv format data file name. The first column must be a time 15 | index or time values unless noTime is TRUE. The first row must be column names.} 16 | 17 | \item{dataFrame}{input data.frame. The first column must be a time 18 | index or time values unless noTime is TRUE. The columns must be named.} 19 | 20 | \item{pathOut}{path for \code{predictFile} containing output predictions.} 21 | 22 | \item{predictFile}{output file name.} 23 | 24 | \item{lib}{string or vector with start and stop indices of input data 25 | rows used to create the library from observations. Mulitple row index 26 | pairs can be specified with each pair defining the first and last 27 | rows of time series observation segments used to create the library.} 28 | 29 | \item{pred}{string with start and stop indices of input data rows used for 30 | predictions. A single contiguous range is supported.} 31 | 32 | \item{E}{embedding dimension.} 33 | 34 | \item{Tp}{prediction horizon (number of time column rows).} 35 | 36 | \item{knn}{number of nearest neighbors. If knn=0, knn is set to E+1.} 37 | 38 | \item{tau}{lag of time delay embedding specified as number of 39 | time column rows.} 40 | 41 | \item{exclusionRadius}{excludes vectors from the search space of nearest 42 | neighbors if their relative time index is within exclusionRadius.} 43 | 44 | \item{columns}{string of whitespace separated column name(s), or vector 45 | of column names used to create the library. If individual column names 46 | contain whitespace place names in a vector, or, append ',' to the name.} 47 | 48 | \item{target}{column name used for prediction.} 49 | 50 | \item{embedded}{logical specifying if the input data are embedded.} 51 | 52 | \item{verbose}{logical to produce additional console reporting.} 53 | 54 | \item{validLib}{logical vector the same length as the number of data 55 | rows. Any data row represented in this vector as FALSE, will not be 56 | included in the library.} 57 | 58 | \item{generateSteps}{number of predictive feedback generative steps.} 59 | 60 | \item{parameterList}{logical to add list of invoked parameters.} 61 | 62 | \item{showPlot}{logical to plot results.} 63 | 64 | \item{noTime}{logical to allow input data with no time column.} 65 | } 66 | 67 | \value{ 68 | A data.frame with columns \code{Observations, Predictions}. 69 | The first column contains the time values. 70 | 71 | If \code{parameterList = TRUE}, a named list with "predictions" holding the 72 | data.frame, "parameters" with a named list of invoked parameters. 73 | } 74 | 75 | \references{Sugihara G. and May R. 1990. Nonlinear forecasting as a way 76 | of distinguishing chaos from measurement error in time series. 77 | Nature, 344:734-741. 78 | } 79 | 80 | \description{ 81 | \code{\link{Simplex}} performs time series forecasting based on 82 | weighted nearest neighbors projection in the time series phase space as 83 | described in \cite{Sugihara and May}. 84 | } 85 | 86 | \details{ 87 | If embedded is \code{FALSE}, the data \code{column(s)} are embedded to 88 | dimension \code{E} with time lag \code{tau}. This embedding forms an 89 | E-dimensional phase space for the \code{\link{Simplex}} projection. 90 | If embedded is \code{TRUE}, the data are assumed to contain an 91 | E-dimensional embedding with E equal to the number of \code{columns}. 92 | Predictions are made using leave-one-out cross-validation, i.e. 93 | observation vectors are excluded from the prediction simplex. 94 | 95 | To assess an optimal embedding dimension \code{\link{EmbedDimension}} 96 | can be applied. Accuracy statistics can be estimated by 97 | \code{\link{ComputeError}}. 98 | } 99 | \examples{ 100 | data( block_3sp ) 101 | smplx = Simplex( dataFrame = block_3sp, lib = "1 100", pred = "101 190", 102 | E = 3, columns = "x_t", target = "x_t" ) 103 | ComputeError( smplx $ Predictions, smplx $ Observations ) 104 | } 105 | -------------------------------------------------------------------------------- /man/SurrogateData.Rd: -------------------------------------------------------------------------------- 1 | \name{SurrogateData} 2 | \alias{SurrogateData} 3 | \title{Generate surrogate data for permutation/randomization tests} 4 | \usage{ 5 | SurrogateData( ts, method = c("random_shuffle", "ebisuzaki", 6 | "seasonal"), num_surr = 100, T_period = 1, alpha = 0 ) 7 | } 8 | 9 | \arguments{ 10 | \item{ts}{the original time series} 11 | 12 | \item{method}{which algorithm to use to generate surrogate data} 13 | 14 | \item{num_surr}{the number of null surrogates to generate} 15 | 16 | \item{T_period}{the period of seasonality for seasonal surrogates 17 | (ignored for other methods)} 18 | 19 | \item{alpha}{additive noise factor: N(0,alpha)} 20 | } 21 | 22 | \value{ 23 | A matrix where each column is a separate surrogate with the same 24 | length as \code{ts}. 25 | } 26 | 27 | \description{ 28 | \code{SurrogateData} generates surrogate data under several different 29 | null models. 30 | } 31 | 32 | \details{ 33 | Method "random_shuffle" creates surrogates by randomly permuting the values 34 | of the original time series. 35 | 36 | Method "Ebisuzaki" creates surrogates by randomizing the phases of a Fourier 37 | transform, preserving the power spectra of the null surrogates. 38 | 39 | Method "seasonal" creates surrogates by computing a mean seasonal trend of 40 | the specified period and shuffling the residuals. It is presumed that 41 | the seasonal trend can be exracted with a smoothing spline. Additive 42 | Gaussian noise is included according to N(0,alpha). 43 | } 44 | 45 | \examples{ 46 | data("block_3sp") 47 | ts <- block_3sp$x_t 48 | SurrogateData(ts, method = "ebisuzaki") 49 | } 50 | -------------------------------------------------------------------------------- /man/TentMap.Rd: -------------------------------------------------------------------------------- 1 | \docType{data} 2 | \name{TentMap} 3 | \alias{TentMap} 4 | \title{Time series for a tent map with mu = 2.} 5 | \format{Data frame with 999 rows and 2 columns 6 | \describe{ 7 | \item{\code{Time}}{time index.} 8 | \item{\code{TentMap}}{tent map values.} 9 | } 10 | } 11 | \usage{ 12 | TentMap 13 | } 14 | \description{ 15 | First-differenced time series generated from the tent map 16 | recurrence relation with mu = 2. 17 | } 18 | \keyword{datasets} 19 | -------------------------------------------------------------------------------- /man/TentMapNoise.Rd: -------------------------------------------------------------------------------- 1 | \docType{data} 2 | \name{TentMapNoise} 3 | \alias{TentMapNoise} 4 | \title{Time series of tent map plus noise.} 5 | \format{Data frame with 999 rows and 2 columns 6 | \describe{ 7 | \item{\code{Time}}{time index.} 8 | \item{\code{TentMap}}{tent map values.} 9 | } 10 | } 11 | \usage{ 12 | TentMapNoise 13 | } 14 | \description{ 15 | First-differenced time series generated from the tent map 16 | recurrence relation with mu = 2 and random noise. 17 | } 18 | \keyword{datasets} 19 | -------------------------------------------------------------------------------- /man/Thrips.Rd: -------------------------------------------------------------------------------- 1 | \docType{data} 2 | \name{Thrips} 3 | \alias{Thrips} 4 | \title{Apple-blossom Thrips time series} 5 | \description{ 6 | Seasonal outbreaks of Thrips imaginis. 7 | } 8 | \references{ 9 | Davidson and Andrewartha, Annual trends in a natural population of 10 | Thrips imaginis \emph{Thysanoptera}, Journal of Animal Ecology, 17, 11 | 193-199, 1948. 12 | } 13 | -------------------------------------------------------------------------------- /man/block_3sp.Rd: -------------------------------------------------------------------------------- 1 | \docType{data} 2 | \name{block_3sp} 3 | \alias{block_3sp} 4 | \title{Time series for a three-species coupled model.} 5 | \format{A data frame with 198 rows and 10 columns: 6 | \describe{ 7 | \item{\code{time}}{time index (# of generations)} 8 | \item{\code{x_t} }{abundance of simulated species x at time t} 9 | \item{\code{x_t-1}}{abundance of simulated species x at time t-1} 10 | \item{\code{x_t-2}}{abundance of simulated species x at time t-2} 11 | \item{\code{y_t} }{abundance of simulated species y at time t} 12 | \item{\code{y_t-1}}{abundance of simulated species y at time t-1} 13 | \item{\code{y_t-2}}{abundance of simulated species y at time t-2} 14 | \item{\code{z_t} }{abundance of simulated species z at time t} 15 | \item{\code{z_t-1}}{abundance of simulated species z at time t-1} 16 | \item{\code{z_t-2}}{abundance of simulated species z at time t-2} 17 | }} 18 | \usage{ 19 | block_3sp 20 | } 21 | \description{ 22 | Time series generated from a discrete-time coupled 23 | Lotka-Volterra model exhibiting chaotic dynamics. 24 | } 25 | \keyword{datasets} 26 | -------------------------------------------------------------------------------- /man/circle.Rd: -------------------------------------------------------------------------------- 1 | \docType{data} 2 | \name{circle} 3 | \alias{circle} 4 | \title{2-D timeseries of a circle.} 5 | \format{A data frame with 200 rows and 3 columns: 6 | \describe{ 7 | \item{\code{Time}}{time index.} 8 | \item{\code{x}}{sin component.} 9 | \item{\code{y}}{cos component.} 10 | }} 11 | \usage{ 12 | circle 13 | } 14 | \description{ 15 | Time series of of circle in 2-D (sin and cos). 16 | } 17 | \keyword{datasets} 18 | -------------------------------------------------------------------------------- /man/figures/optimal-E-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/man/figures/optimal-E-1.png -------------------------------------------------------------------------------- /man/figures/simplex-projection-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/man/figures/simplex-projection-1.png -------------------------------------------------------------------------------- /man/figures/sunspots-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/man/figures/sunspots-1.png -------------------------------------------------------------------------------- /man/figures/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/man/figures/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /man/paramecium_didinium.Rd: -------------------------------------------------------------------------------- 1 | \docType{data} 2 | \name{paramecium_didinium} 3 | \alias{paramecium_didinium} 4 | \title{Time series for the Paramecium-Didinium laboratory experiment} 5 | \usage{ 6 | paramecium_didinium 7 | } 8 | \description{ 9 | Time series of Paramecium and Didinium abundances (#/mL) from 10 | an experiment by Veilleux (1979) 11 | } 12 | \keyword{datasets} 13 | -------------------------------------------------------------------------------- /man/rEDM.Rd: -------------------------------------------------------------------------------- 1 | \docType{package} 2 | \name{rEDM} 3 | \alias{EDM} 4 | \alias{rEDM-package} 5 | \title{Empirical dynamic modeling} 6 | \description{ 7 | \pkg{rEDM} provides tools for data-driven time series analyses. It is 8 | based on reconstructing multivariate state space 9 | representations from uni or multivariate time series, then projecting 10 | state changes using various metrics applied to nearest neighbors. 11 | 12 | \pkg{rEDM} is a \pkg{Rcpp} interface to the 13 | \href{https://github.com/SugiharaLab/cppEDM}{cppEDM} library of 14 | Empirical Dynamic Modeling tools. Functionality includes: 15 | \itemize{ 16 | \item Simplex projection (Sugihara and May 1990) 17 | \item Sequential Locally Weighted Global Linear Maps (S-map) (Sugihara 1994) 18 | \item Multivariate embeddings (Dixon et. al. 1999) 19 | \item Convergent cross mapping (Sugihara et. al. 2012) 20 | \item Multiview embedding (Ye and Sugihara 2016) 21 | } 22 | } 23 | \references{ 24 | Sugihara G. and May R. 1990. Nonlinear forecasting as a way of 25 | distinguishing chaos from measurement error in time series. 26 | Nature, 344:734-741. 27 | 28 | Sugihara G. 1994. Nonlinear forecasting for the classification of 29 | natural time series. Philosophical Transactions: Physical Sciences 30 | and Engineering, 348 (1688) : 477-495. 31 | 32 | Dixon, P. A., M. Milicich, and G. Sugihara, 1999. Episodic 33 | fluctuations in larval supply. Science 283:1528-1530. 34 | 35 | Sugihara G., May R., Ye H., Hsieh C., Deyle E., Fogarty M., 36 | Munch S., 2012. Detecting Causality in Complex Ecosystems. 37 | Science 338:496-500. 38 | 39 | Ye H., and G. Sugihara, 2016. Information leverage in 40 | interconnected ecosystems: Overcoming the curse of dimensionality. 41 | Science 353:922-925. 42 | } 43 | \details{ 44 | 45 | \strong{Main Functions}: 46 | \itemize{ 47 | \item \code{\link{Simplex}} - simplex projection 48 | \item \code{\link{SMap}} - S-map projection 49 | \item \code{\link{CCM}} - convergent cross mapping 50 | \item \code{\link{Multiview}} - multiview forecasting 51 | } 52 | \strong{Helper Functions}: 53 | \itemize{ 54 | \item \code{\link{Embed}} - time delay embedding 55 | \item \code{\link{ComputeError}} - forecast skill metrics 56 | \item \code{\link{EmbedDimension}} - optimal embedding dimension 57 | \item \code{\link{PredictInterval}} - optimal prediction interval 58 | \item \code{\link{PredictNonlinear}} - evaluate nonlinearity 59 | } 60 | } 61 | \author{ 62 | \strong{Maintainer}: Joseph Park 63 | 64 | \strong{Authors}: Joseph Park, Cameron Smith, Ethan Deyle, Erik 65 | Saberski, George Sugihara 66 | 67 | % \strong{Contributors}: 68 | } 69 | \keyword{package} 70 | -------------------------------------------------------------------------------- /man/sardine_anchovy_sst.Rd: -------------------------------------------------------------------------------- 1 | \docType{data} 2 | \name{sardine_anchovy_sst} 3 | \alias{sardine_anchovy_sst} 4 | \title{Time series for the California Current Anchovy-Sardine-SST system} 5 | \format{\describe{ 6 | \item{\code{year}}{year of measurement} 7 | \item{\code{anchovy}}{anchovy landings, scaled to mean = 0, sd = 1} 8 | \item{\code{sardine}}{sardine landings, scaled to mean = 0, sd = 1} 9 | \item{\code{sio_sst}}{3-year running average of sea surface temperature at 10 | SIO pier, scaled to mean = 0, sd = 1} 11 | \item{\code{np_sst}}{3-year running average of sea surface temperature at 12 | Newport pier, scaled to mean = 0, sd = 1} 13 | }} 14 | \usage{ 15 | sardine_anchovy_sst 16 | } 17 | \description{ 18 | Time series of Pacific sardine landings (CA), Northern anchovy 19 | landings (CA), and sea-surface temperature (3-year average) at the SIO 20 | pier and Newport pier 21 | } 22 | \keyword{datasets} 23 | -------------------------------------------------------------------------------- /src/CCM.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "RcppEDMCommon.h" 3 | 4 | //----------------------------------------------------------- 5 | // 6 | //----------------------------------------------------------- 7 | Rcpp::List CCM_rcpp( std::string pathIn, 8 | std::string dataFile, 9 | r::DataFrame dataFrame, 10 | // std::string pathOut, // Rcpp 20 param limit 11 | // std::string predictFile, // Rcpp 20 param limit 12 | int E, 13 | int Tp, 14 | int knn, 15 | int tau, 16 | int exclusionRadius, 17 | std::string columns, 18 | std::string target, 19 | std::string libSizes, 20 | int sample, 21 | bool random, 22 | // bool replacement, // Rcpp 20 param limit 23 | unsigned seed, 24 | bool embedded, 25 | bool includeData, 26 | bool parameterList, 27 | bool verbose ) { 28 | 29 | CCMValues ccmValues; 30 | 31 | bool replacement = false; // Rcpp 20 param limit 32 | 33 | if ( dataFile.size() ) { 34 | // dataFile specified, dispatch overloaded CCM, ignore dataFrame 35 | ccmValues = CCM( pathIn, 36 | dataFile, 37 | "./", // pathOut, // Rcpp 20 param limit 38 | "", // predictFile, // Rcpp 20 param limit 39 | E, 40 | Tp, 41 | knn, 42 | tau, 43 | exclusionRadius, 44 | columns, 45 | target, 46 | libSizes, 47 | sample, 48 | random, 49 | replacement, 50 | seed, 51 | embedded, 52 | includeData, 53 | parameterList, 54 | verbose ); 55 | } 56 | else if ( dataFrame.size() ) { 57 | DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame ); 58 | 59 | ccmValues = CCM( dataFrame_, 60 | "./", // pathOut, // Rcpp 20 param limit 61 | "", // predictFile, // Rcpp 20 param limit 62 | E, 63 | Tp, 64 | knn, 65 | tau, 66 | exclusionRadius, 67 | columns, 68 | target, 69 | libSizes, 70 | sample, 71 | random, 72 | replacement, 73 | seed, 74 | embedded, 75 | includeData, 76 | parameterList, 77 | verbose ); 78 | } 79 | else { 80 | Rcpp::warning( "CCM_rcpp(): No dataFile or dataFrame.\n" ); 81 | } 82 | 83 | // Ouput Rcpp DataFrames 84 | r::DataFrame allLibStat = DataFrameToDF( ccmValues.AllLibStats ); 85 | 86 | r::List output; 87 | if ( includeData ) { 88 | // Have to unroll and convert CCMValues.Predictions forward_list 89 | // to Rcpp::DataFrame for output. 90 | r::List PredictionsList1; 91 | for ( auto pi = ccmValues.CrossMap1.Predictions.begin(); 92 | pi != ccmValues.CrossMap1.Predictions.end(); ++pi ) { 93 | PredictionsList1.push_back( DataFrameToDF( *pi ) ); 94 | } 95 | r::List PredictionsList2; 96 | for ( auto pi = ccmValues.CrossMap2.Predictions.begin(); 97 | pi != ccmValues.CrossMap2.Predictions.end(); ++pi ) { 98 | PredictionsList2.push_back( DataFrameToDF( *pi ) ); 99 | } 100 | 101 | r::DataFrame cm1_PredStat = 102 | DataFrameToDF( ccmValues.CrossMap1.PredictStats ); 103 | r::DataFrame cm2_PredStat = 104 | DataFrameToDF( ccmValues.CrossMap2.PredictStats ); 105 | 106 | output = 107 | r::List::create(r::Named( "LibMeans" ) = allLibStat, 108 | r::Named( "CCM1_PredictStat" ) = cm1_PredStat, 109 | r::Named( "CCM1_Predictions" ) = PredictionsList1, 110 | r::Named( "CCM2_PredictStat" ) = cm2_PredStat, 111 | r::Named( "CCM2_Predictions" ) = PredictionsList2); 112 | 113 | if ( parameterList ) { 114 | r::List paramList = ParamMaptoList( ccmValues.parameterMap ); 115 | output["parameters"] = paramList; 116 | } 117 | } 118 | else { 119 | output = r::List::create( r::Named( "LibMeans" ) = allLibStat); 120 | } 121 | return output; 122 | } 123 | -------------------------------------------------------------------------------- /src/ComputeError.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "RcppEDMCommon.h" 3 | 4 | //---------------------------------------------------------------- 5 | // Compute Error Wrapper method 6 | // @param vec1 : the first vector to compare 7 | // @param vec2 : the second vector to compare 8 | // @return : map/dictionary with the rho, mae, rmse 9 | //---------------------------------------------------------------- 10 | r::List ComputeError_rcpp ( std::vector vec1, 11 | std::vector vec2 ) { 12 | 13 | std::valarray val1 ( vec1.data(), vec1.size() ); 14 | std::valarray val2 ( vec2.data(), vec2.size() ); 15 | 16 | VectorError vecErr = ComputeError( val1, val2 ); 17 | 18 | // Setup as map instead of vecErr struct 19 | return r::List::create( r::Named( "MAE" ) = vecErr.MAE, 20 | r::Named( "rho" ) = vecErr.rho, 21 | r::Named( "RMSE" ) = vecErr.RMSE ); 22 | } 23 | -------------------------------------------------------------------------------- /src/DataFrame.cpp: -------------------------------------------------------------------------------- 1 | #include "RcppEDMCommon.h" 2 | 3 | //----------------------------------------------------------------------- 4 | // Convert R DataFrame to cppEDM DataFrame 5 | //----------------------------------------------------------------------- 6 | DataFrame< double > DFToDataFrame ( Rcpp::DataFrame df ) { 7 | 8 | // Get number of valarray rows from first pair 9 | size_t numRows = df.nrow(); 10 | 11 | // ensure that we have > 1 column for reading 12 | if ( df.ncol() == 1 ) { 13 | std::string err = "DFToDataFrame(): Input must have > 1 column, " 14 | "first column is interpreted as a time vector.\n"; 15 | throw Rcpp::exception( err.c_str() ); 16 | } 17 | 18 | // Get column names 19 | // JP: Are df.names() ensured to be in order as accessed by index? 20 | // If not, this will give incorrect results. 21 | std::vector< std::string > colNames; 22 | r::CharacterVector tmp_colNames = df.names(); 23 | 24 | for ( int idx = 1; idx < tmp_colNames.size(); idx++ ) { 25 | colNames.push_back( r::as( tmp_colNames[idx] ) ); 26 | } 27 | 28 | // Create cpp DataFrame 29 | DataFrame< double > dataFrame ( numRows, df.ncol()-1, colNames ); 30 | 31 | // Setup time column and time name for dataframe 32 | // It is assumed that the first column is a time vector !!! 33 | r::CharacterVector tmp = r::as( df[0] ); 34 | dataFrame.Time() = r::as< std::vector >( tmp ); 35 | dataFrame.TimeName() = r::as( 36 | ((r::CharacterVector)df.names())[0] ); 37 | 38 | // read in data columns to the cppEDM DF 39 | // JP: Are df.names() ensured to be in order as accessed by index? 40 | // If not, this will give incorrect results. 41 | for ( int idx = 1; idx < df.ncol(); idx++ ) { 42 | // unfortunately we can't convert numeric vec to valarray 43 | std::vector tmp = r::as>( df[idx] ); 44 | std::valarray col ( tmp.data(), tmp.size() ); 45 | dataFrame.WriteColumn( idx-1, col ); 46 | } 47 | 48 | return dataFrame; 49 | } 50 | 51 | //--------------------------------------------------------------- 52 | // Convert cppEDM DataFrame to R DataFrame 53 | //--------------------------------------------------------------- 54 | r::DataFrame DataFrameToDF ( DataFrame< double > dataFrame ) { 55 | 56 | r::List columnList; // List of columns to create new R data.frame 57 | 58 | // NOTE: cppEDM DataFrame columnNames are data only, not time 59 | std::vector columnNamesIn = dataFrame.ColumnNames(); 60 | 61 | std::vector columnNames; 62 | 63 | bool hasTime = false; 64 | 65 | // If dataFrame has time vector and timeName, add to columnList 66 | if ( dataFrame.Time().size() ) { 67 | hasTime = true; // Skip time column in dataFrame.VectorColumnName() 68 | 69 | columnNames.push_back( dataFrame.TimeName() ); 70 | 71 | // Probe dataFrame.Time() to see if we can convert it to 72 | // a numeric, Date, or Datetime... 73 | std::string firstTime = dataFrame.Time()[0]; 74 | 75 | // Is firstTime purely numeric characters (not Date or DateTime)? 76 | // We presume time is not negative, or exponential 77 | bool numericTime = strspn( firstTime.c_str(), 78 | ".0123456789" ) == firstTime.size(); 79 | 80 | // Does firstTime have two hyphens as in "%Y-%m-%d" Date format? 81 | size_t nHyphen = std::count(firstTime.begin(), firstTime.end(), '-'); 82 | bool dateTime = nHyphen == 2 ? true : false; 83 | 84 | // Does firstTime have two '-' and two ':' as in DateTime format? 85 | size_t nColon = std::count(firstTime.begin(), firstTime.end(), ':'); 86 | bool dateTimeTime = dateTime and nColon == 2 ? true : false; 87 | if ( dateTimeTime ) { dateTime = false; } 88 | 89 | if ( numericTime and not dateTime and not dateTimeTime ) { 90 | // Convert the dataFrame.Time() vector to numeric/double 91 | r::NumericVector timeVec( dataFrame.Time().size() ); 92 | 93 | char *pEnd; 94 | for ( size_t i = 0; i < dataFrame.Time().size(); i++ ) { 95 | timeVec[ i ] = strtod( dataFrame.Time().at( i ).c_str(), &pEnd ); 96 | // JP: check pEnd? 97 | } 98 | columnList.push_back( timeVec ); 99 | } 100 | 101 | if ( not numericTime and dateTime and not dateTimeTime ) { 102 | // Convert to Date 103 | r::DateVector dateVec( dataFrame.Time().size() ); 104 | 105 | for ( size_t i = 0; i < dataFrame.Time().size(); i++ ) { 106 | dateVec[ i ] = r::Date( dataFrame.Time().at( i ), 107 | "%Y-%m-%d" ); 108 | } 109 | columnList.push_back( dateVec ); 110 | } 111 | 112 | if ( not numericTime and not dateTime and dateTimeTime ) { 113 | // Convert to Datetime 114 | r::DatetimeVector datetimeVec( dataFrame.Time().size() ); 115 | 116 | for ( size_t i = 0; i < dataFrame.Time().size(); i++ ) { 117 | datetimeVec[ i ] = r::Datetime( dataFrame.Time().at( i ), 118 | "%Y-%m-%d %H:%M:%OS" ); 119 | } 120 | columnList.push_back( datetimeVec ); 121 | } 122 | 123 | if ( not numericTime and not dateTime and not dateTimeTime ) { 124 | // Couldn't convert dataFrame.Time(), just push it as-is 125 | // R will see it as a vector of factors... why not strings? 126 | columnList.push_back( dataFrame.Time() ); 127 | } 128 | } // if ( dataFrame.Time().size() ) 129 | 130 | // Copy data: NOTE in cppEDM data and time vector are separate 131 | // data are in a row-major valarray with NColumns(). 132 | for ( auto ci = columnNamesIn.begin(); ci != columnNamesIn.end(); ci++ ) { 133 | if ( hasTime and (*ci).compare( dataFrame.TimeName() ) == 0 ) { 134 | continue; // skip time. It's a vector< std::string > 135 | } 136 | 137 | // Unfortunately we have to copy to vector first 138 | std::valarray col_val = dataFrame.VectorColumnName( *ci ); 139 | std::vector col_vec(std::begin(col_val), std::end(col_val)); 140 | columnList.push_back( col_vec ); 141 | columnNames.push_back( *ci ); 142 | } 143 | 144 | r::DataFrame df ( columnList ); 145 | df.attr("names") = columnNames; 146 | 147 | return df; 148 | } 149 | 150 | //--------------------------------------------------------------- 151 | // Load path/file into cppEDM DataFrame, convert to Python 152 | // dict{ column : array } 153 | //--------------------------------------------------------------- 154 | r::DataFrame ReadDataFrame ( std::string path, std::string file ) { 155 | return DataFrameToDF( DataFrame< double >( path, file ) ); 156 | } 157 | -------------------------------------------------------------------------------- /src/Embed.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "RcppEDMCommon.h" 3 | #include "API.h" 4 | 5 | //--------------------------------------------------------------- 6 | // 7 | //--------------------------------------------------------------- 8 | r::DataFrame Embed_rcpp( std::string path, 9 | std::string dataFile, 10 | r::DataFrame dataFrame, 11 | int E, 12 | int tau, 13 | std::string columns, 14 | bool verbose ) { 15 | 16 | DataFrame< double > embedded; 17 | 18 | if ( dataFile.size() ) { 19 | // dataFile specified, dispatch overloaded Embed, ignore dataFrame 20 | embedded = Embed( path, 21 | dataFile, 22 | E, 23 | tau, 24 | columns, 25 | verbose ); 26 | } 27 | else if ( dataFrame.ncol() ) { 28 | DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame ); 29 | 30 | embedded = Embed( dataFrame_, 31 | E, 32 | tau, 33 | columns, 34 | verbose ); 35 | } 36 | else { 37 | Rcpp::warning( "Embed_rcpp(): Invalid input.\n" ); 38 | } 39 | 40 | return DataFrameToDF( embedded ); 41 | } 42 | 43 | //--------------------------------------------------------------- 44 | // 45 | //--------------------------------------------------------------- 46 | r::DataFrame MakeBlock_rcpp( r::DataFrame dataFrame, 47 | int E, 48 | int tau, 49 | std::vector columnNames, 50 | bool deletePartial ) { 51 | 52 | DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame ); 53 | 54 | DataFrame< double > block = MakeBlock( dataFrame_, 55 | E, 56 | tau, 57 | columnNames, 58 | deletePartial ); 59 | 60 | return DataFrameToDF( block ); 61 | } 62 | -------------------------------------------------------------------------------- /src/EmbedDim.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "RcppEDMCommon.h" 3 | 4 | //--------------------------------------------------------------- 5 | // 6 | //--------------------------------------------------------------- 7 | r::DataFrame EmbedDimension_rcpp( std::string pathIn, 8 | std::string dataFile, 9 | r::DataFrame dataFrame, 10 | std::string pathOut, 11 | std::string predictFile, 12 | std::string lib, 13 | std::string pred, 14 | int maxE, 15 | int Tp, 16 | int tau, 17 | int exclusionRadius, 18 | std::string columns, 19 | std::string target, 20 | bool embedded, 21 | bool verbose, 22 | std::vector validLib, 23 | unsigned numThreads ) { 24 | 25 | DataFrame< double > EmbedDimDF; 26 | 27 | if ( dataFile.size() ) { 28 | // dataFile specified, dispatch overloaded EmbedDimension, 29 | // ignore dataFrame 30 | EmbedDimDF = EmbedDimension( pathIn, 31 | dataFile, 32 | pathOut, 33 | predictFile, 34 | lib, 35 | pred, 36 | maxE, 37 | Tp, 38 | tau, 39 | exclusionRadius, 40 | columns, 41 | target, 42 | embedded, 43 | verbose, 44 | validLib, 45 | numThreads ); 46 | } 47 | else if ( dataFrame.size() ) { 48 | DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame ); 49 | 50 | EmbedDimDF = EmbedDimension( dataFrame_, 51 | pathOut, 52 | predictFile, 53 | lib, 54 | pred, 55 | maxE, 56 | Tp, 57 | tau, 58 | exclusionRadius, 59 | columns, 60 | target, 61 | embedded, 62 | verbose, 63 | validLib, 64 | numThreads ); 65 | } 66 | else { 67 | Rcpp::warning( "EmbedDimension_rcpp(): Invalid input.\n" ); 68 | } 69 | 70 | return DataFrameToDF( EmbedDimDF ); 71 | } 72 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | # NOTE : Do not put comments on a non-comment line. 2 | LIB_PATH = ./cppEDM/lib/ 3 | CPPEDM_SRC_PATH = ./cppEDM/src/ 4 | LIBEDM = $(LIB_PATH)/libEDM.a 5 | 6 | PKG_CPPFLAGS = -I $(CPPEDM_SRC_PATH) 7 | PKG_LIBS = -L $(LIB_PATH) -lEDM $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 8 | 9 | # CXX_STD required for RcppThread on GCC ASAN/UBSAN ? 10 | # R 4.3 and above uses default CXX17. Will change with R version? 11 | CXX_STD = CXX17 12 | 13 | .PHONY: all $(LIBEDM) 14 | 15 | all : $(SHLIB) 16 | 17 | $(SHLIB): $(LIBEDM) 18 | 19 | # Pass R compiler variables CXX17... to cppEDM makefile 20 | $(LIBEDM): 21 | @(cd $(CPPEDM_SRC_PATH) && $(MAKE) clean && $(MAKE) \ 22 | CXX="$(CXX17) $(CXX17STD)" CXXFLAGS="$(CXX17FLAGS) $(CXX17PICFLAGS)" \ 23 | AR="$(AR)" RANLIB="$(RANLIB)") 24 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | ## It seems that cppEDM/lib is not created used...? 2 | ## Leave libEDM.a in cppEDM/src and link from there. 3 | 4 | ## Include paths are preprocessor options, not compiler options, 5 | ## and must be set in PKG_CPPFLAGS 6 | ## Set flags for the linker, for example -l and -L options, via PKG_LIBS 7 | ## Do not set variables such as CPPFLAGS, CFLAGS etc. 8 | 9 | PKG_CPPFLAGS = -I./cppEDM/src -I../ 10 | PKG_LIBS = -L./cppEDM/src/ -lEDM $(LAPACK_LIBS) 11 | ## CXX_STD = CXX11 12 | 13 | $(SHLIB): cppEDM/src/libEDM.a 14 | 15 | cppEDM/src/libEDM.a: 16 | @(cd cppEDM/src && $(MAKE) -f makefile.mingw \ 17 | CC="$(CC)" CFLAGS="-DCCM_THREADED -DUSING_R -I../ $(CPICFLAGS)" AR="$(AR)" RANLIB="$(RANLIB)") 18 | -------------------------------------------------------------------------------- /src/Multiview.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "RcppEDMCommon.h" 3 | 4 | //-------------------------------------------------------------- 5 | // 6 | //-------------------------------------------------------------- 7 | r::List Multiview_rcpp ( std::string pathIn, 8 | std::string dataFile, 9 | r::DataFrame dataFrame, 10 | //std::string pathOut, // Rcpp 20 arg limit 11 | //std::string predictFile, // Rcpp 20 arg limit 12 | std::string lib, 13 | std::string pred, 14 | int D, 15 | int E, 16 | int Tp, 17 | int knn, 18 | int tau, 19 | std::string columns, 20 | std::string target, 21 | int multiview, 22 | int exclusionRadius, 23 | bool trainLib, 24 | bool excludeTarget, 25 | bool parameterList, 26 | bool verbose, 27 | unsigned int numThreads ) { 28 | 29 | MultiviewValues MV; 30 | 31 | std::string pathOut("./"); // Rcpp has 20 arg limit 32 | std::string predictFile(""); // Rcpp has 20 arg limit 33 | 34 | if ( dataFile.size() ) { 35 | // dataFile specified, dispatch overloaded Multiview, ignore dataFrame 36 | 37 | MV = Multiview( pathIn, 38 | dataFile, 39 | pathOut, 40 | predictFile, 41 | lib, 42 | pred, 43 | D, 44 | E, 45 | Tp, 46 | knn, 47 | tau, 48 | columns, 49 | target, 50 | multiview, 51 | exclusionRadius, 52 | trainLib, 53 | excludeTarget, 54 | parameterList, 55 | verbose, 56 | numThreads ); 57 | } 58 | else if ( dataFrame.size() ) { 59 | DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame ); 60 | 61 | MV = Multiview( dataFrame_, 62 | pathOut, 63 | predictFile, 64 | lib, 65 | pred, 66 | D, 67 | E, 68 | Tp, 69 | knn, 70 | tau, 71 | columns, 72 | target, 73 | multiview, 74 | exclusionRadius, 75 | trainLib, 76 | excludeTarget, 77 | parameterList, 78 | verbose, 79 | numThreads ); 80 | } 81 | else { 82 | Rcpp::warning( "Multiview_rcpp(): Invalid input.\n" ); 83 | } 84 | 85 | r::DataFrame comboRho = DataFrameToDF( MV.ComboRho ); 86 | r::DataFrame predictions = DataFrameToDF( MV.Predictions ); 87 | 88 | // ColumnNames are: map< string, vector >, convert to List 89 | r::List columnNames; 90 | for ( auto cni = MV.ColumnNames.begin(); 91 | cni != MV.ColumnNames.end(); cni++ ) { 92 | r::StringVector strVec; 93 | std::vector< std::string > names = cni->second; 94 | for ( auto ni = names.begin(); ni != names.end(); ni++ ) { 95 | strVec.push_back( *ni ); 96 | } 97 | columnNames[ cni->first ] = strVec; 98 | } 99 | 100 | r::List output = r::List::create( 101 | r::Named("ComboRho") = comboRho, 102 | r::Named("ColumnNames") = columnNames, 103 | r::Named("Predictions") = predictions ); 104 | 105 | if ( parameterList ) { 106 | r::List paramList = ParamMaptoList( MV.parameterMap ); 107 | output["parameters"] = paramList; 108 | } 109 | 110 | // Multiview.R in EDM.R will convert comboLines into an R data.frame 111 | return output; 112 | } 113 | -------------------------------------------------------------------------------- /src/ParameterList.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "RcppEDMCommon.h" 3 | 4 | //---------------------------------------------------------------- 5 | // 6 | //---------------------------------------------------------------- 7 | r::List ParamMaptoList( std::map< std::string, std::string > m ) { 8 | 9 | r::List L; 10 | 11 | for ( auto pi = m.begin(); pi != m.end(); pi++ ) { 12 | // string types 13 | if ( pi->first == "version" or 14 | pi->first == "method" or pi->first == "columns" or 15 | pi->first == "target" or pi->first == "pathIn" or 16 | pi->first == "dataFile" or pi->first == "pathOut" or 17 | pi->first == "predictOutputFile" or 18 | pi->first == "SmapOutputFile" or 19 | pi->first == "blockOutputFile" ) { 20 | 21 | L[ pi->first ] = pi->second; 22 | } 23 | // int types 24 | else if ( pi->first == "E" or pi->first == "Tp" or 25 | pi->first == "knn" or pi->first == "tau" or 26 | pi->first == "exclusionRadius" or 27 | pi->first == "seed" or 28 | pi->first == "subSamples" or 29 | pi->first == "multiviewEnsemble" or 30 | pi->first == "multiviewD" or 31 | pi->first == "generateSteps" ) { 32 | 33 | L[ pi->first ] = std::stoi( pi->second ); 34 | } 35 | // boolean types 36 | else if ( pi->first == "randomLib" or 37 | pi->first == "replacement" or 38 | pi->first == "includeData" or 39 | pi->first == "multiviewTrainLib" or 40 | pi->first == "multiviewExcludeTarget" or 41 | pi->first == "embedded" or 42 | pi->first == "const_predict" or 43 | pi->first == "parameterList" or 44 | pi->first == "verbose" ) { 45 | 46 | if ( pi->second == "0" ) { 47 | L[ pi->first ] = false; 48 | } 49 | if ( pi->second == "1" ) { 50 | L[ pi->first ] = true; 51 | } 52 | } 53 | // vector of int 54 | else if ( pi->first == "lib" or pi->first == "pred" or 55 | pi->first == "libSizes" or pi->first == "validLib" ) { 56 | std::stringstream iss( pi->second ); 57 | std::vector< int > intVector; 58 | int value; 59 | 60 | while ( iss >> value ) { 61 | intVector.push_back( value ); 62 | } 63 | 64 | L[ pi->first ] = intVector; 65 | } 66 | // float type 67 | else if ( pi->first == "theta" ) { 68 | L[ pi->first ] = std::stof( pi->second ); 69 | } 70 | } 71 | 72 | return L; 73 | } 74 | -------------------------------------------------------------------------------- /src/PredictInterval.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "RcppEDMCommon.h" 3 | 4 | //--------------------------------------------------------------- 5 | // Input data path and file 6 | //--------------------------------------------------------------- 7 | r::DataFrame PredictInterval_rcpp( std::string pathIn, 8 | std::string dataFile, 9 | r::DataFrame dataFrame, 10 | std::string pathOut, 11 | std::string predictFile, 12 | std::string lib, 13 | std::string pred, 14 | int maxTp, 15 | int E, 16 | int tau, 17 | int exclusionRadius, 18 | std::string columns, 19 | std::string target, 20 | bool embedded, 21 | bool verbose, 22 | std::vector validLib, 23 | unsigned numThreads ) { 24 | 25 | DataFrame< double > PredictDF; 26 | 27 | if ( dataFile.size() ) { 28 | // dataFile specified, dispatch overloaded PredictInterval, 29 | // ignore dataFrame 30 | PredictDF = PredictInterval( pathIn, 31 | dataFile, 32 | pathOut, 33 | predictFile, 34 | lib, 35 | pred, 36 | maxTp, 37 | E, 38 | tau, 39 | exclusionRadius, 40 | columns, 41 | target, 42 | embedded, 43 | verbose, 44 | validLib, 45 | numThreads ); 46 | } 47 | else if ( dataFrame.size() ) { 48 | DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame ); 49 | 50 | PredictDF = PredictInterval( dataFrame_, 51 | pathOut, 52 | predictFile, 53 | lib, 54 | pred, 55 | maxTp, 56 | E, 57 | tau, 58 | exclusionRadius, 59 | columns, 60 | target, 61 | embedded, 62 | verbose, 63 | validLib, 64 | numThreads ); 65 | } 66 | else { 67 | Rcpp::warning("PredictInterval_rcpp(): Invalid input.\n"); 68 | } 69 | 70 | return DataFrameToDF( PredictDF ); 71 | } 72 | -------------------------------------------------------------------------------- /src/PredictNL.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "RcppEDMCommon.h" 3 | 4 | //--------------------------------------------------------------- 5 | // Input data path and file 6 | //--------------------------------------------------------------- 7 | r::DataFrame PredictNonlinear_rcpp( std::string pathIn, 8 | std::string dataFile, 9 | r::DataFrame dataFrame, 10 | std::string pathOut, 11 | std::string predictFile, 12 | std::string lib, 13 | std::string pred, 14 | std::string theta, 15 | int E, 16 | int Tp, 17 | int knn, 18 | int tau, 19 | int exclusionRadius, 20 | std::string columns, 21 | std::string target, 22 | bool embedded, 23 | bool verbose, 24 | std::vector validLib, 25 | bool ignoreNan, 26 | unsigned numThreads ) { 27 | 28 | DataFrame< double > PredictDF; 29 | 30 | if ( dataFile.size() ) { 31 | // dataFile specified, dispatch overloaded PredictNonlinear, 32 | // ignore dataFrame 33 | PredictDF = PredictNonlinear( pathIn, 34 | dataFile, 35 | pathOut, 36 | predictFile, 37 | lib, 38 | pred, 39 | theta, 40 | E, 41 | Tp, 42 | knn, 43 | tau, 44 | exclusionRadius, 45 | columns, 46 | target, 47 | embedded, 48 | verbose, 49 | validLib, 50 | ignoreNan, 51 | numThreads ); 52 | } 53 | else if ( dataFrame.size() ) { 54 | DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame ); 55 | 56 | PredictDF = PredictNonlinear( dataFrame_, 57 | pathOut, 58 | predictFile, 59 | lib, 60 | pred, 61 | theta, 62 | E, 63 | Tp, 64 | knn, 65 | tau, 66 | exclusionRadius, 67 | columns, 68 | target, 69 | embedded, 70 | verbose, 71 | validLib, 72 | ignoreNan, 73 | numThreads ); 74 | } 75 | else { 76 | Rcpp::warning("PredictNonlinear_rcpp(): Invalid input.\n"); 77 | } 78 | 79 | return DataFrameToDF( PredictDF ); 80 | } 81 | -------------------------------------------------------------------------------- /src/RcppEDMCommon.cpp: -------------------------------------------------------------------------------- 1 | // Expose and map cpp wrapper functions to EDM module via Rcpp 2 | // See RCPP_MODULE() at end of file. 3 | // 4 | // Requirements for a function to be exposed to R via Rcpp modules are: 5 | // The function takes between 0 and 65 parameters. 6 | // The type of each input parameter must be manageable by Rcpp::as template. 7 | // The return type of the function must be either void or any type that can 8 | // be managed by the Rcpp::wrap template. 9 | // The function name itself has to be unique in the module. In other words, 10 | // no two functions with the same name but different signatures are allowed. 11 | // C++ allows overloading functions. This might be added in future versions 12 | // of modules. 13 | 14 | #include "RcppEDMCommon.h" 15 | 16 | //------------------------------------------------------------------------- 17 | // Definitions of formal arguments and default params of the R functions 18 | // that encapsulate the C++ functions in an Rcpp::List. 19 | //------------------------------------------------------------------------- 20 | auto ReadDataFrameArgs = r::List::create( r::_["path"] = "", 21 | r::_["file"] = "" ); 22 | 23 | auto MakeBlockArgs = r::List::create( 24 | r::_["dataFrame"] = r::DataFrame(), 25 | r::_["E"] = 0, 26 | r::_["tau"] = -1, 27 | r::_["columnNames"] = std::vector(), 28 | r::_["deletePartial"] = false ); 29 | 30 | auto EmbedArgs = r::List::create( 31 | r::_["path"] = std::string(""), 32 | r::_["dataFile"] = std::string(""), 33 | r::_["dataFrame"] = r::DataFrame(), 34 | r::_["E"] = 0, 35 | r::_["tau"] = -1, 36 | r::_["columns"] = std::string(""), 37 | r::_["verbose"] = false ); 38 | 39 | auto SimplexArgs = r::List::create( 40 | r::_["pathIn"] = std::string("./"), 41 | r::_["dataFile"] = std::string(""), 42 | r::_["dataFrame"] = r::DataFrame(), 43 | r::_["pathOut"] = std::string("./"), 44 | r::_["predictFile"] = std::string(""), 45 | r::_["lib"] = std::string(""), 46 | r::_["pred"] = std::string(""), 47 | r::_["E"] = 0, 48 | r::_["Tp"] = 1, 49 | r::_["knn"] = 0, 50 | r::_["tau"] = -1, 51 | r::_["exclusionRadius"] = 0, 52 | r::_["columns"] = std::string(""), 53 | r::_["target"] = std::string(""), 54 | r::_["embedded"] = false, 55 | //r::_["const_predict"] = false, // Rcpp 20 arg limit 56 | r::_["verbose"] = false, 57 | r::_["validLib"] = std::vector(), 58 | r::_["generateSteps"] = 0, 59 | //r::_["generateLibrary"] = false, // Rcpp 20 arg limit 60 | r::_["parameterList"] = false ); 61 | 62 | auto SMapArgs = r::List::create( 63 | r::_["pathIn"] = std::string("./"), 64 | r::_["dataFile"] = std::string(""), 65 | r::_["dataFrame"] = r::DataFrame(), 66 | //r::_["pathOut"] = std::string("./"), // Rcpp 20 arg limit 67 | //r::_["predictFile"] = std::string(""), // Rcpp 20 arg limit 68 | r::_["lib"] = std::string(""), 69 | r::_["pred"] = std::string(""), 70 | r::_["E"] = 0, 71 | r::_["Tp"] = 1, 72 | r::_["knn"] = 0, 73 | r::_["tau"] = -1, 74 | r::_["theta"] = 0, 75 | r::_["exclusionRadius"] = 0, 76 | r::_["columns"] = std::string(""), 77 | r::_["target"] = std::string(""), 78 | //r::_["smapCoefFile"] = std::string(""), // Rcpp 20 arg limit 79 | //r::_["smapSVFile"] = std::string(""), // Rcpp 20 arg limit 80 | //r::_["solver"] = std::string(""), // Not supported by glmnet 81 | r::_["embedded"] = false, 82 | //r::_["const_predict"] = false, // Rcpp 20 arg limit 83 | r::_["verbose"] = false, 84 | r::_["validLib"] = std::vector(), 85 | r::_["ignoreNan"] = true, 86 | r::_["generateSteps"] = 0, 87 | //r::_["generateLibrary"] = false, // Rcpp 20 arg limit 88 | r::_["parameterList"] = false ); 89 | 90 | auto MultiviewArgs = r::List::create( 91 | r::_["pathIn"] = std::string("./"), 92 | r::_["dataFile"] = std::string(""), 93 | r::_["dataFrame"] = r::DataFrame(), 94 | //r::_["pathOut"] = std::string("./"), // Rcpp 20 arg limit 95 | //r::_["predictFile"] = std::string(""), // Rcpp 20 arg limit 96 | r::_["lib"] = std::string(""), 97 | r::_["pred"] = std::string(""), 98 | r::_["D"] = 0, 99 | r::_["E"] = 1, 100 | r::_["Tp"] = 1, 101 | r::_["knn"] = 0, 102 | r::_["tau"] = -1, 103 | r::_["columns"] = std::string(""), 104 | r::_["target"] = std::string(""), 105 | r::_["multiview"] = 0, 106 | r::_["exlcusionRadius"] = 0, 107 | r::_["trainLib"] = true, 108 | r::_["excludeTarget"] = false, 109 | r::_["parameterList"] = false, 110 | r::_["verbose"] = false, 111 | r::_["numThreads"] = 4 ); 112 | 113 | auto CCMArgs = r::List::create( 114 | r::_["pathIn"] = std::string("./"), 115 | r::_["dataFile"] = std::string(""), 116 | r::_["dataFrame"] = r::DataFrame(), 117 | //r::_["pathOut"] = std::string("./"), // Rcpp 20 arg limit 118 | //r::_["predictFile"] = std::string(""), // Rcpp 20 arg limit 119 | r::_["E"] = 0, 120 | r::_["Tp"] = 0, 121 | r::_["knn"] = 0, 122 | r::_["tau"] = -1, 123 | r::_["exlcusionRadius"] = 0, 124 | r::_["columns"] = std::string(""), 125 | r::_["target"] = std::string(""), 126 | r::_["libSizes"] = std::string(""), 127 | r::_["sample"] = 0, 128 | r::_["random"] = true, 129 | //r::_["replacement"] = false, // Rcpp 20 arg limit 130 | r::_["seed"] = 0, 131 | r::_["embedded"] = false, 132 | r::_["includeData"] = false, 133 | r::_["parameterList"] = false, 134 | r::_["verbose"] = false ); 135 | 136 | auto EmbedDimensionArgs = r::List::create( 137 | r::_["pathIn"] = std::string("./"), 138 | r::_["dataFile"] = std::string(""), 139 | r::_["dataFrame"] = r::DataFrame(), 140 | r::_["pathOut"] = std::string("./"), 141 | r::_["predictFile"] = std::string(""), 142 | r::_["lib"] = std::string(""), 143 | r::_["pred"] = std::string(""), 144 | r::_["maxE"] = 10, 145 | r::_["Tp"] = 1, 146 | r::_["tau"] = -1, 147 | r::_["exclusionRadius"] = 0, 148 | r::_["columns"] = std::string(""), 149 | r::_["target"] = std::string(""), 150 | r::_["embedded"] = false, 151 | r::_["verbose"] = false, 152 | r::_["validLib"] = std::vector(), 153 | r::_["numThreads"] = 4 ); 154 | 155 | auto PredictIntervalArgs = r::List::create( 156 | r::_["pathIn"] = std::string("./"), 157 | r::_["dataFile"] = std::string(""), 158 | r::_["dataFrame"] = r::DataFrame(), 159 | r::_["pathOut"] = std::string("./"), 160 | r::_["predictFile"] = std::string(""), 161 | r::_["lib"] = std::string(""), 162 | r::_["pred"] = std::string(""), 163 | r::_["maxTp"] = 10, 164 | r::_["E"] = 0, 165 | r::_["tau"] = -1, 166 | r::_["exclusionRadius"] = 0, 167 | r::_["columns"] = std::string(""), 168 | r::_["target"] = std::string(""), 169 | r::_["embedded"] = false, 170 | r::_["verbose"] = false, 171 | r::_["validLib"] = std::vector(), 172 | r::_["numThreads"] = 4 ); 173 | 174 | auto PredictNonlinearArgs = r::List::create( 175 | r::_["pathIn"] = std::string("./"), 176 | r::_["dataFile"] = std::string(""), 177 | r::_["dataFrame"] = r::DataFrame(), 178 | r::_["pathOut"] = std::string("./"), 179 | r::_["predictFile"] = std::string(""), 180 | r::_["lib"] = std::string(""), 181 | r::_["pred"] = std::string(""), 182 | r::_["theta"] = std::string(""), 183 | r::_["E"] = 0, 184 | r::_["Tp"] = 1, 185 | r::_["knn"] = 0, 186 | r::_["tau"] = -1, 187 | r::_["exclusionRadius"] = 0, 188 | r::_["columns"] = std::string(""), 189 | r::_["target"] = std::string(""), 190 | r::_["embedded"] = false, 191 | r::_["verbose"] = false, 192 | r::_["validLib"] = std::vector(), 193 | r::_["ignoreNan"] = true, 194 | r::_["numThreads"] = 4 ); 195 | 196 | //------------------------------------------------------------------------- 197 | // Export / map the functions 198 | // First argument: R function name, see ../R/EDM.R 199 | // Second argument: pointer to Rcpp interface function 200 | // Third argument: arguments of the R function that encapsulates the 201 | // C++ function in a Rcpp::List 202 | //------------------------------------------------------------------------- 203 | RCPP_MODULE(EDMInternal) { 204 | r::function( "RtoCpp_ComputeError", &ComputeError_rcpp ); 205 | r::function( "RtoCpp_ReadDataFrame", &ReadDataFrame, ReadDataFrameArgs ); 206 | r::function( "RtoCpp_MakeBlock", &MakeBlock_rcpp, MakeBlockArgs ); 207 | r::function( "RtoCpp_Embed", &Embed_rcpp, EmbedArgs ); 208 | r::function( "RtoCpp_Simplex", &Simplex_rcpp, SimplexArgs ); 209 | r::function( "RtoCpp_SMap", &SMap_rcpp, SMapArgs ); 210 | r::function( "RtoCpp_Multiview", &Multiview_rcpp, MultiviewArgs ); 211 | r::function( "RtoCpp_CCM", &CCM_rcpp, CCMArgs ); 212 | r::function( "RtoCpp_EmbedDimension", &EmbedDimension_rcpp, 213 | EmbedDimensionArgs ); 214 | r::function( "RtoCpp_PredictInterval", &PredictInterval_rcpp, 215 | PredictIntervalArgs ); 216 | r::function( "RtoCpp_PredictNonlinear", &PredictNonlinear_rcpp, 217 | PredictNonlinearArgs ); 218 | } 219 | -------------------------------------------------------------------------------- /src/RcppEDMCommon.h: -------------------------------------------------------------------------------- 1 | 2 | // R to C++ interface using Rcpp 3 | // Functional flow: R func calls Rcpp func calls C++ func. 4 | 5 | #ifndef RCPPEDMCOMMON 6 | #define RCPPEDMCOMMON 7 | 8 | #define RCPPTHREAD_OVERRIDE_COUT 1 // std::cout override 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "API.h" 15 | 16 | namespace r = Rcpp; 17 | 18 | // Forward declarations 19 | DataFrame< double > DFToDataFrame ( Rcpp::DataFrame df ); 20 | 21 | r::DataFrame DataFrameToDF ( DataFrame< double > dataFrame ); 22 | 23 | r::DataFrame ReadDataFrame ( std::string path, std::string file ); 24 | 25 | r::List ParamMaptoList( std::map< std::string, std::string > m ); 26 | 27 | r::List Simplex_rcpp( std::string pathIn, 28 | std::string dataFile, 29 | r::DataFrame dataList, 30 | std::string pathOut, 31 | std::string predictFile, 32 | std::string lib, 33 | std::string pred, 34 | int E, 35 | int Tp, 36 | int knn, 37 | int tau, 38 | int exclusionRadius, 39 | std::string columns, 40 | std::string target, 41 | bool embedded, 42 | // bool const_predict, // Rcpp 20 arg limit 43 | bool verbose, 44 | std::vector validLib, 45 | int generateSteps, 46 | // bool generateLibrary, // Rcpp 20 arg limit 47 | bool parameterList ); 48 | 49 | r::List SMap_rcpp( std::string pathIn, 50 | std::string dataFile, 51 | r::DataFrame dataList, 52 | //std::string pathOut, // Rcpp 20 arg limit 53 | //std::string predictFile, // Rcpp 20 arg limit 54 | std::string lib, 55 | std::string pred, 56 | int E, 57 | int Tp, 58 | int knn, 59 | int tau, 60 | double theta, 61 | int exclusionRadius, 62 | std::string columns, 63 | std::string target, 64 | //std::string smapCoefFile, // Rcpp 20 arg limit 65 | //std::string smapSVFile, // Rcpp 20 arg limit 66 | //SVDValues (*solver) // Not supported by glmnet 67 | bool embedded, 68 | //bool const_predict, // Rcpp 20 arg limit 69 | bool verbose, 70 | std::vector validLib, 71 | bool ignoreNan, 72 | int generateSteps, 73 | //bool generateLibrary, // Rcpp 20 arg limit 74 | bool parameterList ); 75 | 76 | r::List CCM_rcpp( std::string pathIn, 77 | std::string dataFile, 78 | r::DataFrame dataList, 79 | //std::string pathOut, // Rcpp 20 arg limit 80 | //std::string predictFile, // Rcpp 20 arg limit 81 | int E, 82 | int Tp, 83 | int knn, 84 | int tau, 85 | int exclusionRadius, 86 | std::string columns, 87 | std::string target, 88 | std::string libSizes, 89 | int sample, 90 | bool random, 91 | // bool replacement, // Rcpp 20 arg limit 92 | unsigned seed, 93 | bool embedded, 94 | bool includeData, 95 | bool parameterList, 96 | bool verbose ); 97 | 98 | r::List Multiview_rcpp ( std::string pathIn, 99 | std::string dataFile, 100 | r::DataFrame dataList, 101 | //std::string pathOut, // Rcpp 20 arg limit 102 | //std::string predictFile, // Rcpp 20 arg limit 103 | std::string lib, 104 | std::string pred, 105 | int D, 106 | int E, 107 | int Tp, 108 | int knn, 109 | int tau, 110 | std::string columns, 111 | std::string target, 112 | int multiview, 113 | int exlcusionRadius, 114 | bool trainLib, 115 | bool excludeTarget, 116 | bool parameterList, 117 | bool verbose, 118 | unsigned int numThreads ); 119 | 120 | r::DataFrame PredictNonlinear_rcpp( std::string pathIn, 121 | std::string dataFile, 122 | r::DataFrame dataList, 123 | std::string pathOut, 124 | std::string predictFile, 125 | std::string lib, 126 | std::string pred, 127 | std::string theta, 128 | int E, 129 | int Tp, 130 | int knn, 131 | int tau, 132 | int exclusionRadius, 133 | std::string columns, 134 | std::string target, 135 | bool embedded, 136 | bool verbose, 137 | std::vector validLib, 138 | bool ignoreNan, 139 | unsigned numThreads ); 140 | 141 | r::DataFrame PredictInterval_rcpp( std::string pathIn, 142 | std::string dataFile, 143 | r::DataFrame dataList, 144 | std::string pathOut, 145 | std::string predictFile, 146 | std::string lib, 147 | std::string pred, 148 | int maxTp, 149 | int E, 150 | int tau, 151 | int exclusionRadius, 152 | std::string columns, 153 | std::string target, 154 | bool embedded, 155 | bool verbose, 156 | std::vector validLib, 157 | unsigned numThreads ); 158 | 159 | r::DataFrame EmbedDimension_rcpp( std::string pathIn, 160 | std::string dataFile, 161 | r::DataFrame dataList, 162 | std::string pathOut, 163 | std::string predictFile, 164 | std::string lib, 165 | std::string pred, 166 | int maxE, 167 | int Tp, 168 | int tau, 169 | int exclusionRadius, 170 | std::string columns, 171 | std::string target, 172 | bool embedded, 173 | bool verbose, 174 | std::vector validLib, 175 | unsigned numThreads ); 176 | 177 | r::DataFrame Embed_rcpp( std::string path, 178 | std::string dataFile, 179 | r::DataFrame df, 180 | int E, 181 | int tau, 182 | std::string columns, 183 | bool verbose ); 184 | 185 | r::DataFrame MakeBlock_rcpp( r::DataFrame dataList, 186 | int E, 187 | int tau, 188 | std::vector columnNames, 189 | bool deletePartial ); 190 | 191 | r::List ComputeError_rcpp ( std::vector vec1, 192 | std::vector vec2 ); 193 | #endif 194 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | 9 | RcppExport SEXP _rcpp_module_boot_EDMInternal(); 10 | 11 | static const R_CallMethodDef CallEntries[] = { 12 | {"_rcpp_module_boot_EDMInternal", (DL_FUNC) &_rcpp_module_boot_EDMInternal, 0}, 13 | {NULL, NULL, 0} 14 | }; 15 | 16 | RcppExport void R_init_rEDM(DllInfo *dll) { 17 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 18 | R_useDynamicSymbols(dll, FALSE); 19 | } 20 | -------------------------------------------------------------------------------- /src/SMap.cpp: -------------------------------------------------------------------------------- 1 | #include "RcppEDMCommon.h" 2 | 3 | //---------------------------------------------------------- 4 | // 5 | //---------------------------------------------------------- 6 | r::List SMap_rcpp( std::string pathIn, 7 | std::string dataFile, 8 | r::DataFrame dataFrame, 9 | //std::string pathOut, // Rcpp 20 arg limit 10 | //std::string predictFile, // Rcpp 20 arg limit 11 | std::string lib, 12 | std::string pred, 13 | int E, 14 | int Tp, 15 | int knn, 16 | int tau, 17 | double theta, 18 | int exlusionRadius, 19 | std::string columns, 20 | std::string target, 21 | //std::string smapCoefFile, // Rcpp 20 arg limit 22 | //std::string smapSVFile, // Rcpp 20 arg limit 23 | bool embedded, 24 | //bool const_predict, // Rcpp 20 arg limit 25 | bool verbose, 26 | std::vector validLib, 27 | bool ignoreNan, 28 | int generateSteps, 29 | //bool generateLibrary, // Rcpp 20 arg limit 30 | bool parameterList ) { 31 | 32 | SMapValues SM; 33 | 34 | std::string pathOut("./"); // Rcpp 20 arg limit 35 | std::string predictFile(""); // Rcpp 20 arg limit 36 | std::string smapCoefFile(""); // Rcpp 20 arg limit 37 | std::string smapSVFile(""); // Rcpp 20 arg limit 38 | bool generateLibrary = false; // Rcpp 20 arg limit 39 | bool const_predict = false; // Rcpp 20 arg limit 40 | 41 | if ( dataFile.size() ) { 42 | // dataFile specified, dispatch overloaded SMap, ignore dataFrame 43 | 44 | SM = SMap( pathIn, 45 | dataFile, 46 | pathOut, 47 | predictFile, 48 | lib, 49 | pred, 50 | E, 51 | Tp, 52 | knn, 53 | tau, 54 | theta, 55 | exlusionRadius, 56 | columns, 57 | target, 58 | smapCoefFile, 59 | smapSVFile, 60 | embedded, 61 | const_predict, 62 | verbose, 63 | validLib, 64 | ignoreNan, 65 | generateSteps, 66 | generateLibrary, 67 | parameterList ); 68 | } 69 | else if ( dataFrame.size() ) { 70 | DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame ); 71 | 72 | SM = SMap( dataFrame_, 73 | pathOut, 74 | predictFile, 75 | lib, 76 | pred, 77 | E, 78 | Tp, 79 | knn, 80 | tau, 81 | theta, 82 | exlusionRadius, 83 | columns, 84 | target, 85 | smapCoefFile, 86 | smapSVFile, 87 | embedded, 88 | const_predict, 89 | verbose, 90 | validLib, 91 | ignoreNan, 92 | generateSteps, 93 | generateLibrary, 94 | parameterList ); 95 | } 96 | else { 97 | Rcpp::warning( "SMap_rcpp(): Invalid input.\n" ); 98 | } 99 | 100 | r::DataFrame df_pred = DataFrameToDF( SM.predictions ); 101 | r::DataFrame df_coef = DataFrameToDF( SM.coefficients ); 102 | r::DataFrame df_SV = DataFrameToDF( SM.singularValues ); 103 | r::List output = r::List::create( r::Named("predictions") = df_pred, 104 | r::Named("coefficients") = df_coef, 105 | r::Named("singularValues") = df_SV ); 106 | 107 | if ( parameterList ) { 108 | r::List paramList = ParamMaptoList( SM.parameterMap ); 109 | output["parameters"] = paramList; 110 | } 111 | 112 | return output; 113 | } 114 | -------------------------------------------------------------------------------- /src/Simplex.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "RcppEDMCommon.h" 3 | 4 | //------------------------------------------------------------- 5 | // 6 | //------------------------------------------------------------- 7 | r::List Simplex_rcpp( std::string pathIn, 8 | std::string dataFile, 9 | r::DataFrame dataFrame, 10 | std::string pathOut, 11 | std::string predictFile, 12 | std::string lib, 13 | std::string pred, 14 | int E, 15 | int Tp, 16 | int knn, 17 | int tau, 18 | int exclusionRadius, 19 | std::string columns, 20 | std::string target, 21 | bool embedded, 22 | //bool const_predict, // Rcpp 20 arg limit 23 | bool verbose, 24 | std::vector validLib, 25 | int generateSteps, 26 | //bool generateLibrary, // Rcpp 20 arg limit 27 | bool parameterList ) { 28 | 29 | SimplexValues S; 30 | 31 | bool const_predict = false; // Rcpp has 20 arg limit 32 | bool generateLibrary = false; // Rcpp has 20 arg limit 33 | 34 | if ( dataFile.size() ) { 35 | // dataFile specified, dispatch overloaded Simplex, ignore dataFrame 36 | S = Simplex( pathIn, 37 | dataFile, 38 | pathOut, 39 | predictFile, 40 | lib, 41 | pred, 42 | E, 43 | Tp, 44 | knn, 45 | tau, 46 | exclusionRadius, 47 | columns, 48 | target, 49 | embedded, 50 | const_predict, 51 | verbose, 52 | validLib, 53 | generateSteps, 54 | generateLibrary, 55 | parameterList ); 56 | } 57 | else if ( dataFrame.size() ) { 58 | DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame ); 59 | 60 | S = Simplex( dataFrame_, 61 | pathOut, 62 | predictFile, 63 | lib, 64 | pred, 65 | E, 66 | Tp, 67 | knn, 68 | tau, 69 | exclusionRadius, 70 | columns, 71 | target, 72 | embedded, 73 | const_predict, 74 | verbose, 75 | validLib, 76 | generateSteps, 77 | generateLibrary, 78 | parameterList ); 79 | } 80 | else { 81 | Rcpp::warning( "Simplex_rcpp(): Invalid input.\n" ); 82 | } 83 | 84 | r::DataFrame df_pred = DataFrameToDF( S.predictions ); 85 | r::List output = r::List::create( r::Named("predictions") = df_pred ); 86 | 87 | if ( parameterList ) { 88 | r::List paramList = ParamMaptoList( S.parameterMap ); 89 | output["parameters"] = paramList; 90 | } 91 | 92 | return output; 93 | } 94 | -------------------------------------------------------------------------------- /src/cppEDM/lib/.gitignore: -------------------------------------------------------------------------------- 1 | !* 2 | -------------------------------------------------------------------------------- /src/cppEDM/lib/ignore.h: -------------------------------------------------------------------------------- 1 | // R devtools deletes empty dirs 2 | -------------------------------------------------------------------------------- /src/cppEDM/src/CCM.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef EDM_CCM_H 3 | #define EDM_CCM_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "EDM.h" 13 | #include "Simplex.h" 14 | 15 | //---------------------------------------------------------------- 16 | // CCM class inherits from Simplex class and defines 17 | // CCM-specific projection methods 18 | //---------------------------------------------------------------- 19 | class CCMClass : public SimplexClass { 20 | public: 21 | // CCM implements two Simplex objects for cross mapping 22 | SimplexClass colToTarget; 23 | SimplexClass targetToCol; 24 | 25 | // Cross mapping results are stored here 26 | DataFrame< double > allLibStats; // CCM unified libsize, rho, RMSE, MAE 27 | CrossMapValues colToTargetValues; // CCM CrossMap() thread results 28 | CrossMapValues targetToColValues; // CCM CrossMap() thread results 29 | 30 | // Constructor 31 | CCMClass ( DataFrame< double > & data, 32 | Parameters & parameters ); 33 | 34 | // Method declarations 35 | void Project(); 36 | void SetupParameters(); 37 | void CCM(); 38 | void FormatOutput(); 39 | void WriteOutput(); 40 | }; 41 | #endif 42 | -------------------------------------------------------------------------------- /src/cppEDM/src/Common.cc: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | #include "Common.h" 6 | 7 | //--------------------------------------------------------------- 8 | // Binary sort function for FindNeighbors() & CCMNeighbors() 9 | //--------------------------------------------------------------- 10 | bool DistanceCompare( const std::pair & x, 11 | const std::pair & y ) { 12 | return x.first < y.first; 13 | } 14 | 15 | //---------------------------------------------------------------- 16 | // 17 | //---------------------------------------------------------------- 18 | std::string ToLower( std::string str ) { 19 | 20 | std::string lowerStr( str ); 21 | std::transform( lowerStr.begin(), lowerStr.end(), 22 | lowerStr.begin(), ::tolower ); 23 | 24 | return lowerStr; 25 | } 26 | 27 | //---------------------------------------------------------------- 28 | // SplitString 29 | // 30 | // Purpose: like Python string.split() 31 | // 32 | // Arguments: inString : string to be split 33 | // delimeters : string of delimeters 34 | // 35 | // Note: A typical delimeter string: delimeters = " \t,\n;" 36 | // 37 | // Return: vector of tokens 38 | //---------------------------------------------------------------- 39 | std::vector SplitString( std::string inString, 40 | std::string delimeters, 41 | bool removeWhitespace ) { 42 | size_t pos = 0; 43 | size_t eos = 0; 44 | size_t wordStart = 0; 45 | size_t wordEnd = 0; 46 | 47 | bool foundStart = false; 48 | bool foundEnd = false; 49 | 50 | std::vector splitString; 51 | 52 | std::string word; 53 | 54 | eos = inString.length(); 55 | 56 | while ( pos <= eos ) { 57 | if ( not foundStart ) { 58 | if ( delimeters.find( inString[pos] ) == delimeters.npos ) { 59 | // this char (inString[pos]) is not a delimeter 60 | wordStart = pos; 61 | foundStart = true; 62 | pos++; 63 | continue; 64 | } 65 | } 66 | if ( foundStart and not foundEnd ) { 67 | if ( delimeters.find( inString[pos] ) != delimeters.npos 68 | or pos == eos ) { 69 | // this char (inString[pos]) is a delimeter or 70 | // at the end of the string 71 | wordEnd = pos; 72 | foundEnd = true; 73 | } 74 | } 75 | if ( foundStart and foundEnd ) { 76 | foundStart = false; 77 | foundEnd = false; 78 | 79 | word = inString.substr( wordStart, wordEnd - wordStart ); 80 | 81 | if ( removeWhitespace ) { 82 | word.erase( std::remove_if( word.begin(), word.end(), ::isspace ), 83 | word.end() ); 84 | } 85 | 86 | splitString.push_back( word ); 87 | } 88 | if ( pos == eos ) { 89 | break; 90 | } 91 | pos++; 92 | } 93 | 94 | return splitString; 95 | } 96 | 97 | //---------------------------------------------------------------- 98 | // 99 | //---------------------------------------------------------------- 100 | VectorError ComputeError( std::valarray< double > obsIn, 101 | std::valarray< double > predIn ) { 102 | 103 | if ( obsIn.size() != predIn.size() ) { 104 | std::stringstream errMsg; 105 | errMsg << "ComputeError(): Observation size " 106 | << obsIn.size() << " is not equal to prediction size " 107 | << predIn.size(); 108 | throw std::runtime_error( errMsg.str() ); 109 | } 110 | 111 | // JP does find work on nan? Since nan != nan, probably not... 112 | // Use a slice to extract the overlapping subset of obsIn, PredIn 113 | // We need to find the appropriate slice parameters 114 | 115 | // To try and be efficient, we first scan for nans, if none: stats 116 | // If there are nans, copy from the overlapping values 117 | bool nanObs = false; 118 | bool nanPred = false; 119 | 120 | for ( auto o : obsIn ) { if ( std::isnan( o ) ) { nanObs = true; break; } } 121 | for ( auto p : predIn ) { if ( std::isnan( p ) ) { nanPred= true; break; } } 122 | 123 | // vectors to hold data with no nans: reassigned below 124 | std::valarray< double > obs; 125 | std::valarray< double > pred; 126 | size_t Nin = obsIn.size(); 127 | 128 | if ( not nanObs and not nanPred ) { 129 | obs = std::valarray< double >( obsIn ); 130 | pred = std::valarray< double >( predIn ); 131 | } 132 | else { 133 | // Handle nans 134 | // Build concurrent vector of bool pairs : isnan on obsIn, predIn 135 | std::vector< std::pair< bool, bool > > nanIndexPairs( Nin ); 136 | for ( size_t i = 0; i < Nin; i++ ) { 137 | nanIndexPairs[ i ] = std::make_pair( std::isnan( obsIn[i] ), 138 | std::isnan( predIn[i] ) ); 139 | } 140 | // Find overlapping subset indices or use set::intersection 141 | // Condense pairs into one boolean value in nonNanOverlap 142 | size_t Nout = 0; 143 | std::vector< bool > nonNanOverlap( Nin ); 144 | for ( size_t i = 0; i < Nin; i++ ) { 145 | if ( not nanIndexPairs[ i ].first and 146 | not nanIndexPairs[ i ].second ) { 147 | nonNanOverlap[ i ] = true; // Both are not nan, valid index 148 | Nout++; 149 | } 150 | else { 151 | nonNanOverlap[ i ] = false; 152 | } 153 | } 154 | 155 | if ( Nout < 6 ) { 156 | std::stringstream msg; 157 | msg << "WARNING: ComputeError(): nan found. Not enough data" 158 | << " to compute error." << std::endl; 159 | std::cout << msg.str(); 160 | 161 | obs = std::valarray< double >( 0., 1 ); // vector [0.] N = 1 162 | pred = std::valarray< double >( 0., 1 ); // vector [0.] N = 1 163 | } 164 | else { 165 | // Allocate the output arrays and fill with slices 166 | obs = std::valarray< double >( Nout ); 167 | pred = std::valarray< double >( Nout ); 168 | 169 | // Copy valid values into obs & pred 170 | size_t n = 0; 171 | for ( size_t i = 0; i < nonNanOverlap.size(); i++ ) { 172 | if ( nonNanOverlap[ i ] ) { 173 | obs [ n ] = obsIn [ i ]; 174 | pred[ n ] = predIn[ i ]; 175 | n++; 176 | } 177 | } 178 | } 179 | } 180 | 181 | size_t N = std::max( 1, (int) pred.size() ); 182 | std::valarray< double > two( 2, N ); // Vector of 2's for squaring 183 | 184 | double sumPred = pred.sum(); 185 | double sumObs = obs.sum(); 186 | double meanPred = sumPred / N; 187 | double meanObs = sumObs / N; 188 | double sumSqrPred = pow( pred, two ).sum(); 189 | double sumSqrObs = pow( obs, two ).sum(); 190 | double sumErr = abs( obs - pred ).sum(); 191 | double sumSqrErr = pow( obs - pred, two ).sum(); 192 | double sumProd = ( obs * pred ).sum(); 193 | 194 | double rho; // Pearson correlation coefficient 195 | 196 | double denom = ( std::sqrt( ( sumSqrObs - N * pow( meanObs, 2 ) ) ) * 197 | std::sqrt( ( sumSqrPred - N * pow( meanPred, 2 ) ) ) ); 198 | 199 | if ( denom == 0 or std::isnan( denom ) ) { 200 | rho = 0; 201 | } 202 | else { 203 | rho = ( sumProd - N * meanObs * meanPred ) / denom; 204 | } 205 | 206 | VectorError vectorError = VectorError(); 207 | 208 | vectorError.RMSE = sqrt( sumSqrErr / N ); 209 | vectorError.MAE = sumErr / N; 210 | vectorError.rho = rho; 211 | 212 | return vectorError; 213 | } 214 | -------------------------------------------------------------------------------- /src/cppEDM/src/Common.h: -------------------------------------------------------------------------------- 1 | #ifndef EDM_COMMON_H 2 | #define EDM_COMMON_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include // std::ref 13 | 14 | #ifdef _MSC_VER 15 | #include // macro constants for MSVC C++ operators not in ISO646 16 | #endif 17 | 18 | // Enumerations 19 | enum class Method { None, Embed, Simplex, SMap, CCM, Multiview }; 20 | enum class DistanceMetric { Euclidean, Manhattan }; 21 | 22 | #include "DataFrame.h" 23 | 24 | //--------------------------------------------------------- 25 | // Data structs 26 | //--------------------------------------------------------- 27 | struct VectorError { 28 | double rho; 29 | double RMSE; 30 | double MAE; 31 | }; 32 | 33 | struct SimplexValues { 34 | DataFrame< double > predictions; 35 | std::map< std::string, std::string > parameterMap; 36 | }; 37 | 38 | struct SMapValues { 39 | DataFrame< double > predictions; 40 | DataFrame< double > coefficients; 41 | DataFrame< double > singularValues; 42 | std::map< std::string, std::string > parameterMap; 43 | }; 44 | 45 | struct SVDValues { 46 | std::valarray< double > coefficients; 47 | std::valarray< double > singularValues; 48 | }; 49 | 50 | // Return object for CrossMap() worker function 51 | struct CrossMapValues { 52 | DataFrame< double > LibStats; // mean libsize, rho, RMSE, MAE 53 | DataFrame< double > PredictStats; // each predict libsize, rho, RMSE, MAE 54 | std::forward_list< DataFrame< double > > Predictions; 55 | }; 56 | 57 | // Return object for CCM() with two CrossMapValues 58 | struct CCMValues { 59 | DataFrame< double > AllLibStats; // unified mean libsize, rho, RMSE, MAE 60 | CrossMapValues CrossMap1; 61 | CrossMapValues CrossMap2; 62 | std::map< std::string, std::string > parameterMap; 63 | }; 64 | 65 | struct MultiviewValues { 66 | DataFrame< double > ComboRho; // col_i..., rho, MAE, RMSE 67 | DataFrame< double > Predictions; 68 | // Vectors of column names <- col_i 69 | std::map< std::string, std::vector< std::string > > ColumnNames; 70 | std::map< std::string, std::string > parameterMap; 71 | }; 72 | 73 | //------------------------------------------------------------- 74 | // Prototypes 75 | //------------------------------------------------------------- 76 | std::string ToLower( std::string str ); 77 | 78 | std::vector SplitString( std::string inString, 79 | std::string delimeters, 80 | bool removeWhitespace ); 81 | 82 | VectorError ComputeError( std::valarray< double > obs, 83 | std::valarray< double > pred ); 84 | 85 | std::string increment_datetime_str( std::string datetime1, 86 | std::string datetime2, 87 | int tp ); 88 | #endif 89 | -------------------------------------------------------------------------------- /src/cppEDM/src/DateTime.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "DateTime.h" 3 | 4 | //--------------------------------------------------------------------- 5 | // Provide some utility for parsing datetime std::strings 6 | // to add some tp increment to a datetime std::string past the 7 | // given range of the time column. 8 | // 9 | // Fractional seconds not supported by strpftime or IncrementDatetime 10 | // 11 | // TIME FORMATS supported: 12 | // YYYY-MM-DD 13 | // HH:MM:SS 14 | // YYYY-MM-DD HH:MM:SS (2019-06-30 10:26:10) 15 | // YYYY-MM-DDTHH:MM:SS (2019-06-30T10:26:10) 16 | // 17 | //--------------------------------------------------------------------- 18 | // Time formats 19 | std::string YMD ("%Y-%m-%d"); 20 | std::string HMS ("%H:%M:%S"); 21 | std::string YMD_HMS ("%Y-%m-%d %H:%M:%S"); 22 | std::string YMD_T_HMS ("%Y-%m-%dT%H:%M:%S"); 23 | 24 | //---------------------------------------------------------------------- 25 | // Parse a date or time string into a tm obj 26 | // tm : tm object to populate 27 | // datetime_str : date or time string 28 | // isDate : true if this is a date object 29 | //---------------------------------------------------------------------- 30 | void ParseDatetimeString( struct tm & tmStruct, 31 | std::string datetime, 32 | bool isDate ) { 33 | // parsing delimeter is '-' for date, ':' for time 34 | char delimeter = isDate ? '-' : ':'; 35 | 36 | // parse datetime into tokens 37 | std::stringstream ss( datetime ); 38 | std::string token; 39 | std::vector tokens; 40 | 41 | while( getline( ss, token, delimeter ) ) { 42 | tokens.push_back( token ); 43 | } 44 | 45 | // populate the tmStruct 46 | if ( isDate ) { 47 | tmStruct.tm_mday = stod(tokens[2]); 48 | tmStruct.tm_mon = stod(tokens[1]) - ISO_StartMonth; 49 | tmStruct.tm_year = stod(tokens[0]) - ISO_StartYear; 50 | } 51 | else { 52 | tmStruct.tm_sec = stod(tokens[2]); 53 | tmStruct.tm_min = stod(tokens[1]); 54 | tmStruct.tm_hour = stod(tokens[0]); 55 | } 56 | 57 | int err = mktime( &tmStruct ); 58 | 59 | if ( err < 0 ) { 60 | std::stringstream errMsg; 61 | errMsg << "ParseDatetimeString() mktime failed on " << datetime 62 | << " err = " << err << std::endl; 63 | throw std::runtime_error( errMsg.str() ); 64 | } 65 | } 66 | 67 | //---------------------------------------------------------------------- 68 | // Parse the datetime into a DatetimeInfo struct 69 | // datetime : datetime to parse 70 | // return : DatetimeInfo struct 71 | //---------------------------------------------------------------------- 72 | DatetimeInfo ParseDatetime( std::string datetime ) { 73 | 74 | DatetimeInfo output; 75 | 76 | // Detecting the format is based on delimeters to avoid regex: 77 | // [ '-' and '-' ] YMD 78 | // [ ':' and ':' ] HMS 79 | // [ '-' and '-' and ':' and ':' ] YMD_HMS 80 | // [ '-' and '-' and ':' and ':' and 'T' ] YMD_T_HMS 81 | 82 | size_t NHyphen = std::count( datetime.begin(), datetime.end(), '-' ); 83 | size_t nColon = std::count( datetime.begin(), datetime.end(), ':' ); 84 | size_t nT = std::count( datetime.begin(), datetime.end(), 'T' ); 85 | 86 | if ( NHyphen == 2 and nColon == 0 ) { 87 | output.format = YMD; 88 | ParseDatetimeString( output.time, datetime, true ); 89 | } 90 | else if ( NHyphen == 0 and nColon == 2 ) { 91 | output.format = HMS; 92 | ParseDatetimeString( output.time, datetime, false ); 93 | } 94 | else if ( NHyphen == 2 and nColon == 2 and nT == 0 ) { 95 | output.format = YMD_HMS; 96 | // split by " ", then split first by - second by : 97 | int delim_pos = datetime.find(' '); 98 | std::string date = datetime.substr(0, delim_pos); 99 | std::string time = datetime.substr(delim_pos+1, datetime.size()); 100 | ParseDatetimeString( output.time, date, true ); 101 | ParseDatetimeString( output.time, time, false ); 102 | } 103 | else if ( NHyphen == 2 and nColon == 2 and nT == 1 ) { 104 | output.format = YMD_T_HMS; 105 | // split by T, then split first by - second by : 106 | int delim_pos = datetime.find('T'); 107 | std::string date = datetime.substr(0, delim_pos); 108 | std::string time = datetime.substr(delim_pos+1, datetime.size()); 109 | ParseDatetimeString( output.time, date, true ); 110 | ParseDatetimeString( output.time, time, false ); 111 | } 112 | else { 113 | output.unrecognized = true; 114 | } 115 | return output; 116 | } 117 | 118 | //---------------------------------------------------------------------- 119 | // Generate a new datetime + delta past the range of given 120 | //---------------------------------------------------------------------- 121 | // 122 | // @params datetime1/2 : the two last time std::strings 123 | // to compute the delta unit 124 | // we increment from datetime2 125 | // @param tp : the amount to increment the time diff by 126 | // @return : the new incremented timestd::string 127 | //---------------------------------------------------------------------- 128 | std::string IncrementDatetime( std::string datetime1, 129 | std::string datetime2, int tp ) { 130 | // parse datetimes 131 | DatetimeInfo dtinfo1 = ParseDatetime( datetime1 ); 132 | DatetimeInfo dtinfo2 = ParseDatetime( datetime2 ); 133 | 134 | if ( dtinfo1.unrecognized or dtinfo2.unrecognized ) { 135 | // return empty string 136 | return std::string(); 137 | } 138 | 139 | // get the delta unit between two datetimes in the time col 140 | size_t seconds_diff = difftime( mktime( &dtinfo2.time ), 141 | mktime( &dtinfo1.time ) ); 142 | 143 | if ( seconds_diff == 0 ) { 144 | seconds_diff = 1; //if millisec, want some update 145 | } 146 | 147 | // increment the time and format 148 | dtinfo2.time.tm_sec += tp * seconds_diff; 149 | 150 | int err = mktime( &dtinfo2.time ); 151 | 152 | if ( err < 0 ) { 153 | std::stringstream errMsg; 154 | errMsg << "increment_datetime_str() mktime failed on " 155 | << datetime2; 156 | throw( errMsg.str() ); 157 | } 158 | 159 | // format incremented time 160 | char tmp_buffer [ BUFSIZ ]; 161 | 162 | size_t n_char = strftime( tmp_buffer, BUFSIZ, 163 | dtinfo2.format.c_str(), &dtinfo2.time ); 164 | if ( n_char == 0 ) { 165 | std::stringstream errMsg; 166 | errMsg << "increment_datetime_str(): Failed on " 167 | << datetime1 << ", " << datetime2 << " tp = " << tp; 168 | throw( errMsg.str() ); 169 | } 170 | 171 | return std::string( tmp_buffer ); 172 | } 173 | -------------------------------------------------------------------------------- /src/cppEDM/src/DateTime.h: -------------------------------------------------------------------------------- 1 | #ifndef DATETIMEUTIL_H 2 | #define DATETIMEUTIL_H 3 | 4 | #include 5 | #include 6 | #include // std::count 7 | #include // mktime 8 | 9 | const int ISO_StartYear = 1900; 10 | const int ISO_StartMonth = 1; 11 | 12 | struct DatetimeInfo { 13 | struct tm time = {}; 14 | std::string format; 15 | bool unrecognized = false; 16 | 17 | // Constructor : setup time struct 18 | DatetimeInfo() { 19 | time.tm_sec = 0; 20 | time.tm_min = 0; 21 | time.tm_hour = 0; 22 | time.tm_mday = 1; 23 | time.tm_mon = 0; 24 | time.tm_year = 70; // Minimal valid Unix time 1900 + 70 25 | time.tm_wday = 0; 26 | time.tm_yday = 0; 27 | time.tm_isdst = 0; 28 | } 29 | }; 30 | 31 | // Prototypes 32 | void ParseDatetimeString( struct tm & tmStruct, 33 | std::string datetime, 34 | bool isDate ); 35 | 36 | DatetimeInfo ParseDatetime( std::string datetime ); 37 | 38 | std::string IncrementDatetime( std::string datetime1, 39 | std::string datetime2, 40 | int tp ); 41 | #endif 42 | -------------------------------------------------------------------------------- /src/cppEDM/src/EDM.cc: -------------------------------------------------------------------------------- 1 | 2 | #include "EDM.h" 3 | 4 | // Declared in API.h 5 | extern DataFrame< double > MakeBlock( DataFrame< double > &, int, int, 6 | std::vector< std::string >, bool ); 7 | 8 | //---------------------------------------------------------------- 9 | // Constructors 10 | //---------------------------------------------------------------- 11 | EDM::EDM ( DataFrame< double > & data, 12 | Parameters & parameters ) : 13 | data( data ), anyTies( false ), parameters( parameters ) {} 14 | 15 | //---------------------------------------------------------------- 16 | // Project : Implemented in sub-class 17 | //---------------------------------------------------------------- 18 | void EDM::Project () {} 19 | 20 | //---------------------------------------------------------------- 21 | // Generate : Implemented in sub-class 22 | //---------------------------------------------------------------- 23 | void EDM::Generate () {} 24 | 25 | //---------------------------------------------------------------- 26 | // Set target (library) vector 27 | //---------------------------------------------------------------- 28 | void EDM::GetTarget() { 29 | if ( parameters.targetNames.size() ) { 30 | target = data.VectorColumnName( parameters.targetNames.front() ); 31 | } 32 | else { 33 | // Default to first column 34 | target = data.Column( 0 ); 35 | } 36 | } 37 | 38 | //---------------------------------------------------------------- 39 | // Implemented as a wrapper for API MakeBlock() 40 | // Note: dataFrame must have the columnNameToIndex map 41 | // 42 | // NOTE: The returned data block does NOT have the time column 43 | //---------------------------------------------------------------- 44 | void EDM::EmbedData() { 45 | 46 | if ( data.ColumnNameToIndex().empty() ) { 47 | throw std::runtime_error("EDM::Embed(): columnNameIndex empty.\n"); 48 | } 49 | 50 | // If columns provided, validate they are in dataFrameIn 51 | for ( auto colName : parameters.columnNames ) { 52 | auto ci = find( data.ColumnNames().begin(), 53 | data.ColumnNames().end(), colName ); 54 | 55 | if ( ci == data.ColumnNames().end() ) { 56 | std::stringstream errMsg; 57 | errMsg << "EDM::Embed(): Failed to find column " 58 | << colName << " in dataFrame with columns: [ "; 59 | for ( auto col : data.ColumnNames() ) { 60 | errMsg << col << " "; 61 | } errMsg << " ]\n"; 62 | throw std::runtime_error( errMsg.str() ); 63 | } 64 | } 65 | 66 | // Get column names for MakeBlock 67 | std::vector< std::string > colNames; 68 | if ( parameters.columnNames.size() ) { 69 | // column names are strings 70 | colNames = parameters.columnNames; 71 | } 72 | else { 73 | throw std::runtime_error( "EDM::Embed(): columnNames are empty.\n" ); 74 | } 75 | 76 | // Extract the specified columns (sub)DataFrame from dataFrameIn 77 | DataFrame< double > dataFrame = 78 | data.DataFrameFromColumnNames( parameters.columnNames ); 79 | 80 | // deletePartial = false 81 | embedding = MakeBlock( std::ref( dataFrame ), parameters.E, 82 | parameters.tau, colNames, false ); 83 | } 84 | -------------------------------------------------------------------------------- /src/cppEDM/src/EDM.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef EDM_H 3 | #define EDM_H 4 | 5 | #include 6 | #include "Common.h" 7 | #include "Parameter.h" 8 | 9 | //--------------------------------------------------------------------- 10 | // EDM Class 11 | // Central data object and base class for EDM algorithms. 12 | // Specific algorithm projection methods defined in sub-classes. 13 | // 14 | // NOTE JP: Tony recommends to explicitly define special members: 15 | // http://www.cplusplus.com/doc/tutorial/classes2/ 16 | //--------------------------------------------------------------------- 17 | class EDM { 18 | 19 | public: // No need for private or protected 20 | DataFrame< double > data; 21 | DataFrame< double > embedding; 22 | 23 | DataFrame< size_t > knn_neighbors; // N pred rows, knn columns; sorted 24 | DataFrame< double > knn_distances; // N pred rows, knn columns; sorted 25 | 26 | DataFrame< size_t > allLibRows; // 1 row, N lib columns 27 | DataFrame< double > allDistances; // N pred rows N lib columns 28 | 29 | DataFrame< double > projection; // Simplex & SMap Output 30 | DataFrame< double > coefficients; // SMap Output 31 | DataFrame< double > singularValues; // SMap Output 32 | 33 | // Project() vectors to populate projection DataFrame in FormatData() 34 | // JP Can we do away with these and write directly to projection (+Tp)? 35 | std::valarray< double > predictions; 36 | std::valarray< double > const_predictions; 37 | std::valarray< double > variance; 38 | 39 | // Simplex :: Prediction row accounting of library neighbor ties 40 | bool anyTies; 41 | std::vector< bool > ties; // true/false each prediction row 42 | std::vector< size_t > tieFirstIndex; // index in knn of first tie 43 | std::vector< std::vector< std::pair< double, size_t > > > tiePairs; 44 | 45 | // SMap :: Each prediction row can have variable knn 46 | std::vector< size_t > knnSmap; 47 | 48 | std::valarray< double > target; // entire record 49 | std::vector< std::string > allTime; // entire record 50 | 51 | Parameters parameters; 52 | 53 | // Constructor declaration 54 | EDM ( DataFrame< double > & data, Parameters & parameters ); 55 | 56 | // Method declarations 57 | // EDM.cc 58 | void GetTarget(); 59 | void EmbedData(); 60 | void Project(); // Simplex.cc : SMap.cc : CCM.cc : Multiview.cc 61 | void Generate(); // Simplex.cc : SMap.cc 62 | 63 | // EDM_Neighbors.cc 64 | void PrepareEmbedding( bool checkDataRows = true ); 65 | void Distances(); 66 | void FindNeighbors(); 67 | 68 | // EDM_Formatting.cc 69 | void CheckDataRows( std::string call ); 70 | void CheckValidLib( std::string call ); 71 | void FormatOutput(); 72 | void FillTimes( std::vector< std::string > & timeOut ); 73 | 74 | void PrintDataFrameIn(); // EDM_Neighbors.cc #ifdef DEBUG_ALL 75 | void PrintNeighbors(); // EDM_Neighbors.cc #ifdef DEBUG_ALL 76 | }; 77 | #endif 78 | -------------------------------------------------------------------------------- /src/cppEDM/src/EDM_Neighbors.h: -------------------------------------------------------------------------------- 1 | #ifndef EDM_NEIGHBORS_H 2 | #define EDM_NEIGHBORS_H 3 | 4 | #include "EDM.h" 5 | 6 | namespace EDM_Distance { 7 | // Define the initial maximum distance for neigbors 8 | // DBL_MAX is a Macro equivalent to: std::numeric_limits::max() 9 | double DistanceMax = std::numeric_limits::max(); 10 | } 11 | 12 | // Prototypes 13 | double Distance( const std::valarray &v1, 14 | const std::valarray &v2, 15 | DistanceMetric metric ); 16 | 17 | bool DistanceCompare( const std::pair &x, 18 | const std::pair &y ); 19 | #endif 20 | -------------------------------------------------------------------------------- /src/cppEDM/src/Multiview.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef EDM_MULTIVIEW_H 3 | #define EDM_MULTIVIEW_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "EDM.h" 11 | #include "Simplex.h" 12 | 13 | //---------------------------------------------------------------- 14 | // Multiview class inherits from Simplex class and defines 15 | // CCM-specific projection methods 16 | //---------------------------------------------------------------- 17 | class MultiviewClass : public SimplexClass { 18 | public: 19 | std::string predictOutputFileIn; // copy from parameters 20 | std::vector predictionIn; // copy from parameters 21 | 22 | struct MultiviewValues MVvalues; // output structure 23 | 24 | // Constructor 25 | MultiviewClass ( DataFrame< double > & data, 26 | Parameters & parameters ); 27 | 28 | // Method declarations 29 | void Project( unsigned maxThreads ); 30 | void CheckParameters(); 31 | void SetupParameters(); 32 | void Multiview( unsigned maxThreads ); 33 | }; 34 | #endif 35 | -------------------------------------------------------------------------------- /src/cppEDM/src/Parameter.h: -------------------------------------------------------------------------------- 1 | #ifndef PARAMETER_H 2 | #define PARAMETER_H 3 | 4 | #include 5 | #include 6 | 7 | #include "Common.h" 8 | #include "Version.h" 9 | 10 | class ParameterContainer; // forward declaration 11 | 12 | //------------------------------------------------------------ 13 | // 14 | //------------------------------------------------------------ 15 | class Parameters { 16 | 17 | public: // No need for protected or private 18 | Method method; // Simplex or SMap enum class 19 | 20 | std::string pathIn; // path for input dataFile 21 | std::string dataFile; // input dataFile (assumed .csv) 22 | std::string pathOut; // path for output files 23 | std::string predictOutputFile; // path for output file 24 | 25 | std::string lib_str; // multi argument parameters for library 26 | std::string pred_str; // multi argument parameters for prediction 27 | 28 | std::vector library; // library row indices 29 | std::vector prediction; // prediction row indices 30 | 31 | int E; // dimension 32 | int Tp; // prediction interval 33 | int knn; // k nearest neighbors 34 | int tau; // embedding delay 35 | double theta; // S-Map localization 36 | int exclusionRadius; // temporal rows to ignore in predict 37 | 38 | std::string columns_str; // multi argument parameters 39 | std::string target_str; // argument parameter(s) 40 | std::vector< std::string > columnNames; // state-space column name(s) 41 | std::vector< std::string > targetNames; // target column name(s) 42 | 43 | bool embedded; // true if data is already embedded 44 | bool const_predict; // true to compute non "predictor" stats 45 | bool verbose; 46 | 47 | std::vector validLib; // maps row to valid library flag 48 | bool ignoreNan; // SMap create new library to ignore nan 49 | 50 | int generateSteps; // Number of timesteps to feedback generate 51 | bool generateLibrary; // Increment library with generated data 52 | 53 | bool parameterList; // Add parameter list to output 54 | 55 | std::string SmapCoefFile; // path for output file 56 | std::string SmapSVFile; // path for output file 57 | std::string blockOutputFile; // Embed() output file 58 | 59 | int multiviewEnsemble; // Number of ensembles in multiview 60 | int multiviewD; // Multiview state-space dimension 61 | bool multiviewTrainLib; // Use prediction as training library 62 | bool multiviewExcludeTarget; // Exclude target from eval combos 63 | 64 | std::string libSizes_str; 65 | std::vector< size_t > librarySizes;// CCM library sizes to evaluate 66 | int subSamples; // CCM number of samples to draw 67 | bool randomLib; // CCM randomly select subsets if true 68 | bool replacement; // CCM random select with replacement if true 69 | unsigned seed; // CCM random selection RNG seed 70 | bool includeData; // CCM include all simplex projection results 71 | 72 | bool validated; 73 | 74 | Version version; // Version object, instantiated in constructor 75 | 76 | std::map< std::string, std::string > Map; 77 | 78 | friend std::ostream& operator<<( std::ostream & os, Parameters & params ); 79 | 80 | // Constructor declaration and default arguments 81 | Parameters( 82 | Method method = Method::None, 83 | std::string pathIn = "./", 84 | std::string dataFile = "", 85 | std::string pathOut = "./", 86 | std::string predictOutputFile = "", 87 | 88 | std::string lib_str = "", 89 | std::string pred_str = "", 90 | 91 | int E = 0, 92 | int Tp = 0, 93 | int knn = 0, 94 | int tau = -1, 95 | double theta = 0, 96 | int exclusionRadius = 0, 97 | 98 | std::string columns_str = "", 99 | std::string target_str = "", 100 | 101 | bool embedded = false, 102 | bool const_predict = false, 103 | bool verbose = false, 104 | 105 | std::vector validLib = std::vector(), 106 | bool ignoreNan = true, 107 | 108 | int generateSteps = 0, 109 | bool generateLibrary = false, 110 | bool parameterList = false, 111 | 112 | std::string SmapCoefFile = "", 113 | std::string SmapSVFile = "", 114 | std::string blockOutputFile = "", 115 | 116 | int multiviewEnsemble = 0, 117 | int multiviewD = 0, 118 | bool multiviewTrainLib = true, 119 | bool multiviewExcludeTarget = false, 120 | 121 | std::string libSizes_str = "", 122 | int subSamples = 0, 123 | bool randomLib = true, 124 | bool replacement = false, 125 | unsigned seed = 0, // 0: Generate random seed in CCM 126 | bool includeData = false 127 | ); 128 | 129 | ~Parameters(); 130 | 131 | void Validate(); // Parameter validation and index offsets 132 | void AdjustLibPred(); // Adjust for embedding 133 | void FillMap(); 134 | void PrintIndices( std::vector< size_t > library, 135 | std::vector< size_t > prediction ); 136 | }; 137 | #endif 138 | -------------------------------------------------------------------------------- /src/cppEDM/src/SMap.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef EDM_SMAP_H 3 | #define EDM_SMAP_H 4 | 5 | #include "EDM.h" 6 | 7 | // Prototype & alias of solver function pointer 8 | using Solver = SVDValues (*) ( DataFrame < double >, 9 | std::valarray < double > ); 10 | 11 | // Prototype declaration of general functions 12 | SVDValues SVD( DataFrame < double > A, std::valarray< double > B ); 13 | 14 | //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 15 | // Do not use LAPACK on Windog: use scikit-learn LinearRegression 16 | //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 17 | #if !defined _WIN32 || defined USING_R 18 | SVDValues Lapack_SVD( int m, // number of rows in matrix 19 | int n, // number of columns in matrix 20 | double *a, // pointer to top-left corner 21 | double *b, 22 | double rcond ); 23 | #endif 24 | 25 | //---------------------------------------------------------------- 26 | // SMap class inherits from EDM class and defines 27 | // SMap-specific projection & output methods 28 | //---------------------------------------------------------------- 29 | class SMapClass : public EDM { 30 | 31 | public: 32 | // Constructor 33 | SMapClass ( DataFrame & data, 34 | Parameters & parameters ); 35 | 36 | // Method declarations 37 | void Generate( Solver ); 38 | void Project ( Solver ); 39 | void SMap ( Solver ); 40 | void RecordNan( size_t row, size_t N_SingularValues ); 41 | void WriteOutput(); 42 | }; 43 | #endif 44 | -------------------------------------------------------------------------------- /src/cppEDM/src/Simplex.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef EDM_SIMPLEX_H 3 | #define EDM_SIMPLEX_H 4 | 5 | #include "EDM.h" 6 | 7 | //---------------------------------------------------------------- 8 | // Simplex class inherits from EDM class and defines 9 | // Simplex-specific projection methods 10 | //---------------------------------------------------------------- 11 | class SimplexClass : public EDM { 12 | public: 13 | // Constructor 14 | SimplexClass ( DataFrame & data, 15 | Parameters & parameters ); 16 | 17 | // Method declarations 18 | void Generate(); 19 | void Project(); 20 | void Simplex(); 21 | void WriteOutput(); 22 | }; 23 | #endif 24 | -------------------------------------------------------------------------------- /src/cppEDM/src/Version.h: -------------------------------------------------------------------------------- 1 | #ifndef VERSION_H 2 | #define VERSION_H 3 | 4 | #include 5 | #include 6 | 7 | //------------------------------------------------------------ 8 | // Instantiated in Parameters() constructor 9 | //------------------------------------------------------------ 10 | class Version { 11 | public: 12 | int Major; 13 | int Minor; 14 | int Micro; 15 | std::string Date; 16 | 17 | Version( int Major, int Minor, int Micro, std::string Date ) : 18 | Major( Major ), Minor( Minor ), Micro( Micro ), Date ( Date ) {}; 19 | 20 | void ShowVersion() { 21 | std::cout << "cppEDM Version " << Major << "." 22 | << Minor << "." << Micro << " " << Date << std::endl; 23 | } 24 | }; 25 | #endif 26 | -------------------------------------------------------------------------------- /src/cppEDM/src/makefile: -------------------------------------------------------------------------------- 1 | 2 | .PHONY: all clean distclean depend 3 | 4 | ## CXX defined in Makevars from R environment variables, don't define CC 5 | ## CC = g++ 6 | 7 | ## CXXFLAGS are defined in Makevars from R environment variables 8 | ## JP: Temporary (?) hack for R clang-UBSAN issue in EDM_Neighbors 9 | ## to not initialise size_t knnLibRows with nanl(), is to define 10 | ## USING_R. Note: USING_R is an R-defined macro. 11 | CFLAGS = $(CXXFLAGS) -DCCM_THREADED -DUSING_R 12 | 13 | HEADERS = API.h CCM.h Common.h DataFrame.h DateTime.h EDM.h EDM_Neighbors.h\ 14 | Multiview.h Parameter.h Simplex.h SMap.h Version.h 15 | 16 | SRCS = API.cc CCM.cc Common.cc DateTime.cc EDM.cc EDM_Formatting.cc\ 17 | EDM_Neighbors.cc Eval.cc Multiview.cc Parameter.cc Simplex.cc SMap.cc 18 | 19 | OBJ = $(SRCS:%.cc=%.o) 20 | 21 | LIB = libEDM.a 22 | 23 | all: $(LIB) 24 | cp $(LIB) ../lib/ 25 | 26 | clean: 27 | rm -f $(OBJ) $(LIB) 28 | 29 | distclean: 30 | rm -f $(OBJ) $(LIB) ../lib/$(LIB) *~ *.bak *.csv 31 | 32 | $(LIB): $(OBJ) 33 | $(AR) -rcs $(LIB) $(OBJ) # AR passed from Makevars 34 | 35 | %.o : %.cc 36 | $(CXX) $(CFLAGS) -c $< 37 | 38 | depend: 39 | @echo ${SRCS} 40 | makedepend -Y $(SRCS) 41 | # DO NOT DELETE 42 | 43 | API.o: API.h Common.h DataFrame.h Parameter.h Version.h Simplex.h EDM.h 44 | API.o: SMap.h CCM.h Multiview.h 45 | CCM.o: CCM.h EDM.h Common.h DataFrame.h Parameter.h Version.h Simplex.h 46 | Common.o: Common.h DataFrame.h 47 | DateTime.o: DateTime.h 48 | EDM.o: EDM.h Common.h DataFrame.h Parameter.h Version.h 49 | EDM_Formatting.o: EDM.h Common.h DataFrame.h Parameter.h Version.h DateTime.h 50 | EDM_Neighbors.o: EDM_Neighbors.h EDM.h Common.h DataFrame.h Parameter.h 51 | EDM_Neighbors.o: Version.h 52 | Eval.o: API.h Common.h DataFrame.h Parameter.h Version.h Simplex.h EDM.h 53 | Eval.o: SMap.h CCM.h Multiview.h 54 | Multiview.o: Multiview.h EDM.h Common.h DataFrame.h Parameter.h Version.h 55 | Multiview.o: Simplex.h 56 | Parameter.o: Parameter.h Common.h DataFrame.h Version.h 57 | Simplex.o: Simplex.h EDM.h Common.h DataFrame.h Parameter.h Version.h 58 | SMap.o: SMap.h EDM.h Common.h DataFrame.h Parameter.h Version.h 59 | -------------------------------------------------------------------------------- /src/cppEDM/src/makefile.mingw: -------------------------------------------------------------------------------- 1 | 2 | .PHONY: all clean distclean depend 3 | 4 | HEADERS = API.h CCM.h Common.h DataFrame.h DateTime.h EDM.h EDM_Neighbors.h\ 5 | Multiview.h Parameter.h Simplex.h SMap.h Version.h 6 | 7 | SRCS = API.cc CCM.cc Common.cc DateTime.cc EDM.cc EDM_Formatting.cc\ 8 | EDM_Neighbors.cc Eval.cc Multiview.cc Parameter.cc Simplex.cc SMap.cc 9 | 10 | OBJ = $(SRCS:%.cc=%.o) 11 | 12 | LIB = libEDM.a 13 | 14 | CFLAGS += -std=c++11 -O3 15 | CFLAGS += -DCCM_THREADED 16 | CFLAGS += -fPIC 17 | # CFLAGS += -g # -DDEBUG_ALL 18 | 19 | all: $(LIB) 20 | cp $(LIB) ../lib/ 21 | 22 | clean: 23 | rm -f $(OBJ) $(LIB) 24 | 25 | distclean: 26 | rm -f $(OBJ) $(LIB) ../lib/$(LIB) *~ *.bak *.csv 27 | 28 | $(LIB): $(OBJ) 29 | ar -rcs $(LIB) $(OBJ) 30 | 31 | %.o : %.cc 32 | $(CC) $(CFLAGS) -c $< 33 | 34 | depend: 35 | @echo ${SRCS} 36 | makedepend -Y $(SRCS) 37 | # DO NOT DELETE 38 | 39 | API.o: API.h Common.h DataFrame.h Parameter.h Version.h Simplex.h EDM.h 40 | API.o: SMap.h CCM.h Multiview.h 41 | CCM.o: CCM.h EDM.h Common.h DataFrame.h Parameter.h Version.h Simplex.h 42 | Common.o: Common.h DataFrame.h 43 | DateTime.o: DateTime.h 44 | EDM.o: EDM.h Common.h DataFrame.h Parameter.h Version.h 45 | EDM_Formatting.o: EDM.h Common.h DataFrame.h Parameter.h Version.h DateTime.h 46 | EDM_Neighbors.o: EDM_Neighbors.h EDM.h Common.h DataFrame.h Parameter.h 47 | EDM_Neighbors.o: Version.h 48 | Eval.o: API.h Common.h DataFrame.h Parameter.h Version.h Simplex.h EDM.h 49 | Eval.o: SMap.h CCM.h Multiview.h 50 | Multiview.o: Multiview.h EDM.h Common.h DataFrame.h Parameter.h Version.h 51 | Multiview.o: Simplex.h 52 | Parameter.o: Parameter.h Common.h DataFrame.h Version.h 53 | Simplex.o: Simplex.h EDM.h Common.h DataFrame.h Parameter.h Version.h 54 | SMap.o: SMap.h EDM.h Common.h DataFrame.h Parameter.h Version.h 55 | -------------------------------------------------------------------------------- /src/cppEDM/src/makefile.windows: -------------------------------------------------------------------------------- 1 | 2 | CC = cl 3 | OBJ = API.obj CCM.obj Common.obj DateTime.obj EDM.obj EDM_Formatting.obj\ 4 | EDM_Neighbors.obj Eval.obj Multiview.obj Parameter.obj Simplex.obj\ 5 | SMap.obj 6 | 7 | LIB = EDM.lib 8 | 9 | CFLAGS = -DCCM_THREADED /EHsc /MD # /MT -DDEBUG -DDEBUG_ALL 10 | 11 | all: $(LIB) 12 | lib /NODEFAULTLIB:LIBCMT /NODEFAULTLIB:library /OUT:$(LIB) $(OBJ) 13 | cp $(LIB) ..\lib 14 | 15 | clean: 16 | del -f $(OBJ) $(LIB) 17 | 18 | distclean: 19 | del -f $(OBJ) $(LIB) ../lib/$(LIB) *~ *.bak *.csv 20 | 21 | $(LIB): $(OBJ) 22 | 23 | API.obj: API.cc 24 | $(CC) /c API.cc $(CFLAGS) 25 | 26 | CCM.obj: CCM.cc 27 | $(CC) /c CCM.cc $(CFLAGS) 28 | 29 | Common.obj: Common.cc 30 | $(CC) /c Common.cc $(CFLAGS) 31 | 32 | DateTime.obj: DateTime.cc 33 | $(CC) /c DateTime.cc $(CFLAGS) 34 | 35 | EDM.obj: EDM.cc 36 | $(CC) /c EDM.cc $(CFLAGS) 37 | 38 | EDM_Formatting.obj: EDM_Formatting.cc 39 | $(CC) /c EDM_Formatting.cc $(CFLAGS) 40 | 41 | EDM_Neighbors.obj: EDM_Neighbors.cc 42 | $(CC) /c EDM_Neighbors.cc $(CFLAGS) 43 | 44 | Eval.obj: Eval.cc 45 | $(CC) /c Eval.cc $(CFLAGS) 46 | 47 | Multiview.obj: Multiview.cc 48 | $(CC) /c Multiview.cc $(CFLAGS) 49 | 50 | Parameter.obj: Parameter.cc 51 | $(CC) /c Parameter.cc $(CFLAGS) 52 | 53 | Simplex.obj: Simplex.cc 54 | $(CC) /c Simplex.cc $(CFLAGS) 55 | 56 | SMap.obj: SMap.cc 57 | $(CC) /c SMap.cc $(CFLAGS) 58 | 59 | # Depedencies from makedepend on Linux 60 | API.obj: API.h Common.h DataFrame.h Parameter.h Version.h Simplex.h EDM.h 61 | API.obj: SMap.h CCM.h Multiview.h 62 | CCM.obj: CCM.h EDM.h Common.h DataFrame.h Parameter.h Version.h Simplex.h 63 | Common.obj: Common.h DataFrame.h 64 | DateTime.obj: DateTime.h 65 | EDM.obj: EDM.h Common.h DataFrame.h Parameter.h Version.h 66 | EDM_Formatting.obj: EDM.h Common.h DataFrame.h Parameter.h Version.h DateTime.h 67 | EDM_Neighbors.obj: EDM_Neighbors.h EDM.h Common.h DataFrame.h Parameter.h 68 | EDM_Neighbors.obj: Version.h 69 | Eval.obj: API.h Common.h DataFrame.h Parameter.h Version.h Simplex.h EDM.h 70 | Eval.obj: SMap.h CCM.h Multiview.h 71 | Multiview.obj: Multiview.h EDM.h Common.h DataFrame.h Parameter.h Version.h 72 | Multiview.obj: Simplex.h 73 | Parameter.obj: Parameter.h Common.h DataFrame.h Version.h 74 | Simplex.obj: Simplex.h EDM.h Common.h DataFrame.h Parameter.h Version.h 75 | SMap.obj: SMap.h EDM.h Common.h DataFrame.h Parameter.h Version.h 76 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(rEDM) 3 | 4 | test_check("rEDM") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-1-Simplex.R: -------------------------------------------------------------------------------- 1 | # NOTE: Numerical tests are performed in cppEDM unit tests 2 | 3 | context("Simplex test") 4 | 5 | data( block_3sp ) 6 | 7 | test_that("Simplex embedded works", { 8 | S.df <- Simplex( dataFrame = block_3sp, 9 | lib = "1 99", pred = "100 195", 10 | E = 3, embedded = TRUE, showPlot = FALSE, 11 | columns = "x_t y_t z_t", target = "x_t" ) 12 | expect_s3_class(S.df, "data.frame") 13 | expect_true("time" %in% names(S.df)) 14 | expect_true("Observations" %in% names(S.df)) 15 | expect_true("Predictions" %in% names(S.df)) 16 | expect_equal( dim(S.df), c(97,4) ) 17 | Err <- ComputeError( S.df $ Observations, S.df $ Predictions ) 18 | expect_true("MAE" %in% names(Err)) 19 | expect_true("rho" %in% names(Err)) 20 | expect_true("RMSE" %in% names(Err)) 21 | }) 22 | 23 | test_that("Simplex embedding works", { 24 | S.df <- Simplex( dataFrame = block_3sp, 25 | lib = "1 99", pred = "100 195", 26 | E = 3, embedded = FALSE, showPlot = FALSE, 27 | columns = "x_t", target = "x_t" ) 28 | expect_s3_class(S.df, "data.frame") 29 | expect_true("time" %in% names(S.df)) 30 | expect_true("Observations" %in% names(S.df)) 31 | expect_true("Predictions" %in% names(S.df)) 32 | expect_equal( dim(S.df), c(97,4) ) 33 | }) 34 | 35 | test_that("Simplex errors", { 36 | expect_error( Simplex() ) 37 | expect_error( Simplex( dataFrame = block_3sp ) ) 38 | expect_error( Simplex( dataFrame = block_3sp, 39 | lib = "1 99", pred = "100 195", 40 | E = 3, columns = "x_t y_t z_t", target = "None" ) ) 41 | expect_error( Simplex( dataFrame = block_3sp, 42 | lib = "1 99", pred = "100 195", 43 | E = 3, columns = "None", target = "x_t" ) ) 44 | expect_error( Simplex( dataFrame = block_3sp, 45 | lib = "1 99", pred = "100 200", 46 | E = 3, columns = "x_t y_t z_t", target = "x_t" ) ) 47 | }) 48 | -------------------------------------------------------------------------------- /tests/testthat/test-2-SMap.R: -------------------------------------------------------------------------------- 1 | # NOTE: Numerical tests are performed in cppEDM unit tests 2 | 3 | context("SMap test") 4 | 5 | data( circle ) 6 | 7 | test_that("SMap works", { 8 | S.List = SMap( dataFrame = circle, 9 | lib = "1 100", pred = "110 190", theta = 4, E = 2, 10 | embedded = TRUE, columns = "x y", target = "x" ) 11 | expect_type(S.List, "list") 12 | expect_true("predictions" %in% names(S.List)) 13 | expect_true("coefficients" %in% names(S.List)) 14 | expect_equal( dim(S.List $ predictions ), c(82,4) ) 15 | expect_equal( dim(S.List $ coefficients ), c(82,4) ) 16 | }) 17 | 18 | test_that("SMap errors", { 19 | expect_error( SMap() ) 20 | expect_error( SMap( dataFrame = circle, 21 | lib = "1 100", pred = "110 190", theta = 4, E = 2, 22 | embedded = TRUE, columns = "x y", target = "None" ) ) 23 | expect_error( SMap( dataFrame = circle, 24 | lib = "1 100", pred = "110 190", theta = 4, E = 2, 25 | embedded = TRUE, columns = "None", target = "x" ) ) 26 | expect_error( SMap( dataFrame = circle, 27 | lib = "1 100", pred = "110 201", theta = 4, E = 2, 28 | embedded = TRUE, columns = "x y", target = "x" ) ) 29 | }) 30 | -------------------------------------------------------------------------------- /tests/testthat/test-3-CCM.R: -------------------------------------------------------------------------------- 1 | # NOTE: Numerical tests are performed in cppEDM unit tests 2 | 3 | context("CCM test") 4 | 5 | data( sardine_anchovy_sst ) 6 | 7 | test_that("CCM works", { 8 | C.df = CCM( dataFrame = sardine_anchovy_sst, 9 | E = 3, Tp = 0, columns = "anchovy", target = "np_sst", 10 | libSizes = "10 70 10", sample = 100 ) 11 | expect_s3_class(C.df, "data.frame") 12 | expect_true("LibSize" %in% names(C.df)) 13 | expect_true("anchovy:np_sst" %in% names(C.df)) 14 | expect_true("np_sst:anchovy" %in% names(C.df)) 15 | expect_equal( dim(C.df), c(7,3) ) 16 | }) 17 | 18 | test_that("CCM errors", { 19 | expect_error( CCM() ) 20 | expect_error( CCM( dataFrame = sardine_anchovy_sst, 21 | E = 3, Tp = 0, columns = "", target = "np_sst", 22 | libSizes = "10 70 10", sample = 100 ) ) 23 | expect_error( CCM( dataFrame = sardine_anchovy_sst, 24 | E = 3, Tp = 0, columns = "anchovy", target = "X", 25 | libSizes = "10 70 10", sample = 100 ) ) 26 | expect_error( CCM( dataFrame = sardine_anchovy_sst, 27 | E = 3, Tp = 0, columns = "X", target = "np_sst", 28 | libSizes = "10 70 10", sample = 100 ) ) 29 | expect_error( CCM( dataFrame = sardine_anchovy_sst, 30 | E = 3, Tp = 0, columns = "anchovy", target = "np_sst", 31 | libSizes = "10 70 80", sample = 100 ) ) 32 | }) 33 | -------------------------------------------------------------------------------- /tests/testthat/test-4-Multiview.R: -------------------------------------------------------------------------------- 1 | # NOTE: Numerical tests are performed in cppEDM unit tests 2 | 3 | context("Multiview test") 4 | 5 | data( block_3sp ) 6 | 7 | test_that("Multiview works", { 8 | M.List = Multiview( dataFrame = block_3sp, 9 | lib = "1 99", pred = "105 190", 10 | E = 3, columns = "x_t y_t z_t", target = "x_t" ) 11 | 12 | expect_type(M.List, "list") 13 | expect_true("View" %in% names(M.List)) 14 | expect_true("Predictions" %in% names(M.List)) 15 | expect_equal( dim(M.List $ View), c(9,9) ) 16 | expect_equal( dim(M.List $ Predictions), c(87,3) ) 17 | }) 18 | 19 | test_that("Multiview errors", { 20 | expect_error( Multiview() ) 21 | expect_error( Multiview( dataFrame = block_3sp, 22 | lib = "1 99", pred = "105 190", 23 | E = 3, columns = "x_t y_t z_t", target = "None" ) ) 24 | expect_error( Multiview( dataFrame = block_3sp, 25 | lib = "1 99", pred = "105 190", 26 | E = 3, columns = "None", target = "x_t" ) ) 27 | expect_error( Multiview( dataFrame = block_3sp, 28 | lib = "1 99", pred = "105 201", 29 | E = 3, columns = "None", target = "x_t" ) ) 30 | }) 31 | -------------------------------------------------------------------------------- /tests/testthat/test-5-EmbedDimension.R: -------------------------------------------------------------------------------- 1 | # NOTE: Numerical tests are performed in cppEDM unit tests 2 | 3 | context("Embed Dimension test") 4 | 5 | data( TentMap ) 6 | 7 | test_that("EmbedDimension works", { 8 | df <- EmbedDimension( dataFrame = TentMap, lib = "1 100", pred = "201 500", 9 | columns = "TentMap", target = "TentMap", 10 | showPlot = FALSE ) 11 | expect_s3_class(df, "data.frame") 12 | expect_true("E" %in% names(df)) 13 | expect_true("rho" %in% names(df)) 14 | expect_equal( dim(df), c(10,2) ) 15 | }) 16 | 17 | test_that("EmbedDimension errors", { 18 | expect_error( EmbedDimension() ) 19 | expect_error( EmbedDimension( dataFrame = TentMap, 20 | lib = "1 100", pred = "201 500", 21 | columns = "TentMap", target = "None", 22 | showPlot = FALSE ) ) 23 | }) 24 | -------------------------------------------------------------------------------- /tests/testthat/test-6-PredictInterval.R: -------------------------------------------------------------------------------- 1 | # NOTE: Numerical tests are performed in cppEDM unit tests 2 | 3 | context("Predict Interval test") 4 | 5 | data( TentMap ) 6 | 7 | test_that("PredictInterval works", { 8 | df <- PredictInterval( dataFrame = TentMap, 9 | lib = "1 100", pred = "201 500", E = 2, 10 | columns = "TentMap", target = "TentMap", 11 | showPlot = FALSE ) 12 | expect_s3_class(df, "data.frame") 13 | expect_true("Tp" %in% names(df)) 14 | expect_true("rho" %in% names(df)) 15 | expect_equal( dim(df), c(10,2) ) 16 | }) 17 | 18 | test_that("PredictInterval errors", { 19 | expect_error( PredictInterval() ) 20 | expect_error( PredictInterval( dataFrame = TentMap, 21 | lib = "1 100", pred = "201 500", E = 2, 22 | columns = "", target = "TentMap", 23 | showPlot = FALSE ) ) 24 | }) 25 | -------------------------------------------------------------------------------- /tests/testthat/test-7-PredictNonlinear.R: -------------------------------------------------------------------------------- 1 | # NOTE: Numerical tests are performed in cppEDM unit tests 2 | 3 | context("Predict Nonlinear test") 4 | 5 | data( TentMapNoise ) 6 | 7 | test_that("PredictNonlinear works", { 8 | df <- PredictNonlinear( dataFrame = TentMapNoise, 9 | E = 2, lib = "1 100", pred = "201 500", 10 | columns = "TentMap", target = "TentMap", 11 | showPlot = FALSE ) 12 | expect_s3_class(df, "data.frame") 13 | expect_true("Theta" %in% names(df)) 14 | expect_true("rho" %in% names(df)) 15 | expect_equal( dim(df), c(15,2) ) 16 | }) 17 | 18 | test_that("PredictNonlinear errors", { 19 | expect_error( PredictNonlinear() ) 20 | expect_error( PredictNonlinear( dataFrame = TentMapNoise, 21 | E = 2, lib = "1 100", pred = "201 500", 22 | columns = "", target = "TentMap", 23 | showPlot = FALSE ) ) 24 | }) 25 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | rEDM-tutorial_cache 2 | rEDM-tutorial_files -------------------------------------------------------------------------------- /vignettes/CrossMap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/vignettes/CrossMap.png -------------------------------------------------------------------------------- /vignettes/Lorenz_Projection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/vignettes/Lorenz_Projection.png -------------------------------------------------------------------------------- /vignettes/Lorenz_Reconstruct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/vignettes/Lorenz_Reconstruct.png -------------------------------------------------------------------------------- /vignettes/ParameterTable.csv: -------------------------------------------------------------------------------- 1 | Parameter,Description 2 | pathIn,Filesystem path to input ‘dataFile’. CSV format. 3 | , 4 | dataFile,CSV format data file name. The first column must be a timeindex or time values. 5 | ,The first row must be column names. 6 | , 7 | dataFrame,Input data.frame. The first column must be a time index or time values. 8 | ,The columns must be named. 9 | , 10 | pathOut,Filesystem path for ‘predictFile’ containing output predictions. 11 | , 12 | predictFile,Observation and Prediction output file name. CSV format. 13 | , 14 | smapCoefFile,Output file containing S-map coefficients. 15 | , 16 | lib,String with start and stop indices of input data rows used to create the library of observations. 17 | ,A single contiguous range is supported. 18 | , 19 | pred,String with start and stop indices of input data rows used for predictions. 20 | , A single contiguous range is supported. 21 | , 22 | D,Multiview dimension. 23 | , 24 | E,Embedding dimension. 25 | , 26 | Tp,Prediction horizon (number of time column rows). 27 | , 28 | knn,Number of nearest neighbors. If knn=0; knn is set to E+1 for Simplex(); set to number of data rows for SMap(). 29 | , 30 | tau,Lag of time delay embedding specified as number of time column rows. 31 | , 32 | theta,In Smap: S-Map neighbor localisation exponent. Single numeric. 33 | , 34 | theta,In PredictNonlinear: A whitespace delimeted string with values of S-map localisation parameters to be evaluated. 35 | , 36 | exclusionRadius,Excludes vectors from the search space of nearest neighbors if their relative time index is within exclusionRadius. 37 | , 38 | columns,String of whitespace separated column name(s) in the input data used to create the library. 39 | , 40 | target,String of column name in the input data used for prediction. 41 | , 42 | embedded,Logical specifying if the input data are embedded. 43 | , 44 | validLib,Conditional embedding. Boolean vector identifying time series rows to use in state-space library. 45 | , 46 | noTIme,Default False. Set True to not require first column of data to be time. 47 | , 48 | ignoreNan,SMap: default True. Redefine lib to ignore nan in data and embedding. 49 | , 50 | generateSteps,Generative feedback predictions in Simplex or SMap. 51 | , 52 | parameterList,Add parameter dictionary to return objects in Simplex; SMap; CCM; Multiview. 53 | , 54 | libSizes,String of 3 whitespace separated integer values specifying the intial library size; the final library size; and the library size increment for CCM. 55 | , 56 | sample,Integer specifying the number of random samples to draw at each library size evaluation for CCM. 57 | , 58 | random,Logical to specify random (‘TRUE’) or sequential library sampling in CCM. 59 | , 60 | includeData,Logical to return all CCM projection data frames. 61 | , 62 | seed,Integer specifying the random sampler seed in CCM. If ‘seed=0’ a random seed is generated. 63 | , 64 | multiview,Number of multiview ensembles to average for the final prediction estimate in Multiview. 65 | , 66 | trainLib,Use in-sample (lib=pred) prediction for multiview ranking. 67 | , 68 | excludeTarget,Exclude target variable from multiviews. 69 | , 70 | maxE,Maximum value of E to evalulate in EmbedDimension. 71 | , 72 | maxTp,Maximum value of Tp to evalulate in PredictInterval. 73 | , 74 | numThreads,Number of parallel threads for computation in EmbedDimension; PredictInterval and PredictNonlinear. 75 | , 76 | verbose,Logical to produce additional console reporting. 77 | , 78 | const_pred,Logical to add a _constant predictor_ column to the output. The constant predictor is X(t+1) = X(t). 79 | , 80 | showPlot,Logical to plot results. 81 | -------------------------------------------------------------------------------- /vignettes/References.bib: -------------------------------------------------------------------------------- 1 | @article{Casdagli_1991, 2 | Author = {Casdagli and Eubank and Farmer and Gibson}, 3 | Journal = {Physica D: Nonlinear Phenomena}, 4 | Number = {1-3}, 5 | Pages = {52-98}, 6 | Rating = {0}, 7 | Title = {State space reconstruction in the presence of noise}, 8 | Volume = {51}, 9 | Year = {1991}} 10 | 11 | @article{Davidson_1948, 12 | Author = {Davidson and Andrewartha}, 13 | Journal = {Journal of Animal Ecology}, 14 | Pages = {193-199}, 15 | Title = {Annual trends in a natural population of \emph{{Thrips} 16 | imaginis} ({Thysanoptera})}, 17 | Volume = {17}, 18 | Year = {1948}} 19 | 20 | @article{Davidson_1948a, 21 | Author = {Davidson and Andrewartha}, 22 | Journal = {Journal of Animal Ecology}, 23 | Pages = {200-222}, 24 | Title = {The influence of rainfall, evaporation and atmospheric 25 | temperature on fluctuations in the size of a natural population 26 | of \emph{{Thrips} imaginis} ({Thysanoptera})}, 27 | Volume = {17}, 28 | Year = {1948}} 29 | 30 | @article{Deyle_2013, 31 | Author = {Deyle and Fogarty and Hsieh, 32 | Chih-Hao and Kaufman, Les and MacCall, Alec D and Munch, 33 | Stephan B and Perretti, Charles T and Ye, Hao and Sugihara, George}, 34 | Journal = {Proceedings of the National Academy of Sciences}, 35 | Journal-Full = {Proceedings of the National Academy of Sciences 36 | of the United States of America}, 37 | Number = {16}, 38 | Pages = {6430-6435}, 39 | Pmid = {23536299}, 40 | Pst = {ppublish}, 41 | Title = {Predicting climate effects on {Pacific} sardine}, 42 | Volume = {110}, 43 | Year = {2013}} 44 | 45 | @article{Deyle_2011, 46 | Author = {Deyle and Sugihara}, 47 | Journal = {PLoS ONE}, 48 | Pages = {e18295}, 49 | Title = {Generalized theorems for nonlinear state space reconstruction}, 50 | Volume = {6}, 51 | Year = {2011}} 52 | 53 | @article{Deyle_2016, 54 | Author = {Deyle and May and Munch and Sugihara}, 55 | Journal = {Proceedings of the Royal Society of London B}, 56 | Title = {Tracking and forecasting ecosystem interactions in real time}, 57 | Volume = {283}, 58 | Year = {2016}} 59 | 60 | @article{Dixon_1999, 61 | Author = {Dixon and Milicich and Sugihara}, 62 | Journal = {Science}, 63 | Pages = {1528-1530}, 64 | Rating = {0}, 65 | Title = {Episodic fluctuations in larval supply}, 66 | Volume = {283}, 67 | Year = {1999}} 68 | 69 | @article{Fisher_1915, 70 | Author = {Fisher}, 71 | Journal = {Biometrika}, 72 | Number = {4}, 73 | Pages = {507-521}, 74 | Title = {Frequency distribution of the values of the 75 | correlation coefficient in samples from an indefinitely 76 | large population}, 77 | Volume = {10}, 78 | Year = {1915}} 79 | 80 | @article{Granger_1969, 81 | Author = {Granger}, 82 | Journal = {Econometrica}, 83 | Number = {3}, 84 | Pages = {424-438}, 85 | Rating = {0}, 86 | Title = {Investigating causal relations by econometric 87 | models and cross-spectral methods}, 88 | Volume = {37}, 89 | Year = {1969}} 90 | 91 | @article{Lorenz_1996, 92 | Author = {Lorenz}, 93 | Journal = {ECMWF Seminar on Predictability}, 94 | Title = {Predictability – A problem partly solved}, 95 | Volume = {I}, 96 | Year = {1996}} 97 | 98 | @article{Lorenz_1963, 99 | Author = {Lorenz}, 100 | Journal = {Journal of the Atmospheric Sciences}, 101 | Number = {2}, 102 | Pages = {130-141}, 103 | Title = {Deterministic nonperiodic flow}, 104 | Volume = {20}, 105 | Year = {1963}} 106 | 107 | @article{Moran_1953, 108 | Author = {Moran}, 109 | Journal = {Australian Journal of Zoology}, 110 | Pages = {291-298}, 111 | Title = {The statistical analysis of the Canadian Lynx cycle II. 112 | synchronization and meteorology}, 113 | Year = {1953}} 114 | 115 | @misc{NERC-Centre-for-Population-Biology_2010, 116 | Author = {NERC Centre for Population Biology, Imperial College}, 117 | Title = {The Global Population Dynamics Database Version 2}, 118 | Year = {2010}, 119 | Bdsk-Url-1 = {http://www.sw.ic.ac.uk/cpb/cpb/gpdd.html}} 120 | 121 | @article{Sauer_1991, 122 | Author = {Sauer and Yorke and Casdagli}, 123 | Journal = {Journal of Statistical Physics}, 124 | Number = {3-4}, 125 | Pages = {579-616}, 126 | Title = {Embedology}, 127 | Volume = {65}, 128 | Year = {1991}} 129 | 130 | @article{Sugihara_1990, 131 | Author = {Sugihara and May}, 132 | Journal = {Nature}, 133 | Pages = {734-741}, 134 | Rating = {0}, 135 | Title = {Nonlinear forecasting as a way of distinguishing 136 | chaos from measurement error in time series}, 137 | Volume = {344}, 138 | Year = {1990}} 139 | 140 | @article{Sugihara_1994, 141 | Author = {Sugihara}, 142 | Journal={Philosophical Transactions: Physical Sciences and Engineering}, 143 | Number = {1688}, 144 | Pages = {477--495}, 145 | Rating = {0}, 146 | Title = {Nonlinear forecasting for the classification of 147 | natural time series}, 148 | Volume = {348}, 149 | Year = {1994}} 150 | 151 | @article{Sugihara_2012, 152 | Author = {Sugihara and May and Ye and 153 | Hsieh and Deyle and Fogarty and Munch}, 154 | Journal = {Science}, 155 | Pages = {496-500}, 156 | Title = {Detecting causality in complex ecosystems}, 157 | Volume = {338}, 158 | Year = {2012}} 159 | 160 | @article{Takens_1981, 161 | Author = {Takens}, 162 | Journal = {Dynamical Systems and Turbulence, Lecture Notes 163 | in Mathematics}, 164 | Pages = {366--381}, 165 | Rating = {0}, 166 | Read = {Yes}, 167 | Title = {Detecting strange attractors in turbulence}, 168 | Volume = {898}, 169 | Year = {1981}} 170 | 171 | @article{Ye_2015a, 172 | Author = {Hao and Deyle and Gilarranz J. 173 | and Sugihara, George}, 174 | Journal = {Scientific Reports}, 175 | Pages = {14750}, 176 | Title = {Distinguishing time-delayed causal interactions 177 | using convergent cross mapping}, 178 | Volume = {5}, 179 | Year = {2015}} 180 | 181 | @article{Ye_2016, 182 | Author = {Ye and Sugihara}, 183 | Journal = {Science}, 184 | Number = {6302}, 185 | Pages = {922-925}, 186 | Title = {Information leverage in interconnected ecosystems: 187 | Overcoming the curse of dimensionality}, 188 | Volume = {353}, 189 | Year = {2016}} 190 | -------------------------------------------------------------------------------- /vignettes/rEDM-algorithms.ltx: -------------------------------------------------------------------------------- 1 | %\VignetteIndexEntry{Simplex and S-map Algorithms} 2 | %\VignetteEngine{R.rsp::tex} 3 | %\VignetteKeyword{R} 4 | %\VignetteKeyword{package} 5 | %\VignetteKeyword{vignette} 6 | %\VignetteKeyword{LaTeX} 7 | 8 | \documentclass{article} 9 | 10 | \usepackage[T1]{fontenc} % Use 8-bit encoding that has 256 glyphs 11 | \usepackage[english]{babel} % English language/hyphenation 12 | \usepackage{amsmath, amsfonts, amsthm} % Math packages 13 | \usepackage{cite} 14 | %\usepackage[sort&compress,square,comma,authoryear]{natbib} 15 | 16 | % makes color citations 17 | %% \usepackage[ 18 | %% %dvips,dvipdfm, 19 | %% colorlinks=true,urlcolor=blue,citecolor=red,linkcolor=red,bookmarks=true]{hyperref} 20 | 21 | \usepackage{color} 22 | \usepackage{pgfplots} 23 | \usepackage{tikz} 24 | %\pgfplotsset{compat=1.9} 25 | %\usepackage{hyperref} 26 | 27 | \usepackage{algorithm} 28 | \usepackage[noend]{algpseudocode} 29 | 30 | \usepackage{graphicx} 31 | %\usepackage{wrapfig} 32 | \usepackage{paralist} 33 | \usepackage{graphics} %% add this and next lines if pictures should be in esp format 34 | \usepackage{epsfig} %For pictures: screened artwork should be set up with an 85 or 100 line screen 35 | 36 | \usepackage{epstopdf} 37 | \usepackage[colorlinks=true]{hyperref} 38 | \hypersetup{urlcolor=blue, citecolor=red} 39 | %\usepackage{showkeys} 40 | 41 | \newtheorem{theorem}{Theorem}[section] 42 | \newtheorem{corollary}{Corollary} 43 | \newtheorem*{main}{Main Theorem} 44 | \newtheorem{lemma}[theorem]{Lemma} 45 | \newtheorem{proposition}{Proposition} 46 | \newtheorem{conjecture}{Conjecture} 47 | \newtheorem*{problem}{Problem} 48 | \theoremstyle{definition} 49 | \newtheorem{definition}[theorem]{Definition} 50 | \newtheorem{remark}{Remark} 51 | \newtheorem*{notation}{Notation} 52 | \newcommand{\ep}{\varepsilon} 53 | \newcommand{\eps}[1]{{#1}_{\varepsilon}} 54 | \newcommand{\bs}{\boldsymbol} 55 | \allowdisplaybreaks[3] 56 | 57 | 58 | % new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 59 | \newcommand{\R}{\mathbb{R}} 60 | \newcommand{\diag}{\text{diag}} 61 | \DeclareMathOperator*{\argmin}{arg\,min} 62 | 63 | \makeatletter 64 | \def\BState{\State\hskip-\ALG@thistlm} 65 | \makeatother 66 | 67 | %\newcommand\numberthis{\addtocounter{equation}{1}\tag{\theequation}} 68 | \begin{document} 69 | \title{Simplex and S-map Algorithms} 70 | 71 | \author{Yair Daon} 72 | 73 | \maketitle 74 | 75 | {\footnotesize 76 | \centerline{Courant Institute, New York University} 77 | \centerline{New York, NY 10012, USA} 78 | } 79 | 80 | \bigskip 81 | 82 | \begin{abstract} 83 | Pseudo-code for the simplex projection algorithm 84 | \cite{Sugihara_1990} and the S-map algorithm \cite{Sugihara_1994}. 85 | Algorithms are presented for the simple case of predicting one 86 | variable using its own time series. 87 | \end{abstract} 88 | 89 | \section{Notation}\label{section:notation} 90 | \begin{itemize} 91 | \item $E$ denotes the embedding dimension. 92 | \item $k$ denotes the number of nearest neighbors we use. For 93 | the simplex method, the default is $k = E+1$ but for the S-map method it 94 | can be much larger. 95 | \item $T_p$ denotes how many time-steps into the future we are trying 96 | to predict. 97 | \item $X\in \R$ denotes a (potentially long) time series. 98 | \item $y\in \R^{E}$ is a vector of lagged observations for which we 99 | want to make a prediction --- in the simplest case where all components 100 | of the vector are single time step lags, $y_1$ represents the current value, 101 | $y_2$ is the value one time step prior and $y_{E}$ is the value $E-1$ time 102 | steps prior. 103 | \item $\theta \geq 0$ is the tuning parameter in the S-map method. 104 | \item $X_t^E = (X_t, X_{t-1},\dots, X_{t-E+1} )' \in \R^E$ denotes the 105 | lagged embedding vectors. 106 | \item $\| v \|$ is an unspecified norm of $v$. We do not specify 107 | which norm to use and that choice is left to the user / reader. 108 | \item $\| v \|_2^2 = \sum_i v_i^2$ is the squared L2-norm (squared Euclidean distances). 109 | \item Entries of matrices and vectors are indexed in the standard 110 | linear algebraic fashion, starting at $1$ (like the R standard) and 111 | not at $0$ (like the C/C++ and python standard). 112 | \end{itemize} 113 | 114 | 115 | \section{Helper Methods} 116 | 117 | \subsection{Nearest neighbors}\label{subsec:NN} 118 | I will not write implementation of the nearst neighbors method, just 119 | present its description. The method will be used with the signature 120 | presented in algorithm \ref{alg:NN}. 121 | 122 | The input variables $X,y$ and $k$ are defined in section 123 | \ref{section:notation}. The method returns a list of indices $N = 124 | \{N_1,\dots,N_k\}$ such that 125 | \begin{equation*} 126 | \| X_{N_i}^{E} - y\| \leq \| X_{N_j}^{E} - y\| 127 | \text{ if } 1 \leq i \leq j \leq k, 128 | \end{equation*} 129 | % 130 | \begin{algorithm} 131 | \caption{Find Nearest neighbors}\label{alg:NN} 132 | \begin{algorithmic}[1] 133 | \Procedure{Nearneighbor}{$y, X, k$} 134 | \EndProcedure 135 | \end{algorithmic} 136 | \end{algorithm} 137 | 138 | \subsection{Least Squares} 139 | A least squares method finds $x$ that minimizes the error in the 140 | solution of an over-determined linear system (more equations than 141 | variables). Below, $A \in \R^{p \times q},p > q$ and $b\in \R^p$ and 142 | the least squares problem is to find 143 | \begin{equation*} 144 | \hat{x} := \argmin_{x\in \R^q} \|Ax-b\|_2^2. 145 | \end{equation*} 146 | This problem can be solved using a Singular Value Decompostion (SVD), 147 | as outlined in algorithm \ref{alg:SVDLS}. 148 | \begin{algorithm} 149 | \caption{Least Squares via SVD}\label{alg:SVDLS} 150 | \begin{algorithmic}[1] 151 | \Procedure{LeastSquares}{$A,b$} 152 | \Comment{Assume $A\in \R^{p \times q}, p > q$.} 153 | \State $U, S, V \gets \text{SVD}(A)$ \Comment{Thus, $A = U S V'$} 154 | \State $S^{inv} \gets \textsc{zeros}( q,p )$ 155 | \Comment{The zero matrix in $\R^{q \times p}$} 156 | \For{ $i =1,\dots,q$ } 157 | \If{ $S_{ii} > 10^{-5}S_{11}$ } \Comment{Note that $10^{-5}$ is arbitrary} 158 | \State $S^{inv}_{ii} \gets \frac{1}{S_{ii}}$ 159 | \EndIf 160 | \EndFor 161 | \State $x \gets V S^{inv} U'b$ 162 | \State \Return $x$ 163 | \EndProcedure 164 | \end{algorithmic} 165 | \end{algorithm} 166 | 167 | \section{Simplex Projection}\label{section:simplex} 168 | Ignoring ties in distances, minimal distances, minimal weights and 169 | other potential hazards, the following algorithm performs Simplex 170 | projection to predict $T_p$ time-steps ahead. 171 | \begin{algorithm} 172 | \caption{Simplex Projection \cite{Sugihara_1990}}\label{alg:simplex} 173 | \begin{algorithmic}[1] 174 | \Procedure{SimplexPrediction}{$y, X, E, k, T_p$} 175 | 176 | \State $N \gets$ \textsc{Nearneighbor}($y, X, k$) 177 | \Comment{Find $k$ nearest neighbors.} 178 | \State $d \gets \| X_{N_1}^{E} - y\|$ \Comment{Define the distance scale.} 179 | 180 | \For{$i=1,\dots,k$} 181 | \State $w_i \gets \exp (-\| X_{N_i}^{E} - y\| / d )$ 182 | \Comment{Compute weights.} 183 | \EndFor 184 | 185 | \State $\hat{y} \gets \sum_{i = 1}^{k} \left(w_iX_{N_i+T_p}\right) / 186 | \sum_{i = 1}^{k} w_i$ 187 | \Comment{prediction = average of predicitions.} 188 | 189 | \State \Return $\hat{y}$ 190 | \EndProcedure 191 | \end{algorithmic} 192 | \end{algorithm} 193 | 194 | \section{S-map} 195 | Ignoring ties in distances, minimal distances, minimal weights and 196 | other potential hazards, the following algorithm uses the S-map method 197 | to predict $T_p$ time-steps ahead. 198 | % 199 | \begin{algorithm} 200 | \caption{S-map \cite{Sugihara_1994}}\label{alg:smap} 201 | \begin{algorithmic}[1] 202 | \Procedure{SmapPrediction}{$y, X, E, k, T_p, \theta$ } 203 | \State $N \gets$ \textsc{Nearneighbor}($y, X, k$) 204 | \Comment{Find NN to use for prediciton.} 205 | \State $d \gets \frac{1}{k} \sum_{i=1}^k \| X_{N_i}^{E} - y\|$ 206 | \Comment{Sum of distances.} 207 | \For {$i=1,\dots,k$} 208 | \State $w_i \gets \exp (-\theta \| X_{N_i}^{E} - y\| / d )$ 209 | \Comment{Compute weights.} 210 | \EndFor 211 | \State $W \gets \diag(w_i)$ \Comment{Reweighting matrix.} 212 | \State $A \gets 213 | \begin{bmatrix} 214 | 1 & X_{N_1} & X_{N_1- 1} & \dots & X_{N_1 - E + 1} \\ 215 | 1 & X_{N_2} & X_{N_2- 1} & \dots & X_{N_2 - E + 1} \\ 216 | \vdots & \vdots & \vdots & \ddots & \vdots \\ 217 | 1 & X_{N_k} & X_{N_k- 1} & \dots & X_{N_k - E + 1} 218 | \end{bmatrix} $ 219 | \Comment{Design matrix.} 220 | 221 | \State $A \gets WA$ \Comment{Weighted design matrix.} 222 | \State $b \gets 223 | \begin{bmatrix} 224 | X_{N_1 + T_p} \\ 225 | X_{N_2 + T_p} \\ 226 | \vdots \\ 227 | X_{N_k + T_p} 228 | \end{bmatrix} $ 229 | \Comment{Response vector.} 230 | \State $b \gets Wb$ \Comment{Weighted response vector.} 231 | \State $\hat{c} \gets \argmin_{c} \| Ac - b \|_2^2$ 232 | \Comment{Least squares, can be solved via algorithm \ref{alg:SVDLS}.} 233 | \State $\hat{y} \gets \hat{c}_0 + \sum_{i=1}^E\hat{c}_iy_i$ 234 | \Comment{Using the local linear model $\hat{c}$ for prediction.} 235 | \State \Return $\hat{y}$ 236 | \EndProcedure 237 | \end{algorithmic} 238 | \end{algorithm} 239 | 240 | Note that $k$, the number of nearest neighbors used for prediciton, 241 | can be very large compared to the embedding dimension $E$. Since $A 242 | \in \R^{k \times (1+E)}$, this means that $A$ is ``tall and skinny'' 243 | and the system $Ac = b$ is \emph{over-determined} (it has more 244 | equations than variables). This means (typically) that there does not 245 | exist any unique $c$ that solves said system. This is why we seek a 246 | least-squares solution instead. 247 | 248 | 249 | \bibliographystyle{unsrt} 250 | \bibliography{refs} 251 | 252 | \end{document} 253 | -------------------------------------------------------------------------------- /vignettes/rEDM-algorithms.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/vignettes/rEDM-algorithms.pdf -------------------------------------------------------------------------------- /vignettes/rEDM-tutorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/vignettes/rEDM-tutorial.pdf --------------------------------------------------------------------------------