├── .Rbuildignore
├── .gitignore
├── CITATION.cff
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
    ├── EDM.R
    ├── EDM_AuxFuncs.R
    ├── Examples.R
    ├── apps
    │   ├── CCM-Multiprocess.R
    │   └── Embedding.R
    └── zzz.R
├── README.md
├── data
    ├── EvergladesFlow.RData
    ├── Lorenz5D.RData
    ├── TentMap.RData
    ├── TentMapNoise.RData
    ├── Thrips.RData
    ├── block_3sp.RData
    ├── circle.RData
    ├── paramecium_didinium.RData
    └── sardine_anchovy_sst.RData
├── doc
    ├── rEDM-tutorial.html
    ├── rEDM-tutorial.pdf
    └── rEDM-tutorial.tex
├── man
    ├── CCM.Rd
    ├── ComputeError.Rd
    ├── Embed.Rd
    ├── EmbedDimension.Rd
    ├── EvergladesFlow.Rd
    ├── Lorenz5D.Rd
    ├── MakeBlock.Rd
    ├── Multiview.Rd
    ├── PredictInterval.Rd
    ├── PredictNonlinear.Rd
    ├── SMap.Rd
    ├── Simplex.Rd
    ├── SurrogateData.Rd
    ├── TentMap.Rd
    ├── TentMapNoise.Rd
    ├── Thrips.Rd
    ├── block_3sp.Rd
    ├── circle.Rd
    ├── figures
    │   ├── optimal-E-1.png
    │   ├── simplex-projection-1.png
    │   ├── sunspots-1.png
    │   └── unnamed-chunk-3-1.png
    ├── paramecium_didinium.Rd
    ├── rEDM.Rd
    └── sardine_anchovy_sst.Rd
├── src
    ├── CCM.cpp
    ├── ComputeError.cpp
    ├── DataFrame.cpp
    ├── Embed.cpp
    ├── EmbedDim.cpp
    ├── Makevars
    ├── Makevars.win
    ├── Multiview.cpp
    ├── ParameterList.cpp
    ├── PredictInterval.cpp
    ├── PredictNL.cpp
    ├── RcppEDMCommon.cpp
    ├── RcppEDMCommon.h
    ├── RcppExports.cpp
    ├── SMap.cpp
    ├── Simplex.cpp
    └── cppEDM
    │   ├── lib
    │       ├── .gitignore
    │       └── ignore.h
    │   └── src
    │       ├── API.cc
    │       ├── API.h
    │       ├── CCM.cc
    │       ├── CCM.h
    │       ├── Common.cc
    │       ├── Common.h
    │       ├── DataFrame.h
    │       ├── DateTime.cc
    │       ├── DateTime.h
    │       ├── EDM.cc
    │       ├── EDM.h
    │       ├── EDM_Formatting.cc
    │       ├── EDM_Neighbors.cc
    │       ├── EDM_Neighbors.h
    │       ├── Eval.cc
    │       ├── Multiview.cc
    │       ├── Multiview.h
    │       ├── Parameter.cc
    │       ├── Parameter.h
    │       ├── SMap.cc
    │       ├── SMap.h
    │       ├── Simplex.cc
    │       ├── Simplex.h
    │       ├── Version.h
    │       ├── makefile
    │       ├── makefile.mingw
    │       └── makefile.windows
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-1-Simplex.R
    │   ├── test-2-SMap.R
    │   ├── test-3-CCM.R
    │   ├── test-4-Multiview.R
    │   ├── test-5-EmbedDimension.R
    │   ├── test-6-PredictInterval.R
    │   └── test-7-PredictNonlinear.R
└── vignettes
    ├── .gitignore
    ├── CrossMap.png
    ├── CrossMap.svg
    ├── Lorenz_Projection.png
    ├── Lorenz_Projection.svg
    ├── Lorenz_Reconstruct.png
    ├── Lorenz_Reconstruct.svg
    ├── ParameterTable.csv
    ├── References.bib
    ├── rEDM-algorithms.ltx
    ├── rEDM-algorithms.pdf
    ├── rEDM-tutorial.Rmd
    ├── rEDM-tutorial.html
    └── rEDM-tutorial.pdf


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ..Rcheck
 2 | ^README.Rmd
 3 | ^tests
 4 | ^cppEDM/lib
 5 | ^.*\.o
 6 | ^.*\.a
 7 | ^doc$
 8 | ^Meta$
 9 | ^vignettes/rEDM-tutorial_cache$
10 | ^vignettes/vignette_figs/
11 | ^CITATION.cff
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | Meta
2 | /doc/
3 | /Meta/
4 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | title: rEDM
 3 | message: >-
 4 |   If you use this software please cite. Licensed by the
 5 |   University of California for educational, research and
 6 |   non-profit purposes.
 7 | type: software
 8 | authors:
 9 |   - given-names: Joseph
10 |     family-names: Park
11 |     email: josephpark@ieee.org
12 |     orcid: 'https://orcid.org/0000-0001-5411-1409'
13 |   - given-names: Cameron
14 |     family-names: Smith
15 |     orcid: 'https://orcid.org/0000-0003-0020-5607'
16 |     email: omid.smith.cameron@gmail.com
17 |   - given-names: George
18 |     family-names: Sugihara
19 |     orcid: 'https://orcid.org/0000-0002-2863-6946'
20 |   - given-names: Ethan
21 |     family-names: Deyle
22 |     orcid: 'https://orcid.org/0000-0001-8704-8434'
23 |   - given-names: Erik
24 |     family-names: Saberski
25 |     orcid: 'https://orcid.org/0000-0002-6475-6187'
26 |   - given-names: Hao
27 |     family-names: Ye
28 |     orcid: 'https://orcid.org/0000-0002-8630-1458'
29 |   - {}
30 | identifiers:
31 |   - type: url
32 |     value: 'https://github.com/SugiharaLab/rEDM'
33 | repository-code: 'https://github.com/SugiharaLab/rEDM'
34 | url: 'https://github.com/SugiharaLab/rEDM#readme'
35 | repository: 'https://CRAN.R-project.org/package=rEDM'
36 | abstract: >-
37 |   R wrapper of C++ EDM tools. UCSD Scripps Institution of
38 |   Oceanography, Sugihara Lab.
39 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: rEDM
 2 | Type:    Package
 3 | Title:   Empirical Dynamic Modeling ('EDM')
 4 | Version: 1.15.4
 5 | Date:    2024-04-05
 6 | Authors@R: c( person("Joseph", "Park", role = c("aut", "cre"),
 7 |                      email = "JosephPark@IEEE.org",
 8 |                      comment = c(ORCID = "0000-0001-5411-1409")),
 9 |               person("Cameron", "Smith", role = c("aut"),
10 |                      email = "cos008@ucsd.edu",
11 |                      comment = c(ORCID = "0000-0003-0020-5607")),
12 |               person("George", "Sugihara", role = c("aut", "ccp"), 
13 |                      comment = c(ORCID = "0000-0002-2863-6946")),
14 |               person("Ethan", "Deyle", role = c("aut"),
15 |                      comment = c(ORCID = "0000-0001-8704-8434")),
16 |               person("Erik", "Saberski", role = c("ctb"),
17 |                      comment = c(ORCID = "0000-0002-6475-6187")),
18 |               person("Hao", "Ye", role = c("ctb"), 
19 |                      comment = c(ORCID = "0000-0002-8630-1458")),
20 |               person("The Regents of the University of California",
21 |                      role = c("cph") ) )
22 | Maintainer: Joseph Park <JosephPark@IEEE.org>
23 | Description: An implementation of 'EDM' algorithms based on research software developed for internal use at the Sugihara Lab ('UCSD/SIO').  The package is implemented with 'Rcpp' wrappers around the 'cppEDM' library.  It implements the 'simplex' projection method from Sugihara & May (1990) <doi:10.1038/344734a0>, the 'S-map' algorithm from Sugihara (1994) <doi:10.1098/rsta.1994.0106>, convergent cross mapping described in Sugihara et al. (2012) <doi:10.1126/science.1227079>, and, 'multiview embedding' described in Ye & Sugihara (2016) <doi:10.1126/science.aag0863>.
24 | License: BSD_2_clause + file LICENSE
25 | LazyData: true
26 | LazyLoad: yes
27 | Imports: methods, Rcpp (>= 1.0.1)
28 | LinkingTo: Rcpp, RcppThread
29 | Suggests: knitr, rmarkdown, formatR
30 | VignetteBuilder: knitr
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR:2019
2 | COPYRIGHT HOLDER:The Regents of the University of California.
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | useDynLib( rEDM, .registration = TRUE )
 2 | 
 3 | export( Simplex   )
 4 | export( SMap      )
 5 | export( CCM       )
 6 | export( Multiview )
 7 | export( Embed     )
 8 | export( MakeBlock )
 9 | export( ComputeError     )
10 | export( EmbedDimension   )
11 | export( PredictInterval  )
12 | export( PredictNonlinear )
13 | export( SurrogateData    )
14 | 
15 | # Legacy functions
16 | # export( ccm           )
17 | # export( block_lnlp    )
18 | # export( s_map         )
19 | # export( simplex       )
20 | # export( multiview     )
21 | # export( make_block    )
22 | # export( compute_stats )
23 | # export( make_surrogate_data )
24 | 
25 | importFrom("grDevices", "dev.cur", "dev.new", "dev.list")
26 | importFrom("graphics", "abline", "legend", "lines", "mtext", "par", "plot")
27 | importFrom("utils", "data", "read.csv")
28 | importFrom("stats", "fft", "predict", "rnorm", "runif", "sd", "smooth.spline", "cov", "pnorm")
29 | import( methods )
30 | import( Rcpp )
31 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | #### rEDM NEWS
 2 | 
 3 | 2024-04-05 version 1.15.4 <JosephPark@IEEE.org>
 4 | 
 5 | ---
 6 | 
 7 | ##### NOTES:
 8 | - It is required as of version 1.15 to use functions: `Simplex`, `SMap`, `CCM`, `Embed`, `Multiview`, `EmbedDimension`, `PredictInterval`, `PredictNonlinear`, `ComputeError` instead of the legacy version 0.7 signatures. See Version 1.3 notes.
 9 | - [Rcpp](https://CRAN.R-project.org/package=Rcpp) imposes a 20 parameter limit on functions. The rEDM wrapper of [cppEDM](https://github.com/SugiharaLab/cppEDM#empirical-dynamic-modeling-edm) therefore does not invoke the full cppEDM API. Users requiring the full API are referred to the [pyEDM](https://pypi.org/project/pyEDM/) wrapper.
10 | - `SMap` linear system solver regularization: The R [glmnet](https://CRAN.R-project.org/package=glmnet) package does not seperate the model from the data. This prevents integration in rEDM. Users requiring `SMap` regularization are referred to the [pyEDM](https://pypi.org/project/pyEDM/) wrapper.
11 | 
12 | ---
13 | 
14 | ##### Version 1.15
15 | - `SMap()` `ignoreNan` parameter added. If `ignoreNan` is `TRUE` (default) the library is redefined to ignore embedding vectors with nan. If `ignoreNan` is `FALSE` no change is made, the user can manually specify library segments in `lib`.
16 | - `SMap()` return list includes data.frame of SVD singular values.
17 | - `noTime` parameter added, default `FALSE`. If `noTime` is `TRUE` the first column of the data is not required to be a time vector. A row index vector will be inserted and passed to cppEDM. 
18 | - `const_pred` parameter removed due to Rcpp 20 parameter limit.
19 | - `CCM()` `replacement` parameter removed.
20 | - Legacy overload functions removed.
21 | - Version 1.15.1 `ignoreNan` added in `PredictNonlinear()`. Replace unicode in pLot labels with mathplot expression. cppEDM initialize `nanFound` in DataFrame.h for UBSAN. Sync with cppEDM 1.15.1.
22 | - Version 1.15.2 Allow `columns` names with spaces. If the `columns` argument is a string use the "," delimiter to separate names. Remove `SMap` warning for disjoint library.
23 | - Version 1.15.3 Allow `columns` and `target` names with spaces in CCM.
24 | - Version 1.15.4 Move `SMap` warning for NaN into `verbose` mode. cppEDM `ComputeError` NaN removal improved, require more than 5 observation : prediction pairs.
25 | 
26 | ##### Version 1.14
27 | - cppEDM core added `generateLibrary` parameter to `Simplex()` and `SMap()`.  If `TRUE` the state-space library has newly generated points added. Not available due to Rcpp 20 parameter limit. 
28 | - Version 1.14.2 Remove CCM multivariable warning, positive tau warning. Add Embedding application. 
29 | 
30 | ##### Version 1.13
31 | - Adds `embedded` and multivariate embedding to `CCM()`.
32 | - Parameters `pathOut`, `predictFile` are removed from `CCM` to accomodate the Rcpp 20 parameter limit.
33 | - Version 1.13.1 cppEDM DateTime H:M:S fix. Allow first column data.frame characters. Set target to columns[0] if empty.
34 | 
35 | ##### Version 1.12
36 | - Adds `exclusionRadius` and `validLib` to `EmbedDimension()`, `PredictInterval()` and `PredictNonlinear()`. 
37 | - Version 1.12.2 Multiview return data.frame, correct SMap coefficient labels. 
38 | - Version 1.12.2.1 Rcpp character encoding workaround on Windows for DataFrame column names.
39 | - Version 1.12.3 cppEDM DateTime regex removed to avoid UTF-8 gcc issue in Windows.
40 | 
41 | ##### Version 1.11
42 | - Removes `nan` from `SMap` `columns` and `target`. Warning generated.
43 | 
44 | ##### Version 1.10
45 | - Adds the `generateSteps` parameter to `Simplex` and `SMap` implementing generative feedback prediction.
46 | - Adds the `parameterList` argument to `Simplex`, `SMap`, `CCM` and `Multiview`.
47 | - Parameters `pathOut`, `predictFile` are removed from `SMap`, `Multiview` to accomodate the Rcpp 20 parameter limit.
48 | - Version 1.10.1 converts `parameterList` values to numerics.
49 | - Version 1.10.2 is a bug fix for `Tp < 1` in generative mode.
50 | - Version 1.10.3 `SMap` `dgelss` error message. `CCM` `libSize` limits `Tp < 0`.
51 | 
52 | ##### Version 1.9
53 | - Adds the `validLib` parameter to `Simplex` and `SMap`. `validLib` is a boolean vector with the same number of elements as input data rows.  For `validLib` elements that are `false`, the correspoding data row will not be included in the state-space library.
54 | - Version 1.9.1 Requires .csv dataFiles to have column names.
55 | - Version 1.9.2 is a bug fix for `CCM` parameter validation with `tau > 0`.
56 | - Version 1.9.3 is a bug fix for `CCM` parameter validation with `Tp < -1`.
57 | 
58 | ##### Version 1.8
59 | - Removes the deletion of partial embedding data rows.
60 | - Adds the `deletePartial` argument to `MakeBlock`.
61 | - Bug fix in disjoint library indexing.
62 | 
63 | ##### Version 1.7
64 | - Updates nearest neighbors to better align results with legacy code.
65 | - Bug fixes in `SMap`, `CMM` `includeData`, and, the use of disjoint libraries.
66 | 
67 | ##### Version 1.6
68 | - Attempts to label `SMap` coefficients with names from the `columns` and `target` parameters.
69 | - Adds exclusionRadius to `CCM`.
70 | 
71 | ##### Version 1.5
72 | - Implemented an object oriented design in the core cppEDM.
73 | 
74 | ##### Version 1.3
75 | - A major rewrite of the 'rEDM' package as an Rcpp wrapper for the [cppEDM](https://github.com/SugiharaLab/cppEDM#empirical-dynamic-modeling-edm) library providing a unified computation engine for EDM algorithms across C++, Python and R implementations.  The revised package provides improved alignment between observed and forecast data rows, handling of date time vectors, and, strict exclusion of partial data vectors.
76 | 
77 | - To align with cppEDM and pyEDM, function names and signatures have changed from versions 0.7 and earlier. **It is recommended to use the new functions: `Simplex`, `SMap`, `CCM`, `Embed`, `Multiview`, `EmbedDimension`, `PredictInterval`, `PredictNonlinear`, `ComputeError`.** See [EDM Documentation](https://sugiharalab.github.io/EDM_Documentation/) or the package documentation.
78 | 
79 | - A legacy function interface is provided to emulate function signatures of rEDM 0.7, *but does not have complete coverage*.  It also has slightly different return values since nested data.frames are not returned.  Return values are either a data.frame, or, a named list of data.frames, as noted in the man pages.  Implemented functions' include: `simplex`, `s_map`, `block_lnlp`, `ccm`, `multiview`, `make_block`, `compute_stats` and `make_surrogate_data`.  Functions `ccm_means`, `tde_gp`, `block_gp` and `test_nonlinearity` are deprecated.
80 | 


--------------------------------------------------------------------------------
/R/Examples.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #------------------------------------------------------------------------
  3 | # 
  4 | #------------------------------------------------------------------------
  5 | Examples = function() {
  6 | 
  7 |   library( rEDM )
  8 |   
  9 |   # make sure data is loaded
 10 |   tryCatch(
 11 |     expr = {
 12 |       data( TentMap,             envir = environment() )
 13 |       data( TentMapNoise,        envir = environment() )
 14 |       data( block_3sp,           envir = environment() )
 15 |       data( circle,              envir = environment() )
 16 |       data( sardine_anchovy_sst, envir = environment() )
 17 |     },
 18 |     error = function( err ) {
 19 |       print( err )
 20 |       stop("Examples(): Failed to load package data.")
 21 |     }
 22 |   )
 23 | 
 24 |   if ( is.null( dev.list() ) ) {
 25 |     newPlot( mfrow = c( 4, 2 ) )
 26 |   }
 27 |   else {
 28 |     par( mfrow = c( 4, 2 ) )
 29 |   }
 30 |   
 31 |   # EmbedDimension()
 32 |   cmd = paste0('EmbedDimension( dataFrame = TentMap,',
 33 |                ' lib = "1 100", pred = "201 500",',
 34 |                ' columns = "TentMap", target = "TentMap")' )
 35 |   df = eval( parse( text = cmd ) )
 36 |   
 37 |   # PredictInterval()
 38 |   cmd = paste0('PredictInterval( dataFrame = TentMap,',
 39 |                ' lib = "1 100", pred = "201 500", E = 2,',
 40 |                ' columns = "TentMap", target = "TentMap") ')
 41 |   df = eval( parse( text = cmd ) )
 42 |   
 43 |   # PredictNonlinear()
 44 |   cmd = paste0('PredictNonlinear( dataFrame = TentMapNoise,',
 45 |                ' E = 2,lib = "1 100", pred = "201 500", ',
 46 |                ' columns = "TentMap",target = "TentMap") ')
 47 |   df = eval( parse( text = cmd ) )
 48 | 
 49 |   # Simplex() 
 50 |   # Tent map : specify multivariable columns embedded = TRUE
 51 |   cmd = paste0('Simplex( dataFrame = block_3sp,',
 52 |                ' lib = "1 99", pred = "100 195", ',
 53 |                ' E = 3, embedded = TRUE, showPlot = TRUE,',
 54 |                ' columns = "x_t y_t z_t", target = "x_t") ')
 55 |   df = eval( parse( text = cmd ) )
 56 | 
 57 |   # Simplex() 
 58 |   # Tent map : Embed column x_t to E = 3, embedded  =  False
 59 |   cmd = paste0('Simplex( dataFrame = block_3sp,',
 60 |                ' lib = "1 99", pred = "105 190", ',
 61 |                ' E = 3, showPlot = TRUE,',
 62 |                ' columns = "x_t", target = "x_t") ')
 63 |   df = eval( parse( text = cmd ) )
 64 | 
 65 |   # Multiview()
 66 |   cmd = paste0('Multiview( dataFrame = block_3sp,',
 67 |                ' lib = "1 99", pred = "105 190", ',
 68 |                ' E = 3, columns = "x_t y_t z_t", target = "x_t",',
 69 |                ' showPlot = TRUE) ')
 70 |   df = eval( parse( text = cmd ) )
 71 | 
 72 |   # CCM demo
 73 |   cmd = paste0('CCM( dataFrame = sardine_anchovy_sst,',
 74 |                ' E = 3, Tp = 0, columns = "anchovy", target = "np_sst",',
 75 |                ' libSizes = "10 70 10", sample = 100, verbose = TRUE, ',
 76 |                ' showPlot = TRUE) ')
 77 |   df = eval( parse( text = cmd ) )
 78 | 
 79 |   par( ask = TRUE )
 80 |   # SMap circle : specify multivariable columns embedded = TRUE
 81 |   cmd = paste0('SMap( dataFrame = circle,',
 82 |                ' lib = "1 100", pred = "110 190", theta = 4, E = 2,',
 83 |                ' verbose = TRUE, showPlot = TRUE, embedded = TRUE,',
 84 |                ' columns = "x y", target = "x") ')
 85 |   df = eval( parse( text = cmd ) )
 86 |   par( ask = FALSE )
 87 | }
 88 | 
 89 | #------------------------------------------------------------------------
 90 | # 
 91 | #------------------------------------------------------------------------
 92 | newPlot = function(
 93 |   mar      = c( 4, 4, 1, 1 ),
 94 |   mgp      = c( 2.3, 0.8, 0 ),
 95 |   cex      = 1.5, 
 96 |   cex.axis = 1.3,
 97 |   cex.lab  = 1.3,
 98 |   mfrow    = c( 1, 1 )
 99 | ) {
100 |     dev.new()
101 |     par( mar = mar )
102 |     par( mgp = mgp )
103 |     par( cex = cex )
104 |     par( cex.axis = cex.axis )
105 |     par( cex.lab = cex.lab )
106 |     par( mfrow = mfrow )
107 |     invisible( dev.cur() )
108 | }
109 | 


--------------------------------------------------------------------------------
/R/apps/CCM-Multiprocess.R:
--------------------------------------------------------------------------------
  1 | 
  2 | library( rEDM )
  3 | library( foreach )
  4 | library( doParallel )
  5 | 
  6 | #-------------------------------------------------------------------
  7 | # CCM for all dataFrame columns against target using foreach %dopar%
  8 | # Presumes first column is time/index, not processed
  9 | #-------------------------------------------------------------------
 10 | CCM_MP_Columns = function(
 11 |   dataFrame = NULL,
 12 |   target    = 'V5',
 13 |   libSizes  = '20 920 100',
 14 |   sample    = 10,
 15 |   E         = 5,
 16 |   Tp        = 0,
 17 |   cores     = 4  # CCM uses 2 cores, max is detectCores()/2 - 2
 18 | ) {
 19 | 
 20 |   if ( is.null( dataFrame ) ) { dataFrame = Lorenz5D }
 21 | 
 22 |   registerDoParallel( cores = cores )
 23 | 
 24 |   dataCols = names( dataFrame )[ 2 : ncol( dataFrame ) ] # Skip first column
 25 | 
 26 |   # Parallel process columns using foreach ... %dopar%
 27 |   L = foreach ( col = iter( dataCols ) ) %dopar% {
 28 | 
 29 |     CCM( dataFrame = dataFrame,
 30 |          E         = E,
 31 |          Tp        = Tp,
 32 |          columns   = col,
 33 |          target    = target,
 34 |          libSizes  = libSizes,
 35 |          sample    = sample )
 36 |   }
 37 | 
 38 |   stopImplicitCluster()
 39 | 
 40 |   # Get names for the returned list L from the CCM data.frame
 41 |   keys = c()
 42 |   for ( cmap in L ) {
 43 |     keys = c( keys, names( cmap )[3] )
 44 |   }
 45 |   names( L ) = keys
 46 | 
 47 |   invisible( L )
 48 | }
 49 | 
 50 | #---------------------------------------------------------------------
 51 | # CCM for single columns : target over a set of libSizes using foreach %dopar%
 52 | # libSizeList is a list of partioned libSizes
 53 | # libSizesList elements can be any libSizes format used by CCM
 54 | #---------------------------------------------------------------------
 55 | CCM_MP_LibSizes = function(
 56 |   dataFrame    = NULL,
 57 |   columns      = 'V1',
 58 |   target       = 'V5',
 59 |   libSizesList = c( '20 50 70 100', '150 200 250 300', '400 500 600 700 900' ),
 60 |   sample       = 10,
 61 |   E            = 5,
 62 |   Tp           = 0,
 63 |   cores        = 4  # CCM uses 2 cores, max is detectCores()/2 - 2
 64 |  ) {
 65 | 
 66 |   if ( is.null( dataFrame ) ) { dataFrame = Lorenz5D }
 67 | 
 68 |   registerDoParallel( cores = cores )
 69 | 
 70 |   # Parallel process libSizesList using foreach ... %dopar%
 71 |   L = foreach ( libSize = iter( libSizesList ) ) %dopar% {
 72 | 
 73 |     CCM( dataFrame = dataFrame,
 74 |          E         = E,
 75 |          Tp        = Tp,
 76 |          columns   = columns,
 77 |          target    = target,
 78 |          libSizes  = libSize,
 79 |          sample    = sample )
 80 |   }
 81 | 
 82 |   stopImplicitCluster()
 83 | 
 84 |   # Set names
 85 |   names( L ) = libSizesList
 86 | 
 87 |   invisible( L )
 88 | }
 89 | 
 90 | #---------------------------------------------------------------------
 91 | # CCM for single columns : target over a set of libSizes using clusterApply
 92 | # libSizeList is a list of partioned libSizes
 93 | # libSizesList elements can be any libSizes format used by CCM
 94 | #
 95 | # DO NOT USE mclapply
 96 | #    From ?mclapply:
 97 | #    It is _strongly discouraged_ to use these functions with
 98 | #    multi-threaded libraries or packages (see ‘mcfork’ for more
 99 | #    details).  If in doubt, it is safer to use a non-FORK cluster
100 | #    (see ‘makeCluster’, ‘clusterApply’).
101 | #---------------------------------------------------------------------
102 | CCM_MP_LibSizes_cluster = function(
103 |   dataFrame    = NULL,
104 |   columns      = 'V1',
105 |   target       = 'V5',
106 |   libSizesList = c( '20 30 40 50 60 70 80 90 100',
107 |                     '120 150 200 250 300',
108 |                     '400 500 600 700 900' ),
109 |   sample       = 20,
110 |   E            = 5,
111 |   Tp           = 0,
112 |   cores        = 4
113 | 
114 | ) {
115 | 
116 |   if ( is.null( dataFrame ) ) { dataFrame = Lorenz5D }
117 | 
118 |   cl = makeCluster( cores )
119 | 
120 |   clusterExport( cl, list("CCM") )
121 | 
122 |   cmap = clusterApply( cl = cl, x = libSizesList, fun = CrossMapFunc,
123 |                        dataFrame, E, Tp, columns, target, sample )
124 | 
125 |   stopCluster( cl )
126 | 
127 |   invisible( cmap )
128 | }
129 | 
130 | #---------------------------------------------------------------------
131 | # Call rEDM CCM on behalf of CCM_MP_LibSizes_cluster() clusterApply()
132 | #---------------------------------------------------------------------
133 | CrossMapFunc = function(
134 |   libSizes, # First argument : from clusterApply( x = libSizesList )
135 |   dataFrame, E, Tp, columns, target, sample
136 | ) {
137 | 
138 |   cm = CCM( dataFrame = dataFrame,
139 |             E         = E,
140 |             columns   = columns,
141 |             target    = target,
142 |             libSizes  = libSizes,
143 |             sample    = sample )
144 | }
145 | 


--------------------------------------------------------------------------------
/R/apps/Embedding.R:
--------------------------------------------------------------------------------
  1 | 
  2 | library( rEDM )
  3 | 
  4 | #-------------------------------------------------------------------
  5 | # EDM Embed wrapper
  6 | # Create time-delay embedding with time column for EDM. 
  7 | # Useful to create mixed multivariate embeddings for SMap and
  8 | # embeddings with time-advanced vectors.  
  9 | # Rename V(t-0), V(t+0) to V. Add Time column.
 10 | # If columns is NULL, embedd all except the first (time) column.
 11 | # If plusminus create time-advanced & time-delayed columns.
 12 | #-------------------------------------------------------------------
 13 | Embedding = function(
 14 |   dataFrame = NULL,
 15 |   dataFile  = NULL,        
 16 |   outFile   = NULL,
 17 |   plusminus = FALSE,
 18 |   columns   = NULL,
 19 |   E         = 2,
 20 |   tau       = -1,
 21 |   verbose   = FALSE
 22 | ) {
 23 | 
 24 |   if ( is.null( dataFrame ) & is.null( dataFile ) ) {
 25 |     stop( 'dataFrame and dataFile are empty, specify one.' )
 26 |   }
 27 |   if ( tau > 0 & plusminus ) {
 28 |     # Convert to negative
 29 |     tau = -tau
 30 |   }
 31 | 
 32 |   if ( is.null( dataFrame ) ) {
 33 |     # Load from dataFile
 34 |     data = read.csv( dataFile )
 35 |   }
 36 |   else {
 37 |     data = dataFrame
 38 |   }
 39 | 
 40 |   # Presume time is first column
 41 |   timeName   = colnames( data )[1]
 42 |   timeSeries = data[ , timeName ]
 43 | 
 44 |   # If no columns specified, use all except first
 45 |   if ( is.null( columns ) ) {
 46 |     columns = colnames( data )[ 2 : ncol( data ) ]
 47 |   }
 48 | 
 49 |   if ( verbose ) {
 50 |     print( paste( "Time column: ", timeName ) )
 51 |     print( "Embed columns: " ); print( columns )
 52 |   }
 53 | 
 54 |   # Create embeddings of columns
 55 |   # There will be redundancies vis V1(t-0), V1(t+0)
 56 |   if ( plusminus ) {
 57 |     embed_minus = Embed( dataFrame = data, E = E, tau = tau, columns = columns )
 58 |     embed_plus  = Embed( dataFrame = data, E = E, tau = abs( tau ),
 59 |                          columns = columns )
 60 |     embed = cbind( timeSeries, embed_minus, embed_plus, stringsAsFactors=FALSE )
 61 | 
 62 |     # TRUE / FALSE vector
 63 |     cols_tplus0 = grepl( '(t+0)', colnames( embed ), fixed = TRUE )
 64 |     # Remove *(t+0) : redunant with *(t-0)
 65 |     embed = embed[ , !cols_tplus0 ]
 66 |   }
 67 |   else {
 68 |     embed_ = Embed( dataFrame = data, E = E, tau = tau, columns = columns )
 69 |     embed  = cbind( timeSeries, embed_, stringsAsFactors = FALSE )
 70 |   }
 71 | 
 72 |   # Rename *(t-0) to original column names
 73 |   columnNames = colnames( embed )
 74 |   for ( i in 1:length( columnNames ) ) {
 75 |     if ( grepl( '(t-0)', columnNames[i], fixed = TRUE ) ) {
 76 |       columnNames[i] = sub( '(t-0)', '', columnNames[i], fixed = TRUE )
 77 |     }
 78 |   }
 79 | 
 80 |   # Rename *(t+0) to original column names
 81 |   for ( i in 1:length( columnNames ) ) {
 82 |     if ( grepl( '(t+0)', columnNames[i], fixed = TRUE ) ) {
 83 |       columnNames[i] = sub( '(t+0)', '', columnNames[i], fixed = TRUE )
 84 |     }
 85 |   }
 86 | 
 87 |   # Rename first column to original time column name
 88 |   columnNames[ 1 ]  = timeName
 89 |   colnames( embed ) = columnNames
 90 | 
 91 |   if ( verbose ) {
 92 |     print( head( embed, 4 ) )
 93 |     print( tail( embed, 4 ) )
 94 |   }
 95 | 
 96 |   if ( ! is.null( outFile ) ) {
 97 |     write.csv( embed, file = outFile, row.names = FALSE )
 98 |   }
 99 | 
100 |   return( embed )
101 | }
102 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | loadModule("EDMInternal", TRUE)
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | rEDM
  2 | ====
  3 | 
  4 | Overview
  5 | --------
  6 | 
  7 | The `rEDM` package is a collection of methods for Empirical Dynamic
  8 | Modeling (EDM). EDM is based on the mathematical theory of
  9 | reconstructing attractor manifolds from time series data, with
 10 | applications to forecasting, causal inference, and more. It is based on
 11 | research software developed for the [Sugihara Lab](https://deepeco.ucsd.edu/)
 12 | (University of California San Diego, Scripps Institution of Oceanography).
 13 | 
 14 | Empirical Dynamic Modeling (EDM)
 15 | -------------------------------------
 16 | 
 17 | This package implements an R wrapper of
 18 | [EDM](https://deepeco.ucsd.edu/nonlinear-dynamics-research/edm/) tools from
 19 | the [cppEDM](https://github.com/SugiharaLab/cppEDM/#empirical-dynamic-modeling-edm) library.
 20 | Introduction and documentation are are avilable
 21 | [online](https://sugiharalab.github.io/EDM_Documentation/ "EDM Docs"), or
 22 | in the package [tutorial](https://github.com/SugiharaLab/rEDM/blob/d5aafe06573be73f603488f6ee4ae68a73da5e12/doc/rEDM-tutorial.pdf "rEDM turorial").
 23 | 
 24 | Functionality includes:
 25 | 
 26 | * Simplex projection (Sugihara and May 1990)
 27 | * Sequential Locally Weighted Global Linear Maps (S-map) (Sugihara 1994)
 28 | * Multivariate embeddings (Dixon et. al. 1999)
 29 | * Convergent cross mapping (Sugihara et. al. 2012)
 30 | * Multiview embedding (Ye and Sugihara 2016)
 31 | 
 32 | Installation
 33 | ------------
 34 | To install from CRAN [rEDM](https://CRAN.R-project.org/package=rEDM):
 35 | 
 36 |     install.packages(rEDM)
 37 | 
 38 | Using R devtools for latest development version:
 39 | 
 40 |     install.packages("devtools")
 41 |     devtools::install_github("SugiharaLab/rEDM")
 42 | 
 43 | Building from source:
 44 | 
 45 |     git clone https://github.com/SugiharaLab/rEDM.git
 46 |     cd rEDM
 47 |     R CMD INSTALL .
 48 | 
 49 | Example
 50 | -------
 51 | 
 52 | We begin by looking at annual time series of sunspots:
 53 | 
 54 |     df = data.frame(yr = as.numeric(time(sunspot.year)), 
 55 |                      sunspot_count = as.numeric(sunspot.year))
 56 | 
 57 |     plot(df$yr, df$sunspot_count, type = "l", 
 58 |          xlab = "year", ylab = "sunspots")
 59 | 
 60 | ![](man/figures/sunspots-1.png)
 61 | 
 62 | First, we use `EmbedDimension()` to determine the optimal embedding
 63 | dimension, E:
 64 | 
 65 |     library(rEDM)   # load the package
 66 |     # If you're new to the rEDM package, please consult the tutorial:
 67 |     # vignette("rEDM-tutorial")
 68 | 
 69 |     E.opt = EmbedDimension( dataFrame = df,    # input data
 70 |                             lib     = "1 280", # portion of data to train
 71 |                             pred    = "1 280", # portion of data to predict
 72 |                             columns = "sunspot_count",
 73 |                             target  = "sunspot_count" )
 74 | 
 75 | ![](man/figures/optimal-E-1.png)
 76 | 
 77 |     E.opt
 78 |     #     E    rho
 79 |     # 1   1 0.7397
 80 |     # 2   2 0.8930
 81 |     # 3   3 0.9126
 82 |     # 4   4 0.9133
 83 |     # 5   5 0.9179
 84 |     # 6   6 0.9146
 85 |     # 7   7 0.9098
 86 |     # 8   8 0.9065
 87 |     # 9   9 0.8878
 88 |     # 10 10 0.8773
 89 | 
 90 | Highest predictive skill is found between `E = 3` and `E = 6`. Since we
 91 | generally want a simpler model, if possible, we use `E = 3` to forecast
 92 | the last 1/3 of data based on training (attractor reconstruction) from
 93 | the first 2/3.
 94 | 
 95 |     simplex = Simplex( dataFrame = df, 
 96 |                        lib     = "1   190", # portion of data to train
 97 |                        pred    = "191 287", # portion of data to predict
 98 |                        columns = "sunspot_count",
 99 |                        target  = "sunspot_count",
100 |                        E       = 3 )
101 | 
102 |     plot( df$yr, df$sunspot_count, type = "l", lwd = 2,
103 |           xlab = "year", ylab = "sunspots")
104 |     lines( simplex$yr, simplex$Predictions, col = "red", lwd = 2)
105 |     legend( 'topleft', legend = c( "Observed", "Predicted (year + 1)" ),
106 |             fill = c( 'black', 'red' ), bty = 'n', cex = 1.3 )
107 | 
108 | ![](man/figures/simplex-projection-1.png)
109 | 
110 | Further Examples
111 | ----------------
112 | 
113 | Please see the package vignettes for more details:
114 | 
115 |     browseVignettes("rEDM")
116 | 
117 | ### References
118 | 
119 | Sugihara G. and May R. 1990. Nonlinear forecasting as a way of
120 | distinguishing chaos from measurement error in time series. Nature,
121 | 344:734–741.
122 | 
123 | Sugihara G. 1994. Nonlinear forecasting for the classification of
124 | natural time series. Philosophical Transactions: Physical Sciences and
125 | Engineering, 348 (1688) : 477–495.
126 | 
127 | Dixon, P. A., M. Milicich, and G. Sugihara, 1999. Episodic fluctuations
128 | in larval supply. Science 283:1528–1530.
129 | 
130 | Sugihara G., May R., Ye H., Hsieh C., Deyle E., Fogarty M., Munch S.,
131 | 2012. Detecting Causality in Complex Ecosystems. Science 338:496-500.
132 | 
133 | Ye H., and G. Sugihara, 2016. Information leverage in interconnected 
134 | ecosystems: Overcoming the curse of dimensionality. Science 353:922–925.
135 | 
136 | 


--------------------------------------------------------------------------------
/data/EvergladesFlow.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/EvergladesFlow.RData


--------------------------------------------------------------------------------
/data/Lorenz5D.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/Lorenz5D.RData


--------------------------------------------------------------------------------
/data/TentMap.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/TentMap.RData


--------------------------------------------------------------------------------
/data/TentMapNoise.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/TentMapNoise.RData


--------------------------------------------------------------------------------
/data/Thrips.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/Thrips.RData


--------------------------------------------------------------------------------
/data/block_3sp.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/block_3sp.RData


--------------------------------------------------------------------------------
/data/circle.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/circle.RData


--------------------------------------------------------------------------------
/data/paramecium_didinium.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/paramecium_didinium.RData


--------------------------------------------------------------------------------
/data/sardine_anchovy_sst.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/data/sardine_anchovy_sst.RData


--------------------------------------------------------------------------------
/doc/rEDM-tutorial.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/doc/rEDM-tutorial.pdf


--------------------------------------------------------------------------------
/man/CCM.Rd:
--------------------------------------------------------------------------------
  1 | \name{CCM}
  2 | \alias{CCM}
  3 | \title{Convergent cross mapping using simplex projection}
  4 | \usage{
  5 | CCM(pathIn = "./", dataFile = "", dataFrame = NULL,
  6 |   E = 0, Tp = 0, knn = 0, tau = -1,
  7 |   exclusionRadius = 0, columns = "", target = "", libSizes = "",
  8 |   sample = 0, random = TRUE, seed = 0, 
  9 |   embedded = FALSE, includeData = FALSE, parameterList = FALSE,
 10 |   verbose = FALSE, showPlot = FALSE, noTime = FALSE)
 11 | }
 12 | \arguments{
 13 | \item{pathIn}{path to \code{dataFile}.}
 14 | 
 15 | \item{dataFile}{.csv format data file name. The first column must be a time
 16 | index or time values unless noTime is TRUE. The first row must be column names.}
 17 | 
 18 | \item{dataFrame}{input data.frame. The first column must be a time
 19 | index or time values unless noTime is TRUE. The columns must be named.}
 20 | 
 21 | \item{E}{embedding dimension.}
 22 | 
 23 | \item{Tp}{prediction horizon (number of time column rows).}
 24 | 
 25 | \item{knn}{number of nearest neighbors. If knn=0, knn is set to E+1.}
 26 | 
 27 | \item{tau}{lag of time delay embedding specified as number of
 28 | time column rows.}
 29 | 
 30 | \item{exclusionRadius}{excludes vectors from the search space of nearest 
 31 | neighbors if their relative time index is within exclusionRadius.}
 32 | 
 33 | \item{columns}{string of whitespace separated column name(s), or vector
 34 | of column names used to create the library. If individual column names
 35 | contain whitespace place names in a vector, or, append ',' to the name.}
 36 | 
 37 | \item{target}{column name used for prediction.}
 38 | 
 39 | \item{libSizes}{string of 3 whitespace separated integer values
 40 |   specifying the intial library size, the final library size,
 41 |   and the library size increment. Can also be a list of strictly
 42 |   increasing library sizes.}
 43 | 
 44 | \item{sample}{integer specifying the number of random samples to draw at
 45 | each library size evaluation.}
 46 | 
 47 | \item{random}{logical to specify random (\code{TRUE}) or sequential
 48 |   library sampling. Note \code{random = FALSE} is not convergent
 49 |   cross mapping.}
 50 | 
 51 | \item{seed}{integer specifying the random sampler seed.  If
 52 |   \code{seed=0} then a random seed is generated.}
 53 | 
 54 | \item{embedded}{logical specifying if the input data are embedded.}
 55 | 
 56 | \item{includeData}{logical to include statistics and predictions for
 57 |   every prediction in the ensemble.}
 58 | 
 59 | \item{parameterList}{logical to add list of invoked parameters.}
 60 | 
 61 | \item{verbose}{logical to produce additional console reporting.}
 62 | 
 63 | \item{showPlot}{logical to plot results.}
 64 | 
 65 | \item{noTime}{logical to allow input data with no time column.}
 66 | }
 67 | 
 68 | \value{
 69 |   A data.frame with 3 columns. The first column is \code{LibSize}
 70 |   specifying the subsampled library size.  Columns 2 and 3 report
 71 |   Pearson correlation coefficients for the prediction of X from Y, and
 72 |   Y from X.
 73 | 
 74 |   if \code{includeData = TRUE} a named list with the following data.frames
 75 |   data.frame \code{Combo_rho} columns:
 76 |   \tabular{ll}{
 77 |       LibMeans\tab CCM mean correlations for each library size\cr
 78 |       CCM1_PredictStat\tab Forward cross map prediction statistics\cr
 79 |       CCM1_Predictions\tab Forward cross map prediction values\cr
 80 |       CCM2_PredictStat\tab Reverse cross map prediction statistics\cr
 81 |       CCM2_Predictions\tab Reverse cross map prediction values\cr
 82 |   }
 83 | 
 84 |   If \code{includeData = TRUE} and \code{parameterList = TRUE} a
 85 |   named list "parameters" is added.
 86 | }
 87 | 
 88 | \references{Sugihara G., May R., Ye H., Hsieh C., Deyle E., Fogarty M., Munch S., 2012. Detecting Causality in Complex Ecosystems. Science 338:496-500.
 89 | }
 90 | 
 91 | \description{
 92 |   The state-space of a multivariate dynamical system (not a purely
 93 |   stochastic one) encodes coherent phase-space variable trajectories. If
 94 |   enough information is available, one can infer the presence or absence
 95 |   of cross-variable interactions associated with causal links between
 96 |   variables. \code{\link{CCM}} measures the extent to which states of
 97 |   variable Y can reliably estimate states of variable X. This can happen
 98 |   if X is causally influencing Y. 
 99 | 
100 |   If cross-variable state predictability converges as more state-space
101 |   information is provided, this indicates a causal link. \code{\link{CCM}}
102 |   performs this cross-variable mapping using Simplex, with convergence
103 |   assessed across a range of observational library sizes as described in
104 |   \cite{Sugihara et al. 2012}.
105 | }
106 | 
107 | \details{
108 |   \code{\link{CCM}} computes the X:Y and Y:X cross-mappings in parallel
109 |   using threads. 
110 | }
111 | 
112 | \examples{
113 | data(sardine_anchovy_sst)
114 | df = CCM( dataFrame = sardine_anchovy_sst, E = 3, Tp = 0, columns = "anchovy",
115 | target = "np_sst", libSizes = "10 70 10", sample = 100 )
116 | 
117 | }
118 | 


--------------------------------------------------------------------------------
/man/ComputeError.Rd:
--------------------------------------------------------------------------------
 1 | \name{ComputeError}
 2 | \alias{ComputeError}
 3 | \title{Compute error}
 4 | \usage{
 5 | ComputeError(obs, pred)  
 6 | }
 7 | \arguments{
 8 | \item{obs}{vector of observations.}
 9 | 
10 | \item{pred}{vector of predictions.}
11 | }
12 | 
13 | \value{
14 |   A name list with components:
15 |   \tabular{ll}{
16 |     rho   \tab Pearson correlation\cr
17 |     MAE   \tab mean absolute error\cr
18 |     RMSE  \tab root mean square error\cr
19 |   }
20 | }
21 | 
22 | \description{
23 | \code{\link{ComputeError}} evaluates the Pearson correlation
24 | coefficient, mean absolute error and root mean square error between two
25 | numeric vectors.
26 | }
27 | 
28 | \examples{
29 | data(block_3sp)
30 | smplx <- Simplex( dataFrame=block_3sp, lib="1 99", pred="105 190", E=3,
31 | columns="x_t", target="x_t")
32 | err <- ComputeError( smplx$Observations, smplx$Predictions )
33 | }
34 | 


--------------------------------------------------------------------------------
/man/Embed.Rd:
--------------------------------------------------------------------------------
 1 | \name{Embed}
 2 | \alias{Embed}
 3 | \title{Embed data with time lags}
 4 | 
 5 | \usage{
 6 | Embed(path = "./", dataFile = "", dataFrame = NULL, E = 0, tau = -1, 
 7 | columns = "", verbose = FALSE)  
 8 | }
 9 | 
10 | \arguments{
11 | \item{path}{path to \code{dataFile}.}
12 | 
13 | \item{dataFile}{.csv format data file name. The first column must be a time
14 | index or time values. The first row must be column names. One of
15 | \code{dataFile} or \code{dataFrame} are required.}
16 | 
17 | \item{dataFrame}{input data.frame. The first column must be a time
18 | index or time values. The columns must be named. One of
19 | \code{dataFile} or \code{dataFrame} are required.}
20 | 
21 | \item{E}{embedding dimension.}
22 | 
23 | \item{tau}{integer time delay embedding lag specified as number of
24 | time column rows.}
25 | 
26 | \item{columns}{string of whitespace separated column name(s), or vector
27 | of column names used to create the library. If individual column names
28 | contain whitespace place names in a vector, or, append ',' to the name.}
29 | 
30 | \item{verbose}{logical to produce additional console reporting.}
31 | }
32 | 
33 | \description{
34 | \code{\link{Embed}} performs Takens time-delay embedding on \code{columns}.
35 | }
36 | 
37 | \value{
38 | A data.frame with lagged columns. E columns for each variable specified
39 | in \code{columns}.
40 | }
41 | 
42 | \details{
43 |   Each \code{columns} item will have E-1 time-lagged vectors created.
44 |   The column name is appended with \code{(t-n)}.  For example, data
45 |   columns X, Y, with E = 2 will have columns named
46 |   \code{X(t-0) X(t-1) Y(t-0) Y(t-1)}.
47 | 
48 |   The returned data.frame does not have a time column.  The returned
49 |   data.frame is truncated by tau * (E-1) rows to remove state vectors
50 |   with partial data (NaN elements).
51 | }
52 | 
53 | \examples{
54 | data(circle)
55 | embed <- Embed( dataFrame = circle, E = 2, tau = -1, columns = "x y" ) 
56 | }
57 | 


--------------------------------------------------------------------------------
/man/EmbedDimension.Rd:
--------------------------------------------------------------------------------
 1 | \name{EmbedDimension}
 2 | \alias{EmbedDimension}
 3 | \title{Optimal embedding dimension}
 4 | \usage{
 5 | EmbedDimension(pathIn = "./", dataFile = "", dataFrame = NULL, pathOut = "", 
 6 |   predictFile = "", lib = "", pred = "", maxE = 10, Tp = 1, tau = -1,
 7 |   exclusionRadius = 0, columns = "", target = "", embedded = FALSE,
 8 |   verbose = FALSE, validLib = vector(), numThreads = 4, showPlot = TRUE,
 9 |   noTime = FALSE)  
10 | }
11 | \arguments{
12 | \item{pathIn}{path to \code{dataFile}.}
13 | 
14 | \item{dataFile}{.csv format data file name. The first column must be a time
15 | index or time values unless noTime is TRUE. The first row must be column names.}
16 | 
17 | \item{dataFrame}{input data.frame. The first column must be a time
18 | index or time values unless noTime is TRUE. The columns must be named.}
19 | 
20 | \item{pathOut}{path for \code{predictFile} containing output predictions.}
21 | 
22 | \item{predictFile}{output file name.}
23 | 
24 | \item{lib}{string or vector with start and stop indices of input data
25 | rows used to create the library from observations. Mulitple row index
26 | pairs can be specified with each pair defining the first and last
27 | rows of time series observation segments used to create the library.}
28 | 
29 | \item{pred}{string with start and stop indices of input data rows used for
30 | predictions. A single contiguous range is supported.}
31 | 
32 | \item{maxE}{maximum value of E to evalulate.}
33 | 
34 | \item{Tp}{prediction horizon (number of time column rows).}
35 | 
36 | \item{tau}{lag of time delay embedding specified as number of
37 | time column rows.}
38 | 
39 | \item{exclusionRadius}{excludes vectors from the search space of nearest 
40 | neighbors if their relative time index is within exclusionRadius.}
41 | 
42 | \item{columns}{string of whitespace separated column name(s), or vector
43 | of column names used to create the library. If individual column names
44 | contain whitespace place names in a vector, or, append ',' to the name.}
45 | 
46 | \item{target}{column name used for prediction.}
47 | 
48 | \item{embedded}{logical specifying if the input data are embedded.}
49 | 
50 | \item{verbose}{logical to produce additional console reporting.}
51 | 
52 | \item{validLib}{logical vector the same length as the number of data
53 |  rows.  Any data row represented in this vector as FALSE, will not be
54 |  included in the library.}
55 | 
56 | \item{numThreads}{number of parallel threads for computation.}
57 | 
58 | \item{showPlot}{logical to plot results.}
59 | 
60 | \item{noTime}{logical to allow input data with no time column.}
61 | }
62 | 
63 | \value{
64 |   A data.frame with columns \code{E, rho}.
65 | }
66 | 
67 | \description{
68 | \code{\link{EmbedDimension}} uses \code{\link{Simplex}} to evaluate
69 | prediction accuracy as a function of embedding dimension.
70 | }
71 | 
72 | %\details{
73 | %}
74 | 
75 | \examples{
76 | data(TentMap)
77 | E.rho = EmbedDimension( dataFrame = TentMap, lib = "1 100", pred = "201 500",
78 | columns = "TentMap", target = "TentMap", showPlot = FALSE )
79 | }
80 | 


--------------------------------------------------------------------------------
/man/EvergladesFlow.Rd:
--------------------------------------------------------------------------------
 1 | \docType{data}
 2 | \name{EvergladesFlow}
 3 | \alias{EvergladesFlow}
 4 | \title{Water flow to NE Everglades}
 5 | \format{A data frame with 1379 rows and 2 columns: 
 6 | \describe{
 7 |   \item{\code{Date}}{Date.}
 8 |   \item{\code{S12CD_S333_CFS}}{Cumulative weekly flow (CFS).}
 9 | }
10 | }
11 | \usage{
12 | EvergladesFlow
13 | }
14 | \description{
15 |   Cumulative weekly water flow into northeast Everglades from water
16 |   control structures S12C, S12D and S333 from 1980 through 2005.
17 | }
18 | \keyword{datasets}
19 | 


--------------------------------------------------------------------------------
/man/Lorenz5D.Rd:
--------------------------------------------------------------------------------
 1 | \docType{data}
 2 | \name{Lorenz5D}
 3 | \alias{Lorenz5D}
 4 | \title{5-D Lorenz'96}
 5 | \format{Data frame with 1000 rows and 6 columns
 6 | \describe{
 7 |   \item{\code{Time}}{Time.}
 8 |   \item{\code{V1}}{variable 1.}
 9 |   \item{\code{V2}}{variable 2.}
10 |   \item{\code{V3}}{variable 3.}
11 |   \item{\code{V4}}{variable 4.}
12 |   \item{\code{V5}}{variable 5.}
13 | }
14 | }
15 | \usage{
16 | Lorenz5D
17 | }
18 | \description{5-D Lorenz'96 timeseries with F = 8.
19 | }
20 | \references{
21 | Lorenz, Edward (1996). Predictability - A problem partly solved,
22 | Seminar on Predictability, Vol. I, ECMWF.
23 | }
24 | \keyword{datasets}
25 | 


--------------------------------------------------------------------------------
/man/MakeBlock.Rd:
--------------------------------------------------------------------------------
 1 | \name{MakeBlock}
 2 | \alias{MakeBlock}
 3 | \title{Make embedded data block}
 4 | 
 5 | \usage{
 6 | MakeBlock(dataFrame, E = 0, tau = -1, columns = "", deletePartial = FALSE)  
 7 | }
 8 | 
 9 | \arguments{
10 | \item{dataFrame}{input data.frame. The first column must be a time
11 | index or time values. The columns must be named.}
12 | 
13 | \item{E}{embedding dimension.}
14 | 
15 | \item{tau}{integer time delay embedding lag specified as number of
16 | time column rows.}
17 | 
18 | \item{columns}{string of whitespace separated column name(s) in the
19 | input data to be embedded.}
20 | 
21 | \item{deletePartial}{boolean to delete rows with partial data.}
22 | }
23 | 
24 | \description{
25 | \code{\link{MakeBlock}} performs Takens time-delay embedding on
26 | \code{columns}. It is an internal function called by \code{\link{Embed}}
27 | that does not perform input error checking or validation. 
28 | }
29 | 
30 | \value{
31 | A data.frame with lagged columns. E columns for each variable specified
32 | in \code{columns}.
33 | }
34 | 
35 | \details{
36 |   Each \code{columns} item will have E-1 time-lagged vectors created.
37 |   The column name is appended with \code{(t-n)}.  For example, data
38 |   columns X, Y, with E = 2 will have columns named
39 |   \code{X(t-0) X(t-1) Y(t-0) Y(t-1)}.
40 | 
41 |   The returned data.frame does not have a time column.
42 | 
43 |   If \code{deletePartial} is \code{TRUE}, the returned
44 |   data.frame is truncated by tau * (E-1) rows to remove state vectors
45 |   with partial data (NaN elements).
46 | }
47 | 
48 | \examples{
49 | data(TentMap)
50 | embed <- MakeBlock(TentMap, 3, 1, "TentMap")
51 | }
52 | 


--------------------------------------------------------------------------------
/man/Multiview.Rd:
--------------------------------------------------------------------------------
  1 | \name{Multiview}
  2 | \alias{Multiview}
  3 | \title{Forecasting using multiview embedding}
  4 | \usage{
  5 | Multiview(pathIn = "./", dataFile = "", dataFrame = NULL,
  6 |   lib = "", pred = "", D = 0, E = 1, Tp = 1, knn = 0, 
  7 |   tau = -1, columns = "", target = "", multiview = 0, exclusionRadius = 0,
  8 |   trainLib = TRUE, excludeTarget = FALSE, parameterList = FALSE,
  9 |   verbose = FALSE, numThreads = 4, showPlot = FALSE, noTime = FALSE)
 10 | }
 11 | \arguments{
 12 | \item{pathIn}{path to \code{dataFile}.}
 13 | 
 14 | \item{dataFile}{.csv format data file name. The first column must be a time
 15 | index or time values. The first row must be column names unless noTime is TRUE.}
 16 | 
 17 | \item{dataFrame}{input data.frame. The first column must be a time
 18 | index or time values unless noTime is TRUE. The columns must be named.}
 19 | 
 20 | \item{lib}{a 2-column matrix, data.frame, 2-element vector or string of 
 21 |   row indice pairs, where each pair specifies the first and last *rows* of
 22 |   the time series to create the library.}
 23 | 
 24 | \item{pred}{(same format as lib), but specifying the sections of the time 
 25 | series to forecast.}
 26 | 
 27 | \item{D}{multivariate dimension.}
 28 | 
 29 | \item{E}{embedding dimension.}
 30 | 
 31 | \item{Tp}{prediction horizon (number of time column rows).}
 32 | 
 33 | \item{knn}{number of nearest neighbors. If knn=0, knn is set to E+1.}
 34 | 
 35 | \item{tau}{lag of time delay embedding specified as number of
 36 | time column rows.}
 37 | 
 38 | \item{columns}{string of whitespace separated column name(s), or vector
 39 | of column names used to create the library. If individual column names
 40 | contain whitespace place names in a vector, or, append ',' to the name.}
 41 | 
 42 | \item{target}{column name used for prediction.}
 43 | 
 44 | \item{multiview}{number of multiview ensembles to average for the final
 45 | prediction estimate.}
 46 | 
 47 | \item{exclusionRadius}{number of adjacent observation vector rows to exclude as
 48 | nearest neighbors in prediction.}
 49 | 
 50 | \item{trainLib}{logical to use in-sample (lib=pred) projections for the
 51 |   ranking of column combinations.}
 52 | 
 53 | \item{excludeTarget}{logical to exclude embedded target column from combinations.}
 54 | 
 55 | \item{parameterList}{logical to add list of invoked parameters.}
 56 | 
 57 | \item{verbose}{logical to produce additional console reporting.}
 58 | 
 59 | \item{numThreads}{number of CPU threads to use in multiview processing.}
 60 | 
 61 | \item{showPlot}{logical to plot results.}
 62 | 
 63 | \item{noTime}{logical to allow input data with no time column.}
 64 | }
 65 | 
 66 | \value{
 67 | Named list with data.frames \code{[[View, Predictions]]}.
 68 | 
 69 | data.frame \code{View} columns:
 70 | \tabular{ll}{
 71 |   Col_1 \tab column index\cr
 72 |   ...   \tab column index\cr
 73 |   Col_D \tab column index\cr
 74 |   rho   \tab Pearson correlation\cr
 75 |   MAE   \tab mean absolute error\cr
 76 |   RMSE  \tab root mean square error\cr
 77 |   name_1 \tab column name\cr
 78 |   ...    \tab column name\cr
 79 |   name_D \tab column name\cr
 80 | }
 81 | 
 82 |   If \code{parameterList = TRUE} a named list "parameters" is added.
 83 | }
 84 | 
 85 | \references{Ye H., and G. Sugihara, 2016. Information leverage in 
 86 | interconnected ecosystems: Overcoming the curse of dimensionality.
 87 | Science 353:922-925.
 88 | }
 89 | 
 90 | \description{
 91 | \code{\link{Multiview}} applies the method of \cite{Ye & Sugihara}
 92 | to find optimal combinations of variables that best represent the
 93 | dynamics.
 94 | }
 95 | 
 96 | \details{Multiview embedding is a method to identify variables in a
 97 |   multivariate dynamical system that are most likely to contribute to
 98 |   the observed dynamics.  It is a multistep algorithm with these general
 99 |   steps:
100 |   \enumerate{
101 |     \item Compute D-dimensional variable combination forecasts.
102 |     \item Rank forecasts.
103 |     \item Compute predictions of top combinations.
104 |     \item Compute multiview averaged prediction.
105 |   }
106 |   If \code{E>1}, all variables are embedded to dimension E. 
107 |   If \code{trainLib} is \code{TRUE} initial forecasts and ranking are
108 |   done in-sample (\code{lib=pred}) and predictions using the top ranked
109 |   combinations use the specified \code{lib} and \code{pred}.
110 |   If \code{trainLib} is \code{FALSE} initial forecasts and ranking use
111 |   the specified \code{lib} and \code{pred}, the step of computing
112 |   predictions of the top combinations is skipped. 
113 | }
114 | 
115 | \examples{
116 | data(block_3sp)
117 | L = Multiview( dataFrame = block_3sp, lib = "1 100", pred = "101 190",
118 | E = 2, columns = "x_t y_t z_t", target = "x_t" )
119 | }
120 | 


--------------------------------------------------------------------------------
/man/PredictInterval.Rd:
--------------------------------------------------------------------------------
 1 | \name{PredictInterval}
 2 | \alias{PredictInterval}
 3 | \title{Forecast interval accuracy}
 4 | \usage{
 5 | PredictInterval(pathIn = "./", dataFile = "", dataFrame = NULL, pathOut = "./", 
 6 |   predictFile = "", lib = "", pred = "", maxTp = 10, E = 1, tau = -1,
 7 |   exclusionRadius = 0, columns = "", target = "", embedded = FALSE,
 8 |   verbose = FALSE, validLib = vector(), numThreads = 4, showPlot = TRUE,
 9 |   noTime = FALSE)  
10 | }
11 | \arguments{
12 | \item{pathIn}{path to \code{dataFile}.}
13 | 
14 | \item{dataFile}{.csv format data file name. The first column must be a time
15 | index or time values unless noTime is TRUE. The first row must be column names.}
16 | 
17 | \item{dataFrame}{input data.frame. The first column must be a time
18 | index or time values unless noTime is TRUE. The columns must be named.}
19 | 
20 | \item{pathOut}{path for \code{predictFile} containing output predictions.}
21 | 
22 | \item{predictFile}{output file name.}
23 | 
24 | \item{lib}{string or vector with start and stop indices of input data
25 |   rows used to create the library from observations. Mulitple row index
26 |   pairs can be specified with each pair defining the first and last
27 |   rows of time series observation segments used to create the library.}
28 | 
29 | \item{pred}{string with start and stop indices of input data rows used for
30 | predictions. A single contiguous range is supported.}
31 | 
32 | \item{maxTp}{maximum value of Tp to evalulate.}
33 | 
34 | \item{E}{embedding dimension.}
35 | 
36 | \item{tau}{lag of time delay embedding specified as number of
37 | time column rows.}
38 | 
39 | \item{exclusionRadius}{excludes vectors from the search space of nearest 
40 | neighbors if their relative time index is within exclusionRadius.}
41 | 
42 | \item{columns}{string of whitespace separated column name(s), or vector
43 | of column names used to create the library. If individual column names
44 | contain whitespace place names in a vector, or, append ',' to the name.}
45 | 
46 | \item{target}{column name used for prediction.}
47 | 
48 | \item{embedded}{logical specifying if the input data are embedded.}
49 | 
50 | \item{verbose}{logical to produce additional console reporting.}
51 | 
52 | \item{validLib}{logical vector the same length as the number of data
53 |  rows.  Any data row represented in this vector as FALSE, will not be
54 |  included in the library.}
55 | 
56 | \item{numThreads}{number of parallel threads for computation.}
57 | 
58 | \item{showPlot}{logical to plot results.}
59 | 
60 | \item{noTime}{logical to allow input data with no time column.}
61 | }
62 | 
63 | \value{
64 |   A data.frame with columns \code{Tp, rho}.
65 | }
66 | 
67 | \description{
68 | \code{\link{PredictInterval}} uses \code{\link{Simplex}} to evaluate
69 | prediction accuracy as a function of forecast interval Tp.
70 | }
71 | 
72 | %\details{
73 | %}
74 | 
75 | \examples{
76 | data(TentMap)
77 | Tp.rho = PredictInterval( dataFrame = TentMap, lib = "1 100",
78 | pred = "201 500", E = 2, columns = "TentMap", target = "TentMap",
79 | showPlot = FALSE )
80 | }
81 | 


--------------------------------------------------------------------------------
/man/PredictNonlinear.Rd:
--------------------------------------------------------------------------------
 1 | \name{PredictNonlinear}
 2 | \alias{PredictNonlinear}
 3 | \title{Test for nonlinear dynamics}
 4 | \usage{
 5 | PredictNonlinear(pathIn = "./", dataFile = "", dataFrame = NULL,
 6 |   pathOut = "./",  predictFile = "", lib = "", pred = "", theta = "",
 7 |   E = 1, Tp = 1, knn = 0, tau = -1, exclusionRadius = 0,
 8 |   columns = "", target = "", embedded = FALSE, verbose = FALSE,
 9 |   validLib = vector(), ignoreNan = TRUE, numThreads = 4,
10 |   showPlot = TRUE, noTime = FALSE )  
11 | }
12 | \arguments{
13 | \item{pathIn}{path to \code{dataFile}.}
14 | 
15 | \item{dataFile}{.csv format data file name. The first column must be a time
16 | index or time values unless noTime is TRUE. The first row must be column names.}
17 | 
18 | \item{dataFrame}{input data.frame. The first column must be a time
19 | index or time values unless noTime is TRUE. The columns must be named.}
20 | 
21 | \item{pathOut}{path for \code{predictFile} containing output predictions.}
22 | 
23 | \item{predictFile}{output file name.}
24 | 
25 | \item{lib}{string or vector with start and stop indices of input data
26 | rows used to create the library from observations. Mulitple row index
27 | pairs can be specified with each pair defining the first and last
28 | rows of time series observation segments used to create the library.}
29 | 
30 | \item{pred}{string with start and stop indices of input data rows used for
31 | predictions. A single contiguous range is supported.}
32 | 
33 | \item{theta}{A whitespace delimeted string with values of the S-map 
34 |   localisation parameter. An empty string will use default values of
35 | \code{[0.01 0.1 0.3 0.5 0.75 1 1.5 2 3 4 5 6 7 8 9]}.}
36 | 
37 | \item{E}{embedding dimension.}
38 | 
39 | \item{Tp}{prediction horizon (number of time column rows).}
40 | 
41 | \item{knn}{number of nearest neighbors. If knn=0, knn is set to the
42 | library size.} 
43 | 
44 | \item{tau}{lag of time delay embedding specified as number of
45 | time column rows.}
46 | 
47 | \item{exclusionRadius}{excludes vectors from the search space of nearest 
48 | neighbors if their relative time index is within exclusionRadius.}
49 | 
50 | \item{columns}{string of whitespace separated column name(s), or vector
51 | of column names used to create the library. If individual column names
52 | contain whitespace place names in a vector, or, append ',' to the name.}
53 | 
54 | \item{target}{column name used for prediction.}
55 | 
56 | \item{embedded}{logical specifying if the input data are embedded.}
57 | 
58 | \item{verbose}{logical to produce additional console reporting.}
59 | 
60 | \item{validLib}{logical vector the same length as the number of data
61 |  rows.  Any data row represented in this vector as FALSE, will not be
62 |  included in the library.}
63 | 
64 | \item{ignoreNan}{logical to internally redefine library to avoid nan.}
65 | 
66 | \item{numThreads}{number of parallel threads for computation.}
67 | 
68 | \item{showPlot}{logical to plot results.}
69 | 
70 | \item{noTime}{logical to allow input data with no time column.}
71 | }
72 | 
73 | \value{
74 |   A data.frame with columns \code{Theta, rho}.
75 | }
76 | 
77 | \description{
78 | \code{\link{PredictNonlinear}} uses \code{\link{SMap}} to evaluate
79 | prediction accuracy as a function of the localisation parameter
80 | \code{theta}.
81 | }
82 | 
83 | \details{The localisation parameter \code{theta} weights nearest
84 |   neighbors according to exp( (-theta D / D_avg) ) where D is the
85 |   distance between the observation vector and neighbor, D_avg the mean
86 |   distance.  If theta = 0, weights are uniformally unity corresponding
87 |   to a global autoregressive model.  As theta increases, neighbors in
88 |   closer proximity to the observation are considered. 
89 | }
90 | 
91 | \examples{
92 | data(TentMapNoise)
93 | theta.rho = PredictNonlinear( dataFrame = TentMapNoise, E = 2,
94 | lib = "1 100", pred = "201 500", columns = "TentMap",
95 | target = "TentMap", showPlot = FALSE )
96 | }
97 | 


--------------------------------------------------------------------------------
/man/SMap.Rd:
--------------------------------------------------------------------------------
  1 | \name{SMap}
  2 | \alias{SMap}
  3 | \title{SMap forecasting}
  4 | \usage{
  5 | SMap(pathIn = "./", dataFile = "", dataFrame = NULL, 
  6 |   lib = "", pred = "", E = 0, Tp = 1, knn = 0, tau = -1, 
  7 |   theta = 0, exclusionRadius = 0, columns = "", target = "", 
  8 |   embedded = FALSE, verbose = FALSE,
  9 |   validLib = vector(), ignoreNan = TRUE,
 10 |   generateSteps = 0, parameterList = FALSE,
 11 |   showPlot = FALSE, noTime = FALSE)  
 12 | }
 13 | \arguments{
 14 | \item{pathIn}{path to \code{dataFile}.}
 15 | 
 16 | \item{dataFile}{.csv format data file name. The first column must be a time
 17 | index or time values unless noTime is TRUE. The first row must be column names.}
 18 | 
 19 | \item{dataFrame}{input data.frame. The first column must be a time
 20 | index or time values unless noTime is TRUE. The columns must be named.}
 21 | 
 22 | \item{lib}{string or vector with start and stop indices of input data
 23 | rows used to create the library from observations. Mulitple row index
 24 | pairs can be specified with each pair defining the first and last
 25 | rows of time series observation segments used to create the library.}
 26 | 
 27 | \item{pred}{string with start and stop indices of input data rows used for
 28 | predictions. A single contiguous range is supported.}
 29 | 
 30 | \item{E}{embedding dimension.}
 31 | 
 32 | \item{Tp}{prediction horizon (number of time column rows).}
 33 | 
 34 | \item{knn}{number of nearest neighbors. If knn=0, knn is set to the
 35 | library size.} 
 36 | 
 37 | \item{tau}{lag of time delay embedding specified as number of
 38 | time column rows.}
 39 | 
 40 | \item{theta}{neighbor localisation exponent.}
 41 | 
 42 | \item{exclusionRadius}{excludes vectors from the search space of nearest 
 43 | neighbors if their relative time index is within exclusionRadius.}
 44 | 
 45 | \item{columns}{string of whitespace separated column name(s), or vector
 46 | of column names used to create the library. If individual column names
 47 | contain whitespace place names in a vector, or, append ',' to the name.}
 48 | 
 49 | \item{target}{column name used for prediction.}
 50 | 
 51 | \item{embedded}{logical specifying if the input data are embedded.}
 52 | 
 53 | \item{verbose}{logical to produce additional console reporting.}
 54 | 
 55 | \item{validLib}{logical vector the same length as the number of data
 56 |  rows. Any data row represented in this vector as FALSE, will not be
 57 |  included in the library.}
 58 | 
 59 | \item{ignoreNan}{logical to internally redefine library to avoid nan.}
 60 | 
 61 | \item{generateSteps}{number of predictive feedback generative steps.}
 62 | 
 63 | \item{parameterList}{logical to add list of invoked parameters.}
 64 | 
 65 | \item{showPlot}{logical to plot results.}
 66 | 
 67 | \item{noTime}{logical to allow input data with no time column.}
 68 | }
 69 | 
 70 | \value{
 71 |   A named list with three data.frames
 72 |   \code{[[predictions, coefficients, singularValues]]}.
 73 |   \code{predictions} has columns \code{Observations, Predictions}.
 74 |   The first column contains time or index values.
 75 | 
 76 |   \code{coefficients} data.frame has time or index values in the first column.
 77 |   Columns 2 through E+2 (E+1 columns) are the SMap coefficients.
 78 | 
 79 |   \code{singularValues} data.frame has time or index values in the first column.
 80 |   Columns 2 through E+2 (E+1 columns) are the SVD singularValues. The
 81 |   first value corresponds to the SVD bias (intercept) term.
 82 | 
 83 |   If \code{parameterList = TRUE} a named list "parameters" is added.
 84 | }
 85 | 
 86 | \references{Sugihara G. 1994. Nonlinear forecasting for the classification of natural time series. Philosophical Transactions: Physical Sciences and Engineering, 348 (1688):477-495.}
 87 | 
 88 | \description{
 89 |   \code{\link{SMap}} performs time series forecasting based on localised
 90 |   (or global) nearest neighbor projection in the time series phase space as
 91 |   described in \cite{Sugihara 1994}. 
 92 | }
 93 | 
 94 | \details{
 95 |   If \code{embedded} is \code{FALSE}, the data \code{column(s)} are embedded
 96 |   to dimension \code{E} with time lag \code{tau}. This embedding forms an
 97 |   n-columns * E-dimensional phase space for the \code{\link{SMap}} projection.
 98 |   If embedded is \code{TRUE}, the data are assumed to contain an
 99 |   E-dimensional embedding with E equal to the number of \code{columns}.
100 |   See the Note below for proper use of multivariate data (number of
101 |   \code{columns} > 1).
102 | 
103 |   If \code{ignoreNan} is \code{TRUE}, the library (\code{lib}) is
104 |   internally redefined to exclude nan embedding vectors. If
105 |   \code{ignoreNan} is \code{FALSE} no library adjustment is made. The
106 |   (\code{lib}) can be explicitly specified to exclude nan library vectors.
107 |   
108 |   Predictions are made using leave-one-out cross-validation, i.e.
109 |   observation rows are excluded from the prediction regression.
110 | 
111 |   In contrast to \code{\link{Simplex}}, \code{\link{SMap}} uses all
112 |   available neighbors and weights them with an exponential decay
113 |   in phase space distance with exponent \code{theta}. \code{theta}=0
114 |   uses all neighbors corresponding to a global autoregressive model.
115 |   As \code{theta} increases, neighbors closer in vicinity to the
116 |   observation are considered. 
117 | }
118 | 
119 | \note{
120 | \code{\link{SMap}} should be called with columns explicitly corresponding to
121 | dimensions E. In the univariate case (number of \code{columns} = 1) with
122 | default \code{embedded = FALSE}, the time series will be time-delay
123 | embedded to dimension E, SMap coefficients correspond to each dimension. 
124 | 
125 | If a multivariate data set is used (number of \code{columns} > 1) it
126 | must use \code{embedded = TRUE} with E equal to the number of columns.
127 | This prevents the function from internally time-delay embedding the
128 | multiple columns to dimension E.  If the internal time-delay embedding
129 | is performed, then state-space columns will not correspond to the
130 | intended dimensions in the matrix inversion, coefficient assignment,
131 | and prediction. In the multivariate case, the user should first prepare
132 | the embedding (using \code{\link{Embed}} for time-delay embedding), then
133 | pass this embedding to \code{\link{SMap}} with appropriately specified
134 | \code{columns}, \code{E}, and \code{embedded = TRUE}.
135 | }
136 | 
137 | \examples{
138 | data(circle)
139 | L = SMap( dataFrame = circle, lib="1 100", pred="110 190", theta = 4,
140 | E = 2, embedded = TRUE, columns = "x y", target = "x" )
141 | }
142 | 


--------------------------------------------------------------------------------
/man/Simplex.Rd:
--------------------------------------------------------------------------------
  1 | \name{Simplex}
  2 | \alias{Simplex}
  3 | \title{Simplex forecasting}
  4 | \usage{
  5 | Simplex(pathIn = "./", dataFile = "", dataFrame = NULL, pathOut = "./", 
  6 |   predictFile = "", lib = "", pred = "", E = 0, Tp = 1, knn = 0, tau = -1, 
  7 |   exclusionRadius = 0, columns = "", target = "", embedded = FALSE,
  8 |   verbose = FALSE, validLib = vector(), generateSteps = 0,
  9 |   parameterList = FALSE, showPlot = FALSE, noTime = FALSE)
 10 | }
 11 | \arguments{
 12 | \item{pathIn}{path to \code{dataFile}.}
 13 | 
 14 | \item{dataFile}{.csv format data file name. The first column must be a time
 15 | index or time values unless noTime is TRUE. The first row must be column names.}
 16 | 
 17 | \item{dataFrame}{input data.frame. The first column must be a time
 18 | index or time values unless noTime is TRUE. The columns must be named.}
 19 | 
 20 | \item{pathOut}{path for \code{predictFile} containing output predictions.}
 21 | 
 22 | \item{predictFile}{output file name.}
 23 | 
 24 | \item{lib}{string or vector with start and stop indices of input data
 25 | rows used to create the library from observations. Mulitple row index
 26 | pairs can be specified with each pair defining the first and last
 27 | rows of time series observation segments used to create the library.}
 28 | 
 29 | \item{pred}{string with start and stop indices of input data rows used for
 30 | predictions. A single contiguous range is supported.}
 31 | 
 32 | \item{E}{embedding dimension.}
 33 | 
 34 | \item{Tp}{prediction horizon (number of time column rows).}
 35 | 
 36 | \item{knn}{number of nearest neighbors. If knn=0, knn is set to E+1.}
 37 | 
 38 | \item{tau}{lag of time delay embedding specified as number of
 39 | time column rows.}
 40 | 
 41 | \item{exclusionRadius}{excludes vectors from the search space of nearest 
 42 | neighbors if their relative time index is within exclusionRadius.}
 43 | 
 44 | \item{columns}{string of whitespace separated column name(s), or vector
 45 | of column names used to create the library. If individual column names
 46 | contain whitespace place names in a vector, or, append ',' to the name.}
 47 | 
 48 | \item{target}{column name used for prediction.}
 49 | 
 50 | \item{embedded}{logical specifying if the input data are embedded.}
 51 | 
 52 | \item{verbose}{logical to produce additional console reporting.}
 53 | 
 54 | \item{validLib}{logical vector the same length as the number of data
 55 |  rows.  Any data row represented in this vector as FALSE, will not be
 56 |  included in the library.}
 57 | 
 58 | \item{generateSteps}{number of predictive feedback generative steps.}
 59 | 
 60 | \item{parameterList}{logical to add list of invoked parameters.}
 61 | 
 62 | \item{showPlot}{logical to plot results.}
 63 | 
 64 | \item{noTime}{logical to allow input data with no time column.}
 65 | }
 66 | 
 67 | \value{
 68 | A data.frame with columns \code{Observations, Predictions}.
 69 | The first column contains the time values.
 70 | 
 71 | If \code{parameterList = TRUE}, a named list with "predictions" holding the
 72 | data.frame, "parameters" with a named list of invoked parameters.
 73 | }
 74 | 
 75 | \references{Sugihara G. and May R. 1990. Nonlinear forecasting as a way
 76 | of distinguishing chaos from measurement error in time series.
 77 | Nature, 344:734-741.
 78 | }
 79 | 
 80 | \description{
 81 | \code{\link{Simplex}} performs time series forecasting based on
 82 | weighted nearest neighbors projection in the time series phase space as
 83 | described in \cite{Sugihara and May}.
 84 | }
 85 | 
 86 | \details{
 87 |   If embedded is \code{FALSE}, the data \code{column(s)} are embedded to
 88 |   dimension \code{E} with time lag \code{tau}. This embedding forms an
 89 |   E-dimensional phase space for the \code{\link{Simplex}} projection.
 90 |   If embedded is \code{TRUE}, the data are assumed to contain an
 91 |   E-dimensional embedding with E equal to the number of \code{columns}.
 92 |   Predictions are made using leave-one-out cross-validation, i.e.
 93 |   observation vectors are excluded from the prediction simplex. 
 94 | 
 95 |   To assess an optimal embedding dimension \code{\link{EmbedDimension}}
 96 |   can be applied. Accuracy statistics can be estimated by
 97 |   \code{\link{ComputeError}}.
 98 | }
 99 | \examples{
100 | data( block_3sp )
101 | smplx = Simplex( dataFrame = block_3sp, lib = "1 100", pred = "101 190",
102 | E = 3, columns = "x_t", target = "x_t" )
103 | ComputeError( smplx $ Predictions, smplx $ Observations )
104 | }
105 | 


--------------------------------------------------------------------------------
/man/SurrogateData.Rd:
--------------------------------------------------------------------------------
 1 | \name{SurrogateData}
 2 | \alias{SurrogateData}
 3 | \title{Generate surrogate data for permutation/randomization tests}
 4 | \usage{
 5 | SurrogateData( ts, method = c("random_shuffle", "ebisuzaki",
 6 | "seasonal"), num_surr = 100, T_period = 1, alpha = 0 )
 7 | }
 8 | 
 9 | \arguments{
10 | \item{ts}{the original time series}
11 | 
12 | \item{method}{which algorithm to use to generate surrogate data}
13 | 
14 | \item{num_surr}{the number of null surrogates to generate}
15 | 
16 | \item{T_period}{the period of seasonality for seasonal surrogates
17 |   (ignored for other methods)}
18 | 
19 | \item{alpha}{additive noise factor: N(0,alpha)}
20 | }
21 | 
22 | \value{
23 |   A matrix where each column is a separate surrogate with the same
24 |   length as \code{ts}.
25 | }
26 | 
27 | \description{
28 | \code{SurrogateData} generates surrogate data under several different 
29 | null models.
30 | }
31 | 
32 | \details{
33 | Method "random_shuffle" creates surrogates by randomly permuting the values 
34 | of the original time series.
35 | 
36 | Method "Ebisuzaki" creates surrogates by randomizing the phases of a Fourier 
37 | transform, preserving the power spectra of the null surrogates.
38 | 
39 | Method "seasonal" creates surrogates by computing a mean seasonal trend of 
40 | the specified period and shuffling the residuals.  It is presumed that
41 | the seasonal trend can be exracted with a smoothing spline.  Additive
42 | Gaussian noise is included according to N(0,alpha). 
43 | }
44 | 
45 | \examples{
46 | data("block_3sp")
47 | ts <- block_3sp$x_t
48 | SurrogateData(ts, method = "ebisuzaki")
49 | }
50 | 


--------------------------------------------------------------------------------
/man/TentMap.Rd:
--------------------------------------------------------------------------------
 1 | \docType{data}
 2 | \name{TentMap}
 3 | \alias{TentMap}
 4 | \title{Time series for a tent map with mu = 2.}
 5 | \format{Data frame with 999 rows and 2 columns
 6 | \describe{
 7 |   \item{\code{Time}}{time index.}
 8 |   \item{\code{TentMap}}{tent map values.}
 9 | }
10 | }
11 | \usage{
12 | TentMap
13 | }
14 | \description{
15 | First-differenced time series generated from the tent map
16 |   recurrence relation with mu = 2.
17 | }
18 | \keyword{datasets}
19 | 


--------------------------------------------------------------------------------
/man/TentMapNoise.Rd:
--------------------------------------------------------------------------------
 1 | \docType{data}
 2 | \name{TentMapNoise}
 3 | \alias{TentMapNoise}
 4 | \title{Time series of tent map plus noise.}
 5 | \format{Data frame with 999 rows and 2 columns
 6 | \describe{
 7 |   \item{\code{Time}}{time index.}
 8 |   \item{\code{TentMap}}{tent map values.}
 9 | }
10 | }
11 | \usage{
12 | TentMapNoise
13 | }
14 | \description{
15 | First-differenced time series generated from the tent map
16 |   recurrence relation with mu = 2 and random noise.
17 | }
18 | \keyword{datasets}
19 | 


--------------------------------------------------------------------------------
/man/Thrips.Rd:
--------------------------------------------------------------------------------
 1 | \docType{data}
 2 | \name{Thrips}
 3 | \alias{Thrips}
 4 | \title{Apple-blossom Thrips time series}
 5 | \description{
 6 | Seasonal outbreaks of Thrips imaginis.
 7 | }
 8 | \references{
 9 | Davidson and Andrewartha, Annual trends in a natural population of
10 | Thrips imaginis \emph{Thysanoptera}, Journal of Animal Ecology, 17,
11 | 193-199, 1948.
12 | }
13 | 


--------------------------------------------------------------------------------
/man/block_3sp.Rd:
--------------------------------------------------------------------------------
 1 | \docType{data}
 2 | \name{block_3sp}
 3 | \alias{block_3sp}
 4 | \title{Time series for a three-species coupled model.}
 5 | \format{A data frame with 198 rows and 10 columns: 
 6 | \describe{
 7 |   \item{\code{time}}{time index (# of generations)}
 8 |   \item{\code{x_t}  }{abundance of simulated species x at time t}
 9 |   \item{\code{x_t-1}}{abundance of simulated species x at time t-1}
10 |   \item{\code{x_t-2}}{abundance of simulated species x at time t-2}
11 |   \item{\code{y_t}  }{abundance of simulated species y at time t}
12 |   \item{\code{y_t-1}}{abundance of simulated species y at time t-1}
13 |   \item{\code{y_t-2}}{abundance of simulated species y at time t-2}
14 |   \item{\code{z_t}  }{abundance of simulated species z at time t}
15 |   \item{\code{z_t-1}}{abundance of simulated species z at time t-1}
16 |   \item{\code{z_t-2}}{abundance of simulated species z at time t-2}
17 | }}
18 | \usage{
19 | block_3sp
20 | }
21 | \description{
22 | Time series generated from a discrete-time coupled 
23 |   Lotka-Volterra model exhibiting chaotic dynamics.
24 | }
25 | \keyword{datasets}
26 | 


--------------------------------------------------------------------------------
/man/circle.Rd:
--------------------------------------------------------------------------------
 1 | \docType{data}
 2 | \name{circle}
 3 | \alias{circle}
 4 | \title{2-D timeseries of a circle.}
 5 | \format{A data frame with 200 rows and 3 columns: 
 6 | \describe{
 7 |   \item{\code{Time}}{time index.}
 8 |   \item{\code{x}}{sin component.}
 9 |   \item{\code{y}}{cos component.}
10 | }}
11 | \usage{
12 | circle
13 | }
14 | \description{
15 | Time series of of circle in 2-D (sin and cos).
16 | }
17 | \keyword{datasets}
18 | 


--------------------------------------------------------------------------------
/man/figures/optimal-E-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/man/figures/optimal-E-1.png


--------------------------------------------------------------------------------
/man/figures/simplex-projection-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/man/figures/simplex-projection-1.png


--------------------------------------------------------------------------------
/man/figures/sunspots-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/man/figures/sunspots-1.png


--------------------------------------------------------------------------------
/man/figures/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/man/figures/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/man/paramecium_didinium.Rd:
--------------------------------------------------------------------------------
 1 | \docType{data}
 2 | \name{paramecium_didinium}
 3 | \alias{paramecium_didinium}
 4 | \title{Time series for the Paramecium-Didinium laboratory experiment}
 5 | \usage{
 6 | paramecium_didinium
 7 | }
 8 | \description{
 9 | Time series of Paramecium and Didinium abundances (#/mL) from 
10 | an experiment by Veilleux (1979)
11 | }
12 | \keyword{datasets}
13 | 


--------------------------------------------------------------------------------
/man/rEDM.Rd:
--------------------------------------------------------------------------------
 1 | \docType{package}
 2 | \name{rEDM}
 3 | \alias{EDM}
 4 | \alias{rEDM-package}
 5 | \title{Empirical dynamic modeling}
 6 | \description{
 7 |   \pkg{rEDM} provides tools for data-driven time series analyses. It is
 8 |   based on reconstructing multivariate state space
 9 |   representations from uni or multivariate time series, then projecting
10 |   state changes using various metrics applied to nearest neighbors.
11 |   
12 |   \pkg{rEDM} is a \pkg{Rcpp} interface to the
13 |   \href{https://github.com/SugiharaLab/cppEDM}{cppEDM} library of
14 |   Empirical Dynamic Modeling tools. Functionality includes:
15 |   \itemize{
16 |     \item Simplex projection (Sugihara and May 1990)
17 |     \item Sequential Locally Weighted Global Linear Maps (S-map) (Sugihara 1994)
18 |     \item Multivariate embeddings (Dixon et. al. 1999)
19 |     \item Convergent cross mapping (Sugihara et. al. 2012)
20 |     \item Multiview embedding (Ye and Sugihara 2016)
21 |   }
22 | }
23 | \references{
24 |   Sugihara G. and May R. 1990. Nonlinear forecasting as a way of
25 |   distinguishing chaos from measurement error in time series.
26 |   Nature, 344:734-741.
27 | 
28 |   Sugihara G. 1994. Nonlinear forecasting for the classification of
29 |   natural time series. Philosophical Transactions: Physical Sciences
30 |   and Engineering, 348 (1688) : 477-495.
31 | 
32 |   Dixon, P. A., M. Milicich, and G. Sugihara, 1999. Episodic
33 |   fluctuations in larval supply. Science 283:1528-1530.
34 | 
35 |   Sugihara G., May R., Ye H., Hsieh C., Deyle E., Fogarty M.,
36 |   Munch S., 2012. Detecting Causality in Complex Ecosystems.
37 |   Science 338:496-500.
38 | 
39 |   Ye H., and G. Sugihara, 2016. Information leverage in
40 |   interconnected ecosystems: Overcoming the curse of dimensionality.
41 |   Science 353:922-925.
42 | }
43 | \details{
44 | 
45 | \strong{Main Functions}: 
46 |   \itemize{
47 |     \item \code{\link{Simplex}} - simplex projection
48 |     \item \code{\link{SMap}} - S-map projection
49 |     \item \code{\link{CCM}} - convergent cross mapping
50 |     \item \code{\link{Multiview}} - multiview forecasting
51 |   }
52 | \strong{Helper Functions}: 
53 |   \itemize{
54 |     \item \code{\link{Embed}} - time delay embedding
55 |     \item \code{\link{ComputeError}} - forecast skill metrics
56 |     \item \code{\link{EmbedDimension}} - optimal embedding dimension
57 |     \item \code{\link{PredictInterval}} - optimal prediction interval
58 |     \item \code{\link{PredictNonlinear}} - evaluate nonlinearity
59 |   }
60 | }
61 | \author{
62 | \strong{Maintainer}: Joseph Park
63 | 
64 | \strong{Authors}: Joseph Park, Cameron Smith, Ethan Deyle, Erik
65 | Saberski, George Sugihara
66 | 
67 | % \strong{Contributors}: 
68 | }
69 | \keyword{package}
70 | 


--------------------------------------------------------------------------------
/man/sardine_anchovy_sst.Rd:
--------------------------------------------------------------------------------
 1 | \docType{data}
 2 | \name{sardine_anchovy_sst}
 3 | \alias{sardine_anchovy_sst}
 4 | \title{Time series for the California Current Anchovy-Sardine-SST system}
 5 | \format{\describe{
 6 |   \item{\code{year}}{year of measurement}
 7 |   \item{\code{anchovy}}{anchovy landings, scaled to mean = 0, sd = 1}
 8 |   \item{\code{sardine}}{sardine landings, scaled to mean = 0, sd = 1}
 9 |   \item{\code{sio_sst}}{3-year running average of sea surface temperature at 
10 |     SIO pier, scaled to mean = 0, sd = 1}
11 |   \item{\code{np_sst}}{3-year running average of sea surface temperature at 
12 |     Newport pier, scaled to mean = 0, sd = 1}
13 | }}
14 | \usage{
15 | sardine_anchovy_sst
16 | }
17 | \description{
18 | Time series of Pacific sardine landings (CA), Northern anchovy 
19 |   landings (CA), and sea-surface temperature (3-year average) at the SIO 
20 |   pier and Newport pier
21 | }
22 | \keyword{datasets}
23 | 


--------------------------------------------------------------------------------
/src/CCM.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "RcppEDMCommon.h"
  3 | 
  4 | //-----------------------------------------------------------
  5 | // 
  6 | //-----------------------------------------------------------
  7 | Rcpp::List CCM_rcpp( std::string  pathIn, 
  8 |                      std::string  dataFile,
  9 |                      r::DataFrame dataFrame,
 10 |                      // std::string  pathOut,     // Rcpp 20 param limit
 11 |                      // std::string  predictFile, // Rcpp 20 param limit
 12 |                      int          E,
 13 |                      int          Tp,
 14 |                      int          knn,
 15 |                      int          tau,
 16 |                      int          exclusionRadius,
 17 |                      std::string  columns,
 18 |                      std::string  target,
 19 |                      std::string  libSizes,
 20 |                      int          sample,
 21 |                      bool         random,
 22 |                      // bool      replacement,  // Rcpp 20 param limit
 23 |                      unsigned     seed,
 24 |                      bool         embedded,
 25 |                      bool         includeData,
 26 |                      bool         parameterList,
 27 |                      bool         verbose ) {
 28 | 
 29 |     CCMValues ccmValues;
 30 | 
 31 |     bool replacement = false; // Rcpp 20 param limit
 32 |     
 33 |     if ( dataFile.size() ) {
 34 |         // dataFile specified, dispatch overloaded CCM, ignore dataFrame
 35 |         ccmValues = CCM( pathIn,
 36 |                          dataFile,
 37 |                          "./", // pathOut,     // Rcpp 20 param limit
 38 |                          "",   // predictFile, // Rcpp 20 param limit
 39 |                          E, 
 40 |                          Tp,
 41 |                          knn,
 42 |                          tau,
 43 |                          exclusionRadius,
 44 |                          columns,
 45 |                          target, 
 46 |                          libSizes,
 47 |                          sample,
 48 |                          random,
 49 |                          replacement,
 50 |                          seed,
 51 |                          embedded,
 52 |                          includeData,
 53 |                          parameterList,
 54 |                          verbose );
 55 |     }
 56 |     else if ( dataFrame.size() ) {
 57 |         DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame );
 58 | 
 59 |         ccmValues = CCM( dataFrame_,
 60 |                          "./", // pathOut,      // Rcpp 20 param limit
 61 |                          "",   // predictFile,  // Rcpp 20 param limit
 62 |                          E, 
 63 |                          Tp,
 64 |                          knn,
 65 |                          tau,
 66 |                          exclusionRadius,
 67 |                          columns,
 68 |                          target, 
 69 |                          libSizes,
 70 |                          sample,
 71 |                          random,
 72 |                          replacement,
 73 |                          seed,
 74 |                          embedded,
 75 |                          includeData,
 76 |                          parameterList,
 77 |                          verbose );
 78 |     }
 79 |     else {
 80 |         Rcpp::warning( "CCM_rcpp(): No dataFile or dataFrame.\n" );
 81 |     }
 82 | 
 83 |     // Ouput Rcpp DataFrames
 84 |     r::DataFrame allLibStat = DataFrameToDF( ccmValues.AllLibStats );
 85 | 
 86 |     r::List output;
 87 |     if ( includeData ) {
 88 |         // Have to unroll and convert CCMValues.Predictions forward_list
 89 |         // to Rcpp::DataFrame for output.
 90 |         r::List PredictionsList1;
 91 |         for ( auto pi =  ccmValues.CrossMap1.Predictions.begin();
 92 |               pi != ccmValues.CrossMap1.Predictions.end(); ++pi ) {
 93 |             PredictionsList1.push_back( DataFrameToDF( *pi ) );
 94 |         }
 95 |         r::List PredictionsList2;
 96 |         for ( auto pi =  ccmValues.CrossMap2.Predictions.begin();
 97 |               pi != ccmValues.CrossMap2.Predictions.end(); ++pi ) {
 98 |             PredictionsList2.push_back( DataFrameToDF( *pi ) );
 99 |         }
100 | 
101 |         r::DataFrame cm1_PredStat =
102 |             DataFrameToDF( ccmValues.CrossMap1.PredictStats );
103 |         r::DataFrame cm2_PredStat =
104 |             DataFrameToDF( ccmValues.CrossMap2.PredictStats );
105 | 
106 |         output =
107 |             r::List::create(r::Named( "LibMeans"         ) = allLibStat,
108 |                             r::Named( "CCM1_PredictStat" ) = cm1_PredStat,
109 |                             r::Named( "CCM1_Predictions" ) = PredictionsList1,
110 |                             r::Named( "CCM2_PredictStat" ) = cm2_PredStat,
111 |                             r::Named( "CCM2_Predictions" ) = PredictionsList2);
112 | 
113 |         if ( parameterList ) {
114 |             r::List paramList = ParamMaptoList( ccmValues.parameterMap );
115 |             output["parameters"] = paramList;
116 |         }
117 |     }
118 |     else {
119 |         output = r::List::create( r::Named( "LibMeans" ) = allLibStat);
120 |     }
121 |     return output;
122 | }
123 | 


--------------------------------------------------------------------------------
/src/ComputeError.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "RcppEDMCommon.h"
 3 | 
 4 | //----------------------------------------------------------------
 5 | // Compute Error Wrapper method
 6 | // @param vec1      : the first vector to compare
 7 | // @param vec2      : the second vector to compare
 8 | // @return          : map/dictionary with the rho, mae, rmse
 9 | //----------------------------------------------------------------
10 | r::List ComputeError_rcpp ( std::vector<double> vec1, 
11 |                             std::vector<double> vec2 ) {
12 |     
13 |     std::valarray<double> val1 ( vec1.data(), vec1.size() );  
14 |     std::valarray<double> val2 ( vec2.data(), vec2.size() );  
15 |     
16 |     VectorError vecErr = ComputeError( val1, val2 );
17 |     
18 |     // Setup as map instead of vecErr struct
19 |     return r::List::create( r::Named( "MAE"  ) = vecErr.MAE,
20 |                             r::Named( "rho"  ) = vecErr.rho,
21 |                             r::Named( "RMSE" ) = vecErr.RMSE );
22 | }
23 | 


--------------------------------------------------------------------------------
/src/DataFrame.cpp:
--------------------------------------------------------------------------------
  1 | #include "RcppEDMCommon.h"
  2 | 
  3 | //-----------------------------------------------------------------------
  4 | // Convert R DataFrame to cppEDM DataFrame<double>
  5 | //-----------------------------------------------------------------------
  6 | DataFrame< double > DFToDataFrame ( Rcpp::DataFrame df ) {
  7 | 
  8 |     // Get number of valarray rows from first pair
  9 |     size_t numRows = df.nrow();
 10 | 
 11 |     // ensure that we have > 1 column for reading
 12 |     if ( df.ncol() == 1 ) {
 13 |         std::string err = "DFToDataFrame(): Input must have > 1 column, "
 14 |                           "first column is interpreted as a time vector.\n";
 15 |         throw Rcpp::exception( err.c_str() );
 16 |     }
 17 | 
 18 |     // Get column names
 19 |     // JP: Are df.names() ensured to be in order as accessed by index?
 20 |     //     If not, this will give incorrect results.
 21 |     std::vector< std::string > colNames;
 22 |     r::CharacterVector tmp_colNames = df.names();
 23 | 
 24 |     for ( int idx = 1; idx < tmp_colNames.size(); idx++ ) {
 25 |         colNames.push_back( r::as<std::string>( tmp_colNames[idx] ) );
 26 |     }
 27 | 
 28 |     // Create cpp DataFrame
 29 |     DataFrame< double > dataFrame ( numRows, df.ncol()-1, colNames ); 
 30 | 
 31 |     // Setup time column and time name for dataframe
 32 |     // It is assumed that the first column is a time vector !!!
 33 |     r::CharacterVector tmp = r::as<r::CharacterVector>( df[0] );
 34 |     dataFrame.Time()       = r::as< std::vector<std::string> >( tmp );
 35 |     dataFrame.TimeName()   = r::as<std::string>( 
 36 |                              ((r::CharacterVector)df.names())[0] );  
 37 | 
 38 |     // read in data columns to the cppEDM DF
 39 |     // JP: Are df.names() ensured to be in order as accessed by index?
 40 |     //     If not, this will give incorrect results.
 41 |     for ( int idx = 1; idx < df.ncol(); idx++ ) {
 42 |         // unfortunately we can't convert numeric vec to valarray
 43 |         std::vector<double> tmp = r::as<std::vector<double>>( df[idx] );
 44 |         std::valarray<double> col ( tmp.data(), tmp.size() );
 45 |         dataFrame.WriteColumn( idx-1, col ); 
 46 |     }
 47 | 
 48 |     return dataFrame;
 49 | }
 50 | 
 51 | //---------------------------------------------------------------
 52 | // Convert cppEDM DataFrame<double> to R DataFrame
 53 | //---------------------------------------------------------------
 54 | r::DataFrame DataFrameToDF ( DataFrame< double > dataFrame ) {
 55 | 
 56 |     r::List columnList; // List of columns to create new R data.frame
 57 | 
 58 |     // NOTE: cppEDM DataFrame columnNames are data only, not time
 59 |     std::vector<std::string> columnNamesIn = dataFrame.ColumnNames();
 60 | 
 61 |     std::vector<std::string> columnNames;
 62 | 
 63 |     bool hasTime = false;
 64 | 
 65 |     // If dataFrame has time vector and timeName, add to columnList
 66 |     if ( dataFrame.Time().size() ) {
 67 |         hasTime = true; // Skip time column in dataFrame.VectorColumnName()
 68 | 
 69 |         columnNames.push_back( dataFrame.TimeName() );
 70 | 
 71 |         // Probe dataFrame.Time() to see if we can convert it to
 72 |         // a numeric, Date, or Datetime...
 73 |         std::string firstTime = dataFrame.Time()[0];
 74 | 
 75 |         // Is firstTime purely numeric characters (not Date or DateTime)?
 76 |         // We presume time is not negative, or exponential 
 77 |         bool numericTime = strspn( firstTime.c_str(),
 78 |                                    ".0123456789" ) == firstTime.size();
 79 | 
 80 |         // Does firstTime have two hyphens as in "%Y-%m-%d" Date format?
 81 |         size_t nHyphen  = std::count(firstTime.begin(), firstTime.end(), '-');
 82 |         bool   dateTime = nHyphen == 2 ? true : false;
 83 | 
 84 |         // Does firstTime have two '-' and two ':' as in DateTime format?
 85 |         size_t nColon     = std::count(firstTime.begin(), firstTime.end(), ':');
 86 |         bool dateTimeTime = dateTime and nColon == 2 ? true : false;
 87 |         if ( dateTimeTime ) { dateTime = false; }
 88 | 
 89 |         if ( numericTime and not dateTime and not dateTimeTime ) {
 90 |             // Convert the dataFrame.Time() vector to numeric/double
 91 |             r::NumericVector timeVec( dataFrame.Time().size() );
 92 | 
 93 |             char *pEnd;
 94 |             for ( size_t i = 0; i < dataFrame.Time().size(); i++ ) {
 95 |                 timeVec[ i ] = strtod( dataFrame.Time().at( i ).c_str(), &pEnd );
 96 |                 // JP: check pEnd?
 97 |             }
 98 |             columnList.push_back( timeVec );
 99 |         }
100 | 
101 |         if ( not numericTime and dateTime and not dateTimeTime ) {
102 |             // Convert to Date
103 |             r::DateVector dateVec( dataFrame.Time().size() );
104 |             
105 |             for ( size_t i = 0; i < dataFrame.Time().size(); i++ ) {
106 |                 dateVec[ i ] = r::Date( dataFrame.Time().at( i ),
107 |                                         "%Y-%m-%d" );
108 |             }
109 |             columnList.push_back( dateVec );
110 |         }
111 | 
112 |         if ( not numericTime and not dateTime and dateTimeTime )  {
113 |             // Convert to Datetime
114 |             r::DatetimeVector datetimeVec( dataFrame.Time().size() );
115 | 
116 |             for ( size_t i = 0; i < dataFrame.Time().size(); i++ ) {
117 |                 datetimeVec[ i ] = r::Datetime( dataFrame.Time().at( i ),
118 |                                                 "%Y-%m-%d %H:%M:%OS" );
119 |             }
120 |             columnList.push_back( datetimeVec );
121 |         }
122 | 
123 |         if ( not numericTime and not dateTime and not dateTimeTime )  {
124 |             // Couldn't convert dataFrame.Time(), just push it as-is
125 |             // R will see it as a vector of factors... why not strings?
126 |             columnList.push_back( dataFrame.Time() );
127 |         }
128 |     } // if ( dataFrame.Time().size() ) 
129 | 
130 |     // Copy data: NOTE in cppEDM data and time vector are separate
131 |     //            data are in a row-major valarray with NColumns().
132 |     for ( auto ci = columnNamesIn.begin(); ci != columnNamesIn.end(); ci++ ) {
133 |         if ( hasTime and (*ci).compare( dataFrame.TimeName() ) == 0 ) {
134 |             continue;  // skip time. It's a vector< std::string > 
135 |         }
136 | 
137 |         // Unfortunately we have to copy to vector first
138 |         std::valarray<double> col_val = dataFrame.VectorColumnName( *ci );
139 |         std::vector<double> col_vec(std::begin(col_val), std::end(col_val));
140 |         columnList.push_back( col_vec );
141 |         columnNames.push_back( *ci );
142 |     }
143 | 
144 |     r::DataFrame df ( columnList );
145 |     df.attr("names") = columnNames;
146 | 
147 |     return df;
148 | }
149 | 
150 | //---------------------------------------------------------------
151 | // Load path/file into cppEDM DataFrame, convert to Python
152 | // dict{ column : array }
153 | //---------------------------------------------------------------
154 | r::DataFrame ReadDataFrame ( std::string path, std::string file ) {
155 |     return DataFrameToDF( DataFrame< double >( path, file ) );
156 | }
157 | 


--------------------------------------------------------------------------------
/src/Embed.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "RcppEDMCommon.h"
 3 | #include "API.h"
 4 | 
 5 | //---------------------------------------------------------------
 6 | // 
 7 | //---------------------------------------------------------------
 8 | r::DataFrame Embed_rcpp( std::string  path,
 9 |                          std::string  dataFile,
10 |                          r::DataFrame dataFrame,
11 |                          int          E,
12 |                          int          tau,
13 |                          std::string  columns,
14 |                          bool         verbose ) {
15 | 
16 |     DataFrame< double > embedded;
17 | 
18 |     if ( dataFile.size() ) {
19 |         // dataFile specified, dispatch overloaded Embed, ignore dataFrame
20 |         embedded = Embed( path,
21 |                           dataFile,
22 |                           E,
23 |                           tau,
24 |                           columns,
25 |                           verbose );
26 |     }
27 |     else if ( dataFrame.ncol() ) {
28 |         DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame );
29 | 
30 |         embedded = Embed( dataFrame_,
31 |                           E,
32 |                           tau,
33 |                           columns,
34 |                           verbose );
35 |     }
36 |     else {
37 |         Rcpp::warning( "Embed_rcpp(): Invalid input.\n" );
38 |     }
39 | 
40 |     return DataFrameToDF( embedded );
41 | }
42 | 
43 | //---------------------------------------------------------------
44 | // 
45 | //---------------------------------------------------------------
46 | r::DataFrame MakeBlock_rcpp( r::DataFrame             dataFrame,
47 |                              int                      E,
48 |                              int                      tau,
49 |                              std::vector<std::string> columnNames,
50 |                              bool                     deletePartial ) {
51 | 
52 |     DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame );
53 | 
54 |     DataFrame< double > block = MakeBlock( dataFrame_,
55 |                                            E,
56 |                                            tau,
57 |                                            columnNames,
58 |                                            deletePartial );
59 | 
60 |     return DataFrameToDF( block );
61 | }
62 | 


--------------------------------------------------------------------------------
/src/EmbedDim.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "RcppEDMCommon.h"
 3 | 
 4 | //---------------------------------------------------------------
 5 | // 
 6 | //---------------------------------------------------------------
 7 | r::DataFrame EmbedDimension_rcpp( std::string  pathIn,
 8 |                                   std::string  dataFile,
 9 |                                   r::DataFrame dataFrame,
10 |                                   std::string  pathOut,
11 |                                   std::string  predictFile,
12 |                                   std::string  lib,
13 |                                   std::string  pred,
14 |                                   int          maxE,
15 |                                   int          Tp,
16 |                                   int          tau,
17 |                                   int          exclusionRadius,
18 |                                   std::string  columns,
19 |                                   std::string  target,
20 |                                   bool         embedded,
21 |                                   bool         verbose,
22 |                                   std::vector<bool> validLib,
23 |                                   unsigned     numThreads ) {
24 | 
25 |     DataFrame< double > EmbedDimDF;
26 | 
27 |     if ( dataFile.size() ) {
28 |         // dataFile specified, dispatch overloaded EmbedDimension,
29 |         // ignore dataFrame
30 |         EmbedDimDF = EmbedDimension( pathIn,
31 |                                      dataFile,
32 |                                      pathOut,
33 |                                      predictFile,
34 |                                      lib,
35 |                                      pred,
36 |                                      maxE,
37 |                                      Tp,
38 |                                      tau,
39 |                                      exclusionRadius,
40 |                                      columns,
41 |                                      target,
42 |                                      embedded,
43 |                                      verbose,
44 |                                      validLib,
45 |                                      numThreads );
46 |     }
47 |     else if ( dataFrame.size() ) {
48 |         DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame );
49 | 
50 |         EmbedDimDF = EmbedDimension( dataFrame_,
51 |                                      pathOut,
52 |                                      predictFile,
53 |                                      lib,
54 |                                      pred,
55 |                                      maxE,
56 |                                      Tp,
57 |                                      tau,
58 |                                      exclusionRadius,
59 |                                      columns,
60 |                                      target,
61 |                                      embedded,
62 |                                      verbose,
63 |                                      validLib,
64 |                                      numThreads );
65 |     }
66 |     else {
67 |         Rcpp::warning( "EmbedDimension_rcpp(): Invalid input.\n" );
68 |     }
69 | 
70 |     return DataFrameToDF( EmbedDimDF );
71 | }
72 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
 1 | # NOTE : Do not put comments on a non-comment line.
 2 | LIB_PATH        = ./cppEDM/lib/
 3 | CPPEDM_SRC_PATH = ./cppEDM/src/
 4 | LIBEDM          = $(LIB_PATH)/libEDM.a
 5 | 
 6 | PKG_CPPFLAGS = -I $(CPPEDM_SRC_PATH)
 7 | PKG_LIBS     = -L $(LIB_PATH) -lEDM $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
 8 | 
 9 | # CXX_STD required for RcppThread on GCC ASAN/UBSAN ?
10 | # R 4.3 and above uses default CXX17. Will change with R version?
11 | CXX_STD = CXX17
12 | 
13 | .PHONY: all $(LIBEDM)
14 | 
15 | all : $(SHLIB)
16 | 
17 | $(SHLIB): $(LIBEDM)
18 | 
19 | # Pass R compiler variables CXX17... to cppEDM makefile
20 | $(LIBEDM):
21 | 	@(cd $(CPPEDM_SRC_PATH) && $(MAKE) clean && $(MAKE) \
22 | 	CXX="$(CXX17) $(CXX17STD)" CXXFLAGS="$(CXX17FLAGS) $(CXX17PICFLAGS)" \
23 | 	AR="$(AR)" RANLIB="$(RANLIB)")
24 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
 1 | ## It seems that cppEDM/lib is not created used...?
 2 | ## Leave libEDM.a in cppEDM/src and link from there.
 3 | 
 4 | ## Include paths are preprocessor options, not compiler options,
 5 | ## and must be set in PKG_CPPFLAGS
 6 | ## Set flags for the linker, for example -l and -L options, via PKG_LIBS
 7 | ## Do not set variables such as CPPFLAGS, CFLAGS etc.
 8 | 
 9 | PKG_CPPFLAGS = -I./cppEDM/src -I../
10 | PKG_LIBS     = -L./cppEDM/src/ -lEDM $(LAPACK_LIBS)
11 | ## CXX_STD   = CXX11
12 | 
13 | $(SHLIB): cppEDM/src/libEDM.a
14 | 
15 | cppEDM/src/libEDM.a:
16 | 	@(cd cppEDM/src && $(MAKE) -f makefile.mingw \
17 |           CC="$(CC)" CFLAGS="-DCCM_THREADED -DUSING_R -I../ $(CPICFLAGS)" AR="$(AR)" RANLIB="$(RANLIB)")
18 | 


--------------------------------------------------------------------------------
/src/Multiview.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "RcppEDMCommon.h"
  3 | 
  4 | //--------------------------------------------------------------
  5 | // 
  6 | //--------------------------------------------------------------
  7 | r::List Multiview_rcpp ( std::string  pathIn,
  8 |                          std::string  dataFile,
  9 |                          r::DataFrame dataFrame,
 10 |                          //std::string  pathOut,     // Rcpp 20 arg limit
 11 |                          //std::string  predictFile, // Rcpp 20 arg limit
 12 |                          std::string  lib,
 13 |                          std::string  pred,
 14 |                          int          D,
 15 |                          int          E,
 16 |                          int          Tp,
 17 |                          int          knn,
 18 |                          int          tau, 
 19 |                          std::string  columns,
 20 |                          std::string  target,
 21 |                          int          multiview,
 22 |                          int          exclusionRadius,
 23 |                          bool         trainLib,
 24 |                          bool         excludeTarget,
 25 |                          bool         parameterList,
 26 |                          bool         verbose,
 27 |                          unsigned int numThreads ) {
 28 | 
 29 |     MultiviewValues MV;
 30 | 
 31 |     std::string pathOut("./");   // Rcpp has 20 arg limit
 32 |     std::string predictFile(""); // Rcpp has 20 arg limit
 33 | 
 34 |     if ( dataFile.size() ) {
 35 |         // dataFile specified, dispatch overloaded Multiview, ignore dataFrame
 36 | 
 37 |         MV = Multiview( pathIn,
 38 |                         dataFile,
 39 |                         pathOut,
 40 |                         predictFile,
 41 |                         lib,
 42 |                         pred,
 43 |                         D,
 44 |                         E,
 45 |                         Tp,
 46 |                         knn,
 47 |                         tau,
 48 |                         columns,
 49 |                         target,
 50 |                         multiview,
 51 |                         exclusionRadius,
 52 |                         trainLib,
 53 |                         excludeTarget,
 54 |                         parameterList,
 55 |                         verbose,
 56 |                         numThreads );
 57 |     }
 58 |     else if ( dataFrame.size() ) {
 59 |         DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame );
 60 | 
 61 |         MV = Multiview( dataFrame_,
 62 |                         pathOut,
 63 |                         predictFile,
 64 |                         lib,
 65 |                         pred,
 66 |                         D,
 67 |                         E,
 68 |                         Tp,
 69 |                         knn,
 70 |                         tau,
 71 |                         columns,
 72 |                         target,
 73 |                         multiview,
 74 |                         exclusionRadius,
 75 |                         trainLib,
 76 |                         excludeTarget,
 77 |                         parameterList,
 78 |                         verbose,
 79 |                         numThreads );
 80 |     }
 81 |     else {
 82 |         Rcpp::warning( "Multiview_rcpp(): Invalid input.\n" );
 83 |     }
 84 | 
 85 |     r::DataFrame comboRho    = DataFrameToDF( MV.ComboRho    );
 86 |     r::DataFrame predictions = DataFrameToDF( MV.Predictions );
 87 | 
 88 |     // ColumnNames are: map< string, vector<string> >, convert to List
 89 |     r::List columnNames;
 90 |     for ( auto cni  = MV.ColumnNames.begin();
 91 |                cni != MV.ColumnNames.end(); cni++ ) {
 92 |         r::StringVector strVec;
 93 |         std::vector< std::string > names = cni->second;
 94 |         for ( auto ni = names.begin(); ni != names.end(); ni++ ) {
 95 |             strVec.push_back( *ni );
 96 |         }
 97 |         columnNames[ cni->first ] = strVec;
 98 |     }
 99 | 
100 |     r::List output = r::List::create(
101 |         r::Named("ComboRho")    = comboRho,
102 |         r::Named("ColumnNames") = columnNames,
103 |         r::Named("Predictions") = predictions );
104 | 
105 |     if ( parameterList ) {
106 |         r::List paramList = ParamMaptoList( MV.parameterMap );
107 |         output["parameters"] = paramList;
108 |     }
109 | 
110 |     // Multiview.R in EDM.R will convert comboLines into an R data.frame
111 |     return output;
112 | }
113 | 


--------------------------------------------------------------------------------
/src/ParameterList.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "RcppEDMCommon.h"
 3 | 
 4 | //----------------------------------------------------------------
 5 | //
 6 | //----------------------------------------------------------------
 7 | r::List ParamMaptoList( std::map< std::string, std::string > m ) {
 8 | 
 9 |     r::List L;
10 | 
11 |     for ( auto pi = m.begin(); pi != m.end(); pi++ ) {
12 |         // string types
13 |         if ( pi->first == "version"  or
14 |              pi->first == "method"   or pi->first == "columns" or
15 |              pi->first == "target"   or pi->first == "pathIn"  or
16 |              pi->first == "dataFile" or pi->first == "pathOut" or
17 |              pi->first == "predictOutputFile" or
18 |              pi->first == "SmapOutputFile"    or
19 |              pi->first == "blockOutputFile" ) {
20 | 
21 |             L[ pi->first ] = pi->second;
22 |         }
23 |         // int types
24 |         else if ( pi->first == "E"   or pi->first == "Tp"  or
25 |                   pi->first == "knn" or pi->first == "tau" or
26 |                   pi->first == "exclusionRadius"   or
27 |                   pi->first == "seed"              or
28 |                   pi->first == "subSamples"        or
29 |                   pi->first == "multiviewEnsemble" or
30 |                   pi->first == "multiviewD"        or
31 |                   pi->first == "generateSteps" ) {
32 | 
33 |             L[ pi->first ] = std::stoi( pi->second );
34 |         }
35 |         // boolean types
36 |         else if ( pi->first == "randomLib"   or
37 |                   pi->first == "replacement" or
38 |                   pi->first == "includeData" or
39 |                   pi->first == "multiviewTrainLib"      or
40 |                   pi->first == "multiviewExcludeTarget" or
41 |                   pi->first == "embedded"      or
42 |                   pi->first == "const_predict" or
43 |                   pi->first == "parameterList" or
44 |                   pi->first == "verbose" ) {
45 | 
46 |             if ( pi->second == "0" ) {
47 |                 L[ pi->first ] = false;
48 |             }
49 |             if ( pi->second == "1" ) {
50 |                 L[ pi->first ] = true;
51 |             }
52 |         }
53 |         // vector of int
54 |         else if ( pi->first == "lib"      or pi->first == "pred" or
55 |                   pi->first == "libSizes" or pi->first == "validLib" ) {
56 |             std::stringstream iss( pi->second );
57 |             std::vector< int > intVector;
58 |             int                value;
59 | 
60 |             while ( iss >> value ) {
61 |                 intVector.push_back( value );
62 |             }
63 | 
64 |             L[ pi->first ] = intVector;
65 |         }
66 |         // float type
67 |         else if ( pi->first == "theta" ) {
68 |             L[ pi->first ] = std::stof( pi->second );
69 |         }
70 |     }
71 | 
72 |     return L;
73 | }
74 | 


--------------------------------------------------------------------------------
/src/PredictInterval.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "RcppEDMCommon.h"
 3 | 
 4 | //---------------------------------------------------------------
 5 | // Input data path and file
 6 | //---------------------------------------------------------------
 7 | r::DataFrame PredictInterval_rcpp( std::string  pathIn,
 8 |                                    std::string  dataFile,
 9 |                                    r::DataFrame dataFrame,
10 |                                    std::string  pathOut,
11 |                                    std::string  predictFile,
12 |                                    std::string  lib,
13 |                                    std::string  pred,
14 |                                    int          maxTp,
15 |                                    int          E,
16 |                                    int          tau,
17 |                                    int          exclusionRadius,
18 |                                    std::string  columns,
19 |                                    std::string  target,
20 |                                    bool         embedded,
21 |                                    bool         verbose,
22 |                                    std::vector<bool> validLib,
23 |                                    unsigned     numThreads ) {
24 | 
25 |     DataFrame< double > PredictDF;
26 | 
27 |     if ( dataFile.size() ) {
28 |         // dataFile specified, dispatch overloaded PredictInterval,
29 |         // ignore dataFrame
30 |         PredictDF = PredictInterval( pathIn,
31 |                                      dataFile,
32 |                                      pathOut,
33 |                                      predictFile,
34 |                                      lib,
35 |                                      pred,
36 |                                      maxTp,
37 |                                      E,
38 |                                      tau,
39 |                                      exclusionRadius,
40 |                                      columns,
41 |                                      target,
42 |                                      embedded,
43 |                                      verbose,
44 |                                      validLib,
45 |                                      numThreads );
46 |     }
47 |     else if ( dataFrame.size() ) {
48 |         DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame );
49 | 
50 |         PredictDF = PredictInterval( dataFrame_,
51 |                                      pathOut,
52 |                                      predictFile,
53 |                                      lib,
54 |                                      pred,
55 |                                      maxTp,
56 |                                      E,
57 |                                      tau,
58 |                                      exclusionRadius,
59 |                                      columns,
60 |                                      target,
61 |                                      embedded,
62 |                                      verbose,
63 |                                      validLib,
64 |                                      numThreads );
65 |     }
66 |     else {
67 |         Rcpp::warning("PredictInterval_rcpp(): Invalid input.\n");
68 |     }
69 | 
70 |     return DataFrameToDF( PredictDF );
71 | }
72 | 


--------------------------------------------------------------------------------
/src/PredictNL.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "RcppEDMCommon.h"
 3 | 
 4 | //---------------------------------------------------------------
 5 | // Input data path and file
 6 | //---------------------------------------------------------------
 7 | r::DataFrame PredictNonlinear_rcpp( std::string  pathIn,
 8 |                                     std::string  dataFile,
 9 |                                     r::DataFrame dataFrame,
10 |                                     std::string  pathOut,
11 |                                     std::string  predictFile,
12 |                                     std::string  lib,
13 |                                     std::string  pred,
14 |                                     std::string  theta,
15 |                                     int          E,
16 |                                     int          Tp,
17 |                                     int          knn,
18 |                                     int          tau,
19 |                                     int          exclusionRadius,
20 |                                     std::string  columns,
21 |                                     std::string  target,
22 |                                     bool         embedded,
23 |                                     bool         verbose,
24 |                                     std::vector<bool> validLib,
25 |                                     bool         ignoreNan,
26 |                                     unsigned     numThreads ) {
27 | 
28 |     DataFrame< double > PredictDF;
29 | 
30 |     if ( dataFile.size() ) {
31 |         // dataFile specified, dispatch overloaded PredictNonlinear,
32 |         // ignore dataFrame
33 |         PredictDF = PredictNonlinear( pathIn,
34 |                                       dataFile,
35 |                                       pathOut,
36 |                                       predictFile,
37 |                                       lib,
38 |                                       pred,
39 |                                       theta,
40 |                                       E,
41 |                                       Tp,
42 |                                       knn,
43 |                                       tau,
44 |                                       exclusionRadius,
45 |                                       columns,
46 |                                       target,
47 |                                       embedded,
48 |                                       verbose,
49 |                                       validLib,
50 |                                       ignoreNan,
51 |                                       numThreads );
52 |     }
53 |     else if ( dataFrame.size() ) {
54 |         DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame );
55 | 
56 |         PredictDF = PredictNonlinear( dataFrame_,
57 |                                       pathOut,
58 |                                       predictFile,
59 |                                       lib,
60 |                                       pred,
61 |                                       theta,
62 |                                       E,
63 |                                       Tp,
64 |                                       knn,
65 |                                       tau,
66 |                                       exclusionRadius,
67 |                                       columns,
68 |                                       target,
69 |                                       embedded,
70 |                                       verbose,
71 |                                       validLib,
72 |                                       ignoreNan,
73 |                                       numThreads );
74 |     }
75 |     else {
76 |         Rcpp::warning("PredictNonlinear_rcpp(): Invalid input.\n");
77 |     }
78 | 
79 |     return DataFrameToDF( PredictDF );
80 | }
81 | 


--------------------------------------------------------------------------------
/src/RcppEDMCommon.cpp:
--------------------------------------------------------------------------------
  1 | // Expose and map cpp wrapper functions to EDM module via Rcpp
  2 | // See RCPP_MODULE() at end of file.
  3 | //
  4 | // Requirements for a function to be exposed to R via Rcpp modules are:
  5 | //   The function takes between 0 and 65 parameters.
  6 | //   The type of each input parameter must be manageable by Rcpp::as template.
  7 | //   The return type of the function must be either void or any type that can
  8 | //   be managed by the Rcpp::wrap template.
  9 | //   The function name itself has to be unique in the module. In other words,
 10 | //   no two functions with the same name but different signatures are allowed.
 11 | //   C++ allows overloading functions. This might be added in future versions
 12 | //   of modules.
 13 | 
 14 | #include "RcppEDMCommon.h"
 15 | 
 16 | //-------------------------------------------------------------------------
 17 | // Definitions of formal arguments and default params of the R functions
 18 | // that encapsulate the C++ functions in an Rcpp::List.
 19 | //-------------------------------------------------------------------------
 20 | auto ReadDataFrameArgs = r::List::create( r::_["path"] = "",
 21 |                                           r::_["file"] = "" );
 22 | 
 23 | auto MakeBlockArgs = r::List::create( 
 24 |     r::_["dataFrame"]     = r::DataFrame(),
 25 |     r::_["E"]             = 0,
 26 |     r::_["tau"]           = -1,
 27 |     r::_["columnNames"]   = std::vector<std::string>(),
 28 |     r::_["deletePartial"] = false );
 29 | 
 30 | auto EmbedArgs = r::List::create( 
 31 |     r::_["path"]      = std::string(""),
 32 |     r::_["dataFile"]  = std::string(""),
 33 |     r::_["dataFrame"] = r::DataFrame(),
 34 |     r::_["E"]         = 0,
 35 |     r::_["tau"]       = -1,
 36 |     r::_["columns"]   = std::string(""),
 37 |     r::_["verbose"]   = false );
 38 | 
 39 | auto SimplexArgs = r::List::create(
 40 |     r::_["pathIn"]          = std::string("./"),
 41 |     r::_["dataFile"]        = std::string(""),
 42 |     r::_["dataFrame"]       = r::DataFrame(),
 43 |     r::_["pathOut"]         = std::string("./"),
 44 |     r::_["predictFile"]     = std::string(""),
 45 |     r::_["lib"]             = std::string(""),
 46 |     r::_["pred"]            = std::string(""),
 47 |     r::_["E"]               = 0,
 48 |     r::_["Tp"]              = 1,
 49 |     r::_["knn"]             = 0,
 50 |     r::_["tau"]             = -1,
 51 |     r::_["exclusionRadius"] = 0,
 52 |     r::_["columns"]         = std::string(""),
 53 |     r::_["target"]          = std::string(""),
 54 |     r::_["embedded"]        = false,
 55 |     //r::_["const_predict"]   = false, // Rcpp 20 arg limit
 56 |     r::_["verbose"]         = false,
 57 |     r::_["validLib"]        = std::vector<bool>(),
 58 |     r::_["generateSteps"]   = 0,
 59 |     //r::_["generateLibrary"] = false, // Rcpp 20 arg limit
 60 |     r::_["parameterList"]   = false );
 61 | 
 62 | auto SMapArgs = r::List::create(
 63 |     r::_["pathIn"]          = std::string("./"),
 64 |     r::_["dataFile"]        = std::string(""),
 65 |     r::_["dataFrame"]       = r::DataFrame(),
 66 |     //r::_["pathOut"]       = std::string("./"), // Rcpp 20 arg limit
 67 |     //r::_["predictFile"]   = std::string(""),   // Rcpp 20 arg limit
 68 |     r::_["lib"]             = std::string(""),
 69 |     r::_["pred"]            = std::string(""),
 70 |     r::_["E"]               = 0,
 71 |     r::_["Tp"]              = 1,
 72 |     r::_["knn"]             = 0,
 73 |     r::_["tau"]             = -1,
 74 |     r::_["theta"]           = 0,
 75 |     r::_["exclusionRadius"] = 0,
 76 |     r::_["columns"]         = std::string(""),
 77 |     r::_["target"]          = std::string(""),
 78 |     //r::_["smapCoefFile"]  = std::string(""), // Rcpp 20 arg limit
 79 |     //r::_["smapSVFile"]    = std::string(""), // Rcpp 20 arg limit
 80 |     //r::_["solver"]        = std::string(""), // Not supported by glmnet
 81 |     r::_["embedded"]        = false,
 82 |     //r::_["const_predict"] = false,           // Rcpp 20 arg limit
 83 |     r::_["verbose"]         = false,
 84 |     r::_["validLib"]        = std::vector<bool>(),
 85 |     r::_["ignoreNan"]       = true,
 86 |     r::_["generateSteps"]   = 0,
 87 |     //r::_["generateLibrary"] = false, // Rcpp 20 arg limit
 88 |     r::_["parameterList"]   = false );
 89 | 
 90 | auto MultiviewArgs = r::List::create( 
 91 |     r::_["pathIn"]          = std::string("./"),
 92 |     r::_["dataFile"]        = std::string(""),
 93 |     r::_["dataFrame"]       = r::DataFrame(),
 94 |     //r::_["pathOut"]       = std::string("./"), // Rcpp 20 arg limit
 95 |     //r::_["predictFile"]   = std::string(""),   // Rcpp 20 arg limit
 96 |     r::_["lib"]             = std::string(""),
 97 |     r::_["pred"]            = std::string(""),
 98 |     r::_["D"]               = 0,
 99 |     r::_["E"]               = 1,
100 |     r::_["Tp"]              = 1,
101 |     r::_["knn"]             = 0,
102 |     r::_["tau"]             = -1,
103 |     r::_["columns"]         = std::string(""),
104 |     r::_["target"]          = std::string(""),
105 |     r::_["multiview"]       = 0,
106 |     r::_["exlcusionRadius"] = 0,
107 |     r::_["trainLib"]        = true,
108 |     r::_["excludeTarget"]   = false,
109 |     r::_["parameterList"]   = false,
110 |     r::_["verbose"]         = false,
111 |     r::_["numThreads"]      = 4 );
112 | 
113 | auto CCMArgs = r::List::create( 
114 |     r::_["pathIn"]          = std::string("./"),
115 |     r::_["dataFile"]        = std::string(""),
116 |     r::_["dataFrame"]       = r::DataFrame(),
117 |     //r::_["pathOut"]       = std::string("./"), // Rcpp 20 arg limit
118 |     //r::_["predictFile"]   = std::string(""),   // Rcpp 20 arg limit
119 |     r::_["E"]               = 0,
120 |     r::_["Tp"]              = 0,
121 |     r::_["knn"]             = 0,
122 |     r::_["tau"]             = -1,
123 |     r::_["exlcusionRadius"] = 0,
124 |     r::_["columns"]         = std::string(""),
125 |     r::_["target"]          = std::string(""),
126 |     r::_["libSizes"]        = std::string(""),
127 |     r::_["sample"]          = 0,
128 |     r::_["random"]          = true,
129 |     //r::_["replacement"]   = false,             // Rcpp 20 arg limit
130 |     r::_["seed"]            = 0,
131 |     r::_["embedded"]        = false,
132 |     r::_["includeData"]     = false,
133 |     r::_["parameterList"]   = false,
134 |     r::_["verbose"]         = false );
135 |     
136 | auto EmbedDimensionArgs     = r::List::create( 
137 |     r::_["pathIn"]          = std::string("./"),
138 |     r::_["dataFile"]        = std::string(""),
139 |     r::_["dataFrame"]       = r::DataFrame(),
140 |     r::_["pathOut"]         = std::string("./"),
141 |     r::_["predictFile"]     = std::string(""),
142 |     r::_["lib"]             = std::string(""),
143 |     r::_["pred"]            = std::string(""),
144 |     r::_["maxE"]            = 10,
145 |     r::_["Tp"]              = 1,
146 |     r::_["tau"]             = -1,
147 |     r::_["exclusionRadius"] = 0,
148 |     r::_["columns"]         = std::string(""),
149 |     r::_["target"]          = std::string(""),
150 |     r::_["embedded"]        = false,
151 |     r::_["verbose"]         = false,
152 |     r::_["validLib"]        = std::vector<bool>(),
153 |     r::_["numThreads"]      = 4 );
154 | 
155 | auto PredictIntervalArgs = r::List::create( 
156 |     r::_["pathIn"]          = std::string("./"),
157 |     r::_["dataFile"]        = std::string(""),
158 |     r::_["dataFrame"]       = r::DataFrame(),
159 |     r::_["pathOut"]         = std::string("./"),
160 |     r::_["predictFile"]     = std::string(""),
161 |     r::_["lib"]             = std::string(""),
162 |     r::_["pred"]            = std::string(""),
163 |     r::_["maxTp"]           = 10,
164 |     r::_["E"]               = 0,
165 |     r::_["tau"]             = -1,
166 |     r::_["exclusionRadius"] = 0,
167 |     r::_["columns"]         = std::string(""),
168 |     r::_["target"]          = std::string(""),
169 |     r::_["embedded"]        = false,
170 |     r::_["verbose"]         = false,
171 |     r::_["validLib"]        = std::vector<bool>(),
172 |     r::_["numThreads"]      = 4 );
173 | 
174 | auto PredictNonlinearArgs = r::List::create( 
175 |     r::_["pathIn"]          = std::string("./"),
176 |     r::_["dataFile"]        = std::string(""),
177 |     r::_["dataFrame"]       = r::DataFrame(),
178 |     r::_["pathOut"]         = std::string("./"),
179 |     r::_["predictFile"]     = std::string(""),
180 |     r::_["lib"]             = std::string(""),
181 |     r::_["pred"]            = std::string(""),
182 |     r::_["theta"]           = std::string(""),
183 |     r::_["E"]               = 0,
184 |     r::_["Tp"]              = 1,
185 |     r::_["knn"]             = 0,
186 |     r::_["tau"]             = -1,
187 |     r::_["exclusionRadius"] = 0,
188 |     r::_["columns"]         = std::string(""),
189 |     r::_["target"]          = std::string(""),
190 |     r::_["embedded"]        = false,
191 |     r::_["verbose"]         = false,
192 |     r::_["validLib"]        = std::vector<bool>(),
193 |     r::_["ignoreNan"]       = true,
194 |     r::_["numThreads"]      = 4 );
195 | 
196 | //-------------------------------------------------------------------------
197 | // Export / map the functions
198 | //   First argument:  R function name, see ../R/EDM.R
199 | //   Second argument: pointer to Rcpp interface function
200 | //   Third argument:  arguments of the R function that encapsulates the 
201 | //                    C++ function in a Rcpp::List
202 | //-------------------------------------------------------------------------
203 | RCPP_MODULE(EDMInternal) {
204 |     r::function( "RtoCpp_ComputeError",  &ComputeError_rcpp                  );
205 |     r::function( "RtoCpp_ReadDataFrame", &ReadDataFrame,   ReadDataFrameArgs );
206 |     r::function( "RtoCpp_MakeBlock",     &MakeBlock_rcpp,  MakeBlockArgs     );
207 |     r::function( "RtoCpp_Embed",         &Embed_rcpp,      EmbedArgs         );
208 |     r::function( "RtoCpp_Simplex",       &Simplex_rcpp,    SimplexArgs       );
209 |     r::function( "RtoCpp_SMap",          &SMap_rcpp,       SMapArgs          );
210 |     r::function( "RtoCpp_Multiview",     &Multiview_rcpp,  MultiviewArgs     );
211 |     r::function( "RtoCpp_CCM",           &CCM_rcpp,        CCMArgs           );
212 |     r::function( "RtoCpp_EmbedDimension",   &EmbedDimension_rcpp, 
213 |                                              EmbedDimensionArgs   );
214 |     r::function( "RtoCpp_PredictInterval",  &PredictInterval_rcpp, 
215 |                                              PredictIntervalArgs  );
216 |     r::function( "RtoCpp_PredictNonlinear", &PredictNonlinear_rcpp, 
217 |                                              PredictNonlinearArgs );
218 | }
219 | 


--------------------------------------------------------------------------------
/src/RcppEDMCommon.h:
--------------------------------------------------------------------------------
  1 | 
  2 | // R to C++ interface using Rcpp
  3 | // Functional flow: R func calls Rcpp func calls C++ func.
  4 | 
  5 | #ifndef RCPPEDMCOMMON
  6 | #define RCPPEDMCOMMON
  7 | 
  8 | #define RCPPTHREAD_OVERRIDE_COUT 1 // std::cout override
  9 | 
 10 | #include <Rcpp.h>
 11 | #include <R.h>
 12 | #include <RcppThread.h>
 13 | #include <iostream>
 14 | #include "API.h"
 15 | 
 16 | namespace r = Rcpp;
 17 | 
 18 | // Forward declarations
 19 | DataFrame< double > DFToDataFrame ( Rcpp::DataFrame df );
 20 | 
 21 | r::DataFrame DataFrameToDF ( DataFrame< double > dataFrame );
 22 | 
 23 | r::DataFrame ReadDataFrame ( std::string path, std::string file );
 24 | 
 25 | r::List ParamMaptoList( std::map< std::string, std::string > m );
 26 | 
 27 | r::List Simplex_rcpp( std::string       pathIn,
 28 |                       std::string       dataFile,
 29 |                       r::DataFrame      dataList,
 30 |                       std::string       pathOut,
 31 |                       std::string       predictFile,
 32 |                       std::string       lib,
 33 |                       std::string       pred,
 34 |                       int               E,
 35 |                       int               Tp,
 36 |                       int               knn,
 37 |                       int               tau,
 38 |                       int               exclusionRadius,
 39 |                       std::string       columns,
 40 |                       std::string       target,
 41 |                       bool              embedded,
 42 |                       // bool           const_predict, // Rcpp 20 arg limit
 43 |                       bool              verbose,
 44 |                       std::vector<bool> validLib,
 45 |                       int               generateSteps,
 46 |                       // bool           generateLibrary, // Rcpp 20 arg limit
 47 |                       bool              parameterList );
 48 | 
 49 | r::List SMap_rcpp( std::string       pathIn,
 50 |                    std::string       dataFile,
 51 |                    r::DataFrame      dataList,
 52 |                    //std::string     pathOut,     // Rcpp 20 arg limit
 53 |                    //std::string     predictFile, // Rcpp 20 arg limit
 54 |                    std::string       lib,
 55 |                    std::string       pred,
 56 |                    int               E,
 57 |                    int               Tp,
 58 |                    int               knn,
 59 |                    int               tau,
 60 |                    double            theta,
 61 |                    int               exclusionRadius,
 62 |                    std::string       columns,
 63 |                    std::string       target,
 64 |                    //std::string     smapCoefFile,  // Rcpp 20 arg limit
 65 |                    //std::string     smapSVFile,    // Rcpp 20 arg limit
 66 |                    //SVDValues       (*solver)      // Not supported by glmnet
 67 |                    bool              embedded,
 68 |                    //bool            const_predict, // Rcpp 20 arg limit
 69 |                    bool              verbose,
 70 |                    std::vector<bool> validLib,
 71 |                    bool              ignoreNan,
 72 |                    int               generateSteps,
 73 |                    //bool            generateLibrary, // Rcpp 20 arg limit
 74 |                    bool              parameterList );
 75 | 
 76 | r::List CCM_rcpp( std::string  pathIn,
 77 |                   std::string  dataFile,
 78 |                   r::DataFrame dataList,
 79 |                   //std::string  pathOut,     // Rcpp 20 arg limit
 80 |                   //std::string  predictFile, // Rcpp 20 arg limit
 81 |                   int          E,
 82 |                   int          Tp,
 83 |                   int          knn,
 84 |                   int          tau,
 85 |                   int          exclusionRadius,
 86 |                   std::string  columns,
 87 |                   std::string  target,
 88 |                   std::string  libSizes,
 89 |                   int          sample,
 90 |                   bool         random,
 91 |                   // bool      replacement,  // Rcpp 20 arg limit
 92 |                   unsigned     seed,
 93 |                   bool         embedded,
 94 |                   bool         includeData,
 95 |                   bool         parameterList,
 96 |                   bool         verbose );
 97 | 
 98 | r::List Multiview_rcpp ( std::string  pathIn,
 99 |                          std::string  dataFile,
100 |                          r::DataFrame dataList,
101 |                          //std::string  pathOut,      // Rcpp 20 arg limit
102 |                          //std::string  predictFile,  // Rcpp 20 arg limit
103 |                          std::string  lib,
104 |                          std::string  pred,
105 |                          int          D,
106 |                          int          E,
107 |                          int          Tp,
108 |                          int          knn,
109 |                          int          tau, 
110 |                          std::string  columns,
111 |                          std::string  target,
112 |                          int          multiview,
113 |                          int          exlcusionRadius,
114 |                          bool         trainLib,
115 |                          bool         excludeTarget,
116 |                          bool         parameterList,
117 |                          bool         verbose,
118 |                          unsigned int numThreads );
119 | 
120 | r::DataFrame PredictNonlinear_rcpp( std::string  pathIn,
121 |                                     std::string  dataFile,
122 |                                     r::DataFrame dataList,
123 |                                     std::string  pathOut,
124 |                                     std::string  predictFile,
125 |                                     std::string  lib,
126 |                                     std::string  pred,
127 |                                     std::string  theta,
128 |                                     int          E,
129 |                                     int          Tp,
130 |                                     int          knn,
131 |                                     int          tau,
132 |                                     int          exclusionRadius,
133 |                                     std::string  columns,
134 |                                     std::string  target,
135 |                                     bool         embedded,
136 |                                     bool         verbose,
137 |                                     std::vector<bool> validLib,
138 |                                     bool         ignoreNan,
139 |                                     unsigned     numThreads );
140 | 
141 | r::DataFrame PredictInterval_rcpp( std::string  pathIn,
142 |                                    std::string  dataFile,
143 |                                    r::DataFrame dataList,
144 |                                    std::string  pathOut,
145 |                                    std::string  predictFile,
146 |                                    std::string  lib,
147 |                                    std::string  pred,
148 |                                    int          maxTp,
149 |                                    int          E,
150 |                                    int          tau,
151 |                                    int          exclusionRadius,
152 |                                    std::string  columns,
153 |                                    std::string  target,
154 |                                    bool         embedded,
155 |                                    bool         verbose,
156 |                                    std::vector<bool> validLib,
157 |                                    unsigned     numThreads );
158 | 
159 | r::DataFrame EmbedDimension_rcpp( std::string  pathIn,
160 |                                   std::string  dataFile,
161 |                                   r::DataFrame dataList,
162 |                                   std::string  pathOut,
163 |                                   std::string  predictFile,
164 |                                   std::string  lib,
165 |                                   std::string  pred,
166 |                                   int          maxE,
167 |                                   int          Tp,
168 |                                   int          tau,
169 |                                   int          exclusionRadius,
170 |                                   std::string  columns,
171 |                                   std::string  target,
172 |                                   bool         embedded,
173 |                                   bool         verbose,
174 |                                   std::vector<bool> validLib,
175 |                                   unsigned     numThreads );
176 | 
177 | r::DataFrame Embed_rcpp( std::string  path,
178 |                          std::string  dataFile,
179 |                          r::DataFrame df,
180 |                          int          E,
181 |                          int          tau,
182 |                          std::string  columns,
183 |                          bool         verbose );
184 | 
185 | r::DataFrame MakeBlock_rcpp( r::DataFrame             dataList,
186 |                              int                      E,
187 |                              int                      tau,
188 |                              std::vector<std::string> columnNames,
189 |                              bool                     deletePartial );
190 | 
191 | r::List ComputeError_rcpp ( std::vector<double> vec1, 
192 |                             std::vector<double> vec2 );
193 | #endif
194 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | #include <Rcpp.h>
 5 | 
 6 | using namespace Rcpp;
 7 | 
 8 | 
 9 | RcppExport SEXP _rcpp_module_boot_EDMInternal();
10 | 
11 | static const R_CallMethodDef CallEntries[] = {
12 |     {"_rcpp_module_boot_EDMInternal", (DL_FUNC) &_rcpp_module_boot_EDMInternal, 0},
13 |     {NULL, NULL, 0}
14 | };
15 | 
16 | RcppExport void R_init_rEDM(DllInfo *dll) {
17 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
18 |     R_useDynamicSymbols(dll, FALSE);
19 | }
20 | 


--------------------------------------------------------------------------------
/src/SMap.cpp:
--------------------------------------------------------------------------------
  1 | #include "RcppEDMCommon.h"
  2 | 
  3 | //----------------------------------------------------------
  4 | // 
  5 | //----------------------------------------------------------
  6 | r::List SMap_rcpp( std::string       pathIn,
  7 |                    std::string       dataFile,
  8 |                    r::DataFrame      dataFrame,
  9 |                    //std::string     pathOut,       // Rcpp 20 arg limit
 10 |                    //std::string     predictFile,   // Rcpp 20 arg limit
 11 |                    std::string       lib,
 12 |                    std::string       pred,
 13 |                    int               E,
 14 |                    int               Tp,
 15 |                    int               knn,
 16 |                    int               tau,
 17 |                    double            theta,
 18 |                    int               exlusionRadius,
 19 |                    std::string       columns,
 20 |                    std::string       target,
 21 |                    //std::string     smapCoefFile,  // Rcpp 20 arg limit
 22 |                    //std::string     smapSVFile,    // Rcpp 20 arg limit
 23 |                    bool              embedded,
 24 |                    //bool            const_predict, // Rcpp 20 arg limit
 25 |                    bool              verbose,
 26 |                    std::vector<bool> validLib,
 27 |                    bool              ignoreNan,
 28 |                    int               generateSteps,
 29 |                    //bool            generateLibrary, // Rcpp 20 arg limit
 30 |                    bool              parameterList ) {
 31 | 
 32 |     SMapValues SM;
 33 | 
 34 |     std::string pathOut("./");    // Rcpp 20 arg limit
 35 |     std::string predictFile("");  // Rcpp 20 arg limit
 36 |     std::string smapCoefFile(""); // Rcpp 20 arg limit
 37 |     std::string smapSVFile("");   // Rcpp 20 arg limit
 38 |     bool generateLibrary = false; // Rcpp 20 arg limit
 39 |     bool const_predict   = false; // Rcpp 20 arg limit
 40 | 
 41 |     if ( dataFile.size() ) {
 42 |         // dataFile specified, dispatch overloaded SMap, ignore dataFrame
 43 | 
 44 |         SM = SMap( pathIn,
 45 |                    dataFile,
 46 |                    pathOut,
 47 |                    predictFile,
 48 |                    lib,
 49 |                    pred,
 50 |                    E, 
 51 |                    Tp,
 52 |                    knn,
 53 |                    tau,
 54 |                    theta,
 55 |                    exlusionRadius,
 56 |                    columns, 
 57 |                    target,
 58 |                    smapCoefFile,
 59 |                    smapSVFile,
 60 |                    embedded,
 61 |                    const_predict,
 62 |                    verbose,
 63 |                    validLib,
 64 |                    ignoreNan,
 65 |                    generateSteps,
 66 |                    generateLibrary,
 67 |                    parameterList );
 68 |     }
 69 |     else if ( dataFrame.size() ) {
 70 |         DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame );
 71 | 
 72 |         SM = SMap( dataFrame_,
 73 |                    pathOut,
 74 |                    predictFile,
 75 |                    lib,
 76 |                    pred,
 77 |                    E, 
 78 |                    Tp,
 79 |                    knn,
 80 |                    tau,
 81 |                    theta,
 82 |                    exlusionRadius,
 83 |                    columns, 
 84 |                    target,
 85 |                    smapCoefFile,
 86 |                    smapSVFile,
 87 |                    embedded,
 88 |                    const_predict,
 89 |                    verbose,
 90 |                    validLib,
 91 |                    ignoreNan,
 92 |                    generateSteps,
 93 |                    generateLibrary,
 94 |                    parameterList );
 95 |     }
 96 |     else {
 97 |         Rcpp::warning( "SMap_rcpp(): Invalid input.\n" );
 98 |     }
 99 | 
100 |     r::DataFrame df_pred = DataFrameToDF( SM.predictions    );
101 |     r::DataFrame df_coef = DataFrameToDF( SM.coefficients   );
102 |     r::DataFrame df_SV   = DataFrameToDF( SM.singularValues );
103 |     r::List output = r::List::create( r::Named("predictions")  = df_pred,
104 |                                       r::Named("coefficients") = df_coef,
105 |                                       r::Named("singularValues") = df_SV );
106 | 
107 |     if ( parameterList ) {
108 |         r::List paramList = ParamMaptoList( SM.parameterMap );
109 |         output["parameters"] = paramList;
110 |     }
111 | 
112 |     return output;
113 | }
114 | 


--------------------------------------------------------------------------------
/src/Simplex.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "RcppEDMCommon.h"
 3 | 
 4 | //-------------------------------------------------------------
 5 | // 
 6 | //-------------------------------------------------------------
 7 | r::List Simplex_rcpp( std::string       pathIn,
 8 |                       std::string       dataFile,
 9 |                       r::DataFrame      dataFrame,
10 |                       std::string       pathOut,
11 |                       std::string       predictFile,
12 |                       std::string       lib,
13 |                       std::string       pred,
14 |                       int               E,
15 |                       int               Tp,
16 |                       int               knn,
17 |                       int               tau,
18 |                       int               exclusionRadius,
19 |                       std::string       columns,
20 |                       std::string       target,
21 |                       bool              embedded,
22 |                       //bool            const_predict,   // Rcpp 20 arg limit
23 |                       bool              verbose,
24 |                       std::vector<bool> validLib,
25 |                       int               generateSteps,
26 |                       //bool            generateLibrary, // Rcpp 20 arg limit
27 |                       bool              parameterList ) {
28 | 
29 |     SimplexValues S;
30 | 
31 |     bool const_predict   = false; // Rcpp has 20 arg limit
32 |     bool generateLibrary = false; // Rcpp has 20 arg limit
33 |     
34 |     if ( dataFile.size() ) {
35 |         // dataFile specified, dispatch overloaded Simplex, ignore dataFrame
36 |         S = Simplex( pathIn,
37 |                      dataFile,
38 |                      pathOut,
39 |                      predictFile,
40 |                      lib,
41 |                      pred,
42 |                      E, 
43 |                      Tp,
44 |                      knn,
45 |                      tau,
46 |                      exclusionRadius,
47 |                      columns,
48 |                      target, 
49 |                      embedded,
50 |                      const_predict,
51 |                      verbose,
52 |                      validLib,
53 |                      generateSteps,
54 |                      generateLibrary,
55 |                      parameterList );
56 |     }
57 |     else if ( dataFrame.size() ) {
58 |         DataFrame< double > dataFrame_ = DFToDataFrame( dataFrame );
59 | 
60 |         S = Simplex( dataFrame_,
61 |                      pathOut,
62 |                      predictFile,
63 |                      lib,
64 |                      pred,
65 |                      E, 
66 |                      Tp,
67 |                      knn,
68 |                      tau,
69 |                      exclusionRadius,
70 |                      columns,
71 |                      target, 
72 |                      embedded,
73 |                      const_predict,
74 |                      verbose,
75 |                      validLib,
76 |                      generateSteps,
77 |                      generateLibrary,
78 |                      parameterList );
79 |     }
80 |     else {
81 |         Rcpp::warning( "Simplex_rcpp(): Invalid input.\n" );
82 |     }
83 | 
84 |     r::DataFrame df_pred = DataFrameToDF( S.predictions );
85 |     r::List output = r::List::create( r::Named("predictions")  = df_pred );
86 | 
87 |     if ( parameterList ) {
88 |         r::List paramList = ParamMaptoList( S.parameterMap );
89 |         output["parameters"] = paramList;
90 |     }
91 | 
92 |     return output;
93 | }
94 | 


--------------------------------------------------------------------------------
/src/cppEDM/lib/.gitignore:
--------------------------------------------------------------------------------
1 | !*
2 | 


--------------------------------------------------------------------------------
/src/cppEDM/lib/ignore.h:
--------------------------------------------------------------------------------
1 | // R devtools deletes empty dirs
2 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/CCM.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef EDM_CCM_H
 3 | #define EDM_CCM_H
 4 | 
 5 | #include <cstdlib>
 6 | #include <random>
 7 | #include <unordered_set>
 8 | #include <chrono>
 9 | #include <queue>
10 | #include <thread>
11 | 
12 | #include "EDM.h"
13 | #include "Simplex.h"
14 | 
15 | //----------------------------------------------------------------
16 | // CCM class inherits from Simplex class and defines
17 | // CCM-specific projection methods
18 | //----------------------------------------------------------------
19 | class CCMClass : public SimplexClass {
20 | public:
21 |     // CCM implements two Simplex objects for cross mapping
22 |     SimplexClass colToTarget;
23 |     SimplexClass targetToCol;
24 | 
25 |     // Cross mapping results are stored here
26 |     DataFrame< double > allLibStats; // CCM unified libsize, rho, RMSE, MAE
27 |     CrossMapValues      colToTargetValues; // CCM CrossMap() thread results
28 |     CrossMapValues      targetToColValues; // CCM CrossMap() thread results
29 | 
30 |     // Constructor
31 |     CCMClass ( DataFrame< double > & data,
32 |                Parameters          & parameters );
33 | 
34 |     // Method declarations
35 |     void Project();
36 |     void SetupParameters();
37 |     void CCM();
38 |     void FormatOutput();
39 |     void WriteOutput();
40 | };
41 | #endif
42 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/Common.cc:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <algorithm>
  3 | #include <cstring>
  4 | 
  5 | #include "Common.h"
  6 | 
  7 | //---------------------------------------------------------------
  8 | // Binary sort function for FindNeighbors() & CCMNeighbors()
  9 | //---------------------------------------------------------------
 10 | bool DistanceCompare( const std::pair<double, size_t> & x,
 11 |                       const std::pair<double, size_t> & y ) {
 12 |     return x.first < y.first;
 13 | }
 14 | 
 15 | //----------------------------------------------------------------
 16 | // 
 17 | //----------------------------------------------------------------
 18 | std::string ToLower( std::string str ) {
 19 | 
 20 |     std::string lowerStr( str );
 21 |     std::transform( lowerStr.begin(), lowerStr.end(),
 22 |                     lowerStr.begin(), ::tolower );
 23 | 
 24 |     return lowerStr;
 25 | }
 26 | 
 27 | //----------------------------------------------------------------
 28 | // SplitString
 29 | //
 30 | // Purpose: like Python string.split()
 31 | //
 32 | // Arguments: inString    : string to be split
 33 | //            delimeters  : string of delimeters
 34 | //
 35 | // Note:  A typical delimeter string: delimeters = " \t,\n;"
 36 | //           
 37 | // Return: vector of tokens
 38 | //----------------------------------------------------------------
 39 | std::vector<std::string> SplitString( std::string inString,
 40 |                                       std::string delimeters,
 41 |                                       bool removeWhitespace ) {
 42 |   size_t pos       = 0;
 43 |   size_t eos       = 0;
 44 |   size_t wordStart = 0;
 45 |   size_t wordEnd   = 0;
 46 | 
 47 |   bool foundStart = false;
 48 |   bool foundEnd   = false;
 49 | 
 50 |   std::vector<std::string> splitString;
 51 | 
 52 |   std::string word;
 53 | 
 54 |   eos = inString.length();
 55 | 
 56 |   while ( pos <= eos ) {
 57 |     if ( not foundStart ) {
 58 |       if ( delimeters.find( inString[pos] ) == delimeters.npos ) {
 59 | 	// this char (inString[pos]) is not a delimeter
 60 | 	wordStart  = pos;
 61 | 	foundStart = true;
 62 | 	pos++;
 63 | 	continue;
 64 |       }
 65 |     }
 66 |     if ( foundStart and not foundEnd ) {
 67 |       if ( delimeters.find( inString[pos] ) != delimeters.npos 
 68 | 	   or pos == eos ) {
 69 | 	// this char (inString[pos]) is a delimeter or
 70 | 	// at the end of the string
 71 | 	wordEnd  = pos;
 72 | 	foundEnd = true;
 73 |       }
 74 |     }
 75 |     if ( foundStart and foundEnd ) {
 76 |       foundStart = false;
 77 |       foundEnd   = false;
 78 | 
 79 |       word = inString.substr( wordStart, wordEnd - wordStart );
 80 | 
 81 |       if ( removeWhitespace ) {
 82 |           word.erase( std::remove_if( word.begin(), word.end(), ::isspace ),
 83 |                       word.end() );
 84 |       }
 85 | 
 86 |       splitString.push_back( word );
 87 |     }
 88 |     if ( pos == eos ) {
 89 |       break;
 90 |     }
 91 |     pos++;
 92 |   }
 93 | 
 94 |   return splitString;
 95 | }
 96 | 
 97 | //----------------------------------------------------------------
 98 | // 
 99 | //----------------------------------------------------------------
100 | VectorError ComputeError( std::valarray< double > obsIn,
101 |                           std::valarray< double > predIn ) {
102 | 
103 |     if ( obsIn.size() != predIn.size() ) {
104 |         std::stringstream errMsg;
105 |         errMsg << "ComputeError(): Observation size "
106 |                << obsIn.size() << " is not equal to prediction size "
107 |                << predIn.size();
108 |         throw std::runtime_error( errMsg.str() );
109 |     }
110 | 
111 |     // JP does find work on nan?  Since nan != nan, probably not...
112 |     // Use a slice to extract the overlapping subset of obsIn, PredIn
113 |     // We need to find the appropriate slice parameters
114 | 
115 |     // To try and be efficient, we first scan for nans, if none: stats
116 |     // If there are nans, copy from the overlapping values
117 |     bool nanObs  = false;
118 |     bool nanPred = false;
119 | 
120 |     for ( auto o : obsIn  ) { if ( std::isnan( o ) ) { nanObs = true; break; } }
121 |     for ( auto p : predIn ) { if ( std::isnan( p ) ) { nanPred= true; break; } }
122 |     
123 |     // vectors to hold data with no nans: reassigned below
124 |     std::valarray< double > obs;
125 |     std::valarray< double > pred;
126 |     size_t                  Nin = obsIn.size();
127 | 
128 |     if ( not nanObs and not nanPred ) {
129 |         obs  = std::valarray< double >( obsIn  );
130 |         pred = std::valarray< double >( predIn );
131 |     }
132 |     else {
133 |         // Handle nans
134 |         // Build concurrent vector of bool pairs : isnan on obsIn, predIn
135 |         std::vector< std::pair< bool, bool > > nanIndexPairs( Nin );
136 |         for ( size_t i = 0; i < Nin; i++ ) {
137 |             nanIndexPairs[ i ] = std::make_pair( std::isnan( obsIn[i]  ),
138 |                                                  std::isnan( predIn[i] ) );
139 |         }
140 |         // Find overlapping subset indices or use set::intersection
141 |         // Condense pairs into one boolean value in nonNanOverlap
142 |         size_t Nout = 0;
143 |         std::vector< bool > nonNanOverlap( Nin );
144 |         for ( size_t i = 0; i < Nin; i++ ) {
145 |             if ( not nanIndexPairs[ i ].first and
146 |                  not nanIndexPairs[ i ].second ) {
147 |                 nonNanOverlap[ i ] = true; // Both are not nan, valid index
148 |                 Nout++;
149 |             }
150 |             else {
151 |                 nonNanOverlap[ i ] = false;
152 |             }
153 |         }
154 | 
155 |         if ( Nout < 6 ) {
156 |             std::stringstream msg;
157 |             msg << "WARNING: ComputeError(): nan found. Not enough data"
158 |                 << " to compute error." << std::endl;
159 |             std::cout << msg.str();
160 | 
161 |             obs  = std::valarray< double >( 0., 1 ); // vector [0.] N = 1
162 |             pred = std::valarray< double >( 0., 1 ); // vector [0.] N = 1
163 |         }
164 |         else {
165 |             // Allocate the output arrays and fill with slices
166 |             obs  = std::valarray< double >( Nout );
167 |             pred = std::valarray< double >( Nout );
168 | 
169 |             // Copy valid values into obs & pred
170 |             size_t n = 0;
171 |             for ( size_t i = 0; i < nonNanOverlap.size(); i++ ) {
172 |                 if ( nonNanOverlap[ i ] ) {
173 |                     obs [ n ] = obsIn [ i ];
174 |                     pred[ n ] = predIn[ i ];
175 |                     n++;
176 |                 }
177 |             }
178 |         }
179 |     }
180 | 
181 |     size_t N = std::max( 1, (int) pred.size() );
182 |     std::valarray< double > two( 2, N ); // Vector of 2's for squaring
183 | 
184 |     double sumPred    = pred.sum();
185 |     double sumObs     = obs.sum();
186 |     double meanPred   = sumPred / N;
187 |     double meanObs    = sumObs  / N;
188 |     double sumSqrPred = pow( pred, two ).sum();
189 |     double sumSqrObs  = pow( obs,  two ).sum();
190 |     double sumErr     = abs( obs - pred ).sum();
191 |     double sumSqrErr  = pow( obs - pred, two ).sum();
192 |     double sumProd    = ( obs * pred ).sum();
193 | 
194 |     double rho; // Pearson correlation coefficient
195 | 
196 |     double denom = ( std::sqrt( ( sumSqrObs  - N * pow( meanObs,  2 ) ) ) *
197 |                      std::sqrt( ( sumSqrPred - N * pow( meanPred, 2 ) ) ) );
198 | 
199 |     if ( denom == 0 or std::isnan( denom ) ) {
200 |         rho = 0;
201 |     }
202 |     else {
203 |         rho = ( sumProd - N * meanObs * meanPred ) / denom;
204 |     }
205 | 
206 |     VectorError vectorError = VectorError();
207 | 
208 |     vectorError.RMSE = sqrt( sumSqrErr / N );
209 |     vectorError.MAE  = sumErr / N;
210 |     vectorError.rho  = rho;
211 | 
212 |     return vectorError;
213 | }
214 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/Common.h:
--------------------------------------------------------------------------------
 1 | #ifndef EDM_COMMON_H
 2 | #define EDM_COMMON_H
 3 | 
 4 | #include <iostream>
 5 | #include <sstream>
 6 | #include <vector>
 7 | #include <valarray>
 8 | #include <map>
 9 | #include <forward_list>
10 | #include <cctype>
11 | #include <cmath>
12 | #include <functional> // std::ref 
13 | 
14 | #ifdef _MSC_VER
15 | #include <ciso646> // macro constants for MSVC C++ operators not in ISO646
16 | #endif
17 | 
18 | // Enumerations
19 | enum class Method         { None, Embed, Simplex, SMap, CCM, Multiview };
20 | enum class DistanceMetric { Euclidean, Manhattan };
21 | 
22 | #include "DataFrame.h"
23 | 
24 | //---------------------------------------------------------
25 | // Data structs
26 | //---------------------------------------------------------
27 | struct VectorError {
28 |     double rho;
29 |     double RMSE;
30 |     double MAE;
31 | };
32 | 
33 | struct SimplexValues {
34 |     DataFrame< double >                  predictions;
35 |     std::map< std::string, std::string > parameterMap;
36 | };
37 | 
38 | struct SMapValues {
39 |     DataFrame< double >                  predictions;
40 |     DataFrame< double >                  coefficients;
41 |     DataFrame< double >                  singularValues;
42 |     std::map< std::string, std::string > parameterMap;
43 | };
44 | 
45 | struct SVDValues {
46 |     std::valarray< double > coefficients;
47 |     std::valarray< double > singularValues;
48 | };
49 | 
50 | // Return object for CrossMap() worker function
51 | struct CrossMapValues {
52 |     DataFrame< double > LibStats;     // mean libsize, rho, RMSE, MAE
53 |     DataFrame< double > PredictStats; // each predict libsize, rho, RMSE, MAE
54 |     std::forward_list< DataFrame< double > > Predictions;
55 | };
56 | 
57 | // Return object for CCM() with two CrossMapValues
58 | struct CCMValues {
59 |     DataFrame< double > AllLibStats;  // unified mean libsize, rho, RMSE, MAE
60 |     CrossMapValues CrossMap1;
61 |     CrossMapValues CrossMap2;
62 |     std::map< std::string, std::string > parameterMap;
63 | };
64 | 
65 | struct MultiviewValues {
66 |     DataFrame< double > ComboRho;            // col_i..., rho, MAE, RMSE
67 |     DataFrame< double > Predictions;
68 |     // Vectors of column names <- col_i
69 |     std::map< std::string, std::vector< std::string > > ColumnNames;
70 |     std::map< std::string, std::string > parameterMap;
71 | };
72 | 
73 | //-------------------------------------------------------------
74 | // Prototypes
75 | //-------------------------------------------------------------
76 | std::string ToLower( std::string str );
77 | 
78 | std::vector<std::string> SplitString( std::string inString, 
79 |                                       std::string delimeters,
80 |                                       bool        removeWhitespace );
81 | 
82 | VectorError ComputeError( std::valarray< double > obs,
83 |                           std::valarray< double > pred );
84 | 
85 | std::string increment_datetime_str( std::string datetime1, 
86 |                                     std::string datetime2,
87 |                                     int         tp );
88 | #endif
89 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/DateTime.cc:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "DateTime.h"
  3 | 
  4 | //---------------------------------------------------------------------
  5 | //  Provide some utility for parsing datetime std::strings
  6 | //  to add some tp increment to a datetime std::string past the
  7 | //  given range of the time column.
  8 | //
  9 | //  Fractional seconds not supported by strpftime or IncrementDatetime
 10 | //
 11 | //  TIME FORMATS supported:
 12 | //    YYYY-MM-DD
 13 | //    HH:MM:SS
 14 | //    YYYY-MM-DD HH:MM:SS  (2019-06-30 10:26:10)
 15 | //    YYYY-MM-DDTHH:MM:SS  (2019-06-30T10:26:10)
 16 | //    
 17 | //---------------------------------------------------------------------
 18 | // Time formats
 19 | std::string YMD       ("%Y-%m-%d");
 20 | std::string HMS       ("%H:%M:%S");
 21 | std::string YMD_HMS   ("%Y-%m-%d %H:%M:%S");
 22 | std::string YMD_T_HMS ("%Y-%m-%dT%H:%M:%S");
 23 | 
 24 | //----------------------------------------------------------------------
 25 | // Parse a date or time string into a tm obj
 26 | // tm             : tm object to populate
 27 | // datetime_str   : date or time string
 28 | // isDate         : true if this is a date object
 29 | //----------------------------------------------------------------------
 30 | void ParseDatetimeString( struct tm & tmStruct,
 31 |                           std::string datetime,
 32 |                           bool        isDate ) {
 33 |     // parsing delimeter is '-' for date, ':' for time
 34 |     char delimeter = isDate ? '-' : ':';
 35 | 
 36 |     // parse datetime into tokens
 37 |     std::stringstream ss( datetime );
 38 |     std::string token;
 39 |     std::vector<std::string> tokens;
 40 | 
 41 |     while( getline( ss, token, delimeter ) ) {
 42 |         tokens.push_back( token );
 43 |     }
 44 | 
 45 |     // populate the tmStruct
 46 |     if ( isDate ) {
 47 |         tmStruct.tm_mday = stod(tokens[2]);
 48 |         tmStruct.tm_mon  = stod(tokens[1]) - ISO_StartMonth;
 49 |         tmStruct.tm_year = stod(tokens[0]) - ISO_StartYear;
 50 |     }
 51 |     else {
 52 |         tmStruct.tm_sec  = stod(tokens[2]);
 53 |         tmStruct.tm_min  = stod(tokens[1]);
 54 |         tmStruct.tm_hour = stod(tokens[0]);
 55 |     }
 56 | 
 57 |     int err = mktime( &tmStruct );
 58 | 
 59 |     if ( err < 0 ) {
 60 |         std::stringstream errMsg;
 61 |         errMsg << "ParseDatetimeString() mktime failed on " << datetime
 62 |                << " err = " << err << std::endl;
 63 |         throw std::runtime_error( errMsg.str() );
 64 |     }
 65 | }
 66 | 
 67 | //----------------------------------------------------------------------
 68 | // Parse the datetime into a DatetimeInfo struct
 69 | // datetime :  datetime to parse
 70 | // return   :  DatetimeInfo struct
 71 | //----------------------------------------------------------------------
 72 | DatetimeInfo ParseDatetime( std::string datetime ) {
 73 | 
 74 |     DatetimeInfo output;
 75 | 
 76 |     // Detecting the format is based on delimeters to avoid regex:
 77 |     //    [ '-' and '-' ] YMD
 78 |     //    [ ':' and ':' ] HMS
 79 |     //    [ '-' and '-' and ':' and ':' ] YMD_HMS
 80 |     //    [ '-' and '-' and ':' and ':' and 'T' ] YMD_T_HMS
 81 | 
 82 |     size_t NHyphen = std::count( datetime.begin(), datetime.end(), '-' );
 83 |     size_t nColon  = std::count( datetime.begin(), datetime.end(), ':' );
 84 |     size_t nT      = std::count( datetime.begin(), datetime.end(), 'T' );
 85 |     
 86 |     if ( NHyphen == 2 and nColon == 0 ) {
 87 |         output.format = YMD;
 88 |         ParseDatetimeString( output.time, datetime, true );
 89 |     }
 90 |     else if ( NHyphen == 0 and nColon == 2 ) {
 91 |         output.format = HMS;
 92 |         ParseDatetimeString( output.time, datetime, false );
 93 |     }
 94 |     else if ( NHyphen == 2 and nColon == 2 and nT == 0 ) {
 95 |         output.format = YMD_HMS; 
 96 |         // split by " ", then split first by - second by :
 97 |         int delim_pos    = datetime.find(' ');
 98 |         std::string date = datetime.substr(0, delim_pos);
 99 |         std::string time = datetime.substr(delim_pos+1, datetime.size());
100 |         ParseDatetimeString( output.time, date, true );
101 |         ParseDatetimeString( output.time, time, false );
102 |     }
103 |     else if ( NHyphen == 2 and nColon == 2 and nT == 1 ) {
104 |         output.format = YMD_T_HMS; 
105 |         // split by T, then split first by - second by :
106 |         int delim_pos    = datetime.find('T');
107 |         std::string date = datetime.substr(0, delim_pos);
108 |         std::string time = datetime.substr(delim_pos+1, datetime.size());
109 |         ParseDatetimeString( output.time, date, true );           
110 |         ParseDatetimeString( output.time, time, false );        
111 |     }
112 |     else {
113 |         output.unrecognized = true;
114 |     }
115 |     return output; 
116 | }
117 | 
118 | //----------------------------------------------------------------------
119 | // Generate a new datetime + delta past the range of given
120 | //----------------------------------------------------------------------
121 | //
122 | // @params datetime1/2   :  the two last time std::strings
123 | //                          to compute the delta unit
124 | //                          we increment from datetime2
125 | // @param tp             :  the amount to increment the time diff by
126 | // @return               :  the new incremented timestd::string
127 | //----------------------------------------------------------------------
128 | std::string IncrementDatetime( std::string datetime1, 
129 |                                std::string datetime2, int tp ) {
130 |     // parse datetimes
131 |     DatetimeInfo dtinfo1 = ParseDatetime( datetime1 );
132 |     DatetimeInfo dtinfo2 = ParseDatetime( datetime2 );
133 | 
134 |     if ( dtinfo1.unrecognized or dtinfo2.unrecognized ) {
135 |         // return empty string
136 |         return std::string();
137 |     }
138 | 
139 |     // get the delta unit between two datetimes in the time col
140 |     size_t seconds_diff = difftime( mktime( &dtinfo2.time ),
141 |                                     mktime( &dtinfo1.time ) );
142 | 
143 |     if ( seconds_diff == 0 ) {
144 |         seconds_diff = 1; //if millisec, want some update
145 |     }
146 | 
147 |     // increment the time and format
148 |     dtinfo2.time.tm_sec += tp * seconds_diff;
149 | 
150 |     int err = mktime( &dtinfo2.time );
151 | 
152 |     if ( err < 0 ) {
153 |         std::stringstream errMsg;
154 |         errMsg << "increment_datetime_str() mktime failed on "
155 |                << datetime2;
156 |         throw( errMsg.str() );
157 |     }
158 | 
159 |     // format incremented time
160 |     char tmp_buffer [ BUFSIZ ];
161 |     
162 |     size_t n_char = strftime( tmp_buffer, BUFSIZ,
163 |                               dtinfo2.format.c_str(), &dtinfo2.time );
164 |     if ( n_char == 0 ) {
165 |         std::stringstream errMsg;
166 |         errMsg << "increment_datetime_str(): Failed on "
167 |                << datetime1 << ", " << datetime2 << " tp = " << tp;
168 |         throw( errMsg.str() );
169 |     }
170 | 
171 |     return std::string( tmp_buffer );
172 | }
173 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/DateTime.h:
--------------------------------------------------------------------------------
 1 | #ifndef DATETIMEUTIL_H
 2 | #define DATETIMEUTIL_H
 3 | 
 4 | #include <sstream>
 5 | #include <vector>
 6 | #include <algorithm> // std::count
 7 | #include <time.h>    // mktime
 8 | 
 9 | const int ISO_StartYear  = 1900;
10 | const int ISO_StartMonth = 1;
11 | 
12 | struct DatetimeInfo {
13 |     struct      tm time = {};
14 |     std::string format;
15 |     bool        unrecognized = false;
16 | 
17 |     // Constructor : setup time struct
18 |     DatetimeInfo() {
19 |         time.tm_sec   = 0;
20 |         time.tm_min   = 0;
21 |         time.tm_hour  = 0;
22 |         time.tm_mday  = 1;
23 |         time.tm_mon   = 0;
24 |         time.tm_year  = 70; // Minimal valid Unix time 1900 + 70
25 |         time.tm_wday  = 0;
26 |         time.tm_yday  = 0;
27 |         time.tm_isdst = 0;
28 |     }
29 | };
30 | 
31 | // Prototypes
32 | void ParseDatetimeString( struct tm & tmStruct,
33 |                           std::string datetime,
34 |                           bool        isDate );
35 | 
36 | DatetimeInfo ParseDatetime( std::string datetime );
37 | 
38 | std::string IncrementDatetime( std::string datetime1,
39 |                                std::string datetime2,
40 |                                int         tp );
41 | #endif
42 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/EDM.cc:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "EDM.h"
 3 | 
 4 | // Declared in API.h
 5 | extern DataFrame< double > MakeBlock( DataFrame< double > &, int, int,
 6 |                                       std::vector< std::string >, bool );
 7 | 
 8 | //----------------------------------------------------------------
 9 | // Constructors
10 | //----------------------------------------------------------------
11 | EDM::EDM ( DataFrame< double > & data,
12 |            Parameters          & parameters ) :
13 |     data( data ), anyTies( false ), parameters( parameters ) {}
14 | 
15 | //----------------------------------------------------------------
16 | // Project : Implemented in sub-class
17 | //----------------------------------------------------------------
18 | void EDM::Project () {}
19 | 
20 | //----------------------------------------------------------------
21 | // Generate : Implemented in sub-class
22 | //----------------------------------------------------------------
23 | void EDM::Generate () {}
24 | 
25 | //----------------------------------------------------------------
26 | // Set target (library) vector
27 | //----------------------------------------------------------------
28 | void EDM::GetTarget() {
29 |     if ( parameters.targetNames.size() ) {
30 |         target = data.VectorColumnName( parameters.targetNames.front() );
31 |     }
32 |     else {
33 |         // Default to first column
34 |         target = data.Column( 0 );
35 |     }
36 | }
37 | 
38 | //----------------------------------------------------------------
39 | // Implemented as a wrapper for API MakeBlock()
40 | // Note: dataFrame must have the columnNameToIndex map
41 | //
42 | // NOTE: The returned data block does NOT have the time column
43 | //----------------------------------------------------------------
44 | void EDM::EmbedData() {
45 | 
46 |     if ( data.ColumnNameToIndex().empty() ) {
47 |         throw std::runtime_error("EDM::Embed(): columnNameIndex empty.\n");
48 |     }
49 | 
50 |     // If columns provided, validate they are in dataFrameIn
51 |     for ( auto colName : parameters.columnNames ) {
52 |         auto ci = find( data.ColumnNames().begin(),
53 |                         data.ColumnNames().end(), colName );
54 | 
55 |         if ( ci == data.ColumnNames().end() ) {
56 |             std::stringstream errMsg;
57 |             errMsg << "EDM::Embed(): Failed to find column "
58 |                    << colName << " in dataFrame with columns: [ ";
59 |             for ( auto col : data.ColumnNames() ) {
60 |                 errMsg << col << " ";
61 |             } errMsg << " ]\n";
62 |             throw std::runtime_error( errMsg.str() );    
63 |         }
64 |     }
65 | 
66 |     // Get column names for MakeBlock
67 |     std::vector< std::string > colNames;
68 |     if ( parameters.columnNames.size() ) {
69 |         // column names are strings
70 |         colNames = parameters.columnNames;
71 |     }
72 |     else {
73 |         throw std::runtime_error( "EDM::Embed(): columnNames are empty.\n" );
74 |     }
75 | 
76 |     // Extract the specified columns (sub)DataFrame from dataFrameIn
77 |     DataFrame< double > dataFrame =
78 |         data.DataFrameFromColumnNames( parameters.columnNames );
79 | 
80 |     // deletePartial = false
81 |     embedding = MakeBlock( std::ref( dataFrame ), parameters.E,
82 |                            parameters.tau, colNames, false );
83 | }
84 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/EDM.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef EDM_H
 3 | #define EDM_H
 4 | 
 5 | #include <mutex>
 6 | #include "Common.h"
 7 | #include "Parameter.h"
 8 | 
 9 | //---------------------------------------------------------------------
10 | // EDM Class
11 | // Central data object and base class for EDM algorithms.
12 | // Specific algorithm projection methods defined in sub-classes.
13 | //
14 | // NOTE JP: Tony recommends to explicitly define special members:
15 | //          http://www.cplusplus.com/doc/tutorial/classes2/
16 | //---------------------------------------------------------------------
17 | class EDM {
18 | 
19 | public: // No need for private or protected
20 |     DataFrame< double > data;
21 |     DataFrame< double > embedding;
22 | 
23 |     DataFrame< size_t > knn_neighbors; // N pred rows, knn columns; sorted
24 |     DataFrame< double > knn_distances; // N pred rows, knn columns; sorted
25 | 
26 |     DataFrame< size_t > allLibRows;   // 1 row,       N lib columns
27 |     DataFrame< double > allDistances; // N pred rows  N lib columns
28 | 
29 |     DataFrame< double > projection;     // Simplex & SMap Output
30 |     DataFrame< double > coefficients;   // SMap Output
31 |     DataFrame< double > singularValues; // SMap Output
32 | 
33 |     // Project() vectors to populate projection DataFrame in FormatData()
34 |     // JP Can we do away with these and write directly to projection (+Tp)?
35 |     std::valarray< double > predictions;
36 |     std::valarray< double > const_predictions;
37 |     std::valarray< double > variance;
38 | 
39 |     // Simplex :: Prediction row accounting of library neighbor ties
40 |     bool                  anyTies;
41 |     std::vector< bool >   ties;          // true/false each prediction row
42 |     std::vector< size_t > tieFirstIndex; // index in knn of first tie
43 |     std::vector< std::vector< std::pair< double, size_t > > > tiePairs;
44 | 
45 |     // SMap :: Each prediction row can have variable knn
46 |     std::vector< size_t > knnSmap;
47 | 
48 |     std::valarray< double >    target;  // entire record
49 |     std::vector< std::string > allTime; // entire record
50 | 
51 |     Parameters parameters;
52 | 
53 |     // Constructor declaration
54 |     EDM ( DataFrame< double > & data, Parameters & parameters );
55 | 
56 |     // Method declarations
57 |     // EDM.cc
58 |     void GetTarget();
59 |     void EmbedData();
60 |     void Project();  // Simplex.cc : SMap.cc : CCM.cc : Multiview.cc
61 |     void Generate(); // Simplex.cc : SMap.cc
62 | 
63 |     // EDM_Neighbors.cc
64 |     void PrepareEmbedding( bool checkDataRows = true );
65 |     void Distances();
66 |     void FindNeighbors();
67 | 
68 |     // EDM_Formatting.cc
69 |     void CheckDataRows( std::string call );
70 |     void CheckValidLib( std::string call );
71 |     void FormatOutput();
72 |     void FillTimes( std::vector< std::string > & timeOut );
73 | 
74 |     void PrintDataFrameIn(); // EDM_Neighbors.cc #ifdef DEBUG_ALL
75 |     void PrintNeighbors();   // EDM_Neighbors.cc #ifdef DEBUG_ALL
76 | };
77 | #endif
78 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/EDM_Neighbors.h:
--------------------------------------------------------------------------------
 1 | #ifndef EDM_NEIGHBORS_H
 2 | #define EDM_NEIGHBORS_H
 3 | 
 4 | #include "EDM.h"
 5 | 
 6 | namespace EDM_Distance {
 7 |     // Define the initial maximum distance for neigbors
 8 |     // DBL_MAX is a Macro equivalent to: std::numeric_limits<double>::max()
 9 |     double DistanceMax = std::numeric_limits<double>::max();
10 | }
11 | 
12 | // Prototypes
13 | double Distance( const std::valarray<double> &v1,
14 |                  const std::valarray<double> &v2,
15 |                  DistanceMetric metric );
16 | 
17 | bool DistanceCompare( const std::pair<double, size_t> &x,
18 |                       const std::pair<double, size_t> &y );
19 | #endif
20 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/Multiview.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef EDM_MULTIVIEW_H
 3 | #define EDM_MULTIVIEW_H
 4 | 
 5 | #include <thread>
 6 | #include <atomic>
 7 | #include <mutex>
 8 | #include <queue>
 9 | 
10 | #include "EDM.h"
11 | #include "Simplex.h"
12 | 
13 | //----------------------------------------------------------------
14 | // Multiview class inherits from Simplex class and defines
15 | // CCM-specific projection methods
16 | //----------------------------------------------------------------
17 | class MultiviewClass : public SimplexClass {
18 | public:
19 |     std::string          predictOutputFileIn; // copy from parameters
20 |     std::vector<size_t>  predictionIn;        // copy from parameters
21 | 
22 |     struct MultiviewValues MVvalues; // output structure
23 | 
24 |     // Constructor
25 |     MultiviewClass ( DataFrame< double > & data,
26 |                      Parameters          & parameters );
27 | 
28 |     // Method declarations
29 |     void Project( unsigned maxThreads );
30 |     void CheckParameters();
31 |     void SetupParameters();
32 |     void Multiview( unsigned maxThreads );
33 | };
34 | #endif
35 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/Parameter.h:
--------------------------------------------------------------------------------
  1 | #ifndef PARAMETER_H
  2 | #define PARAMETER_H
  3 | 
  4 | #include <algorithm>
  5 | #include <numeric>
  6 | 
  7 | #include "Common.h"
  8 | #include "Version.h"
  9 | 
 10 | class ParameterContainer; // forward declaration
 11 | 
 12 | //------------------------------------------------------------
 13 | //
 14 | //------------------------------------------------------------
 15 | class Parameters {
 16 | 
 17 | public: // No need for protected or private
 18 |     Method      method;             // Simplex or SMap enum class
 19 | 
 20 |     std::string pathIn;             // path for input dataFile
 21 |     std::string dataFile;           // input dataFile (assumed .csv)
 22 |     std::string pathOut;            // path for output files
 23 |     std::string predictOutputFile;  // path for output file
 24 | 
 25 |     std::string lib_str;            // multi argument parameters for library
 26 |     std::string pred_str;           // multi argument parameters for prediction
 27 | 
 28 |     std::vector<size_t> library;    // library row indices
 29 |     std::vector<size_t> prediction; // prediction row indices
 30 | 
 31 |     int         E;                  // dimension
 32 |     int         Tp;                 // prediction interval
 33 |     int         knn;                // k nearest neighbors
 34 |     int         tau;                // embedding delay
 35 |     double      theta;              // S-Map localization
 36 |     int         exclusionRadius;    // temporal rows to ignore in predict
 37 | 
 38 |     std::string                columns_str; // multi argument parameters
 39 |     std::string                target_str;  // argument parameter(s)
 40 |     std::vector< std::string > columnNames; // state-space column name(s)
 41 |     std::vector< std::string > targetNames; // target column name(s)
 42 | 
 43 |     bool        embedded;          // true if data is already embedded
 44 |     bool        const_predict;     // true to compute non "predictor" stats
 45 |     bool        verbose;
 46 | 
 47 |     std::vector<bool> validLib;    // maps row to valid library flag
 48 |     bool        ignoreNan;         // SMap create new library to ignore nan
 49 | 
 50 |     int         generateSteps;     // Number of timesteps to feedback generate
 51 |     bool        generateLibrary;   // Increment library with generated data
 52 | 
 53 |     bool        parameterList;     // Add parameter list to output
 54 | 
 55 |     std::string SmapCoefFile;      // path for output file
 56 |     std::string SmapSVFile;        // path for output file
 57 |     std::string blockOutputFile;   // Embed() output file
 58 | 
 59 |     int         multiviewEnsemble; // Number of ensembles in multiview
 60 |     int         multiviewD;        // Multiview state-space dimension
 61 |     bool        multiviewTrainLib; // Use prediction as training library
 62 |     bool        multiviewExcludeTarget; // Exclude target from eval combos
 63 | 
 64 |     std::string libSizes_str;
 65 |     std::vector< size_t > librarySizes;// CCM library sizes to evaluate
 66 |     int         subSamples;       // CCM number of samples to draw
 67 |     bool        randomLib;        // CCM randomly select subsets if true
 68 |     bool        replacement;      // CCM random select with replacement if true
 69 |     unsigned    seed;             // CCM random selection RNG seed
 70 |     bool        includeData;      // CCM include all simplex projection results
 71 | 
 72 |     bool        validated;
 73 | 
 74 |     Version version; // Version object, instantiated in constructor
 75 |     
 76 |     std::map< std::string, std::string > Map;
 77 | 
 78 |     friend std::ostream& operator<<( std::ostream & os, Parameters & params );
 79 | 
 80 |     // Constructor declaration and default arguments
 81 |     Parameters(
 82 |         Method      method            = Method::None,
 83 |         std::string pathIn            = "./",
 84 |         std::string dataFile          = "",
 85 |         std::string pathOut           = "./",
 86 |         std::string predictOutputFile = "",
 87 | 
 88 |         std::string lib_str           = "",
 89 |         std::string pred_str          = "",
 90 | 
 91 |         int         E                 = 0,
 92 |         int         Tp                = 0,
 93 |         int         knn               = 0,
 94 |         int         tau               = -1,
 95 |         double      theta             = 0,
 96 |         int         exclusionRadius   = 0,
 97 | 
 98 |         std::string columns_str       = "",
 99 |         std::string target_str        = "",
100 | 
101 |         bool        embedded          = false,
102 |         bool        const_predict     = false,
103 |         bool        verbose           = false,
104 | 
105 |         std::vector<bool> validLib    = std::vector<bool>(),
106 |         bool              ignoreNan   = true,
107 | 
108 |         int         generateSteps     = 0,
109 |         bool        generateLibrary   = false,
110 |         bool        parameterList     = false,
111 | 
112 |         std::string SmapCoefFile      = "",
113 |         std::string SmapSVFile        = "",
114 |         std::string blockOutputFile   = "",        
115 | 
116 |         int         multiviewEnsemble      = 0,
117 |         int         multiviewD             = 0,
118 |         bool        multiviewTrainLib      = true,
119 |         bool        multiviewExcludeTarget = false,
120 | 
121 |         std::string libSizes_str      = "",
122 |         int         subSamples        = 0,
123 |         bool        randomLib         = true,
124 |         bool        replacement       = false,
125 |         unsigned    seed              = 0,  // 0: Generate random seed in CCM
126 |         bool        includeData       = false
127 |     );
128 | 
129 |     ~Parameters();
130 | 
131 |     void Validate();      // Parameter validation and index offsets
132 |     void AdjustLibPred(); // Adjust for embedding
133 |     void FillMap();
134 |     void PrintIndices( std::vector< size_t > library,
135 |                        std::vector< size_t > prediction );
136 | };
137 | #endif
138 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/SMap.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef EDM_SMAP_H
 3 | #define EDM_SMAP_H
 4 | 
 5 | #include "EDM.h"
 6 | 
 7 | // Prototype & alias of solver function pointer
 8 | using Solver = SVDValues (*) ( DataFrame     < double >,
 9 |                                std::valarray < double > );
10 | 
11 | // Prototype declaration of general functions
12 | SVDValues SVD( DataFrame < double > A, std::valarray< double > B );
13 | 
14 | //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
15 | // Do not use LAPACK on Windog: use scikit-learn LinearRegression
16 | //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
17 | #if !defined _WIN32 || defined USING_R
18 | SVDValues Lapack_SVD( int     m, // number of rows in matrix
19 |                       int     n, // number of columns in matrix
20 |                       double *a, // pointer to top-left corner
21 |                       double *b,
22 |                       double  rcond );
23 | #endif
24 | 
25 | //----------------------------------------------------------------
26 | // SMap class inherits from EDM class and defines
27 | // SMap-specific projection & output methods
28 | //----------------------------------------------------------------
29 | class SMapClass : public EDM {
30 | 
31 | public:
32 |     // Constructor
33 |     SMapClass ( DataFrame<double> & data,
34 |                 Parameters        & parameters );
35 | 
36 |     // Method declarations
37 |     void Generate( Solver );
38 |     void Project ( Solver );
39 |     void SMap    ( Solver );
40 |     void RecordNan( size_t row, size_t N_SingularValues );
41 |     void WriteOutput();
42 | };
43 | #endif
44 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/Simplex.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef EDM_SIMPLEX_H
 3 | #define EDM_SIMPLEX_H
 4 | 
 5 | #include "EDM.h"
 6 | 
 7 | //----------------------------------------------------------------
 8 | // Simplex class inherits from EDM class and defines
 9 | // Simplex-specific projection methods
10 | //----------------------------------------------------------------
11 | class SimplexClass : public EDM {
12 | public:
13 |     // Constructor
14 |     SimplexClass ( DataFrame<double> & data,
15 |                    Parameters        & parameters );
16 | 
17 |     // Method declarations
18 |     void Generate();
19 |     void Project();
20 |     void Simplex();
21 |     void WriteOutput();
22 | };
23 | #endif
24 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/Version.h:
--------------------------------------------------------------------------------
 1 | #ifndef VERSION_H
 2 | #define VERSION_H
 3 | 
 4 | #include <string>
 5 | #include <iostream>
 6 | 
 7 | //------------------------------------------------------------
 8 | // Instantiated in Parameters() constructor
 9 | //------------------------------------------------------------
10 | class Version {
11 | public:
12 |     int         Major;
13 |     int         Minor;
14 |     int         Micro;
15 |     std::string Date;
16 |     
17 |     Version( int Major, int Minor, int Micro, std::string Date ) :
18 |         Major( Major  ), Minor( Minor ), Micro( Micro ), Date ( Date  ) {};
19 |     
20 |     void ShowVersion() {
21 |         std::cout << "cppEDM Version " << Major << "."
22 |                   << Minor << "." << Micro << " " << Date << std::endl;
23 |     }
24 | };
25 | #endif
26 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | .PHONY: all clean distclean depend 
 3 | 
 4 | ## CXX defined in Makevars from R environment variables, don't define CC
 5 | ## CC = g++
 6 | 
 7 | ## CXXFLAGS are defined in Makevars from R environment variables
 8 | ## JP: Temporary (?) hack for R clang-UBSAN issue in EDM_Neighbors
 9 | ##     to not initialise size_t knnLibRows with nanl(), is to define
10 | ##     USING_R. Note: USING_R is an R-defined macro.
11 | CFLAGS = $(CXXFLAGS) -DCCM_THREADED -DUSING_R
12 | 
13 | HEADERS = API.h CCM.h Common.h DataFrame.h DateTime.h EDM.h EDM_Neighbors.h\
14 |           Multiview.h Parameter.h Simplex.h SMap.h Version.h
15 | 
16 | SRCS = API.cc CCM.cc Common.cc DateTime.cc EDM.cc EDM_Formatting.cc\
17 |        EDM_Neighbors.cc Eval.cc Multiview.cc Parameter.cc Simplex.cc SMap.cc
18 | 
19 | OBJ = $(SRCS:%.cc=%.o)
20 | 
21 | LIB = libEDM.a
22 | 
23 | all:	$(LIB)
24 | 	cp $(LIB) ../lib/
25 | 
26 | clean:
27 | 	rm -f $(OBJ) $(LIB)
28 | 
29 | distclean:
30 | 	rm -f $(OBJ) $(LIB) ../lib/$(LIB) *~ *.bak *.csv
31 | 
32 | $(LIB): $(OBJ)
33 | 	$(AR) -rcs $(LIB) $(OBJ) # AR passed from Makevars
34 | 
35 | %.o : %.cc 
36 | 	$(CXX) $(CFLAGS) -c $<
37 | 
38 | depend:
39 | 	@echo ${SRCS}
40 | 	makedepend -Y $(SRCS)
41 | # DO NOT DELETE
42 | 
43 | API.o: API.h Common.h DataFrame.h Parameter.h Version.h Simplex.h EDM.h
44 | API.o: SMap.h CCM.h Multiview.h
45 | CCM.o: CCM.h EDM.h Common.h DataFrame.h Parameter.h Version.h Simplex.h
46 | Common.o: Common.h DataFrame.h
47 | DateTime.o: DateTime.h
48 | EDM.o: EDM.h Common.h DataFrame.h Parameter.h Version.h
49 | EDM_Formatting.o: EDM.h Common.h DataFrame.h Parameter.h Version.h DateTime.h
50 | EDM_Neighbors.o: EDM_Neighbors.h EDM.h Common.h DataFrame.h Parameter.h
51 | EDM_Neighbors.o: Version.h
52 | Eval.o: API.h Common.h DataFrame.h Parameter.h Version.h Simplex.h EDM.h
53 | Eval.o: SMap.h CCM.h Multiview.h
54 | Multiview.o: Multiview.h EDM.h Common.h DataFrame.h Parameter.h Version.h
55 | Multiview.o: Simplex.h
56 | Parameter.o: Parameter.h Common.h DataFrame.h Version.h
57 | Simplex.o: Simplex.h EDM.h Common.h DataFrame.h Parameter.h Version.h
58 | SMap.o: SMap.h EDM.h Common.h DataFrame.h Parameter.h Version.h
59 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/makefile.mingw:
--------------------------------------------------------------------------------
 1 | 
 2 | .PHONY: all clean distclean depend 
 3 | 
 4 | HEADERS = API.h CCM.h Common.h DataFrame.h DateTime.h EDM.h EDM_Neighbors.h\
 5 |           Multiview.h Parameter.h Simplex.h SMap.h Version.h
 6 | 
 7 | SRCS = API.cc CCM.cc Common.cc DateTime.cc EDM.cc EDM_Formatting.cc\
 8 |        EDM_Neighbors.cc Eval.cc Multiview.cc Parameter.cc Simplex.cc SMap.cc
 9 | 
10 | OBJ = $(SRCS:%.cc=%.o)
11 | 
12 | LIB = libEDM.a
13 | 
14 | CFLAGS += -std=c++11 -O3
15 | CFLAGS += -DCCM_THREADED
16 | CFLAGS += -fPIC
17 | # CFLAGS += -g # -DDEBUG_ALL
18 | 
19 | all:	$(LIB)
20 | 	cp $(LIB) ../lib/
21 | 
22 | clean:
23 | 	rm -f $(OBJ) $(LIB)
24 | 
25 | distclean:
26 | 	rm -f $(OBJ) $(LIB) ../lib/$(LIB) *~ *.bak *.csv
27 | 
28 | $(LIB): $(OBJ)
29 | 	ar -rcs $(LIB) $(OBJ)
30 | 
31 | %.o : %.cc 
32 | 	$(CC) $(CFLAGS) -c $<
33 | 
34 | depend:
35 | 	@echo ${SRCS}
36 | 	makedepend -Y $(SRCS)
37 | # DO NOT DELETE
38 | 
39 | API.o: API.h Common.h DataFrame.h Parameter.h Version.h Simplex.h EDM.h
40 | API.o: SMap.h CCM.h Multiview.h
41 | CCM.o: CCM.h EDM.h Common.h DataFrame.h Parameter.h Version.h Simplex.h
42 | Common.o: Common.h DataFrame.h
43 | DateTime.o: DateTime.h
44 | EDM.o: EDM.h Common.h DataFrame.h Parameter.h Version.h
45 | EDM_Formatting.o: EDM.h Common.h DataFrame.h Parameter.h Version.h DateTime.h
46 | EDM_Neighbors.o: EDM_Neighbors.h EDM.h Common.h DataFrame.h Parameter.h
47 | EDM_Neighbors.o: Version.h
48 | Eval.o: API.h Common.h DataFrame.h Parameter.h Version.h Simplex.h EDM.h
49 | Eval.o: SMap.h CCM.h Multiview.h
50 | Multiview.o: Multiview.h EDM.h Common.h DataFrame.h Parameter.h Version.h
51 | Multiview.o: Simplex.h
52 | Parameter.o: Parameter.h Common.h DataFrame.h Version.h
53 | Simplex.o: Simplex.h EDM.h Common.h DataFrame.h Parameter.h Version.h
54 | SMap.o: SMap.h EDM.h Common.h DataFrame.h Parameter.h Version.h
55 | 


--------------------------------------------------------------------------------
/src/cppEDM/src/makefile.windows:
--------------------------------------------------------------------------------
 1 | 
 2 | CC  = cl
 3 | OBJ =  API.obj CCM.obj Common.obj DateTime.obj EDM.obj EDM_Formatting.obj\
 4 |        EDM_Neighbors.obj Eval.obj Multiview.obj Parameter.obj Simplex.obj\
 5 |        SMap.obj
 6 | 
 7 | LIB = EDM.lib
 8 | 
 9 | CFLAGS = -DCCM_THREADED /EHsc /MD # /MT -DDEBUG -DDEBUG_ALL
10 | 
11 | all:	$(LIB)
12 | 	lib /NODEFAULTLIB:LIBCMT /NODEFAULTLIB:library /OUT:$(LIB)  $(OBJ)
13 | 	cp $(LIB) ..\lib
14 | 
15 | clean:
16 | 	del -f $(OBJ) $(LIB)
17 | 
18 | distclean:
19 | 	del -f $(OBJ) $(LIB) ../lib/$(LIB) *~ *.bak *.csv
20 | 
21 | $(LIB): $(OBJ)
22 | 
23 | API.obj: API.cc
24 | 	$(CC) /c API.cc $(CFLAGS)
25 | 
26 | CCM.obj: CCM.cc
27 | 	$(CC) /c CCM.cc $(CFLAGS)
28 | 
29 | Common.obj: Common.cc
30 | 	$(CC) /c Common.cc $(CFLAGS)
31 | 
32 | DateTime.obj: DateTime.cc
33 | 	$(CC) /c DateTime.cc $(CFLAGS)
34 | 
35 | EDM.obj: EDM.cc
36 | 	$(CC) /c EDM.cc $(CFLAGS)
37 | 
38 | EDM_Formatting.obj: EDM_Formatting.cc
39 | 	$(CC) /c EDM_Formatting.cc $(CFLAGS)
40 | 
41 | EDM_Neighbors.obj: EDM_Neighbors.cc
42 | 	$(CC) /c EDM_Neighbors.cc $(CFLAGS)
43 | 
44 | Eval.obj: Eval.cc
45 | 	$(CC) /c Eval.cc $(CFLAGS)
46 | 
47 | Multiview.obj: Multiview.cc
48 | 	$(CC) /c Multiview.cc $(CFLAGS)
49 | 
50 | Parameter.obj: Parameter.cc
51 | 	$(CC) /c Parameter.cc $(CFLAGS)
52 | 
53 | Simplex.obj: Simplex.cc
54 | 	$(CC) /c Simplex.cc $(CFLAGS)
55 | 
56 | SMap.obj: SMap.cc
57 | 	$(CC) /c SMap.cc $(CFLAGS)
58 | 
59 | # Depedencies from makedepend on Linux
60 | API.obj: API.h Common.h DataFrame.h Parameter.h Version.h Simplex.h EDM.h
61 | API.obj: SMap.h CCM.h Multiview.h
62 | CCM.obj: CCM.h EDM.h Common.h DataFrame.h Parameter.h Version.h Simplex.h
63 | Common.obj: Common.h DataFrame.h
64 | DateTime.obj: DateTime.h
65 | EDM.obj: EDM.h Common.h DataFrame.h Parameter.h Version.h
66 | EDM_Formatting.obj: EDM.h Common.h DataFrame.h Parameter.h Version.h DateTime.h
67 | EDM_Neighbors.obj: EDM_Neighbors.h EDM.h Common.h DataFrame.h Parameter.h
68 | EDM_Neighbors.obj: Version.h
69 | Eval.obj: API.h Common.h DataFrame.h Parameter.h Version.h Simplex.h EDM.h
70 | Eval.obj: SMap.h CCM.h Multiview.h
71 | Multiview.obj: Multiview.h EDM.h Common.h DataFrame.h Parameter.h Version.h
72 | Multiview.obj: Simplex.h
73 | Parameter.obj: Parameter.h Common.h DataFrame.h Version.h
74 | Simplex.obj: Simplex.h EDM.h Common.h DataFrame.h Parameter.h Version.h
75 | SMap.obj: SMap.h EDM.h Common.h DataFrame.h Parameter.h Version.h
76 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(rEDM)
3 | 
4 | test_check("rEDM")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-1-Simplex.R:
--------------------------------------------------------------------------------
 1 | # NOTE: Numerical tests are performed in cppEDM unit tests
 2 | 
 3 | context("Simplex test")
 4 | 
 5 | data( block_3sp )
 6 | 
 7 | test_that("Simplex embedded works", {
 8 |     S.df <- Simplex( dataFrame = block_3sp,
 9 |                      lib = "1 99", pred = "100 195",
10 |                      E = 3, embedded = TRUE, showPlot = FALSE,
11 |                      columns = "x_t y_t z_t", target = "x_t" )
12 |     expect_s3_class(S.df, "data.frame")
13 |     expect_true("time"         %in% names(S.df))
14 |     expect_true("Observations" %in% names(S.df))
15 |     expect_true("Predictions"  %in% names(S.df))
16 |     expect_equal( dim(S.df), c(97,4) )
17 |     Err <- ComputeError( S.df $ Observations, S.df $ Predictions )
18 |     expect_true("MAE"  %in% names(Err))
19 |     expect_true("rho"  %in% names(Err))
20 |     expect_true("RMSE" %in% names(Err))
21 | })
22 | 
23 | test_that("Simplex embedding works", {
24 |     S.df <- Simplex( dataFrame = block_3sp,
25 |                      lib = "1 99", pred = "100 195",
26 |                      E = 3, embedded = FALSE, showPlot = FALSE,
27 |                      columns = "x_t", target = "x_t" )
28 |     expect_s3_class(S.df, "data.frame")
29 |     expect_true("time"         %in% names(S.df))
30 |     expect_true("Observations" %in% names(S.df))
31 |     expect_true("Predictions"  %in% names(S.df))
32 |     expect_equal( dim(S.df), c(97,4) )
33 | })
34 | 
35 | test_that("Simplex errors", {
36 |     expect_error( Simplex() )
37 |     expect_error( Simplex( dataFrame = block_3sp ) )
38 |     expect_error( Simplex( dataFrame = block_3sp,
39 |                            lib = "1 99", pred = "100 195",
40 |                            E = 3, columns = "x_t y_t z_t", target = "None" ) )
41 |     expect_error( Simplex( dataFrame = block_3sp,
42 |                            lib = "1 99", pred = "100 195",
43 |                            E = 3, columns = "None", target = "x_t" ) )
44 |     expect_error( Simplex( dataFrame = block_3sp,
45 |                            lib = "1 99", pred = "100 200",
46 |                            E = 3, columns = "x_t y_t z_t", target = "x_t" ) )
47 | })
48 | 


--------------------------------------------------------------------------------
/tests/testthat/test-2-SMap.R:
--------------------------------------------------------------------------------
 1 | # NOTE: Numerical tests are performed in cppEDM unit tests
 2 | 
 3 | context("SMap test")
 4 | 
 5 | data( circle )
 6 | 
 7 | test_that("SMap works", {
 8 |     S.List = SMap( dataFrame = circle,
 9 |                    lib = "1 100", pred = "110 190", theta = 4, E = 2,
10 |                    embedded = TRUE, columns = "x y", target = "x" )
11 |     expect_type(S.List, "list")
12 |     expect_true("predictions"  %in% names(S.List))
13 |     expect_true("coefficients" %in% names(S.List))
14 |     expect_equal( dim(S.List $ predictions  ), c(82,4) )
15 |     expect_equal( dim(S.List $ coefficients ), c(82,4) )
16 | })
17 | 
18 | test_that("SMap errors", {
19 |     expect_error( SMap() )
20 |     expect_error( SMap( dataFrame = circle,
21 |                         lib = "1 100", pred = "110 190", theta = 4, E = 2,
22 |                         embedded = TRUE, columns = "x y", target = "None" ) )
23 |     expect_error( SMap( dataFrame = circle,
24 |                         lib = "1 100", pred = "110 190", theta = 4, E = 2,
25 |                         embedded = TRUE, columns = "None", target = "x" ) )
26 |     expect_error( SMap( dataFrame = circle,
27 |                         lib = "1 100", pred = "110 201", theta = 4, E = 2,
28 |                         embedded = TRUE, columns = "x y", target = "x" ) )
29 | })
30 | 


--------------------------------------------------------------------------------
/tests/testthat/test-3-CCM.R:
--------------------------------------------------------------------------------
 1 | # NOTE: Numerical tests are performed in cppEDM unit tests
 2 | 
 3 | context("CCM test")
 4 | 
 5 | data( sardine_anchovy_sst )
 6 | 
 7 | test_that("CCM works", {
 8 |     C.df = CCM( dataFrame = sardine_anchovy_sst,
 9 |                 E = 3, Tp = 0, columns = "anchovy", target = "np_sst",
10 |                 libSizes = "10 70 10", sample = 100 )
11 |     expect_s3_class(C.df, "data.frame")
12 |     expect_true("LibSize"         %in% names(C.df))
13 |     expect_true("anchovy:np_sst"  %in% names(C.df))
14 |     expect_true("np_sst:anchovy"  %in% names(C.df))
15 |     expect_equal( dim(C.df), c(7,3) )
16 | })
17 | 
18 | test_that("CCM errors", {
19 |     expect_error( CCM() )
20 |     expect_error( CCM( dataFrame = sardine_anchovy_sst,
21 |                        E = 3, Tp = 0, columns = "", target = "np_sst",
22 |                        libSizes = "10 70 10", sample = 100 ) )
23 |     expect_error( CCM( dataFrame = sardine_anchovy_sst,
24 |                        E = 3, Tp = 0, columns = "anchovy", target = "X",
25 |                        libSizes = "10 70 10", sample = 100 ) )
26 |     expect_error( CCM( dataFrame = sardine_anchovy_sst,
27 |                        E = 3, Tp = 0, columns = "X", target = "np_sst",
28 |                        libSizes = "10 70 10", sample = 100 ) )
29 |     expect_error( CCM( dataFrame = sardine_anchovy_sst,
30 |                        E = 3, Tp = 0, columns = "anchovy", target = "np_sst",
31 |                        libSizes = "10 70 80", sample = 100 ) )
32 | })
33 | 


--------------------------------------------------------------------------------
/tests/testthat/test-4-Multiview.R:
--------------------------------------------------------------------------------
 1 | # NOTE: Numerical tests are performed in cppEDM unit tests
 2 | 
 3 | context("Multiview test")
 4 | 
 5 | data( block_3sp )
 6 | 
 7 | test_that("Multiview works", {
 8 |   M.List = Multiview( dataFrame = block_3sp,
 9 |                       lib = "1 99", pred = "105 190",
10 |                       E = 3, columns = "x_t y_t z_t", target = "x_t" )
11 |     
12 |   expect_type(M.List, "list")
13 |   expect_true("View"        %in% names(M.List))
14 |   expect_true("Predictions" %in% names(M.List))
15 |   expect_equal( dim(M.List $ View), c(9,9) )
16 |   expect_equal( dim(M.List $ Predictions), c(87,3) )
17 | })
18 | 
19 | test_that("Multiview errors", {
20 |   expect_error( Multiview() )
21 |   expect_error( Multiview( dataFrame = block_3sp,
22 |                            lib = "1 99", pred = "105 190",
23 |                            E = 3, columns = "x_t y_t z_t", target = "None" ) )
24 |   expect_error( Multiview( dataFrame = block_3sp,
25 |                            lib = "1 99", pred = "105 190",
26 |                            E = 3, columns = "None", target = "x_t" ) )
27 |   expect_error( Multiview( dataFrame = block_3sp,
28 |                            lib = "1 99", pred = "105 201",
29 |                            E = 3, columns = "None", target = "x_t" ) )
30 | })
31 | 


--------------------------------------------------------------------------------
/tests/testthat/test-5-EmbedDimension.R:
--------------------------------------------------------------------------------
 1 | # NOTE: Numerical tests are performed in cppEDM unit tests
 2 | 
 3 | context("Embed Dimension test")
 4 | 
 5 | data( TentMap )
 6 | 
 7 | test_that("EmbedDimension works", {
 8 |     df <- EmbedDimension( dataFrame = TentMap, lib = "1 100", pred = "201 500",
 9 |                           columns = "TentMap", target = "TentMap",
10 |                           showPlot = FALSE )
11 |     expect_s3_class(df, "data.frame")
12 |     expect_true("E"   %in% names(df))
13 |     expect_true("rho" %in% names(df))
14 |     expect_equal( dim(df), c(10,2) )
15 | })
16 | 
17 | test_that("EmbedDimension errors", {
18 |     expect_error( EmbedDimension() )
19 |     expect_error( EmbedDimension( dataFrame = TentMap,
20 |                                   lib = "1 100", pred = "201 500",
21 |                                   columns = "TentMap", target = "None",
22 |                                   showPlot = FALSE ) )
23 | })
24 | 


--------------------------------------------------------------------------------
/tests/testthat/test-6-PredictInterval.R:
--------------------------------------------------------------------------------
 1 | # NOTE: Numerical tests are performed in cppEDM unit tests
 2 | 
 3 | context("Predict Interval test")
 4 | 
 5 | data( TentMap )
 6 | 
 7 | test_that("PredictInterval works", {
 8 |     df <- PredictInterval( dataFrame = TentMap,
 9 |                            lib = "1 100", pred = "201 500", E = 2,
10 |                            columns = "TentMap", target = "TentMap",
11 |                            showPlot = FALSE )
12 |     expect_s3_class(df, "data.frame")
13 |     expect_true("Tp"  %in% names(df))
14 |     expect_true("rho" %in% names(df))
15 |     expect_equal( dim(df), c(10,2) )
16 | })
17 | 
18 | test_that("PredictInterval errors", {
19 |     expect_error( PredictInterval() )
20 |     expect_error( PredictInterval( dataFrame = TentMap,
21 |                                    lib = "1 100", pred = "201 500", E = 2,
22 |                                    columns = "", target = "TentMap",
23 |                                    showPlot = FALSE ) )
24 | })
25 | 


--------------------------------------------------------------------------------
/tests/testthat/test-7-PredictNonlinear.R:
--------------------------------------------------------------------------------
 1 | # NOTE: Numerical tests are performed in cppEDM unit tests
 2 | 
 3 | context("Predict Nonlinear test")
 4 | 
 5 | data( TentMapNoise )
 6 | 
 7 | test_that("PredictNonlinear works", {
 8 |     df <- PredictNonlinear( dataFrame = TentMapNoise,
 9 |                             E = 2, lib = "1 100", pred = "201 500",
10 |                             columns = "TentMap", target = "TentMap",
11 |                             showPlot = FALSE )
12 |     expect_s3_class(df, "data.frame")
13 |     expect_true("Theta" %in% names(df))
14 |     expect_true("rho"   %in% names(df))
15 |     expect_equal( dim(df), c(15,2) )
16 | })
17 | 
18 | test_that("PredictNonlinear errors", {
19 |     expect_error( PredictNonlinear() )
20 |     expect_error( PredictNonlinear( dataFrame = TentMapNoise,
21 |                                     E = 2, lib = "1 100", pred = "201 500",
22 |                                     columns = "", target = "TentMap",
23 |                                     showPlot = FALSE ) )
24 | })
25 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | rEDM-tutorial_cache
2 | rEDM-tutorial_files


--------------------------------------------------------------------------------
/vignettes/CrossMap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/vignettes/CrossMap.png


--------------------------------------------------------------------------------
/vignettes/Lorenz_Projection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/vignettes/Lorenz_Projection.png


--------------------------------------------------------------------------------
/vignettes/Lorenz_Reconstruct.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/vignettes/Lorenz_Reconstruct.png


--------------------------------------------------------------------------------
/vignettes/ParameterTable.csv:
--------------------------------------------------------------------------------
 1 | Parameter,Description
 2 | pathIn,Filesystem path to input ‘dataFile’. CSV format.
 3 | ,
 4 | dataFile,CSV format data file name. The first column must be a timeindex or time values.
 5 | ,The first row must be column names.
 6 | ,
 7 | dataFrame,Input data.frame. The first column must be a time index or time values.
 8 | ,The columns must be named.
 9 | ,
10 | pathOut,Filesystem path for ‘predictFile’ containing output predictions.
11 | ,
12 | predictFile,Observation and Prediction output file name. CSV format.
13 | ,
14 | smapCoefFile,Output file containing S-map coefficients.
15 | ,
16 | lib,String with start and stop indices of input data rows used to create the library of observations.
17 | ,A single contiguous range is supported.
18 | ,
19 | pred,String with start and stop indices of input data rows used for predictions.
20 | , A single contiguous range is supported.
21 | ,
22 | D,Multiview dimension.
23 | ,
24 | E,Embedding dimension.
25 | ,
26 | Tp,Prediction horizon (number of time column rows).
27 | ,
28 | knn,Number of nearest neighbors. If knn=0; knn is set to E+1 for Simplex(); set to number of data rows for SMap().
29 | ,
30 | tau,Lag of time delay embedding specified as number of time column rows.
31 | ,
32 | theta,In Smap: S-Map neighbor localisation exponent. Single numeric.
33 | ,
34 | theta,In PredictNonlinear: A whitespace delimeted string with values of S-map localisation parameters to be evaluated.
35 | ,
36 | exclusionRadius,Excludes vectors from the search space of nearest neighbors if their relative time index is within exclusionRadius.
37 | ,
38 | columns,String of whitespace separated column name(s) in the input data used to create the library.
39 | ,
40 | target,String of column name in the input data used for prediction.
41 | ,
42 | embedded,Logical specifying if the input data are embedded.
43 | ,
44 | validLib,Conditional embedding. Boolean vector identifying time series rows to use in state-space library.
45 | ,
46 | noTIme,Default False. Set True to not require first column of data to be time.
47 | ,
48 | ignoreNan,SMap: default True. Redefine lib to ignore nan in data and embedding.
49 | ,
50 | generateSteps,Generative feedback predictions in Simplex or SMap.
51 | ,
52 | parameterList,Add parameter dictionary to return objects in Simplex; SMap; CCM; Multiview.
53 | ,
54 | libSizes,String of 3 whitespace separated integer values specifying the intial library size; the final library size; and the library size increment for CCM.
55 | ,
56 | sample,Integer specifying the number of random samples to draw at each library size evaluation for CCM.
57 | ,
58 | random,Logical to specify random (‘TRUE’) or sequential library sampling in CCM.
59 | ,
60 | includeData,Logical to return all CCM projection data frames.
61 | ,
62 | seed,Integer specifying the random sampler seed in CCM.  If ‘seed=0’ a random seed is generated.
63 | ,
64 | multiview,Number of multiview ensembles to average for the final prediction estimate in Multiview.
65 | ,
66 | trainLib,Use in-sample (lib=pred) prediction for multiview ranking.
67 | ,
68 | excludeTarget,Exclude target variable from multiviews.
69 | ,
70 | maxE,Maximum value of E to evalulate in EmbedDimension.
71 | ,
72 | maxTp,Maximum value of Tp to evalulate in PredictInterval.
73 | ,
74 | numThreads,Number of parallel threads for computation in EmbedDimension; PredictInterval and PredictNonlinear.
75 | ,
76 | verbose,Logical to produce additional console reporting.
77 | ,
78 | const_pred,Logical to add a _constant predictor_ column to the output. The constant predictor is X(t+1) = X(t).
79 | ,
80 | showPlot,Logical to plot results.
81 | 


--------------------------------------------------------------------------------
/vignettes/References.bib:
--------------------------------------------------------------------------------
  1 | @article{Casdagli_1991,
  2 | 	Author = {Casdagli and Eubank and Farmer and Gibson},
  3 | 	Journal = {Physica D: Nonlinear Phenomena},
  4 | 	Number = {1-3},
  5 | 	Pages = {52-98},
  6 | 	Rating = {0},
  7 | 	Title = {State space reconstruction in the presence of noise},
  8 | 	Volume = {51},
  9 | 	Year = {1991}}
 10 | 
 11 | @article{Davidson_1948,
 12 | 	Author = {Davidson and Andrewartha},
 13 | 	Journal = {Journal of Animal Ecology},
 14 | 	Pages = {193-199},
 15 | 	Title = {Annual trends in a natural population of \emph{{Thrips}
 16 |         imaginis} ({Thysanoptera})},
 17 | 	Volume = {17},
 18 | 	Year = {1948}}
 19 | 
 20 | @article{Davidson_1948a,
 21 | 	Author = {Davidson and Andrewartha},
 22 | 	Journal = {Journal of Animal Ecology},
 23 | 	Pages = {200-222},
 24 | 	Title = {The influence of rainfall, evaporation and atmospheric
 25 |         temperature on fluctuations in the size of a natural population
 26 |         of \emph{{Thrips} imaginis} ({Thysanoptera})},
 27 | 	Volume = {17},
 28 | 	Year = {1948}}
 29 | 
 30 | @article{Deyle_2013,
 31 | 	Author = {Deyle and Fogarty and Hsieh,
 32 |         Chih-Hao and Kaufman, Les and MacCall, Alec D and Munch,
 33 |         Stephan B and Perretti, Charles T and Ye, Hao and Sugihara, George},
 34 | 	Journal = {Proceedings of the National Academy of Sciences},
 35 | 	Journal-Full = {Proceedings of the National Academy of Sciences
 36 |         of the United States of America},
 37 | 	Number = {16},
 38 | 	Pages = {6430-6435},
 39 | 	Pmid = {23536299},
 40 | 	Pst = {ppublish},
 41 | 	Title = {Predicting climate effects on {Pacific} sardine},
 42 | 	Volume = {110},
 43 | 	Year = {2013}}
 44 | 
 45 | @article{Deyle_2011,
 46 | 	Author = {Deyle and Sugihara},
 47 | 	Journal = {PLoS ONE},
 48 | 	Pages = {e18295},
 49 | 	Title = {Generalized theorems for nonlinear state space reconstruction},
 50 | 	Volume = {6},
 51 | 	Year = {2011}}
 52 | 
 53 | @article{Deyle_2016,
 54 | 	Author = {Deyle and May and Munch and Sugihara},
 55 | 	Journal = {Proceedings of the Royal Society of London B},
 56 | 	Title = {Tracking and forecasting ecosystem interactions in real time},
 57 | 	Volume = {283},
 58 | 	Year = {2016}}
 59 |         
 60 | @article{Dixon_1999,
 61 | 	Author = {Dixon and Milicich and Sugihara},
 62 | 	Journal = {Science},
 63 | 	Pages = {1528-1530},
 64 | 	Rating = {0},
 65 | 	Title = {Episodic fluctuations in larval supply},
 66 | 	Volume = {283},
 67 | 	Year = {1999}}
 68 | 
 69 | @article{Fisher_1915,
 70 | 	Author = {Fisher},
 71 | 	Journal = {Biometrika},
 72 | 	Number = {4},
 73 | 	Pages = {507-521},
 74 | 	Title = {Frequency distribution of the values of the
 75 |         correlation coefficient in samples from an indefinitely
 76 |         large population},
 77 | 	Volume = {10},
 78 | 	Year = {1915}}
 79 | 
 80 | @article{Granger_1969,
 81 | 	Author = {Granger},
 82 | 	Journal = {Econometrica},
 83 | 	Number = {3},
 84 | 	Pages = {424-438},
 85 | 	Rating = {0},
 86 | 	Title = {Investigating causal relations by econometric
 87 |         models and cross-spectral methods},
 88 | 	Volume = {37},
 89 | 	Year = {1969}}
 90 | 
 91 | @article{Lorenz_1996,
 92 | 	Author = {Lorenz},
 93 | 	Journal = {ECMWF Seminar on Predictability},
 94 | 	Title = {Predictability – A problem partly solved},
 95 | 	Volume = {I},
 96 | 	Year = {1996}}
 97 | 
 98 | @article{Lorenz_1963,
 99 | 	Author = {Lorenz},
100 | 	Journal = {Journal of the Atmospheric Sciences},
101 | 	Number = {2},
102 | 	Pages = {130-141},
103 | 	Title = {Deterministic nonperiodic flow},
104 | 	Volume = {20},
105 | 	Year = {1963}}
106 | 
107 | @article{Moran_1953,
108 | 	Author = {Moran},
109 | 	Journal = {Australian Journal of Zoology},
110 | 	Pages = {291-298},
111 | 	Title = {The statistical analysis of the Canadian Lynx cycle II.
112 |         synchronization and meteorology},
113 | 	Year = {1953}}
114 | 
115 | @misc{NERC-Centre-for-Population-Biology_2010,
116 | 	Author = {NERC Centre for Population Biology, Imperial College},
117 | 	Title = {The Global Population Dynamics Database Version 2},
118 | 	Year = {2010},
119 | 	Bdsk-Url-1 = {http://www.sw.ic.ac.uk/cpb/cpb/gpdd.html}}
120 | 
121 | @article{Sauer_1991,
122 | 	Author = {Sauer and Yorke and Casdagli},
123 | 	Journal = {Journal of Statistical Physics},
124 | 	Number = {3-4},
125 | 	Pages = {579-616},
126 | 	Title = {Embedology},
127 | 	Volume = {65},
128 | 	Year = {1991}}
129 | 
130 | @article{Sugihara_1990,
131 | 	Author = {Sugihara and May},
132 | 	Journal = {Nature},
133 | 	Pages = {734-741},
134 | 	Rating = {0},
135 | 	Title = {Nonlinear forecasting as a way of distinguishing
136 |         chaos from measurement error in time series},
137 | 	Volume = {344},
138 | 	Year = {1990}}
139 | 
140 | @article{Sugihara_1994,
141 | 	Author = {Sugihara},
142 | 	Journal={Philosophical Transactions: Physical Sciences and Engineering},
143 | 	Number = {1688},
144 | 	Pages = {477--495},
145 | 	Rating = {0},
146 | 	Title = {Nonlinear forecasting for the classification of
147 |         natural time series},
148 | 	Volume = {348},
149 | 	Year = {1994}}
150 | 
151 | @article{Sugihara_2012,
152 | 	Author = {Sugihara and May and Ye and
153 |         Hsieh and Deyle and Fogarty and Munch},
154 | 	Journal = {Science},
155 | 	Pages = {496-500},
156 | 	Title = {Detecting causality in complex ecosystems},
157 | 	Volume = {338},
158 | 	Year = {2012}}
159 | 
160 | @article{Takens_1981,
161 | 	Author = {Takens},
162 | 	Journal = {Dynamical Systems and Turbulence, Lecture Notes
163 |         in Mathematics},
164 | 	Pages = {366--381},
165 | 	Rating = {0},
166 | 	Read = {Yes},
167 | 	Title = {Detecting strange attractors in turbulence},
168 | 	Volume = {898},
169 | 	Year = {1981}}
170 | 
171 | @article{Ye_2015a,
172 | 	Author = {Hao and Deyle and Gilarranz J.
173 |         and Sugihara, George},
174 | 	Journal = {Scientific Reports},
175 | 	Pages = {14750},
176 | 	Title = {Distinguishing time-delayed causal interactions
177 |         using convergent cross mapping},
178 | 	Volume = {5},
179 | 	Year = {2015}}
180 | 
181 | @article{Ye_2016,
182 | 	Author = {Ye and Sugihara},
183 | 	Journal = {Science},
184 | 	Number = {6302},
185 | 	Pages = {922-925},
186 | 	Title = {Information leverage in interconnected ecosystems:
187 |         Overcoming the curse of dimensionality},
188 | 	Volume = {353},
189 | 	Year = {2016}}
190 | 


--------------------------------------------------------------------------------
/vignettes/rEDM-algorithms.ltx:
--------------------------------------------------------------------------------
  1 | %\VignetteIndexEntry{Simplex and S-map Algorithms}
  2 | %\VignetteEngine{R.rsp::tex}
  3 | %\VignetteKeyword{R}
  4 | %\VignetteKeyword{package}
  5 | %\VignetteKeyword{vignette}
  6 | %\VignetteKeyword{LaTeX}
  7 | 
  8 | \documentclass{article}
  9 | 
 10 | \usepackage[T1]{fontenc} % Use 8-bit encoding that has 256 glyphs
 11 | \usepackage[english]{babel} % English language/hyphenation
 12 | \usepackage{amsmath, amsfonts, amsthm} % Math packages
 13 | \usepackage{cite}
 14 | %\usepackage[sort&compress,square,comma,authoryear]{natbib}
 15 | 
 16 | % makes color citations
 17 | %% \usepackage[
 18 | %%   %dvips,dvipdfm,
 19 | %%   colorlinks=true,urlcolor=blue,citecolor=red,linkcolor=red,bookmarks=true]{hyperref}
 20 | 
 21 | \usepackage{color}
 22 | \usepackage{pgfplots}
 23 | \usepackage{tikz}
 24 | %\pgfplotsset{compat=1.9} 
 25 | %\usepackage{hyperref}
 26 | 
 27 | \usepackage{algorithm}
 28 | \usepackage[noend]{algpseudocode}
 29 | 
 30 | \usepackage{graphicx}
 31 | %\usepackage{wrapfig}
 32 | \usepackage{paralist}
 33 | \usepackage{graphics} %% add this and next lines if pictures should be in esp format
 34 | \usepackage{epsfig} %For pictures: screened artwork should be set up with an 85 or 100 line screen
 35 | 
 36 | \usepackage{epstopdf} 
 37 | \usepackage[colorlinks=true]{hyperref}
 38 | \hypersetup{urlcolor=blue, citecolor=red}
 39 | %\usepackage{showkeys}
 40 | 
 41 | \newtheorem{theorem}{Theorem}[section]
 42 | \newtheorem{corollary}{Corollary}
 43 | \newtheorem*{main}{Main Theorem}
 44 | \newtheorem{lemma}[theorem]{Lemma}
 45 | \newtheorem{proposition}{Proposition}
 46 | \newtheorem{conjecture}{Conjecture}
 47 | \newtheorem*{problem}{Problem}
 48 | \theoremstyle{definition}
 49 | \newtheorem{definition}[theorem]{Definition}
 50 | \newtheorem{remark}{Remark}
 51 | \newtheorem*{notation}{Notation}
 52 | \newcommand{\ep}{\varepsilon}
 53 | \newcommand{\eps}[1]{{#1}_{\varepsilon}}
 54 | \newcommand{\bs}{\boldsymbol}
 55 | \allowdisplaybreaks[3]
 56 | 
 57 | 
 58 | % new commands %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 59 | \newcommand{\R}{\mathbb{R}}
 60 | \newcommand{\diag}{\text{diag}}
 61 | \DeclareMathOperator*{\argmin}{arg\,min}
 62 | 
 63 | \makeatletter
 64 | \def\BState{\State\hskip-\ALG@thistlm}
 65 | \makeatother
 66 | 
 67 | %\newcommand\numberthis{\addtocounter{equation}{1}\tag{\theequation}}
 68 | \begin{document}
 69 | \title{Simplex and S-map Algorithms}
 70 | 
 71 | \author{Yair Daon}
 72 | 
 73 | \maketitle
 74 | 
 75 | {\footnotesize 
 76 |  \centerline{Courant Institute, New York University}
 77 |    \centerline{New York, NY 10012, USA}
 78 | }
 79 | 
 80 | \bigskip
 81 |  
 82 | \begin{abstract}
 83 |   Pseudo-code for the simplex projection algorithm
 84 |   \cite{Sugihara_1990} and the S-map algorithm \cite{Sugihara_1994}.
 85 |   Algorithms are presented for the simple case of predicting one
 86 |   variable using its own time series.
 87 | \end{abstract}
 88 | 
 89 | \section{Notation}\label{section:notation}
 90 | \begin{itemize}
 91 | \item $E$ denotes the embedding dimension.
 92 | \item $k$ denotes the number of nearest neighbors we use. For
 93 |   the simplex method, the default is $k = E+1$ but for the S-map method it
 94 |   can be much larger.
 95 | \item $T_p$ denotes how many time-steps into the future we are trying
 96 |   to predict.
 97 | \item $X\in \R$ denotes a (potentially long) time series.
 98 | \item $y\in \R^{E}$ is a vector of lagged observations for which we
 99 |   want to make a prediction --- in the simplest case where all components
100 |   of the vector are single time step lags, $y_1$ represents the current value, 
101 |   $y_2$ is the value one time step prior and $y_{E}$ is the value $E-1$ time
102 |   steps prior.
103 | \item $\theta \geq 0$ is the tuning parameter in the S-map method.
104 | \item $X_t^E = (X_t, X_{t-1},\dots, X_{t-E+1} )' \in \R^E$ denotes the
105 |   lagged embedding vectors.
106 | \item $\| v \|$ is an unspecified norm of $v$. We do not specify
107 |   which norm to use and that choice is left to the user / reader.
108 | \item $\| v \|_2^2 = \sum_i v_i^2$ is the squared L2-norm (squared Euclidean distances). 
109 | \item Entries of matrices and vectors are indexed in the standard
110 |   linear algebraic fashion, starting at $1$ (like the R standard) and
111 |   not at $0$ (like the C/C++ and python standard).
112 | \end{itemize}
113 | 
114 | 
115 | \section{Helper Methods}
116 | 
117 | \subsection{Nearest neighbors}\label{subsec:NN}
118 | I will not write implementation of the nearst neighbors method, just
119 | present its description. The method will be used with the signature
120 | presented in algorithm \ref{alg:NN}.
121 | 
122 | The input variables $X,y$ and $k$ are defined in section
123 | \ref{section:notation}. The method returns a list of indices $N =
124 | \{N_1,\dots,N_k\}$ such that
125 | \begin{equation*}
126 |   \| X_{N_i}^{E} - y\| \leq \| X_{N_j}^{E} - y\|
127 |   \text{ if } 1 \leq i \leq j \leq k,
128 | \end{equation*}
129 | %
130 | \begin{algorithm}
131 |   \caption{Find Nearest neighbors}\label{alg:NN}
132 |   \begin{algorithmic}[1]
133 |     \Procedure{Nearneighbor}{$y, X, k$}
134 |     \EndProcedure
135 |   \end{algorithmic}
136 | \end{algorithm}
137 | 
138 | \subsection{Least Squares}
139 | A least squares method finds $x$ that minimizes the error in the
140 | solution of an over-determined linear system (more equations than
141 | variables). Below, $A \in \R^{p \times q},p > q$ and $b\in \R^p$ and
142 | the least squares problem is to find
143 | \begin{equation*}
144 |   \hat{x} := \argmin_{x\in \R^q} \|Ax-b\|_2^2.
145 | \end{equation*}
146 | This problem can be solved using a Singular Value Decompostion (SVD),
147 | as outlined in algorithm \ref{alg:SVDLS}.
148 | \begin{algorithm}
149 |   \caption{Least Squares via SVD}\label{alg:SVDLS}
150 |   \begin{algorithmic}[1]
151 |     \Procedure{LeastSquares}{$A,b$}
152 |     \Comment{Assume $A\in \R^{p \times q}, p > q$.}
153 |     \State $U, S, V \gets \text{SVD}(A)$ \Comment{Thus, $A = U S V'$}
154 |     \State $S^{inv} \gets \textsc{zeros}( q,p )$
155 |     \Comment{The zero matrix in $\R^{q \times p}$}
156 |     \For{ $i =1,\dots,q$ }
157 |     \If{ $S_{ii} > 10^{-5}S_{11}$ } \Comment{Note that $10^{-5}$ is arbitrary}
158 |     \State $S^{inv}_{ii} \gets \frac{1}{S_{ii}}$
159 |     \EndIf
160 |     \EndFor
161 |     \State $x \gets V S^{inv} U'b$
162 |     \State \Return $x$
163 |     \EndProcedure
164 |   \end{algorithmic}
165 | \end{algorithm}
166 | 
167 | \section{Simplex Projection}\label{section:simplex}
168 | Ignoring ties in distances, minimal distances, minimal weights and
169 | other potential hazards, the following algorithm performs Simplex
170 | projection to predict $T_p$ time-steps ahead.
171 | \begin{algorithm}
172 |   \caption{Simplex Projection \cite{Sugihara_1990}}\label{alg:simplex}
173 |   \begin{algorithmic}[1]
174 |     \Procedure{SimplexPrediction}{$y, X, E, k, T_p$}
175 | 
176 |     \State $N \gets$ \textsc{Nearneighbor}($y, X, k$)
177 |     \Comment{Find $k$ nearest neighbors.}
178 |     \State $d \gets  \| X_{N_1}^{E} - y\|$ \Comment{Define the distance scale.}
179 | 
180 |     \For{$i=1,\dots,k$}  
181 |     \State $w_i \gets \exp (-\| X_{N_i}^{E} - y\| / d )$
182 |     \Comment{Compute weights.}
183 |     \EndFor
184 | 
185 |     \State $\hat{y} \gets \sum_{i = 1}^{k} \left(w_iX_{N_i+T_p}\right) /
186 |     \sum_{i = 1}^{k} w_i$ 
187 |     \Comment{prediction = average of predicitions.}
188 |     
189 |     \State \Return $\hat{y}$
190 |     \EndProcedure
191 |   \end{algorithmic}
192 | \end{algorithm}
193 | 
194 | \section{S-map}
195 | Ignoring ties in distances, minimal distances, minimal weights and
196 | other potential hazards, the following algorithm uses the S-map method
197 | to predict $T_p$ time-steps ahead.
198 | %
199 | \begin{algorithm}
200 |   \caption{S-map \cite{Sugihara_1994}}\label{alg:smap}
201 |   \begin{algorithmic}[1]
202 |     \Procedure{SmapPrediction}{$y, X, E, k, T_p, \theta$ }
203 |     \State $N \gets$ \textsc{Nearneighbor}($y, X, k$)
204 |     \Comment{Find NN to use for prediciton.}
205 |     \State $d \gets \frac{1}{k} \sum_{i=1}^k \| X_{N_i}^{E} - y\|$
206 |     \Comment{Sum of distances.}
207 |     \For {$i=1,\dots,k$} 
208 |     \State $w_i \gets \exp (-\theta \| X_{N_i}^{E} - y\| / d )$
209 |     \Comment{Compute weights.}
210 |     \EndFor
211 |     \State $W \gets \diag(w_i)$ \Comment{Reweighting matrix.}
212 |     \State $A \gets
213 |     \begin{bmatrix}
214 |       1          & X_{N_1} & X_{N_1- 1} & \dots  & X_{N_1 - E + 1} \\
215 |       1          & X_{N_2} & X_{N_2- 1} & \dots  & X_{N_2 - E + 1} \\
216 |       \vdots     & \vdots & \vdots   & \ddots & \vdots       \\
217 |       1          & X_{N_k} & X_{N_k- 1} & \dots  & X_{N_k - E + 1} 
218 |     \end{bmatrix} $
219 |     \Comment{Design matrix.}
220 |     
221 |     \State $A \gets WA$ \Comment{Weighted design matrix.}
222 |     \State $b \gets 
223 |     \begin{bmatrix}
224 |       X_{N_1 + T_p} \\
225 |       X_{N_2 + T_p} \\
226 |       \vdots  \\
227 |       X_{N_k + T_p} 
228 |     \end{bmatrix} $
229 |     \Comment{Response vector.}
230 |     \State $b \gets Wb$ \Comment{Weighted response vector.}
231 |     \State $\hat{c} \gets \argmin_{c} \| Ac - b \|_2^2$
232 |     \Comment{Least squares, can be solved via algorithm \ref{alg:SVDLS}.}
233 |     \State $\hat{y} \gets \hat{c}_0 + \sum_{i=1}^E\hat{c}_iy_i$
234 |     \Comment{Using the local linear model $\hat{c}$ for prediction.}
235 |     \State \Return $\hat{y}$
236 |     \EndProcedure
237 |   \end{algorithmic}
238 | \end{algorithm}
239 | 
240 | Note that $k$, the number of nearest neighbors used for prediciton,
241 | can be very large compared to the embedding dimension $E$. Since $A
242 | \in \R^{k \times (1+E)}$, this means that $A$ is ``tall and skinny''
243 | and the system $Ac = b$ is \emph{over-determined} (it has more
244 | equations than variables). This means (typically) that there does not
245 | exist any unique $c$ that solves said system. This is why we seek a
246 | least-squares solution instead.
247 | 
248 | 
249 | \bibliographystyle{unsrt}
250 | \bibliography{refs}
251 | 
252 | \end{document}
253 | 


--------------------------------------------------------------------------------
/vignettes/rEDM-algorithms.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/vignettes/rEDM-algorithms.pdf


--------------------------------------------------------------------------------
/vignettes/rEDM-tutorial.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SugiharaLab/rEDM/aa8204c60a5b9c95d7901b879e3c347c71cfdf7c/vignettes/rEDM-tutorial.pdf


--------------------------------------------------------------------------------