├── .github
    ├── .gitignore
    └── workflows
    │   ├── pkgdown.yaml
    │   └── R-CMD-check.yaml
├── extra-data
    ├── hwl.rda
    ├── khs.rda
    └── yahoo.rda
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-zero_proportion.R
    │   ├── test-holt_parameters.R
    │   ├── test-pacf_features.R
    │   ├── test-acf_features.R
    │   ├── test-compengine.R
    │   └── test-stl_features.R
├── CRAN-SUBMISSION
├── .Rbuildignore
├── pkgdown
    └── extra.css
├── .gitignore
├── NEWS.md
├── man
    ├── crossing_points.Rd
    ├── zero_proportion.Rd
    ├── hurst.Rd
    ├── flat_spots.Rd
    ├── as.list.mts.Rd
    ├── arch_stat.Rd
    ├── holt_parameters.Rd
    ├── acf_features.Rd
    ├── pacf_features.Rd
    ├── stl_features.Rd
    ├── unitroot_kpss.Rd
    ├── lumpiness.Rd
    ├── fluctanal_prop_r1.Rd
    ├── binarize_mean.Rd
    ├── ac_9.Rd
    ├── motiftwo_entro3.Rd
    ├── trev_num.Rd
    ├── firstzero_ac.Rd
    ├── std1st_der.Rd
    ├── walker_propcross.Rd
    ├── scal_features.Rd
    ├── histogram_mode.Rd
    ├── heterogeneity.Rd
    ├── yahoo_data.Rd
    ├── localsimple_taures.Rd
    ├── firstmin_ac.Rd
    ├── dist_features.Rd
    ├── pred_features.Rd
    ├── nonlinearity.Rd
    ├── station_features.Rd
    ├── sampenc.Rd
    ├── compengine.Rd
    ├── embed2_incircle.Rd
    ├── max_level_shift.Rd
    ├── sampen_first.Rd
    ├── spreadrandomlocal_meantaul.Rd
    ├── autocorr_features.Rd
    ├── entropy.Rd
    ├── outlierinclude_mdrmd.Rd
    ├── tsfeatures-package.Rd
    └── tsfeatures.Rd
├── R
    ├── tsfeatures-package.R
    ├── as.list.mts.R
    ├── yahoo.R
    ├── entropy.R
    ├── multipleseasonal.R
    ├── yanfei.R
    ├── thiyanga.R
    ├── featurematrix.R
    ├── features.R
    └── compengine.R
├── cran-comments.md
├── README.Rmd
├── NAMESPACE
├── DESCRIPTION
├── README.md
├── _pkgdown.yml
└── vignettes
    └── tsfeatures.Rmd


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/extra-data/hwl.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robjhyndman/tsfeatures/HEAD/extra-data/hwl.rda


--------------------------------------------------------------------------------
/extra-data/khs.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robjhyndman/tsfeatures/HEAD/extra-data/khs.rda


--------------------------------------------------------------------------------
/extra-data/yahoo.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robjhyndman/tsfeatures/HEAD/extra-data/yahoo.rda


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(tsfeatures)
3 | 
4 | test_check("tsfeatures")
5 | 


--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 1.1.1
2 | Date: 2023-08-28 13:24:34 UTC
3 | SHA: 892a4d46b629549abc50e88a13035b3aecbd26b6
4 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^cran-comments\.md$
 2 | ^\.travis\.yml$
 3 | ^Meta$
 4 | ^doc$
 5 | ^docs$
 6 | ^_pkgdown\.yml$
 7 | ^.*\.Rproj$
 8 | ^\.Rproj\.user$
 9 | ^README\.Rmd$
10 | ^README-.*\.png$
11 | ^README\_.*$
12 | ^READMEfigs
13 | ^extra-data
14 | ^revdep$
15 | ^\.github$
16 | ^CRAN-RELEASE$
17 | ^pkgdown$
18 | ^CRAN-SUBMISSION$
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-zero_proportion.R:
--------------------------------------------------------------------------------
 1 | # A unit tests for zero_proportion() function
 2 | 
 3 | if (require(testthat)) {
 4 |   context("Tests on output")
 5 |   test_that("test for zero_proportion() ", {
 6 |     z <- zero_proportion(as.ts(c(0, 0, 3, 1, 2, 0)))
 7 |     expect_equal(length(z), 1L)
 8 |     expect_equal(z[1], 0.5)
 9 |   })
10 | }
11 | 


--------------------------------------------------------------------------------
/pkgdown/extra.css:
--------------------------------------------------------------------------------
 1 | h1, .h1 {
 2 |   font-size: 2rem;
 3 |   font-weight: 700;
 4 | }
 5 | 
 6 | h2, .h2 {
 7 |   font-size: 1.5rem;
 8 |   font-weight: 700;
 9 | }
10 | 
11 | .bg-primary .navbar-nav .show>.nav-link, .bg-primary .navbar-nav .nav-link.active, .bg-primary .navbar-nav .nav-link:hover, .bg-primary .navbar-nav .nav-link:focus {
12 |     color: #ffb81c !important;
13 | }
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | Makefile
 2 | Meta
 3 | doc
 4 | .Rproj.user
 5 | .Rhistory
 6 | .RData
 7 | docs
 8 | revdep
 9 | 
10 | # History files
11 | .Rapp.history
12 | # Example code in package build process
13 | *-Ex.R
14 | # RStudio files
15 | .Rproj.user/
16 | # produced vignettes
17 | vignettes/*.html
18 | vignettes/*.pdf
19 | .Rbuildignore
20 | *.Rproj
21 | README_cache
22 | inst/doc
23 | docs
24 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # tsfeatures (development version)
 2 | 
 3 | # tsfeatures 1.1.1
 4 | 
 5 | * Bug fixes
 6 | * Improved docs
 7 | 
 8 | # tsfeatures 1.1
 9 | 
10 | * Added zero_proportion
11 | * Replaced deprecated multiprocess
12 | * Bug fixes and documentation improvements
13 | 
14 | # tsfeatures 1.0.2
15 | 
16 | * Better handling of perfect fits in `arch_stat()`
17 | 
18 | # tsfeatures 1.0.1
19 | 
20 | * Bug fixes
21 | * Documentation improvements
22 | 
23 | # tsfeatures 1.0.0
24 | 
25 | * First release
26 | 


--------------------------------------------------------------------------------
/man/crossing_points.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/features.R
 3 | \name{crossing_points}
 4 | \alias{crossing_points}
 5 | \title{Number of crossing points}
 6 | \usage{
 7 | crossing_points(x)
 8 | }
 9 | \arguments{
10 | \item{x}{a univariate time series}
11 | }
12 | \value{
13 | A numeric value.
14 | }
15 | \description{
16 | Computes the number of times a time series crosses the median.
17 | }
18 | \author{
19 | Earo Wang and Rob J Hyndman
20 | }
21 | 


--------------------------------------------------------------------------------
/R/tsfeatures-package.R:
--------------------------------------------------------------------------------
 1 | #' @importFrom stats as.ts bw.nrd0 coef dnorm embed fitted frequency lm spec.ar
 2 | #' @importFrom stats median na.contiguous na.pass residuals cor sd tsp "tsp<-" var
 3 | #' @importFrom stats quantile acf pacf stl pchisq ar Box.test poly start cmdscale
 4 | #' @importFrom purrr map map_dbl
 5 | #' @importFrom forecast mstl
 6 | 
 7 | #' @aliases tsfeatures-package
 8 | #' @keywords internal
 9 | "_PACKAGE"
10 | 
11 | ## usethis namespace: start
12 | ## usethis namespace: end
13 | NULL
14 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | Fixed _PACKAGE issue as requested
 2 | 
 3 | ## Test environments
 4 | 
 5 | * ubuntu 22.04 (local): R 4.3.1
 6 | * macOS-latest (on GitHub Actions): release
 7 | * windows-latest (on GitHub Actions): release
 8 | * ubuntu-latest (on GitHub Actions): devel, release, oldrel
 9 | * win-builder: devel, release, oldrelease
10 | 
11 | ## R CMD check results
12 | 
13 | 0 errors | 0 warnings | 0 notes
14 | 
15 | ## revdep checks
16 | 
17 | All 6 reverse dependencies have been checked with no new errors detected.
18 | 


--------------------------------------------------------------------------------
/man/zero_proportion.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/thiyanga.R
 3 | \name{zero_proportion}
 4 | \alias{zero_proportion}
 5 | \title{Proportion of zeros}
 6 | \usage{
 7 | zero_proportion(x, tol = 1e-08)
 8 | }
 9 | \arguments{
10 | \item{x}{a univariate time series}
11 | 
12 | \item{tol}{tolerance level. Absolute values below this are considered zeros.}
13 | }
14 | \value{
15 | A numeric value.
16 | }
17 | \description{
18 | Computes proportion of zeros in a time series
19 | }
20 | \author{
21 | Thiyanga Talagala
22 | }
23 | 


--------------------------------------------------------------------------------
/man/hurst.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/features.R
 3 | \name{hurst}
 4 | \alias{hurst}
 5 | \title{Hurst coefficient}
 6 | \usage{
 7 | hurst(x)
 8 | }
 9 | \arguments{
10 | \item{x}{a univariate time series. If missing values are present, the largest
11 | contiguous portion of the time series is used.}
12 | }
13 | \value{
14 | A numeric value.
15 | }
16 | \description{
17 | Computes the Hurst coefficient indicating the level of fractional differencing
18 | of a time series.
19 | }
20 | \author{
21 | Rob J Hyndman
22 | }
23 | 


--------------------------------------------------------------------------------
/man/flat_spots.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/features.R
 3 | \name{flat_spots}
 4 | \alias{flat_spots}
 5 | \title{Longest flat spot}
 6 | \usage{
 7 | flat_spots(x)
 8 | }
 9 | \arguments{
10 | \item{x}{a univariate time series}
11 | }
12 | \value{
13 | A numeric value.
14 | }
15 | \description{
16 | "Flat spots” are computed by dividing the sample space of a time series into ten equal-sized intervals, and computing the maximum run length within any single interval.
17 | }
18 | \author{
19 | Earo Wang and Rob J Hyndman
20 | }
21 | 


--------------------------------------------------------------------------------
/tests/testthat/test-holt_parameters.R:
--------------------------------------------------------------------------------
 1 | # A unit tests for holt_parameters() function
 2 | 
 3 | if (require(testthat)) {
 4 |   context("Tests on output")
 5 |   test_that("test for holt_parameters() results on non-seasonal ts data", {
 6 |     z <- holt_parameters(WWWusage)
 7 |     expect_equal(length(z), 2L)
 8 |     expect_gt(z[1], 0.99)
 9 |     expect_gt(z[2], 0.99)
10 |   })
11 |   test_that("test for holt_parameters() results on seasonal ts data", {
12 |     z <- holt_parameters(USAccDeaths)
13 |     expect_equal(length(z), 2L)
14 |     expect_gt(z[1], 0.96)
15 |     expect_gt(z[2], 0.00)
16 |   })
17 | }
18 | 


--------------------------------------------------------------------------------
/man/as.list.mts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/as.list.mts.R
 3 | \name{as.list.mts}
 4 | \alias{as.list.mts}
 5 | \title{Convert mts object to list of time series}
 6 | \usage{
 7 | \method{as.list}{mts}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{multivariate time series of class mts.}
11 | 
12 | \item{...}{other arguments are ignored.}
13 | }
14 | \value{
15 | A list of ts objects.
16 | }
17 | \description{
18 | An mts object contains a multivariate time series in a matrix, with time on rows.
19 | This is converted into a list of univariate time series.
20 | }
21 | \author{
22 | Rob J Hyndman
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/testthat/test-pacf_features.R:
--------------------------------------------------------------------------------
 1 | # A unit tests for pacf_features() function
 2 | 
 3 | if (require(testthat)) {
 4 |   context("Tests on output")
 5 |   test_that("test for pacf_features() results on non-seasonal ts data", {
 6 |     z <- pacf_features(WWWusage)
 7 |     expect_equal(length(z), 3L)
 8 |     expect_gt(z[1], 1.03)
 9 |     expect_gt(z[2], 0.80)
10 |     expect_gt(z[3], 0.22)
11 |   })
12 |   test_that("test for pacf_features() results on seasonal ts data", {
13 |     z <- pacf_features(USAccDeaths)
14 |     expect_equal(length(z), 4L)
15 |     expect_gt(z[1], 0.63)
16 |     expect_gt(z[2], 0.09)
17 |     expect_gt(z[3], 0.38)
18 |     expect_gt(z[4], 0.12)
19 |   })
20 | }
21 | 


--------------------------------------------------------------------------------
/R/as.list.mts.R:
--------------------------------------------------------------------------------
 1 | #' Convert mts object to list of time series
 2 | #' 
 3 | #' An mts object contains a multivariate time series in a matrix, with time on rows.
 4 | #' This is converted into a list of univariate time series.
 5 | #' 
 6 | #' @method as.list mts
 7 | #' @param x multivariate time series of class mts.
 8 | #' @param ... other arguments are ignored.
 9 | #' @author Rob J Hyndman
10 | #' @return A list of ts objects.
11 | #' @export
12 | as.list.mts <- function(x, ...) {
13 |   tspx <- tsp(x)
14 |   listx <- as.list(as.data.frame(x))
15 |   listx <- purrr::map(
16 |     listx,
17 |     function(u) {
18 |       u <- as.ts(u)
19 |       tsp(u) <- tspx
20 |       return(u)
21 |     }
22 |   )
23 |   return(listx)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/arch_stat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/yanfei.R
 3 | \name{arch_stat}
 4 | \alias{arch_stat}
 5 | \title{ARCH LM Statistic}
 6 | \usage{
 7 | arch_stat(x, lags = 12, demean = TRUE)
 8 | }
 9 | \arguments{
10 | \item{x}{a univariate time series}
11 | 
12 | \item{lags}{Number of lags to use in the test}
13 | 
14 | \item{demean}{Should data have mean removed before test applied?}
15 | }
16 | \value{
17 | A numeric value.
18 | }
19 | \description{
20 | Computes a statistic based on the Lagrange Multiplier (LM) test of Engle (1982) for
21 | autoregressive conditional heteroscedasticity (ARCH). The statistic returned is
22 | the \eqn{R^2}{R^2} value of an autoregressive model of order \code{lags} applied
23 | to \eqn{x^2}{x^2}.
24 | }
25 | \author{
26 | Yanfei Kang
27 | }
28 | 


--------------------------------------------------------------------------------
/man/holt_parameters.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/thiyanga.R
 3 | \name{holt_parameters}
 4 | \alias{holt_parameters}
 5 | \alias{hw_parameters}
 6 | \title{Parameter estimates of Holt's linear trend method}
 7 | \usage{
 8 | holt_parameters(x)
 9 | 
10 | hw_parameters(x)
11 | }
12 | \arguments{
13 | \item{x}{a univariate time series}
14 | }
15 | \value{
16 | \code{holt_parameters} produces a vector of 2 values: alpha, beta.
17 | 
18 | \code{hw_parameters} produces a vector of 3 values: alpha, beta and gamma.
19 | }
20 | \description{
21 | Estimate the smoothing parameter for the level-alpha and
22 | the smoothing parameter for the trend-beta.
23 | \code{hw_parameters} considers additive seasonal trend: ets(A,A,A) model.
24 | }
25 | \author{
26 | Thiyanga Talagala, Pablo Montero-Manso
27 | }
28 | 


--------------------------------------------------------------------------------
/man/acf_features.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/thiyanga.R
 3 | \name{acf_features}
 4 | \alias{acf_features}
 5 | \title{Autocorrelation-based features}
 6 | \usage{
 7 | acf_features(x)
 8 | }
 9 | \arguments{
10 | \item{x}{a univariate time series}
11 | }
12 | \value{
13 | A vector of 6 values: first autocorrelation coefficient and sum of squared of
14 | first ten autocorrelation coefficients of original series, first-differenced series,
15 | and twice-differenced series.
16 | For seasonal data, the autocorrelation coefficient at the first seasonal lag is
17 | also returned.
18 | }
19 | \description{
20 | Computes various measures based on autocorrelation coefficients of the
21 | original series, first-differenced series and second-differenced series
22 | }
23 | \author{
24 | Thiyanga Talagala
25 | }
26 | 


--------------------------------------------------------------------------------
/man/pacf_features.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/thiyanga.R
 3 | \name{pacf_features}
 4 | \alias{pacf_features}
 5 | \title{Partial autocorrelation-based features}
 6 | \usage{
 7 | pacf_features(x)
 8 | }
 9 | \arguments{
10 | \item{x}{a univariate time series}
11 | }
12 | \value{
13 | A vector of 3 values: Sum of squared of first 5
14 | partial autocorrelation coefficients of the original series, first differenced
15 | series and twice-differenced series.
16 | For seasonal data, the partial autocorrelation coefficient at the first seasonal
17 | lag is also returned.
18 | }
19 | \description{
20 | Computes various measures based on partial autocorrelation coefficients of the
21 | original series, first-differenced series and second-differenced series
22 | }
23 | \author{
24 | Thiyanga Talagala
25 | }
26 | 


--------------------------------------------------------------------------------
/tests/testthat/test-acf_features.R:
--------------------------------------------------------------------------------
 1 | # A unit tests for acf_features() function
 2 | 
 3 | if (require(testthat)) {
 4 |   context("Tests on output")
 5 |   test_that("test for acf_features() results on non-seasonal ts data", {
 6 |     z <- acf_features(WWWusage)
 7 |     expect_equal(length(z), 6L)
 8 |     expect_gt(z[1], 0.96)
 9 |     expect_gt(z[2], 4.19)
10 |     expect_gt(z[3], 0.79)
11 |     expect_gt(z[4], 1.40)
12 |     expect_gt(z[5], 0.17)
13 |     expect_gt(z[6], 0.33)
14 |   })
15 |   test_that("test for acf_features() results on seasonal ts data", {
16 |     z <- acf_features(USAccDeaths)
17 |     expect_equal(length(z), 7L)
18 |     expect_gt(z[1], 0.70)
19 |     expect_gt(z[2], 1.20)
20 |     expect_gt(z[3], 0.02)
21 |     expect_gt(z[4], 0.27)
22 |     expect_gt(z[5], -0.49)
23 |     expect_gt(z[6], 0.74)
24 |     expect_gt(z[7], 0.62)
25 |   })
26 | }
27 | 


--------------------------------------------------------------------------------
/man/stl_features.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/multipleseasonal.R
 3 | \name{stl_features}
 4 | \alias{stl_features}
 5 | \title{Strength of trend and seasonality of a time series}
 6 | \usage{
 7 | stl_features(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a univariate time series.}
11 | 
12 | \item{...}{Other arguments are passed to \code{\link[forecast]{mstl}}.}
13 | }
14 | \value{
15 | A vector of numeric values.
16 | }
17 | \description{
18 | Computes various measures of trend and seasonality of a time series based on
19 | an STL decomposition. The number of seasonal periods, and the length of the
20 | seasonal periods are returned. Also, the strength of seasonality corresponding
21 | to each period is estimated. The \code{\link[forecast]{mstl}} function is used
22 | to do the decomposition.
23 | }
24 | \author{
25 | Rob J Hyndman
26 | }
27 | 


--------------------------------------------------------------------------------
/man/unitroot_kpss.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/features.R
 3 | \name{unitroot_kpss}
 4 | \alias{unitroot_kpss}
 5 | \alias{unitroot_pp}
 6 | \title{Unit Root Test Statistics}
 7 | \usage{
 8 | unitroot_kpss(x, ...)
 9 | 
10 | unitroot_pp(x, ...)
11 | }
12 | \arguments{
13 | \item{x}{a univariate time series.}
14 | 
15 | \item{...}{Other arguments are passed to the \code{\link[urca]{ur.kpss}} or
16 | \code{\link[urca]{ur.kpss}} functions.}
17 | }
18 | \value{
19 | A numeric value
20 | }
21 | \description{
22 | \code{unitroot_kpss} computes the statistic for the Kwiatkowski et al. unit root test
23 | using the default settings for the \code{\link[urca]{ur.kpss}} function.
24 | \code{unitroot_pp} computes the statistic for the Phillips-Perron unit root test
25 | using the default settings for the \code{\link[urca]{ur.pp}} function.
26 | }
27 | \author{
28 | Pablo Montero-Manso
29 | }
30 | 


--------------------------------------------------------------------------------
/man/lumpiness.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/features.R
 3 | \name{lumpiness}
 4 | \alias{lumpiness}
 5 | \alias{stability}
 6 | \title{Time series features based on tiled windows}
 7 | \usage{
 8 | lumpiness(x, width = ifelse(frequency(x) > 1, frequency(x), 10))
 9 | 
10 | stability(x, width = ifelse(frequency(x) > 1, frequency(x), 10))
11 | }
12 | \arguments{
13 | \item{x}{a univariate time series}
14 | 
15 | \item{width}{size of sliding window}
16 | }
17 | \value{
18 | A numeric vector of length 2 containing a measure of lumpiness and
19 | a measure of stability.
20 | }
21 | \description{
22 | Computes feature of a time series based on tiled (non-overlapping) windows.
23 | Means or variances are produced for all tiled windows. Then stability is
24 | the variance of the means, while lumpiness is the variance of the variances.
25 | }
26 | \author{
27 | Earo Wang and Rob J Hyndman
28 | }
29 | 


--------------------------------------------------------------------------------
/man/fluctanal_prop_r1.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{fluctanal_prop_r1}
 4 | \alias{fluctanal_prop_r1}
 5 | \title{Implements fluctuation analysis from software package \code{hctsa}}
 6 | \usage{
 7 | fluctanal_prop_r1(x)
 8 | }
 9 | \arguments{
10 | \item{x}{the input time series (or any vector)}
11 | }
12 | \description{
13 | Fits a polynomial of order 1 and then returns the
14 | range. The order of fluctuations is 2, corresponding to root mean
15 | square fluctuations.
16 | }
17 | \references{
18 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
19 | 
20 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
21 | }
22 | \author{
23 | Yangzhuoran Yang
24 | }
25 | 


--------------------------------------------------------------------------------
/man/binarize_mean.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{binarize_mean}
 4 | \alias{binarize_mean}
 5 | \title{Converts an input vector into a binarized version from software package \code{hctsa}}
 6 | \usage{
 7 | binarize_mean(y)
 8 | }
 9 | \arguments{
10 | \item{y}{the input time series}
11 | }
12 | \value{
13 | Time-series values above its mean are given 1, and those below the mean are 0.
14 | }
15 | \description{
16 | Converts an input vector into a binarized version from software package \code{hctsa}
17 | }
18 | \references{
19 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
20 | 
21 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
22 | }
23 | \author{
24 | Yangzhuoran Yang
25 | }
26 | 


--------------------------------------------------------------------------------
/man/ac_9.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{ac_9}
 4 | \alias{ac_9}
 5 | \title{Autocorrelation at lag 9. Included for completion and consistency.}
 6 | \usage{
 7 | ac_9(y, acfv = stats::acf(y, 9, plot = FALSE, na.action = na.pass))
 8 | }
 9 | \arguments{
10 | \item{y}{the input time series}
11 | 
12 | \item{acfv}{vector of autocorrelation, if exist, used to avoid repeated computation.}
13 | }
14 | \value{
15 | autocorrelation at lag 9
16 | }
17 | \description{
18 | Autocorrelation at lag 9. Included for completion and consistency.
19 | }
20 | \references{
21 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
22 | 
23 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
24 | }
25 | \author{
26 | Yangzhuoran Yang
27 | }
28 | 


--------------------------------------------------------------------------------
/man/motiftwo_entro3.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{motiftwo_entro3}
 4 | \alias{motiftwo_entro3}
 5 | \title{Local motifs in a binary symbolization of the time series from software package \code{hctsa}}
 6 | \usage{
 7 | motiftwo_entro3(y)
 8 | }
 9 | \arguments{
10 | \item{y}{the input time series}
11 | }
12 | \value{
13 | Entropy of words in the binary alphabet of length 3.
14 | }
15 | \description{
16 | Coarse-graining is performed. Time-series values above its mean are given 1,
17 | and those below the mean are 0.
18 | }
19 | \examples{
20 | motiftwo_entro3(WWWusage)
21 | }
22 | \references{
23 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
24 | 
25 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
26 | }
27 | \author{
28 | Yangzhuoran Yang
29 | }
30 | 


--------------------------------------------------------------------------------
/man/trev_num.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{trev_num}
 4 | \alias{trev_num}
 5 | \title{Normalized nonlinear autocorrelation, the numerator of the trev function of a time series from software package \code{hctsa}}
 6 | \usage{
 7 | trev_num(y)
 8 | }
 9 | \arguments{
10 | \item{y}{the input time series}
11 | }
12 | \value{
13 | the numerator of the trev function of a time series
14 | }
15 | \description{
16 | Calculates the numerator of the trev function, a normalized nonlinear autocorrelation,
17 | The time lag is set to 1.
18 | }
19 | \examples{
20 | trev_num(WWWusage)
21 | }
22 | \references{
23 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
24 | 
25 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
26 | }
27 | \author{
28 | Yangzhuoran Yang
29 | }
30 | 


--------------------------------------------------------------------------------
/man/firstzero_ac.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{firstzero_ac}
 4 | \alias{firstzero_ac}
 5 | \title{The first zero crossing of the autocorrelation function from software package \code{hctsa}}
 6 | \usage{
 7 | firstzero_ac(y, acfv = stats::acf(y, N - 1, plot = FALSE, na.action = na.pass))
 8 | }
 9 | \arguments{
10 | \item{y}{the input time series}
11 | 
12 | \item{acfv}{vector of autocorrelation, if exist, used to avoid repeated computation.}
13 | }
14 | \value{
15 | The first zero crossing of the autocorrelation function
16 | }
17 | \description{
18 | Search up to a maximum of the length of the time series
19 | }
20 | \references{
21 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
22 | 
23 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
24 | }
25 | \author{
26 | Yangzhuoran Yang
27 | }
28 | 


--------------------------------------------------------------------------------
/man/std1st_der.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{std1st_der}
 4 | \alias{std1st_der}
 5 | \title{Standard deviation of the first derivative of the time series from software package \code{hctsa}}
 6 | \usage{
 7 | std1st_der(y)
 8 | }
 9 | \arguments{
10 | \item{y}{the input time series. Missing values will be removed.}
11 | }
12 | \value{
13 | Standard deviation of the first derivative of the time series.
14 | }
15 | \description{
16 | Modified from \code{SY_StdNthDer} in \code{hctsa}. Based on an idea by Vladimir Vassilevsky.
17 | }
18 | \references{
19 | cf. http://www.mathworks.de/matlabcentral/newsreader/view_thread/136539
20 | 
21 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
22 | 
23 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
24 | }
25 | \author{
26 | Yangzhuoran Yang
27 | }
28 | 


--------------------------------------------------------------------------------
/man/walker_propcross.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{walker_propcross}
 4 | \alias{walker_propcross}
 5 | \title{Simulates a hypothetical walker moving through the time domain from software package \code{hctsa}}
 6 | \usage{
 7 | walker_propcross(y)
 8 | }
 9 | \arguments{
10 | \item{y}{the input time series}
11 | }
12 | \value{
13 | fraction of time series length that walker crosses time series
14 | }
15 | \description{
16 | The hypothetical particle (or 'walker') moves in response to values of the
17 | time series at each point.
18 | The walker narrows the gap between its value and that
19 | of the time series by 10\%.
20 | }
21 | \references{
22 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
23 | 
24 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
25 | }
26 | \author{
27 | Yangzhuoran Yang
28 | }
29 | 


--------------------------------------------------------------------------------
/man/scal_features.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{scal_features}
 4 | \alias{scal_features}
 5 | \title{The scaling feature set from software package \code{hctsa}}
 6 | \usage{
 7 | scal_features(x)
 8 | }
 9 | \arguments{
10 | \item{x}{the input time series}
11 | }
12 | \value{
13 | a vector with scaling features
14 | }
15 | \description{
16 | Calculate the features that grouped as scaling set,
17 | which have been used in CompEngine database, using method introduced in package \code{hctsa}.
18 | }
19 | \details{
20 | Feature in this set is \code{fluctanal_prop_r1}.
21 | }
22 | \references{
23 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
24 | 
25 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
26 | }
27 | \seealso{
28 | \code{\link{fluctanal_prop_r1}}
29 | }
30 | \author{
31 | Yangzhuoran Yang
32 | }
33 | 


--------------------------------------------------------------------------------
/man/histogram_mode.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{histogram_mode}
 4 | \alias{histogram_mode}
 5 | \title{Mode of a data vector from software package \code{hctsa}}
 6 | \usage{
 7 | histogram_mode(y, numBins = 10)
 8 | }
 9 | \arguments{
10 | \item{y}{the input data vector}
11 | 
12 | \item{numBins}{the number of bins to use in the histogram.}
13 | }
14 | \value{
15 | the mode
16 | }
17 | \description{
18 | Measures the mode of the data vector using histograms with a given number of bins as suggestion.
19 | The value calculated is different from \code{hctsa} and \code{CompEngine} as the histogram edges are calculated differently.
20 | }
21 | \references{
22 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
23 | 
24 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
25 | }
26 | \author{
27 | Yangzhuoran Yang
28 | }
29 | 


--------------------------------------------------------------------------------
/man/heterogeneity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/yanfei.R
 3 | \name{heterogeneity}
 4 | \alias{heterogeneity}
 5 | \title{Heterogeneity coefficients}
 6 | \usage{
 7 | heterogeneity(x)
 8 | }
 9 | \arguments{
10 | \item{x}{a univariate time series}
11 | }
12 | \value{
13 | A vector of numeric values.
14 | }
15 | \description{
16 | Computes various measures of heterogeneity of a time series. First the series
17 | is pre-whitened using an AR model to give a new series y. We fit a GARCH(1,1)
18 | model to y and obtain the residuals, e. Then the four measures of heterogeneity
19 | are:
20 | (1) the sum of squares of the first 12 autocorrelations of \eqn{y^2}{y^2};
21 | (2) the sum of squares of the first 12 autocorrelations of \eqn{e^2}{e^2};
22 | (3) the \eqn{R^2}{R^2} value of an AR model applied to \eqn{y^2}{y^2};
23 | (4) the \eqn{R^2}{R^2} value of an AR model applied to \eqn{e^2}{e^2}.
24 | The statistics obtained from \eqn{y^2}{y^2} are the ARCH effects, while those
25 | from \eqn{e^2}{e^2} are the GARCH effects.
26 | }
27 | \author{
28 | Yanfei Kang and Rob J Hyndman
29 | }
30 | 


--------------------------------------------------------------------------------
/man/yahoo_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/yahoo.R
 3 | \name{yahoo_data}
 4 | \alias{yahoo_data}
 5 | \title{Yahoo server metrics}
 6 | \usage{
 7 | yahoo_data(...)
 8 | }
 9 | \arguments{
10 | \item{...}{Additional arguments passed to \code{download.file}
11 | 
12 | Downloads and returns aggregated and anonymized datasets from Yahoo representing server metrics of Yahoo services.}
13 | }
14 | \value{
15 | A matrix of time series with 1437 rows of hourly data, and 1748 columns representing different servers.
16 | }
17 | \description{
18 | Yahoo server metrics
19 | }
20 | \examples{
21 | yahoo <- yahoo_data()
22 | plot(yahoo[,1:10])
23 | plot(yahoo[,1:44], plot.type='single', col=1:44)
24 | 
25 | }
26 | \references{
27 | Hyndman, R.J., Wang, E., Laptev, N. (2015) Large-scale unusual time series detection.
28 | In: \emph{Proceedings of the IEEE International Conference on Data Mining}. Atlantic City, NJ, USA. 14–17 November 2015.
29 | \url{https://robjhyndman.com/publications/icdm2015/}
30 | }
31 | \author{
32 | Rob Hyndman, Earo Wang, Nikolay Laptev, Mitchell O'Hara-Wild
33 | }
34 | 


--------------------------------------------------------------------------------
/man/localsimple_taures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{localsimple_taures}
 4 | \alias{localsimple_taures}
 5 | \title{The first zero crossing of the autocorrelation function of the residuals from Simple local time-series forecasting from software package \code{hctsa}}
 6 | \usage{
 7 | localsimple_taures(y, forecastMeth = c("mean", "lfit"), trainLength = NULL)
 8 | }
 9 | \arguments{
10 | \item{y}{the input time series}
11 | 
12 | \item{forecastMeth}{the forecasting method, default to \code{mean}.
13 | \code{mean}: local mean prediction using the past trainLength time-series values.
14 | \code{lfit}: local linear prediction using the past trainLength time-series values.}
15 | 
16 | \item{trainLength}{the number of time-series values to use to forecast the next value.
17 | Default to 1 when using method \code{mean} and 3 when using method \code{lfit}.}
18 | }
19 | \value{
20 | The first zero crossing of the autocorrelation function of the residuals
21 | }
22 | \description{
23 | Simple predictors using the past trainLength values of the time series to
24 | predict its next value.
25 | }
26 | 


--------------------------------------------------------------------------------
/R/yahoo.R:
--------------------------------------------------------------------------------
 1 | #' Yahoo server metrics
 2 | #'
 3 | #' @param ... Additional arguments passed to `download.file`
 4 | #'
 5 | #' Downloads and returns aggregated and anonymized datasets from Yahoo representing server metrics of Yahoo services.
 6 | #'
 7 | #' @return A matrix of time series with 1437 rows of hourly data, and 1748 columns representing different servers.
 8 | #' @author Rob Hyndman, Earo Wang, Nikolay Laptev, Mitchell O'Hara-Wild
 9 | #' @references
10 | #' Hyndman, R.J., Wang, E., Laptev, N. (2015) Large-scale unusual time series detection.
11 | #' In: \emph{Proceedings of the IEEE International Conference on Data Mining}. Atlantic City, NJ, USA. 14–17 November 2015.
12 | #' \url{https://robjhyndman.com/publications/icdm2015/}
13 | #' @examples
14 | #' yahoo <- yahoo_data()
15 | #' plot(yahoo[,1:10])
16 | #' plot(yahoo[,1:44], plot.type='single', col=1:44)
17 | #' 
18 | #' @export
19 | yahoo_data <- function(...){
20 |   yahoo <- 1 # Just to avoid a note about undefined global variables.
21 |   tmp <- tempfile()
22 |   utils::download.file("https://github.com/robjhyndman/tsfeatures/raw/master/extra-data/yahoo.rda", tmp, ...)
23 |   load(tmp)
24 |   yahoo
25 | }


--------------------------------------------------------------------------------
/tests/testthat/test-compengine.R:
--------------------------------------------------------------------------------
 1 | # A unit test for compengine() function
 2 | if (require(testthat)) {
 3 |   context("Tests on input")
 4 |   test_that("tests for a non-vector object", {
 5 |     expect_that(suppressWarnings(compengine(matrix(0, 2, 2))), throws_error())
 6 |   })
 7 | 
 8 |   context("Tests on output")
 9 |   test_that("tests for compengine results on non-seasonal data", {
10 |     z <- compengine(WWWusage)
11 |     expect_equal(length(z), 16L)
12 |     expect_equal(z[4], c(firstmin_ac = 21))
13 |     expect_gt(z[5], 109.15)
14 |     expect_gt(z[3], 0.27)
15 |   })
16 |   test_that("tests for compengine results on seasonal data", {
17 |     z <- compengine(USAccDeaths)
18 |     expect_that(length(z), equals(16L))
19 |     expect_equal(z[4], c(firstmin_ac = 6))
20 |     expect_gt(z[6], 1.83)
21 |     expect_lt(z[3], -0.0647)
22 |   })
23 |   test_that("tests for compengine results on data with missing values", {
24 |     y_WWWusage <- WWWusage
25 |     y_WWWusage[c(16:17, 78)] <- NA
26 |     z <- compengine(y_WWWusage)
27 |     expect_equal(length(which(is.na(z))), 0)
28 |     expect_gt(z[3], 0.2845)
29 |     expect_equal(z[4], c(firstmin_ac = 21))
30 |   })
31 | }
32 | 


--------------------------------------------------------------------------------
/man/firstmin_ac.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{firstmin_ac}
 4 | \alias{firstmin_ac}
 5 | \title{Time of first minimum in the autocorrelation function from software package \code{hctsa}}
 6 | \usage{
 7 | firstmin_ac(
 8 |   x,
 9 |   acfv = stats::acf(x, lag.max = N - 1, plot = FALSE, na.action = na.pass)
10 | )
11 | }
12 | \arguments{
13 | \item{x}{the input time series}
14 | 
15 | \item{acfv}{vector of autocorrelation, if exist, used to avoid repeated computation.}
16 | }
17 | \value{
18 | The lag of the first minimum
19 | }
20 | \description{
21 | Time of first minimum in the autocorrelation function from software package \code{hctsa}
22 | }
23 | \examples{
24 | firstmin_ac(WWWusage)
25 | }
26 | \references{
27 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
28 | 
29 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
30 | }
31 | \author{
32 | Yangzhuoran Yang
33 | }
34 | 


--------------------------------------------------------------------------------
/man/dist_features.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{dist_features}
 4 | \alias{dist_features}
 5 | \title{The distribution feature set from software package \code{hctsa}}
 6 | \usage{
 7 | dist_features(x)
 8 | }
 9 | \arguments{
10 | \item{x}{the input time series}
11 | }
12 | \value{
13 | a vector with distribution features
14 | }
15 | \description{
16 | Calculate the features that grouped as distribution set,
17 | which have been used in CompEngine database, using method introduced in package \code{hctsa}.
18 | }
19 | \details{
20 | Features in this set are \code{histogram_mode_10}
21 | and \code{outlierinclude_mdrmd}.
22 | }
23 | \references{
24 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
25 | 
26 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
27 | }
28 | \seealso{
29 | \code{\link{histogram_mode}}
30 | 
31 | \code{\link{outlierinclude_mdrmd}}
32 | }
33 | \author{
34 | Yangzhuoran Yang
35 | }
36 | 


--------------------------------------------------------------------------------
/man/pred_features.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{pred_features}
 4 | \alias{pred_features}
 5 | \title{The prediction feature set from software package \code{hctsa}}
 6 | \usage{
 7 | pred_features(x)
 8 | }
 9 | \arguments{
10 | \item{x}{the input time series}
11 | }
12 | \value{
13 | a vector with prediction features
14 | }
15 | \description{
16 | Calculate the features that grouped as prediction set,
17 | which have been used in CompEngine database, using method introduced in package \code{hctsa}.
18 | }
19 | \details{
20 | Features in this set are \code{localsimple_mean1},
21 | \code{localsimple_lfitac},
22 | and \code{sampen_first}.
23 | }
24 | \references{
25 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
26 | 
27 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
28 | }
29 | \seealso{
30 | \code{\link{localsimple_taures}}
31 | 
32 | \code{\link{sampen_first}}
33 | }
34 | \author{
35 | Yangzhuoran Yang
36 | }
37 | 


--------------------------------------------------------------------------------
/tests/testthat/test-stl_features.R:
--------------------------------------------------------------------------------
 1 | # A unit test for stl_features() function
 2 | if (require(testthat)) {
 3 |   context("Tests on input")
 4 |   test_that("tests for a non-vector object", {
 5 |     expect_that(stl_features(matrix(0, 2, 2)), throws_error())
 6 |   })
 7 | 
 8 |   context("Tests on output")
 9 |   test_that("tests for stl_feature results on non-seasonal data", {
10 |     z <- stl_features(WWWusage)
11 |     expect_equal(length(z), 8L)
12 |     expect_equal(z[1], c(nperiods = 0))
13 |     expect_equal(z[2], c(seasonal_period = 1))
14 |     expect_gt(z[3], 0.98)
15 |   })
16 |   test_that("tests for stl_feature results on seasonal ts data", {
17 |     z <- stl_features(USAccDeaths)
18 |     expect_that(length(z), equals(11L))
19 |     expect_equal(z[1], c(nperiods = 1))
20 |     expect_equal(z[2], c(seasonal_period = 12))
21 |     expect_gt(z[3], 0.78)
22 |   })
23 |   test_that("tests for stl_feature results on seasonal msts data", {
24 |     z <- stl_features(forecast::taylor)
25 |     expect_that(length(z), equals(15L))
26 |     expect_equal(z[1], c(nperiods = 2))
27 |     expect_equal(z[2], c(seasonal_period1 = 48))
28 |     expect_equal(z[3], c(seasonal_period2 = 336))
29 |     expect_gt(z[4], 0.79)
30 |   })
31 | }
32 | 


--------------------------------------------------------------------------------
/man/nonlinearity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/yanfei.R
 3 | \name{nonlinearity}
 4 | \alias{nonlinearity}
 5 | \title{Nonlinearity coefficient}
 6 | \usage{
 7 | nonlinearity(x)
 8 | }
 9 | \arguments{
10 | \item{x}{a univariate time series}
11 | }
12 | \value{
13 | A numeric value.
14 | }
15 | \description{
16 | Computes a nonlinearity statistic based on Lee, White & Granger's nonlinearity test of a time series.
17 | The statistic is \eqn{10X^2/T}{10X^2/T} where \eqn{X^2}{X^2} is the Chi-squared statistic from Lee, White and Granger,
18 | and T is the length of the time series. This takes large values
19 | when the series is nonlinear, and values around 0 when the series is linear.
20 | }
21 | \examples{
22 | nonlinearity(lynx)
23 | }
24 | \references{
25 | Lee, T. H., White, H., & Granger, C. W. (1993). Testing for neglected nonlinearity in time series models: A comparison of neural network methods and alternative tests. \emph{Journal of Econometrics}, 56(3), 269-290.
26 | 
27 | Teräsvirta, T., Lin, C.-F., & Granger, C. W. J. (1993). Power of the neural network linearity test. \emph{Journal of Time Series Analysis}, 14(2), 209–220.
28 | }
29 | \author{
30 | Yanfei Kang and Rob J Hyndman
31 | }
32 | 


--------------------------------------------------------------------------------
/man/station_features.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{station_features}
 4 | \alias{station_features}
 5 | \title{The stationarity feature set from software package \code{hctsa}}
 6 | \usage{
 7 | station_features(x)
 8 | }
 9 | \arguments{
10 | \item{x}{the input time series}
11 | }
12 | \value{
13 | a vector with stationarity features
14 | }
15 | \description{
16 | Calculate the features that grouped as stationarity set,
17 | which have been used in CompEngine database, using method introduced in package \code{hctsa}.
18 | }
19 | \details{
20 | Features in this set are \code{std1st_der},
21 | \code{spreadrandomlocal_meantaul_50},
22 | and \code{spreadrandomlocal_meantaul_ac2}.
23 | }
24 | \references{
25 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
26 | 
27 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
28 | }
29 | \seealso{
30 | \code{\link{std1st_der}}
31 | 
32 | \code{\link{spreadrandomlocal_meantaul}}
33 | }
34 | \author{
35 | Yangzhuoran Yang
36 | }
37 | 


--------------------------------------------------------------------------------
/man/sampenc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{sampenc}
 4 | \alias{sampenc}
 5 | \title{Second Sample Entropy from software package \code{hctsa}}
 6 | \usage{
 7 | sampenc(y, M = 6, r = 0.3)
 8 | }
 9 | \arguments{
10 | \item{y}{the input time series}
11 | 
12 | \item{M}{embedding dimension}
13 | 
14 | \item{r}{threshold}
15 | }
16 | \description{
17 | Modified from the Ben Fulcher version of original code sampenc.m from
18 | http://physionet.org/physiotools/sampen/
19 | http://www.physionet.org/physiotools/sampen/matlab/1.1/sampenc.m
20 | Code by DK Lake (dlake@virginia.edu), JR Moorman and Cao Hanqing.
21 | }
22 | \references{
23 | cf. "Physiological time-series analysis using approximate entropy and sample
24 | entropy", J. S. Richman and J. R. Moorman, Am. J. Physiol. Heart Circ.
25 | Physiol., 278(6) H2039 (2000)
26 | 
27 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
28 | 
29 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
30 | }
31 | \author{
32 | Yangzhuoran Yang
33 | }
34 | 


--------------------------------------------------------------------------------
/man/compengine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{compengine}
 4 | \alias{compengine}
 5 | \title{CompEngine feature set}
 6 | \usage{
 7 | compengine(x)
 8 | }
 9 | \arguments{
10 | \item{x}{the input time series}
11 | }
12 | \value{
13 | a vector with CompEngine features
14 | }
15 | \description{
16 | Calculate the features that have been used in CompEngine database, using method introduced in package
17 | \code{hctsa}.
18 | }
19 | \details{
20 | The features involved can be grouped as \code{autocorrelation},
21 | \code{prediction}, \code{stationarity}, \code{distribution}, and \code{scaling}.
22 | }
23 | \references{
24 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
25 | 
26 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
27 | }
28 | \seealso{
29 | \code{\link{autocorr_features}}
30 | 
31 | \code{\link{pred_features}}
32 | 
33 | \code{\link{station_features}}
34 | 
35 | \code{\link{dist_features}}
36 | 
37 | \code{\link{scal_features}}
38 | }
39 | \author{
40 | Yangzhuoran Yang
41 | }
42 | 


--------------------------------------------------------------------------------
/man/embed2_incircle.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{embed2_incircle}
 4 | \alias{embed2_incircle}
 5 | \title{Points inside a given circular boundary in a 2-d embedding space from software package \code{hctsa}}
 6 | \usage{
 7 | embed2_incircle(
 8 |   y,
 9 |   boundary = NULL,
10 |   acfv = stats::acf(y, length(y) - 1, plot = FALSE, na.action = na.pass)
11 | )
12 | }
13 | \arguments{
14 | \item{y}{the input time series}
15 | 
16 | \item{boundary}{the given circular boundary, setting to 1 or 2 in CompEngine. Default to 1.}
17 | 
18 | \item{acfv}{vector of autocorrelation, if exist, used to avoid repeated computation.}
19 | }
20 | \value{
21 | the proportion of points inside a given circular boundary
22 | }
23 | \description{
24 | The time lag is set to the first zero crossing of the autocorrelation function.
25 | }
26 | \references{
27 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
28 | 
29 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
30 | }
31 | \author{
32 | Yangzhuoran Yang
33 | }
34 | 


--------------------------------------------------------------------------------
/man/max_level_shift.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/features.R
 3 | \name{max_level_shift}
 4 | \alias{max_level_shift}
 5 | \alias{max_var_shift}
 6 | \alias{max_kl_shift}
 7 | \title{Time series features based on sliding windows}
 8 | \usage{
 9 | max_level_shift(x, width = ifelse(frequency(x) > 1, frequency(x), 10))
10 | 
11 | max_var_shift(x, width = ifelse(frequency(x) > 1, frequency(x), 10))
12 | 
13 | max_kl_shift(x, width = ifelse(frequency(x) > 1, frequency(x), 10))
14 | }
15 | \arguments{
16 | \item{x}{a univariate time series}
17 | 
18 | \item{width}{size of sliding window}
19 | }
20 | \value{
21 | A vector of 2 values: the size of the shift, and the time index of the shift.
22 | }
23 | \description{
24 | Computes feature of a time series based on sliding (overlapping) windows.
25 | \code{max_level_shift} finds the largest mean shift between two consecutive windows.
26 | \code{max_var_shift} finds the largest var shift between two consecutive windows.
27 | \code{max_kl_shift} finds the largest shift in Kulback-Leibler divergence between
28 | two consecutive windows.
29 | }
30 | \details{
31 | Computes the largest level shift and largest variance shift in sliding mean calculations
32 | }
33 | \author{
34 | Earo Wang and Rob J Hyndman
35 | }
36 | 


--------------------------------------------------------------------------------
/man/sampen_first.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{sampen_first}
 4 | \alias{sampen_first}
 5 | \title{Second Sample Entropy of a time series from software package \code{hctsa}}
 6 | \usage{
 7 | sampen_first(y)
 8 | }
 9 | \arguments{
10 | \item{y}{the input time series}
11 | }
12 | \description{
13 | Modified from the Ben Fulcher's \code{EN_SampEn} which uses code from PhysioNet.
14 | The publicly-available PhysioNet Matlab code, sampenc (renamed here to
15 | RN_sampenc) is available from:
16 | http://www.physionet.org/physiotools/sampen/matlab/1.1/sampenc.m
17 | }
18 | \details{
19 | Embedding dimension is set to 5.
20 | The threshold is set to 0.3.
21 | }
22 | \references{
23 | cf. "Physiological time-series analysis using approximate entropy and sample
24 | entropy", J. S. Richman and J. R. Moorman, Am. J. Physiol. Heart Circ.
25 | Physiol., 278(6) H2039 (2000)
26 | 
27 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
28 | 
29 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
30 | }
31 | \author{
32 | Yangzhuoran Yang
33 | }
34 | 


--------------------------------------------------------------------------------
/man/spreadrandomlocal_meantaul.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{spreadrandomlocal_meantaul}
 4 | \alias{spreadrandomlocal_meantaul}
 5 | \title{Bootstrap-based stationarity measure from software package \code{hctsa}}
 6 | \usage{
 7 | spreadrandomlocal_meantaul(y, l = 50)
 8 | }
 9 | \arguments{
10 | \item{y}{the input time series}
11 | 
12 | \item{l}{the length of local time-series segments to analyse as a positive integer. Can also be a specified character string: "ac2": twice the first zero-crossing of the autocorrelation function}
13 | }
14 | \value{
15 | mean of the first zero-crossings of the autocorrelation function
16 | }
17 | \description{
18 | 100 time-series segments of length \code{l} are selected at random from the time series and
19 | the mean of the first zero-crossings of the autocorrelation function in each segment is calculated.
20 | }
21 | \references{
22 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
23 | 
24 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
25 | }
26 | \author{
27 | Yangzhuoran Yang
28 | }
29 | 


--------------------------------------------------------------------------------
/man/autocorr_features.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{autocorr_features}
 4 | \alias{autocorr_features}
 5 | \title{The autocorrelation feature set from software package \code{hctsa}}
 6 | \usage{
 7 | autocorr_features(x)
 8 | }
 9 | \arguments{
10 | \item{x}{the input time series}
11 | }
12 | \value{
13 | a vector with autocorrelation features
14 | }
15 | \description{
16 | Calculate the features that grouped as autocorrelation set,
17 | which have been used in CompEngine database, using method introduced in package \code{hctsa}.
18 | }
19 | \details{
20 | Features in this set are \code{embed2_incircle_1},
21 | \code{embed2_incircle_2},
22 | \code{ac_9},
23 | \code{firstmin_ac},
24 | \code{trev_num},
25 | \code{motiftwo_entro3},
26 | and \code{walker_propcross}.
27 | }
28 | \references{
29 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
30 | 
31 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
32 | }
33 | \seealso{
34 | \code{\link{embed2_incircle}}
35 | 
36 | \code{\link{ac_9}}
37 | 
38 | \code{\link{firstmin_ac}}
39 | 
40 | \code{\link{trev_num}}
41 | 
42 | \code{\link{motiftwo_entro3}}
43 | 
44 | \code{\link{walker_propcross}}
45 | }
46 | \author{
47 | Yangzhuoran Yang
48 | }
49 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown
13 | 
14 | jobs:
15 |   pkgdown:
16 |     runs-on: ubuntu-latest
17 |     # Only restrict concurrency for non-PR jobs
18 |     concurrency:
19 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20 |     env:
21 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 |     permissions:
23 |       contents: write
24 |     steps:
25 |       - uses: actions/checkout@v3
26 | 
27 |       - uses: r-lib/actions/setup-pandoc@v2
28 | 
29 |       - uses: r-lib/actions/setup-r@v2
30 |         with:
31 |           use-public-rspm: true
32 | 
33 |       - uses: r-lib/actions/setup-r-dependencies@v2
34 |         with:
35 |           extra-packages: any::pkgdown, local::.
36 |           needs: website
37 | 
38 |       - name: Build site
39 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
40 |         shell: Rscript {0}
41 | 
42 |       - name: Deploy to GitHub pages 🚀
43 |         if: github.event_name != 'pull_request'
44 |         uses: JamesIves/github-pages-deploy-action@v4.4.1
45 |         with:
46 |           clean: false
47 |           branch: gh-pages
48 |           folder: docs
49 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macos-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v3
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-tinytex@v2
43 | 
44 |       - uses: r-lib/actions/setup-r-dependencies@v2
45 |         with:
46 |           extra-packages: any::rcmdcheck
47 |           needs: check
48 | 
49 |       - uses: r-lib/actions/check-r-package@v2
50 |         with:
51 |           args: 'c("--no-build-vignettes","--no-manual")'
52 | 


--------------------------------------------------------------------------------
/man/entropy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/entropy.R
 3 | \name{entropy}
 4 | \alias{entropy}
 5 | \title{Spectral entropy of a time series}
 6 | \usage{
 7 | entropy(x)
 8 | }
 9 | \arguments{
10 | \item{x}{a univariate time series}
11 | }
12 | \value{
13 | A non-negative real value for the spectral entropy \eqn{H_s(x_t)}.
14 | }
15 | \description{
16 | Computes spectral entropy from a univariate normalized
17 | spectral density, estimated using an AR model.
18 | }
19 | \details{
20 | The \emph{spectral entropy} equals the Shannon entropy of the spectral density
21 | \eqn{f_x(\lambda)} of a stationary process \eqn{x_t}:
22 | \deqn{
23 | H_s(x_t) = - \int_{-\pi}^{\pi} f_x(\lambda) \log f_x(\lambda) d \lambda,
24 | }
25 | where the density is normalized such that
26 | \eqn{\int_{-\pi}^{\pi} f_x(\lambda) d \lambda = 1}.
27 | An estimate of \eqn{f(\lambda)} can be obtained using \code{\link[stats]{spec.ar}} with
28 | the \code{burg} method.
29 | }
30 | \examples{
31 | entropy(rnorm(1000))
32 | entropy(lynx)
33 | entropy(sin(1:20))
34 | }
35 | \references{
36 | Jerry D. Gibson and Jaewoo Jung (2006). \dQuote{The
37 | Interpretation of Spectral Entropy Based Upon Rate Distortion Functions}.
38 | IEEE International Symposium on Information Theory, pp. 277-281.
39 | 
40 | Goerg, G. M. (2013). \dQuote{Forecastable Component Analysis}.
41 | Proceedings of the 30th International Conference on Machine Learning (PMLR) 28 (2): 64-72, 2013.
42 | Available at \url{https://proceedings.mlr.press/v28/goerg13.html}.
43 | }
44 | \seealso{
45 | \code{\link[stats]{spec.ar}}
46 | }
47 | \author{
48 | Rob J Hyndman
49 | }
50 | 


--------------------------------------------------------------------------------
/man/outlierinclude_mdrmd.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compengine.R
 3 | \name{outlierinclude_mdrmd}
 4 | \alias{outlierinclude_mdrmd}
 5 | \title{How median depend on distributional outliers from software package \code{hctsa}}
 6 | \usage{
 7 | outlierinclude_mdrmd(y, zscored = TRUE)
 8 | }
 9 | \arguments{
10 | \item{y}{the input time series (ideally z-scored)}
11 | 
12 | \item{zscored}{Should y be z-scored before computing the statistic. Default: TRUE}
13 | }
14 | \value{
15 | median  of the median of range indices
16 | }
17 | \description{
18 | Measures median as more and
19 | more outliers are included in the calculation according to a specified rule,
20 | of outliers being furthest from the mean.
21 | }
22 | \details{
23 | The threshold for including time-series data points in the analysis increases
24 | from zero to the maximum deviation, in increments of 0.01*sigma (by default),
25 | where sigma is the standard deviation of the time series.
26 | 
27 | At each threshold,  proportion of time series points
28 | included and median are calculated, and outputs from the
29 | algorithm measure how these statistical quantities change as more extreme
30 | points are included in the calculation.
31 | 
32 | Outliers are defined as furthest from the mean.
33 | }
34 | \references{
35 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
36 | 
37 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
38 | }
39 | \author{
40 | Yangzhuoran Yang
41 | }
42 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | editor_options:
 4 |   chunk_output_type: console
 5 | ---
 6 | 
 7 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 8 | 
 9 | ```{r, echo = FALSE}
10 | knitr::opts_chunk$set(
11 |   collapse = TRUE,
12 |   comment = "#>",
13 |   fig.path = "man/figures/README-",
14 |   cache = TRUE,
15 |   message = FALSE,
16 |   warning = FALSE
17 | )
18 | ```
19 | 
20 | # tsfeatures
21 | 
22 | <!-- badges: start -->
23 |   [![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/tsfeatures)](https://cran.r-project.org/package=tsfeatures)
24 |   [![Downloads](http://cranlogs.r-pkg.org/badges/tsfeatures)](https://cran.r-project.org/package=tsfeatures)
25 |   [![Licence](https://img.shields.io/badge/licence-GPL--3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0.en.html)
26 |   [![R build status](https://github.com/robjhyndman/tsfeatures/workflows/R-CMD-check/badge.svg)](https://github.com/robjhyndman/tsfeatures/actions)
27 | <!-- badges: end -->
28 | 
29 | The R package *tsfeatures* provides methods for extracting various features from time series data.
30 | 
31 | ## Installation
32 | 
33 | You can install the **stable** version on [R
34 | CRAN](https://cran.r-project.org/package=tsfeatures).
35 | 
36 | ``` r
37 | install.packages('tsfeatures', dependencies = TRUE)
38 | ```
39 | 
40 | You can install the **development** version from [Github](https://github.com/robjhyndman/tsfeatures) with:
41 | 
42 | ```{r gh-installation, eval = FALSE}
43 | # install.packages("devtools")
44 | devtools::install_github("robjhyndman/tsfeatures")
45 | ```
46 | 
47 | ## Usage
48 | 
49 | ```{r}
50 | library(tsfeatures)
51 | mylist <- list(sunspot.year, WWWusage, AirPassengers, USAccDeaths)
52 | myfeatures <- tsfeatures(mylist)
53 | myfeatures
54 | ```
55 | 
56 | 
57 | ## License
58 | 
59 | This package is free and open source software, licensed under GPL-3.
60 | 


--------------------------------------------------------------------------------
/man/tsfeatures-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tsfeatures-package.R
 3 | \docType{package}
 4 | \name{tsfeatures-package}
 5 | \alias{tsfeatures-package}
 6 | \alias{_PACKAGE}
 7 | \title{tsfeatures: Time Series Feature Extraction}
 8 | \description{
 9 | Methods for extracting various features from time series data. The features provided are those from Hyndman, Wang and Laptev (2013) \doi{10.1109/ICDMW.2015.104}, Kang, Hyndman and Smith-Miles (2017) \doi{10.1016/j.ijforecast.2016.09.004} and from Fulcher, Little and Jones (2013) \doi{10.1098/rsif.2013.0048}. Features include spectral entropy, autocorrelations, measures of the strength of seasonality and trend, and so on. Users can also define their own feature functions.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://pkg.robjhyndman.com/tsfeatures/}
15 |   \item \url{https://github.com/robjhyndman/tsfeatures}
16 |   \item Report bugs at \url{https://github.com/robjhyndman/tsfeatures/issues}
17 | }
18 | 
19 | }
20 | \author{
21 | \strong{Maintainer}: Rob Hyndman \email{Rob.Hyndman@monash.edu} (\href{https://orcid.org/0000-0002-2140-5352}{ORCID})
22 | 
23 | Authors:
24 | \itemize{
25 |   \item Yanfei Kang (\href{https://orcid.org/0000-0001-8769-6650}{ORCID})
26 |   \item Pablo Montero-Manso \email{p.montero.manso@udc.es}
27 |   \item Mitchell O'Hara-Wild (\href{https://orcid.org/0000-0001-6729-7695}{ORCID})
28 |   \item Thiyanga Talagala (\href{https://orcid.org/0000-0002-0656-9789}{ORCID})
29 |   \item Earo Wang (\href{https://orcid.org/0000-0001-6448-5260}{ORCID})
30 |   \item Yangzhuoran Yang \email{Fin.Yang@monash.edu}
31 | }
32 | 
33 | Other contributors:
34 | \itemize{
35 |   \item Souhaib Ben Taieb [contributor]
36 |   \item Cao Hanqing [contributor]
37 |   \item D K Lake [contributor]
38 |   \item Nikolay Laptev [contributor]
39 |   \item J R Moorman [contributor]
40 |   \item Bohan Zhang [contributor]
41 | }
42 | 
43 | }
44 | \keyword{internal}
45 | 


--------------------------------------------------------------------------------
/R/entropy.R:
--------------------------------------------------------------------------------
 1 | #' Spectral entropy of a time series
 2 | #'
 3 | #' @description
 4 | #' Computes spectral entropy from a univariate normalized
 5 | #' spectral density, estimated using an AR model.
 6 | #'
 7 | #' @details
 8 | #' The \emph{spectral entropy} equals the Shannon entropy of the spectral density
 9 | #' \eqn{f_x(\lambda)} of a stationary process \eqn{x_t}:
10 | #'
11 | #' \deqn{H_s(x_t) = - \int_{-\pi}^{\pi} f_x(\lambda) \log f_x(\lambda) d \lambda,}
12 | #'
13 | #' where the density is normalized such that
14 | #' \eqn{\int_{-\pi}^{\pi} f_x(\lambda) d \lambda = 1}.
15 | #' An estimate of \eqn{f(\lambda)} can be obtained using \code{\link[stats]{spec.ar}} with
16 | #' the `burg` method.
17 | #'
18 | #' @param x a univariate time series
19 | #' @author Rob J Hyndman
20 | #' @return
21 | #' A non-negative real value for the spectral entropy \eqn{H_s(x_t)}.
22 | #' @seealso \code{\link[stats]{spec.ar}}
23 | #' @references
24 | #' Jerry D. Gibson and Jaewoo Jung (2006). \dQuote{The
25 | #' Interpretation of Spectral Entropy Based Upon Rate Distortion Functions}.
26 | #' IEEE International Symposium on Information Theory, pp. 277-281.
27 | #'
28 | #' Goerg, G. M. (2013). \dQuote{Forecastable Component Analysis}.
29 | #' Proceedings of the 30th International Conference on Machine Learning (PMLR) 28 (2): 64-72, 2013.
30 | #' Available at \url{https://proceedings.mlr.press/v28/goerg13.html}.
31 | #'
32 | #' @examples
33 | #' entropy(rnorm(1000))
34 | #' entropy(lynx)
35 | #' entropy(sin(1:20))
36 | #' @export
37 | 
38 | entropy <- function(x) {
39 |   #spec <- spectrum(x, plot = FALSE, n.freq = ceiling(length(x)/2 + 1), ...)
40 |   spec <- try(stats::spec.ar(na.contiguous(x), plot=FALSE, method='burg',
41 |                       n.freq = ceiling(length(x)/2 + 1)))
42 |   if ("try-error" %in% class(spec)) {
43 |     entropy <- NA
44 |   } else {
45 |     fx <- c(rev(spec$spec[-1]),spec$spec)/ length(x)
46 |     fx <- fx/sum(fx)
47 |     prior.fx = rep(1 / length(fx), length = length(fx))
48 |     prior.weight = 0.001
49 |     fx <- (1 - prior.weight) * fx + prior.weight * prior.fx
50 |     entropy <- pmin(1, -sum(fx * log(fx, base = length(x))))
51 |   }
52 |   return(c(entropy = entropy))
53 | }
54 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(as.list,mts)
 4 | export(ac_9)
 5 | export(acf_features)
 6 | export(arch_stat)
 7 | export(autocorr_features)
 8 | export(binarize_mean)
 9 | export(compengine)
10 | export(crossing_points)
11 | export(dist_features)
12 | export(embed2_incircle)
13 | export(entropy)
14 | export(firstmin_ac)
15 | export(firstzero_ac)
16 | export(flat_spots)
17 | export(fluctanal_prop_r1)
18 | export(heterogeneity)
19 | export(histogram_mode)
20 | export(holt_parameters)
21 | export(hurst)
22 | export(hw_parameters)
23 | export(localsimple_taures)
24 | export(lumpiness)
25 | export(max_kl_shift)
26 | export(max_level_shift)
27 | export(max_var_shift)
28 | export(motiftwo_entro3)
29 | export(nonlinearity)
30 | export(outlierinclude_mdrmd)
31 | export(pacf_features)
32 | export(pred_features)
33 | export(sampen_first)
34 | export(sampenc)
35 | export(scal_features)
36 | export(spreadrandomlocal_meantaul)
37 | export(stability)
38 | export(station_features)
39 | export(std1st_der)
40 | export(stl_features)
41 | export(trev_num)
42 | export(tsfeatures)
43 | export(unitroot_kpss)
44 | export(unitroot_pp)
45 | export(walker_propcross)
46 | export(yahoo_data)
47 | export(zero_proportion)
48 | importFrom(forecast,mstl)
49 | importFrom(graphics,hist)
50 | importFrom(purrr,map)
51 | importFrom(purrr,map_dbl)
52 | importFrom(stats,"tsp<-")
53 | importFrom(stats,Box.test)
54 | importFrom(stats,acf)
55 | importFrom(stats,ar)
56 | importFrom(stats,as.ts)
57 | importFrom(stats,bw.nrd0)
58 | importFrom(stats,cmdscale)
59 | importFrom(stats,coef)
60 | importFrom(stats,cor)
61 | importFrom(stats,dnorm)
62 | importFrom(stats,embed)
63 | importFrom(stats,fitted)
64 | importFrom(stats,frequency)
65 | importFrom(stats,lm)
66 | importFrom(stats,median)
67 | importFrom(stats,na.contiguous)
68 | importFrom(stats,na.pass)
69 | importFrom(stats,pacf)
70 | importFrom(stats,pchisq)
71 | importFrom(stats,poly)
72 | importFrom(stats,predict)
73 | importFrom(stats,quantile)
74 | importFrom(stats,residuals)
75 | importFrom(stats,sd)
76 | importFrom(stats,spec.ar)
77 | importFrom(stats,start)
78 | importFrom(stats,stl)
79 | importFrom(stats,ts)
80 | importFrom(stats,tsp)
81 | importFrom(stats,var)
82 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: tsfeatures
 2 | Title: Time Series Feature Extraction
 3 | Version: 1.1.1.9000
 4 | Authors@R: c(
 5 |     person("Rob", "Hyndman", email = "Rob.Hyndman@monash.edu", role = c("aut","cre"), comment = c(ORCID = "0000-0002-2140-5352")),
 6 |     person("Yanfei", "Kang", role = "aut", comment = c(ORCID = "0000-0001-8769-6650")),
 7 |     person("Pablo", "Montero-Manso", email="p.montero.manso@udc.es", role="aut"),
 8 |     person("Mitchell", "O'Hara-Wild", role="aut", comment=c(ORCID = "0000-0001-6729-7695")),
 9 |     person("Thiyanga", "Talagala", role = "aut", comment=c(ORCID = "0000-0002-0656-9789")),
10 |     person("Earo", "Wang", role = "aut", comment=c(ORCID = "0000-0001-6448-5260")),
11 |     person("Yangzhuoran", "Yang", email = "Fin.Yang@monash.edu", role = "aut"),
12 |     person("Souhaib", "Ben Taieb", role = "ctb"),
13 |     person("Cao", "Hanqing", role="ctb"),
14 |     person("D K", "Lake", role="ctb"),
15 |     person("Nikolay", "Laptev", role="ctb"),
16 |     person("J R", "Moorman", role="ctb"),
17 |     person("Bohan", "Zhang", role = "ctb"))
18 | Description: Methods for extracting various features from time series data. The features provided are those from Hyndman, Wang and Laptev (2013) <doi:10.1109/ICDMW.2015.104>, Kang, Hyndman and Smith-Miles (2017) <doi:10.1016/j.ijforecast.2016.09.004> and from Fulcher, Little and Jones (2013) <doi:10.1098/rsif.2013.0048>. Features include spectral entropy, autocorrelations, measures of the strength of seasonality and trend, and so on. Users can also define their own feature functions.
19 | Depends:
20 |     R (>= 3.6.0)
21 | Imports:
22 |     fracdiff,
23 |     forecast (>= 8.3),
24 |     purrr,
25 |     RcppRoll (>= 0.2.2),
26 |     stats,
27 |     tibble,
28 |     tseries,
29 |     urca,
30 |     future,
31 |     furrr
32 | Suggests:
33 |     testthat,
34 |     knitr,
35 |     rmarkdown,
36 |     ggplot2,
37 |     tidyr,
38 |     dplyr,
39 |     Mcomp,
40 |     GGally
41 | License: GPL-3
42 | ByteCompile: true
43 | URL: https://pkg.robjhyndman.com/tsfeatures/, https://github.com/robjhyndman/tsfeatures
44 | BugReports: https://github.com/robjhyndman/tsfeatures/issues
45 | RoxygenNote: 7.2.3
46 | Roxygen: list(markdown = TRUE, roclets=c('rd', 'collate', 'namespace'))
47 | VignetteBuilder: knitr
48 | Encoding: UTF-8
49 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 3 | 
 4 | # tsfeatures
 5 | 
 6 | <!-- badges: start -->
 7 | 
 8 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/tsfeatures)](https://cran.r-project.org/package=tsfeatures)
 9 | [![Downloads](http://cranlogs.r-pkg.org/badges/tsfeatures)](https://cran.r-project.org/package=tsfeatures)
10 | [![Licence](https://img.shields.io/badge/licence-GPL--3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0.en.html)
11 | [![R build
12 | status](https://github.com/robjhyndman/tsfeatures/workflows/R-CMD-check/badge.svg)](https://github.com/robjhyndman/tsfeatures/actions)
13 | <!-- badges: end -->
14 | 
15 | The R package *tsfeatures* provides methods for extracting various
16 | features from time series data.
17 | 
18 | ## Installation
19 | 
20 | You can install the **stable** version on [R
21 | CRAN](https://cran.r-project.org/package=tsfeatures).
22 | 
23 | ``` r
24 | install.packages('tsfeatures', dependencies = TRUE)
25 | ```
26 | 
27 | You can install the **development** version from
28 | [Github](https://github.com/robjhyndman/tsfeatures) with:
29 | 
30 | ``` r
31 | # install.packages("devtools")
32 | devtools::install_github("robjhyndman/tsfeatures")
33 | ```
34 | 
35 | ## Usage
36 | 
37 | ``` r
38 | library(tsfeatures)
39 | mylist <- list(sunspot.year, WWWusage, AirPassengers, USAccDeaths)
40 | myfeatures <- tsfeatures(mylist)
41 | myfeatures
42 | #> # A tibble: 4 × 20
43 | #>   frequency nperiods seasonal_period trend      spike linearity curvature e_acf1
44 | #>       <dbl>    <dbl>           <dbl> <dbl>      <dbl>     <dbl>     <dbl>  <dbl>
45 | #> 1         1        0               1 0.125    2.10e-5      3.58      1.11  0.793
46 | #> 2         1        0               1 0.985    3.01e-8      4.45      1.10  0.774
47 | #> 3        12        1              12 0.991    1.46e-8     11.0       1.09  0.509
48 | #> 4        12        1              12 0.802    9.15e-7     -2.12      2.85  0.258
49 | #> # ℹ 12 more variables: e_acf10 <dbl>, entropy <dbl>, x_acf1 <dbl>,
50 | #> #   x_acf10 <dbl>, diff1_acf1 <dbl>, diff1_acf10 <dbl>, diff2_acf1 <dbl>,
51 | #> #   diff2_acf10 <dbl>, seasonal_strength <dbl>, peak <dbl>, trough <dbl>,
52 | #> #   seas_acf1 <dbl>
53 | ```
54 | 
55 | ## License
56 | 
57 | This package is free and open source software, licensed under GPL-3.
58 | 


--------------------------------------------------------------------------------
/man/tsfeatures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/featurematrix.R
 3 | \name{tsfeatures}
 4 | \alias{tsfeatures}
 5 | \title{Time series feature matrix}
 6 | \usage{
 7 | tsfeatures(
 8 |   tslist,
 9 |   features = c("frequency", "stl_features", "entropy", "acf_features"),
10 |   scale = TRUE,
11 |   trim = FALSE,
12 |   trim_amount = 0.1,
13 |   parallel = FALSE,
14 |   multiprocess = future::multisession,
15 |   na.action = na.pass,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{tslist}{a list of univariate time series, each of class \code{ts} or a numeric vector.
21 | Alternatively, an object of class \code{mts} may be used.}
22 | 
23 | \item{features}{a vector of function names which return numeric vectors of features.
24 | All features returned by these functions must be named if they return more than one feature.
25 | Existing functions from installed packages may be used, but the package must be loaded first.
26 | Functions must return a result for all time series, even if it is just NA.}
27 | 
28 | \item{scale}{if \code{TRUE}, time series are scaled to mean 0 and sd 1 before features
29 | are computed.}
30 | 
31 | \item{trim}{if \code{TRUE}, time series are trimmed by \code{trim_amount} before features
32 | are computed. Values larger than \code{trim_amount} in absolute value are set to \code{NA}.}
33 | 
34 | \item{trim_amount}{Default level of trimming if \code{trim==TRUE}.}
35 | 
36 | \item{parallel}{If TRUE, multiple cores (or multiple sessions) will be used. This only speeds things up
37 | when there are a large number of time series.}
38 | 
39 | \item{multiprocess}{The function from the \code{future} package to use for parallel processing. Either
40 | \code{\link[future]{multisession}} or \code{\link[future]{multicore}}. The latter is preferred
41 | for Linux and MacOS.}
42 | 
43 | \item{na.action}{A function to handle missing values. Use \code{na.interp} to estimate missing values.}
44 | 
45 | \item{...}{Other arguments get passed to the feature functions.}
46 | }
47 | \value{
48 | A feature matrix (in the form of a tibble) with each row corresponding to
49 | one time series from tslist, and each column being a feature.
50 | }
51 | \description{
52 | \code{tsfeatures} computes a matrix of time series features from a list of time series
53 | }
54 | \examples{
55 | mylist <- list(sunspot.year, WWWusage, AirPassengers, USAccDeaths)
56 | tsfeatures(mylist)
57 | }
58 | \author{
59 | Rob J Hyndman
60 | }
61 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
  1 | url: http://pkg.robjhyndman.com/tsfeatures
  2 | 
  3 | template:
  4 |   bootstrap: 5
  5 |   theme: tango
  6 |   bootswatch: flatly
  7 |   bslib:
  8 |     base_font: {google: "Fira Sans"}
  9 |     heading_font: {google: "Fira Sans"}
 10 |     code_font: "Hack, mono"
 11 |     primary: "#234460"
 12 |     link-color: "#234460"
 13 |   includes:
 14 |     in_header: <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/hack-font@3/build/web/hack-subset.css">
 15 | 
 16 | authors:
 17 |   Rob Hyndman:
 18 |     href: https://robjhyndman.com
 19 |   Yanfei Kang:
 20 |     href: https://yanfei.site/
 21 |   Thiyanga Talagala:
 22 |     href: https://thiyanga.netlify.com/
 23 |   Earo Wang:
 24 |     href: https://earo.me
 25 |   Yangzhuoran Yang:
 26 |     href: https://yangzhuoranyang.com
 27 | 
 28 | navbar:
 29 |   type: light
 30 |   structure:
 31 |     left: [home, intro, reference, changelog]
 32 |     right: [search, github]
 33 |   components:
 34 |     home:
 35 |       icon: fa-home fa-lg
 36 |       href: index.html
 37 |     reference:
 38 |       text: Reference
 39 |       href: reference/index.html
 40 |     intro:
 41 |       text: Get started
 42 |       href: articles/tsfeatures.html
 43 |     changelog:
 44 |       text: Change log
 45 |       href: news/index.html
 46 |     github:
 47 |       icon: fa-github fa-lg
 48 |       href: https://github.com/robjhyndman/tsfeatures/
 49 | 
 50 | reference:
 51 | - title: "Package"
 52 |   desc: "tsfeatures package"
 53 |   contents:
 54 |   - tsfeatures-package
 55 | - title: tsfeatures
 56 |   desc: The main function to extract features from a list of time series.
 57 |   contents:
 58 |   - tsfeatures
 59 | - title: Feature functions
 60 |   desc: Functions which compute features from time series
 61 |   contents:
 62 |   - ac_9
 63 |   - acf_features
 64 |   - arch_stat
 65 |   - autocorr_features
 66 |   - binarize_mean
 67 |   - compengine
 68 |   - crossing_points
 69 |   - dist_features
 70 |   - embed2_incircle
 71 |   - entropy
 72 |   - firstmin_ac
 73 |   - firstzero_ac
 74 |   - flat_spots
 75 |   - fluctanal_prop_r1
 76 |   - heterogeneity
 77 |   - histogram_mode
 78 |   - holt_parameters
 79 |   - hurst
 80 |   - localsimple_taures
 81 |   - lumpiness
 82 |   - max_level_shift
 83 |   - motiftwo_entro3
 84 |   - nonlinearity
 85 |   - outlierinclude_mdrmd
 86 |   - pacf_features
 87 |   - pred_features
 88 |   - sampen_first
 89 |   - sampenc
 90 |   - scal_features
 91 |   - spreadrandomlocal_meantaul
 92 |   - station_features
 93 |   - std1st_der
 94 |   - stl_features
 95 |   - trev_num
 96 |   - unitroot_kpss
 97 |   - walker_propcross
 98 |   - zero_proportion
 99 | - title: Utility functions
100 |   contents:
101 |   - as.list.mts
102 | 
103 | - title: Data
104 |   desc: Download data providing Yahoo server metrics
105 |   contents:
106 |   - yahoo_data
107 | 


--------------------------------------------------------------------------------
/R/multipleseasonal.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' Strength of trend and seasonality of a time series
  3 | #'
  4 | #' Computes various measures of trend and seasonality of a time series based on
  5 | #' an STL decomposition. The number of seasonal periods, and the length of the
  6 | #' seasonal periods are returned. Also, the strength of seasonality corresponding
  7 | #' to each period is estimated. The \code{\link[forecast]{mstl}} function is used
  8 | #' to do the decomposition.
  9 | #' @param x a univariate time series.
 10 | #' @param ... Other arguments are passed to \code{\link[forecast]{mstl}}.
 11 | #' @return A vector of numeric values.
 12 | #' @author Rob J Hyndman
 13 | #' @export
 14 | 
 15 | stl_features <- function(x, ...) {
 16 |   if ("msts" %in% class(x)) {
 17 |     msts <- attributes(x)$msts
 18 |     nperiods <- length(msts)
 19 |   }
 20 |   else if ("ts" %in% class(x)) {
 21 |     msts <- frequency(x)
 22 |     nperiods <- msts > 1
 23 |     if(length(x) <= 2*msts) {
 24 |       warning("Insufficient data to compute STL decomposition")
 25 |       x <- c(x)
 26 |     }
 27 |     season <- 0
 28 |   }
 29 |   else {
 30 |     msts <- 1
 31 |     nperiods <- 0L
 32 |     season <- 0
 33 |   }
 34 |   if(NCOL(x) > 1){
 35 |     stop("x must be a univariate time series.")
 36 |   }
 37 |   trend <- linearity <- curvature <- season <- spike <- peak <- trough <- acfremainder <- NA
 38 | 
 39 |   # STL fits
 40 |   stlfit <- forecast::mstl(x, ...)
 41 |   trend0 <- stlfit[, "Trend"]
 42 |   remainder <- stlfit[, "Remainder"]
 43 |   seasonal <- stlfit[, grep("Season", colnames(stlfit)), drop = FALSE]
 44 | 
 45 |   # When the maximum frequency is dropped
 46 |   tsp(x) <- tsp(trend0)
 47 | 
 48 |   # De-trended and de-seasonalized data
 49 |   detrend <- x - trend0
 50 |   deseason <- forecast::seasadj(stlfit)
 51 |   fits <- x - remainder
 52 | 
 53 |   # Summary stats
 54 |   n <- length(x)
 55 |   varx <- var(x, na.rm = TRUE)
 56 |   vare <- var(remainder, na.rm = TRUE)
 57 |   vardetrend <- var(detrend, na.rm = TRUE)
 58 |   vardeseason <- var(deseason, na.rm = TRUE)
 59 |   nseas <- NCOL(seasonal)
 60 | 
 61 |   # Measure of trend strength
 62 |   if(varx < .Machine$double.eps)
 63 |     trend <- 0
 64 |   else if (vardeseason / varx < 1e-10) {
 65 |     trend <- 0
 66 |   } else {
 67 |     trend <- max(0, min(1, 1 - vare / vardeseason))
 68 |   }
 69 | 
 70 |   if (nseas > 0) {
 71 |     # Measure of seasonal strength
 72 |     season <- numeric(nseas)
 73 |     for (i in seq(nseas))
 74 |       season[i] <- max(0, min(1, 1 - vare / var(remainder + seasonal[, i], na.rm = TRUE)))
 75 | 
 76 |     # Find time of peak and trough for each component
 77 |     peak <- trough <- numeric(nseas)
 78 |     for (i in seq(nseas))
 79 |     {
 80 |       startx <- start(x)[2L] - 1L
 81 |       pk <- (startx + which.max(seasonal[, i])) %% msts[i]
 82 |       th <- (startx + which.min(seasonal[, i])) %% msts[i]
 83 |       peak[i] <- ifelse(pk == 0, msts[i], pk)
 84 |       trough[i] <- ifelse(th == 0, msts[i], th)
 85 |     }
 86 |   }
 87 | 
 88 |   # Compute measure of spikiness
 89 |   d <- (remainder - mean(remainder, na.rm = TRUE))^2
 90 |   varloo <- (vare * (n - 1) - d) / (n - 2)
 91 |   spike <- var(varloo, na.rm = TRUE)
 92 | 
 93 |   # Compute measures of linearity and curvature
 94 |   tren.coef <- coef(lm(trend0 ~ poly(seq(n), degree = min(n-1, 2L))))[2L:3L]
 95 |   linearity <- tren.coef[1L]
 96 |   curvature <- tren.coef[2L]
 97 | 
 98 |   # ACF of remainder
 99 |   acfremainder <- unname(acf_features(remainder))
100 | 
101 |   # Assemble results
102 |   output <- c(
103 |     nperiods = nperiods, seasonal_period = msts, trend = trend,
104 |     spike = spike, linearity = unname(linearity), curvature = unname(curvature),
105 |     e_acf1 = acfremainder[1L], e_acf10 = acfremainder[2L]
106 |   )
107 |   if (nseas > 0) {
108 |     output <- c(output, seasonal_strength = season, peak = peak, trough = trough)
109 |   }
110 | 
111 |   return(output)
112 | }
113 | 


--------------------------------------------------------------------------------
/R/yanfei.R:
--------------------------------------------------------------------------------
  1 | #' Heterogeneity coefficients
  2 | #'
  3 | #' Computes various measures of heterogeneity of a time series. First the series
  4 | #' is pre-whitened using an AR model to give a new series y. We fit a GARCH(1,1)
  5 | #' model to y and obtain the residuals, e. Then the four measures of heterogeneity
  6 | #' are:
  7 | #' (1) the sum of squares of the first 12 autocorrelations of \eqn{y^2}{y^2};
  8 | #' (2) the sum of squares of the first 12 autocorrelations of \eqn{e^2}{e^2};
  9 | #' (3) the \eqn{R^2}{R^2} value of an AR model applied to \eqn{y^2}{y^2};
 10 | #' (4) the \eqn{R^2}{R^2} value of an AR model applied to \eqn{e^2}{e^2}.
 11 | #' The statistics obtained from \eqn{y^2}{y^2} are the ARCH effects, while those
 12 | #' from \eqn{e^2}{e^2} are the GARCH effects.
 13 | #' @param x a univariate time series
 14 | #' @return A vector of numeric values.
 15 | #' @author Yanfei Kang and Rob J Hyndman
 16 | #' @export
 17 | 
 18 | heterogeneity <- function(x) {
 19 |   # One possible issue when applied to the ETS/ARIMA comparison is that it will
 20 |   # be high for any type of heteroskedasticity, whereas ETS heteroskedasticity
 21 |   # is of a particular type, namely that the variation increases with the level
 22 |   # of the series. But the GARCH type hetero could be high when the variation
 23 |   # changes independently of the level of the series.
 24 | 
 25 |   # pre-whiten a series before Garch modeling
 26 |   x.whitened <- na.contiguous(ar(x)$resid)
 27 | 
 28 |   # perform arch and box test
 29 |   x.archtest <- arch_stat(x.whitened)
 30 |   LBstat <- sum(acf(x.whitened^2, lag.max = 12L, plot = FALSE)$acf[-1L]^2)
 31 | 
 32 |   # fit garch model to capture the variance dynamics.
 33 |   garch.fit <- suppressWarnings(tseries::garch(x.whitened, trace = FALSE))
 34 | 
 35 |   # compare arch test before and after fitting garch
 36 |   garch.fit.std <- residuals(garch.fit)
 37 |   x.garch.archtest <- arch_stat(garch.fit.std)
 38 | 
 39 |   # compare Box test of squared residuals before and after fitting garch
 40 |   LBstat2 <- NA
 41 |   try(LBstat2 <- sum(acf(na.contiguous(garch.fit.std^2), lag.max = 12L, plot = FALSE)$acf[-1L]^2),
 42 |     silent = TRUE
 43 |   )
 44 |   output <- c(
 45 |     arch_acf = LBstat,
 46 |     garch_acf = LBstat2,
 47 |     arch_r2 = unname(x.archtest),
 48 |     garch_r2 = unname(x.garch.archtest)
 49 |   )
 50 |   # output[is.na(output)] <- 1
 51 |   return(output)
 52 | }
 53 | 
 54 | #' Nonlinearity coefficient
 55 | #'
 56 | #' Computes a nonlinearity statistic based on Lee, White & Granger's nonlinearity test of a time series.
 57 | #' The statistic is \eqn{10X^2/T}{10X^2/T} where \eqn{X^2}{X^2} is the Chi-squared statistic from Lee, White and Granger,
 58 | #' and T is the length of the time series. This takes large values
 59 | #' when the series is nonlinear, and values around 0 when the series is linear.
 60 | #' @param x a univariate time series
 61 | #' @return A numeric value.
 62 | #' @examples
 63 | #' nonlinearity(lynx)
 64 | #' @author Yanfei Kang and Rob J Hyndman
 65 | #' @references Lee, T. H., White, H., & Granger, C. W. (1993). Testing for neglected nonlinearity in time series models: A comparison of neural network methods and alternative tests. \emph{Journal of Econometrics}, 56(3), 269-290.
 66 | #' @references Teräsvirta, T., Lin, C.-F., & Granger, C. W. J. (1993). Power of the neural network linearity test. \emph{Journal of Time Series Analysis}, 14(2), 209–220.
 67 | #' @export
 68 | 
 69 | nonlinearity <- function(x) {
 70 |   X2 <- tryCatch(tseries::terasvirta.test(as.ts(x), type = "Chisq")$stat,
 71 |                  error = function(e) NA)
 72 |   c(nonlinearity = 10 * unname(X2) / length(x))
 73 | }
 74 | 
 75 | #' ARCH LM Statistic
 76 | #'
 77 | #' Computes a statistic based on the Lagrange Multiplier (LM) test of Engle (1982) for
 78 | #' autoregressive conditional heteroscedasticity (ARCH). The statistic returned is
 79 | #' the \eqn{R^2}{R^2} value of an autoregressive model of order \code{lags} applied
 80 | #' to \eqn{x^2}{x^2}.
 81 | #' @param x a univariate time series
 82 | #' @param lags Number of lags to use in the test
 83 | #' @param demean Should data have mean removed before test applied?
 84 | #' @return A numeric value.
 85 | #' @author Yanfei Kang
 86 | #' @export
 87 | 
 88 | arch_stat <- function(x, lags = 12, demean = TRUE) {
 89 |   if (length(x) <= lags+1) {
 90 |     return(c(ARCH.LM = NA_real_))
 91 |   }
 92 |   if (demean) {
 93 |     x <- x - mean(x, na.rm = TRUE)
 94 |   }
 95 |   mat <- embed(x^2, lags + 1)
 96 |   fit <- try(lm(mat[, 1] ~ mat[, -1]), silent = TRUE)
 97 |   if ("try-error" %in% class(fit)) {
 98 |     return(c(ARCH.LM = NA_real_))
 99 |   } else {
100 |     arch.lm <- summary(fit)
101 |     S <- arch.lm$r.squared #* NROW(mat)
102 |     return(c(ARCH.LM = if(is.nan(S)) 1 else S))
103 |   }
104 | }
105 | 


--------------------------------------------------------------------------------
/R/thiyanga.R:
--------------------------------------------------------------------------------
  1 | #' Autocorrelation-based features
  2 | #'
  3 | #' Computes various measures based on autocorrelation coefficients of the
  4 | #' original series, first-differenced series and second-differenced series
  5 | #' @param x a univariate time series
  6 | #' @return A vector of 6 values: first autocorrelation coefficient and sum of squared of
  7 | #' first ten autocorrelation coefficients of original series, first-differenced series,
  8 | #' and twice-differenced series.
  9 | #' For seasonal data, the autocorrelation coefficient at the first seasonal lag is
 10 | #' also returned.
 11 | #' @author Thiyanga Talagala
 12 | #' @export
 13 | acf_features <- function(x) {
 14 |   m <- frequency(x)
 15 |   if(length(x) > 1) {
 16 |     acfx <- acf(x, lag.max = max(10L, m), plot = FALSE, na.action=na.pass)$acf[-1L]
 17 |   } else {
 18 |     acfx <- NA
 19 |   }
 20 |   if(length(x) > 10) {
 21 |     acfdiff1x <- acf(diff(x, differences = 1), lag.max = 10L, plot = FALSE, na.action = na.pass)$acf[-1L]
 22 |   } else {
 23 |     acfdiff1x <- NA
 24 |   }
 25 |   if(length(x) > 11) {
 26 |     acfdiff2x <- acf(diff(x, differences = 2), lag.max = 10L, plot = FALSE, na.action = na.pass)$acf[-1L]
 27 |   } else {
 28 |     acfdiff2x <- NA
 29 |   }
 30 | 
 31 |   # first autocorrelation coefficient
 32 |   acf_1 <- acfx[1L]
 33 | 
 34 |   # sum of squares of first 10 autocorrelation coefficients
 35 |   sum_of_sq_acf10 <- sum((acfx[seq(10)])^2)
 36 | 
 37 |   # first autocorrelation coefficient of differenced series
 38 |   diff1_acf1 <- acfdiff1x[1L]
 39 | 
 40 |   # Sum of squared of first 10 autocorrelation coefficients of differenced series
 41 |   diff1_acf10 <- sum((acfdiff1x[seq(10)])^2)
 42 | 
 43 |   # first autocorrelation coefficient of twice-differenced series
 44 |   diff2_acf1 <- acfdiff2x[1L]
 45 | 
 46 |   # Sum of squared of first 10 autocorrelation coefficients of twice-differenced series
 47 |   diff2_acf10 <- sum((acfdiff2x[seq(10)])^2)
 48 | 
 49 |   output <- c(
 50 |     x_acf1 = unname(acf_1),
 51 |     x_acf10 = unname(sum_of_sq_acf10),
 52 |     diff1_acf1 = unname(diff1_acf1),
 53 |     diff1_acf10 = unname(diff1_acf10),
 54 |     diff2_acf1 = unname(diff2_acf1),
 55 |     diff2_acf10 = unname(diff2_acf10)
 56 |   )
 57 | 
 58 |   if (m > 1) {
 59 |     output <- c(output, seas_acf1 = unname(acfx[m]))
 60 |   }
 61 | 
 62 |   return(output)
 63 | }
 64 | 
 65 | #' Partial autocorrelation-based features
 66 | #'
 67 | #' Computes various measures based on partial autocorrelation coefficients of the
 68 | #' original series, first-differenced series and second-differenced series
 69 | #' @param x a univariate time series
 70 | #' @return A vector of 3 values: Sum of squared of first 5
 71 | #' partial autocorrelation coefficients of the original series, first differenced
 72 | #' series and twice-differenced series.
 73 | #' For seasonal data, the partial autocorrelation coefficient at the first seasonal
 74 | #' lag is also returned.
 75 | #' @author Thiyanga Talagala
 76 | #' @export
 77 | pacf_features <- function(x) {
 78 |   m <- frequency(x)
 79 |   if(length(x) > 1){
 80 |     pacfx <- pacf(x, lag.max = max(5L, m), plot = FALSE)$acf
 81 |   } else {
 82 |     pacfx <- NA
 83 |   }
 84 | 
 85 |   # Sum of first 5 PACs squared
 86 |   if(length(x) > 5) {
 87 |     pacf_5 <- sum((pacfx[seq(5L)])^2)
 88 |   } else {
 89 |     pacf_5 <- NA
 90 |   }
 91 | 
 92 |   # Sum of first 5 PACs of difference series squared
 93 |   if(length(x) > 6) {
 94 |     diff1_pacf_5 <- sum(pacf(diff(x, differences = 1L), lag.max = 5L, plot = FALSE)$acf^2)
 95 |   } else {
 96 |     diff1_pacf_5 <- NA
 97 |   }
 98 | 
 99 |   # Sum of first 5 PACs of twice differenced series squared
100 |   if(length(x) > 7) {
101 |     diff2_pacf_5 <- sum(pacf(diff(x, differences = 2L), lag.max = 5L, plot = FALSE)$acf^2)
102 |   } else {
103 |     diff2_pacf_5 <- NA
104 |   }
105 | 
106 |   output <- c(
107 |     x_pacf5 = unname(pacf_5),
108 |     diff1x_pacf5 = unname(diff1_pacf_5),
109 |     diff2x_pacf5 = unname(diff2_pacf_5)
110 |   )
111 |   if (m > 1) {
112 |     output <- c(output, seas_pacf = pacfx[m])
113 |   }
114 |   return(output)
115 | }
116 | 
117 | #' Parameter estimates of Holt's linear trend method
118 | #'
119 | #' Estimate the smoothing parameter for the level-alpha and
120 | #' the smoothing parameter for the trend-beta.
121 | #' \code{hw_parameters} considers additive seasonal trend: ets(A,A,A) model.
122 | #' @param x a univariate time series
123 | #' @return \code{holt_parameters} produces a vector of 2 values: alpha, beta.
124 | #'
125 | #' \code{hw_parameters} produces a vector of 3 values: alpha, beta and gamma.
126 | #' @author Thiyanga Talagala, Pablo Montero-Manso
127 | #' @export
128 | 
129 | holt_parameters <- function(x) {
130 |   # parameter estimates of holt linear trend model
131 |   fit <- forecast::ets(x, model = c("AAN"))
132 |   params <- c(fit$par["alpha"], fit$par["beta"])
133 |   names(params) <- c("alpha", "beta")
134 |   return(params)
135 | }
136 | 
137 | #' @rdname holt_parameters
138 | #' @export
139 | hw_parameters <- function(x) {
140 |   # parameter estimates of holt winters additive trend seasonal model
141 |   hw_fit <- purrr::possibly(forecast::ets,
142 |     list(par = c(alpha = NA, beta = NA, gamma = NA)))(x, model = c("AAA"))
143 |   return(hw_fit$par[c("alpha", "beta", "gamma")])
144 | }
145 | # #' Autocorrelation coefficient at lag 1 of the residual
146 | # #'
147 | # #' Computes the first order autocorrelation of the residual series of the deterministic trend model
148 | # #' @param x a univariate time series
149 | # #' @return A numeric value.
150 | # #' @author Thiyanga Talagala
151 | # #' @export
152 | # acfresid <- function(x){
153 | #   time <- 1:length(x)
154 | #   linear_mod <- lm(x~time)
155 | #   Res<-resid(linear_mod)
156 | #   return(stats::acf(Res,lag.max=1L,plot=FALSE)$acf[-1])
157 | # }
158 | 
159 | #' Proportion of zeros
160 | #'
161 | #' Computes proportion of zeros in a time series
162 | #' @param x a univariate time series
163 | #' @param tol tolerance level. Absolute values below this are considered zeros.
164 | #' @return A numeric value.
165 | #' @author Thiyanga Talagala
166 | #' @export
167 | zero_proportion <- function(x, tol = 1e-8) {
168 |   mean(abs(x) < tol, na.rm=TRUE)
169 | }
170 | 


--------------------------------------------------------------------------------
/R/featurematrix.R:
--------------------------------------------------------------------------------
  1 | #' Time series feature matrix
  2 | #'
  3 | #' \code{tsfeatures} computes a matrix of time series features from a list of time series
  4 | #' @param tslist a list of univariate time series, each of class \code{ts} or a numeric vector.
  5 | #' Alternatively, an object of class \code{mts} may be used.
  6 | #' @param features a vector of function names which return numeric vectors of features.
  7 | #' All features returned by these functions must be named if they return more than one feature.
  8 | #' Existing functions from installed packages may be used, but the package must be loaded first.
  9 | #' Functions must return a result for all time series, even if it is just NA.
 10 | #' @param scale if \code{TRUE}, time series are scaled to mean 0 and sd 1 before features
 11 | #' are computed.
 12 | #' @param trim if \code{TRUE}, time series are trimmed by \code{trim_amount} before features
 13 | #' are computed. Values larger than \code{trim_amount} in absolute value are set to \code{NA}.
 14 | #' @param trim_amount Default level of trimming if \code{trim==TRUE}.
 15 | #' @param parallel If TRUE, multiple cores (or multiple sessions) will be used. This only speeds things up
 16 | #' when there are a large number of time series.
 17 | #' @param multiprocess The function from the \code{future} package to use for parallel processing. Either
 18 | #' \code{\link[future]{multisession}} or \code{\link[future]{multicore}}. The latter is preferred
 19 | #' for Linux and MacOS.
 20 | #' @param na.action A function to handle missing values. Use \code{na.interp} to estimate missing values.
 21 | #' @param ... Other arguments get passed to the feature functions.
 22 | #' @return A feature matrix (in the form of a tibble) with each row corresponding to
 23 | #' one time series from tslist, and each column being a feature.
 24 | #' @examples
 25 | #' mylist <- list(sunspot.year, WWWusage, AirPassengers, USAccDeaths)
 26 | #' tsfeatures(mylist)
 27 | #' @author Rob J Hyndman
 28 | #' @export
 29 | tsfeatures <- function(tslist,
 30 |                        features = c("frequency", "stl_features", "entropy", "acf_features"),
 31 |                        scale = TRUE, trim = FALSE, trim_amount = 0.1,
 32 |                        parallel = FALSE, multiprocess = future::multisession, na.action = na.pass, ...) {
 33 |   if (!is.list(tslist)) {
 34 |     tslist <- as.list(as.ts(tslist))
 35 |   }
 36 |   else{
 37 |     tslist <- map(tslist, as.ts)
 38 |   }
 39 |   if (scale && any(map_dbl(tslist, var, na.rm=TRUE) == 0)){
 40 |     warning("Some series are constant and cannot be scaled, so scaling has been disabled (`scale = FALSE`).")
 41 |     scale <- FALSE
 42 |   }
 43 |   if (scale) {
 44 |     tslist <- map(tslist, scalets)
 45 |   }
 46 |   if (trim) {
 47 |     tslist <- map(tslist, trimts, trim = trim_amount)
 48 |   }
 49 |   # Interpolate for missing values
 50 |   tslist <- map(tslist, function(x) {
 51 |     y <- na.action(x)
 52 |     attributes(y) <- attributes(x)
 53 |     x <- y
 54 |   })
 55 |   # Compute all features
 56 |   flist <- funlist <- list()
 57 |   # Assuming that didn't generate an error, we will proceed
 58 |   func <- lapply(features, match.fun)
 59 |   if (parallel) {
 60 |     old_plan <- future::plan(multiprocess)
 61 |     on.exit(future::plan(old_plan))
 62 |   }
 63 |   for (i in seq_along(features)) {
 64 | 
 65 |     if (parallel) {
 66 |       flist[[i]] <- furrr::future_map(tslist, func[[i]], ...)
 67 |     }
 68 |     else {
 69 |       flist[[i]] <- map(tslist, func[[i]], ...)
 70 |     }
 71 | 
 72 |     # Check names
 73 |     if (is.null(names(flist[[i]][[1]]))) {
 74 |       if(length(flist[[i]][[1]]) != 1L) {
 75 |         stop(paste("Function",features[i],"not returning named feature vector"))
 76 |       }
 77 |       flist[[i]] <- map(
 78 |         flist[[i]],
 79 |         function(x) {
 80 |           names(x) <- features[i]
 81 |           return(x)
 82 |         }
 83 |       )
 84 |     }
 85 |   }
 86 | 
 87 |   # Rename duplicate feature names to avoid conflicts
 88 |   flist <- rename_duplicate_features(features, flist)
 89 | 
 90 |   # Unpack features into a list of numeric vectors
 91 |   featurelist <- list()
 92 |   for (i in seq_along(tslist))
 93 |     featurelist[[i]] <- unlist(map(flist, function(u) u[[i]]))
 94 | 
 95 |   # Find feature names
 96 |   featurenames <- map(featurelist, names)
 97 | 
 98 |   fnames <- unique(unlist(featurenames))
 99 |   if (any(featurenames == "")) {
100 |     stop("Some unnamed features")
101 |   }
102 | 
103 |   # Create feature matrix
104 |   fmat <- matrix(NA_real_, nrow = length(tslist), ncol = length(fnames))
105 |   colnames(fmat) <- fnames
106 |   rownames(fmat) <- names(tslist)
107 | 
108 |   for (i in seq_along(tslist))
109 |     fmat[i, featurenames[[i]]] <- featurelist[[i]][featurenames[[i]]]
110 | 
111 |   return(tibble::as_tibble(fmat))
112 | }
113 | 
114 | # Scale time series
115 | scalets <- function(x) {
116 |   n <- length(x)
117 |   if (forecast::is.constant(x)) {
118 |     return(x)
119 |   }
120 |   scaledx <- as.numeric(scale(x, center = TRUE, scale = TRUE))
121 |   if ("msts" %in% class(x)) {
122 |     msts <- attributes(x)$msts
123 |     y <- forecast::msts(scaledx, seasonal.periods = msts)
124 |   }
125 |   else {
126 |     y <- as.ts(scaledx)
127 |   }
128 |   tsp(y) <- tsp(x)
129 |   return(y)
130 | }
131 | 
132 | # Trim time series
133 | trimts <- function(x, trim = 0.1) {
134 |   qtl <- quantile(x, c(trim, 1 - trim), na.rm = TRUE)
135 |   x[x < qtl[1L] | x > qtl[2L]] <- NA
136 |   return(x)
137 | }
138 | 
139 | # check for duplicate feature names in the feature list and rename by prepending
140 | # the name of the function that generates them to avoid conflicts: "functionName_featureName"
141 | # both functions' features are renamed
142 | # processed in order of appearance in the list
143 | # a warning is generated when conflicts are found
144 | rename_duplicate_features <- function(fun_names, feat_list) {
145 |   if (length(feat_list) < 2) {
146 |     return(feat_list)
147 |   }
148 |   for (i in 1:(length(feat_list) - 1)) {
149 |     for (j in (i + 1):length(feat_list)) {
150 |       names_first_fun <- names(feat_list[[i]][[1]])
151 |       names_sec_fun <- names(feat_list[[j]][[1]])
152 |       # look for at least one match in the names of the features
153 |       if (Reduce("|", names_first_fun %in% names_sec_fun)) {
154 |         warning(paste("Conflicting feature names in functions: ", fun_names[[i]], " and ", fun_names[[j]]))
155 |         names_first_fun <- paste(fun_names[[i]], "_", names_first_fun, sep = "")
156 |         for (idx in seq_along(feat_list[[i]])) {
157 |           names(feat_list[[i]][[idx]]) <- names_first_fun
158 |         }
159 |         names_sec_fun <- paste(fun_names[[j]], "_", names_sec_fun, sep = "")
160 |         for (idx in seq_along(feat_list[[j]])) {
161 |           names(feat_list[[j]][[idx]]) <- names_sec_fun
162 |         }
163 |       }
164 |     }
165 |   }
166 |   feat_list
167 | }
168 | 


--------------------------------------------------------------------------------
/R/features.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' Time series features based on tiled windows
  3 | #'
  4 | #' Computes feature of a time series based on tiled (non-overlapping) windows.
  5 | #' Means or variances are produced for all tiled windows. Then stability is
  6 | #' the variance of the means, while lumpiness is the variance of the variances.
  7 | #' @param x a univariate time series
  8 | #' @param width size of sliding window
  9 | #' @return A numeric vector of length 2 containing a measure of lumpiness and
 10 | #' a measure of stability.
 11 | #' @author Earo Wang and Rob J Hyndman
 12 | #' @export
 13 | 
 14 | lumpiness <- function(x, width = ifelse(frequency(x) > 1,
 15 |                         frequency(x), 10
 16 |                       )) {
 17 |   x <- scalets(x)
 18 |   nr <- length(x)
 19 |   lo <- seq(1, nr, by = width)
 20 |   up <- seq(width, nr + width, by = width)
 21 |   nsegs <- nr / width
 22 |   varx <- map_dbl(seq_len(nsegs), function(idx)
 23 |     var(x[lo[idx]:up[idx]], na.rm = TRUE))
 24 |   if (length(x) < 2 * width) {
 25 |     lumpiness <- 0
 26 |   } else {
 27 |     lumpiness <- var(varx, na.rm = TRUE)
 28 |   }
 29 |   return(c(lumpiness = lumpiness))
 30 | }
 31 | 
 32 | #' @rdname lumpiness
 33 | #' @export
 34 | 
 35 | stability <- function(x, width = ifelse(frequency(x) > 1,
 36 |                         frequency(x), 10
 37 |                       )) {
 38 |   x <- scalets(x)
 39 |   nr <- length(x)
 40 |   lo <- seq(1, nr, by = width)
 41 |   up <- seq(width, nr + width, by = width)
 42 |   nsegs <- nr / width
 43 |   meanx <- map_dbl(seq_len(nsegs), function(idx)
 44 |     mean(x[lo[idx]:up[idx]], na.rm = TRUE))
 45 |   if (length(x) < 2 * width) {
 46 |     stability <- 0
 47 |   } else {
 48 |     stability <- var(meanx, na.rm = TRUE)
 49 |   }
 50 |   return(c(stability = stability))
 51 | }
 52 | 
 53 | #' Time series features based on sliding windows
 54 | #'
 55 | #' Computes feature of a time series based on sliding (overlapping) windows.
 56 | #' \code{max_level_shift} finds the largest mean shift between two consecutive windows.
 57 | #' \code{max_var_shift} finds the largest var shift between two consecutive windows.
 58 | #' \code{max_kl_shift} finds the largest shift in Kulback-Leibler divergence between
 59 | #' two consecutive windows.
 60 | #'
 61 | #' Computes the largest level shift and largest variance shift in sliding mean calculations
 62 | #' @param x a univariate time series
 63 | #' @param width size of sliding window
 64 | #' @return A vector of 2 values: the size of the shift, and the time index of the shift.
 65 | #' @author Earo Wang and Rob J Hyndman
 66 | #' @export
 67 | 
 68 | max_level_shift <- function(x, width = ifelse(frequency(x) > 1,
 69 |                               frequency(x), 10
 70 |                             )) {
 71 |   suppressWarnings(rollmean <- try(RcppRoll::roll_mean(x, width, na.rm = TRUE), silent = TRUE))
 72 |   if ("try-error" %in% class(rollmean)) {
 73 |     maxmeans <- NA_real_
 74 |     maxidx <- NA_real_
 75 |   } else {
 76 |     means <- abs(diff(rollmean, width))
 77 |     if (length(means) == 0L) {
 78 |       maxmeans <- 0
 79 |       maxidx <- NA_real_
 80 |     }
 81 |     else if (all(is.na(means))) {
 82 |       maxmeans <- NA_real_
 83 |       maxidx <- NA_real_
 84 |     }
 85 |     else {
 86 |       maxmeans <- max(means, na.rm = TRUE)
 87 |       maxidx <- which.max(means) + width - 1L
 88 |     }
 89 |   }
 90 |   return(c(max_level_shift = maxmeans, time_level_shift = maxidx))
 91 | }
 92 | 
 93 | #' @rdname max_level_shift
 94 | #' @export
 95 | 
 96 | max_var_shift <- function(x, width = ifelse(frequency(x) > 1,
 97 |                             frequency(x), 10
 98 |                           )) {
 99 |   suppressWarnings(rollvar <- try(RcppRoll::roll_var(x, width, na.rm = TRUE), silent = TRUE))
100 |   if ("try-error" %in% class(rollvar)) {
101 |     maxvar <- NA_real_
102 |     maxidx <- NA_real_
103 |   } else {
104 |     vars <- abs(diff(rollvar, width))
105 | 
106 |     if (length(vars) == 0L) {
107 |       maxvar <- 0
108 |       maxidx <- NA_real_
109 |     }
110 |     else if (all(is.na(vars))) {
111 |       maxvar <- NA_real_
112 |       maxidx <- NA_real_
113 |     }
114 |     else {
115 |       maxvar <- max(vars, na.rm = TRUE)
116 |       maxidx <- which.max(vars) + width - 1L
117 |     }
118 |   }
119 |   return(c(max_var_shift = maxvar, time_var_shift = maxidx))
120 | }
121 | 
122 | #' @rdname max_level_shift
123 | #' @export
124 | 
125 | max_kl_shift <- function(x, width = ifelse(frequency(x) > 1,
126 |                            frequency(x), 10
127 |                          )) {
128 |   gw <- 100 # grid width
129 |   xgrid <- seq(min(x, na.rm = TRUE), max(x, na.rm = TRUE), length = gw)
130 |   grid <- xgrid[2L] - xgrid[1L]
131 |   tmpx <- x[!is.na(x)] # Remove NA to calculate bw
132 |   bw <- bw.nrd0(tmpx)
133 |   lenx <- length(x)
134 |   if (lenx <= (2 * width)) {
135 |     return(c(max_kl_shift = NA_real_, time_kl_shift = NA_real_))
136 |   }
137 |   # Using binning algorithm to achieve efficiency but obsecure exact positions.
138 |   # lastrep <- ceiling(lenx/5)
139 |   # group <- rep(1:lastrep, each = 5)[1:lenx]
140 |   # midpoints <- aggregate(x, by = list(group), function(y) y[3L])[, 2]
141 |   # dens.mat <- matrix(, nrow = lastrep, ncol = gw)
142 |   # for (i in 1L:lastrep) {
143 |   #   dens.mat[i, ] <- dnorm(xgrid, mean = midpoints[i], sd = bw)
144 |   # }
145 |   dens.mat <- matrix(, nrow = lenx, ncol = gw)
146 |   for (i in 1L:lenx) {
147 |     dens.mat[i, ] <- dnorm(xgrid, mean = x[i], sd = bw)
148 |   }
149 |   dens.mat <- pmax(dens.mat, dnorm(38))
150 |   rmean <- RcppRoll::roll_mean(dens.mat,
151 |     n = width, na.rm = TRUE, fill = NA,
152 |     align = "right"
153 |   ) # by column
154 |   # lo <- seq(1, lastrep - width + 1)
155 |   # hi <- seq(width + 1, lastrep)
156 |   lo <- seq(1, lenx - width + 1)
157 |   hi <- seq(width + 1, lenx)
158 |   seqidx <- min(length(lo), length(hi))
159 |   kl <- sapply(1:seqidx, function(i) sum(rmean[lo[i], ] *
160 |       (log(rmean[lo[i], ]) - log(rmean[hi[i], ])) *
161 |       grid, na.rm = TRUE))
162 |   diffkl <- diff(kl, na.rm = TRUE)
163 |   if (length(diffkl) == 0L) {
164 |     diffkl <- 0
165 |     maxidx <- NA_real_
166 |   }
167 |   else {
168 |     maxidx <- which.max(diffkl) + width - 1L
169 |   }
170 |   return(c(max_kl_shift = max(diffkl, na.rm = TRUE), time_kl_shift = maxidx))
171 | }
172 | 
173 | #' Number of crossing points
174 | #'
175 | #' Computes the number of times a time series crosses the median.
176 | #' @param x a univariate time series
177 | #' @return A numeric value.
178 | #' @author Earo Wang and Rob J Hyndman
179 | #' @export
180 | crossing_points <- function(x) {
181 |   midline <- median(x, na.rm = TRUE)
182 |   ab <- x <= midline
183 |   lenx <- length(x)
184 |   p1 <- ab[1:(lenx - 1)]
185 |   p2 <- ab[2:lenx]
186 |   cross <- (p1 & !p2) | (p2 & !p1)
187 |   return(c(crossing_points = sum(cross, na.rm = TRUE)))
188 | }
189 | 
190 | #' Longest flat spot
191 | #'
192 | #' "Flat spots” are computed by dividing the sample space of a time series into ten equal-sized intervals, and computing the maximum run length within any single interval.
193 | #' @param x a univariate time series
194 | #' @return A numeric value.
195 | #' @author Earo Wang and Rob J Hyndman
196 | #' @export
197 | 
198 | flat_spots <- function(x) {
199 |   cutx <- try(cut(x, breaks = 10, include.lowest = TRUE, labels = FALSE),
200 |     silent = TRUE
201 |   )
202 |   if ("try-error" %in% class(cutx)) {
203 |     fspots <- NA
204 |   } else {
205 |     rlex <- rle(cutx)
206 |     # Any flat spot
207 |     return(c(flat_spots = max(rlex$lengths)))
208 |     # Low flat spots
209 |     # ones <- (rlex$values == 1)
210 |     # return(max(rlex$lengths[ones]))
211 |   }
212 | }
213 | 
214 | # shapes <- function(x, width, scale = TRUE, FUN = mean, ...){
215 | #   nr <- length(x)
216 | #   if (nr %% width != 0) {
217 | #       stop("width must be a divisor of the length of the series.")
218 | #     }
219 | #     shapes <- matrix(x, ncol = width, byrow= TRUE)
220 | #     if(scale){
221 | #       dtotal <- apply(shapes, 1, sum)
222 | #       idremove <- which(dtotal == 0)
223 | #       if(length(idremove) > 0){
224 | #         shapes <- shapes[-idremove, ]
225 | #         dtotal <- dtotal[-idremove]
226 | #       }
227 | #       shapes <- t(t(shapes) / dtotal)
228 | #     }
229 | #     xprofile <- apply(shapes, 2, FUN, ...)
230 | #     return(c(shapes=xprofile))
231 | # }
232 | 
233 | #' Hurst coefficient
234 | #'
235 | #' Computes the Hurst coefficient indicating the level of fractional differencing
236 | #' of a time series.
237 | #' @param x a univariate time series. If missing values are present, the largest
238 | #' contiguous portion of the time series is used.
239 | #' @return A numeric value.
240 | #' @author Rob J Hyndman
241 | #' @export
242 | 
243 | hurst <- function(x) {
244 |   # Hurst=d+0.5 where d is fractional difference.
245 |   return(c(hurst = suppressWarnings(fracdiff::fracdiff(na.contiguous(x), 0, 0)[["d"]] + 0.5)))
246 | }
247 | 
248 | #' Unit Root Test Statistics
249 | #'
250 | #' \code{unitroot_kpss} computes the statistic for the Kwiatkowski et al. unit root test
251 | #' using the default settings for the \code{\link[urca]{ur.kpss}} function.
252 | #' \code{unitroot_pp} computes the statistic for the Phillips-Perron unit root test
253 | #' using the default settings for the \code{\link[urca]{ur.pp}} function.
254 | #' @param x a univariate time series.
255 | #' @param ... Other arguments are passed to the \code{\link[urca]{ur.kpss}} or
256 | #' \code{\link[urca]{ur.kpss}} functions.
257 | #' @return A numeric value
258 | #' @author Pablo Montero-Manso
259 | #' @export
260 | unitroot_kpss <- function(x, ...) {
261 |   kpss <- try(urca::ur.kpss(x, ...)@teststat, silent=TRUE)
262 |   if("try-error" %in% class(kpss)) {
263 |     warning("Error in unitroot_kpss")
264 |     kpss <- NA
265 |   }
266 |   return(kpss)
267 | }
268 | 
269 | #' @rdname unitroot_kpss
270 | #' @export
271 | unitroot_pp <- function(x, ...) {
272 |   pp <- try(urca::ur.pp(x, ...)@teststat, silent = TRUE)
273 |   if("try-error" %in% class(pp)) {
274 |     warning("Error in unitroot_pp")
275 |     pp <- NA
276 |   }
277 |   return(pp)
278 | }
279 | 


--------------------------------------------------------------------------------
/vignettes/tsfeatures.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Introduction to the tsfeatures package"
  3 | author: "Yangzhuoran Yang and Rob J Hyndman"
  4 | date: "`r Sys.Date()`"
  5 | output:
  6 |   rmarkdown::html_vignette:
  7 |     toc: true
  8 |     toc_depth: 3
  9 | vignette: >
 10 |   %\VignetteIndexEntry{Introduction to the tsfeatures package}
 11 |   %\VignetteEngine{knitr::rmarkdown}
 12 |   %\VignetteEncoding{UTF-8}
 13 | ---
 14 | 
 15 | ```{r setup, include = FALSE}
 16 | knitr::opts_chunk$set(
 17 |   collapse = TRUE,
 18 |   comment = "#>",
 19 |   warning = FALSE,
 20 |   fig.align = "center"
 21 | )
 22 | library(tsfeatures)
 23 | ```
 24 | 
 25 | # tsfeatures
 26 | 
 27 | The R package *tsfeatures* provides methods for extracting various features from time series data.
 28 | 
 29 | ## Installation
 30 | 
 31 | The **stable** version on R CRAN and can be installed in the usual way:
 32 | 
 33 | ```{r cran-installation, eval = FALSE}
 34 | install.packages("tsfeatures")
 35 | ```
 36 | 
 37 | You can install the **development** version from [Github](https://github.com/robjhyndman/tsfeatures) with:
 38 | 
 39 | ```{r gh-installation, eval = FALSE}
 40 | # install.packages("devtools")
 41 | devtools::install_github("robjhyndman/tsfeatures")
 42 | ```
 43 | 
 44 | ## Usage
 45 | 
 46 | The function `tsfeatures()` computes a tibble of time series features from a list of time series.
 47 | 
 48 | ```{r}
 49 | mylist <- list(sunspot.year, WWWusage, AirPassengers, USAccDeaths)
 50 | tsfeatures(mylist)
 51 | ```
 52 | 
 53 | The default functions that `tsfeatures` uses to compute features are `frequency`, `stl_features`, `entropy` and `acf_features`. Each of them can produce one or more  features. Detailed information of features included in the *tsfeatures* package are described below. Functions from other packages, or user-defined functions, may also be used.
 54 | 
 55 | ```{r}
 56 | # Function from outside of tsfeatures package being used
 57 | is.monthly <- function(x){
 58 |   frequency(x) == 12
 59 | }
 60 | tsfeatures(mylist, features = "is.monthly")
 61 | ```
 62 | 
 63 | ## List of features
 64 | 
 65 | ### acf_features {#acf_features}
 66 | 
 67 | We compute the autocorrelation function of the series, the differenced series, and the twice-differenced series. `acf_features` produces a vector comprising the first autocorrelation coefficient in each case, and the sum of squares of the first 10 autocorrelation coefficients in each case.
 68 | 
 69 | ```{r}
 70 | acf_features(AirPassengers)
 71 | ```
 72 | 
 73 | ### arch_stat {#arch_stat}
 74 | 
 75 | `arch_stat` Computes a statistic based on the Lagrange Multiplier (LM) test of Engle ([1982](#ref)) for autoregressive conditional heteroscedasticity (ARCH). The statistic returned is the $R^2$ value of an autoregressive model of order specified as lags applied to $x^2$.
 76 | 
 77 | ```{r}
 78 | arch_stat(AirPassengers)
 79 | ```
 80 | 
 81 | ### autocorr_features
 82 | 
 83 | The autocorrelation feature set from software package hctsa
 84 | 
 85 | ```{r}
 86 | autocorr_features(AirPassengers)
 87 | ```
 88 | 
 89 | * `ac_9` is the autocorrelation at lag 9.
 90 | * `embed2_incircle` gives proportion of points inside a given circular boundary in a 2-d embedding space.
 91 | * `firstmin_ac` returns the time of first minimum in the autocorrelation function.
 92 | * `trev_num` returns the numerator of the trev function of a time series, a normalized nonlinear autocorrelation. The time lag is set to 1.
 93 | * `motiftwo_entro3` finds local motifs in a binary symbolization of the time series. Coarse-graining is performed. Time-series values above its mean are given 1, and those below the mean are 0. `motiftwo_entro3` returns the entropy of words in the binary alphabet of length 3.
 94 | * `walker_propcross` simulates a hypothetical walker moving through the time domain. The hypothetical particle (or 'walker') moves in response to values of the time series at each point. The walker narrows the gap between its value and that of the time series by 10. `walker_propcross` returns the fraction of time series length that walker crosses time series.
 95 | 
 96 | ### binarize_mean {#binarize_mean}
 97 | 
 98 | `binarize_mean` converts an input vector into a binarized version. Time-series values above its mean are given 1, and those below the mean are 0.
 99 | 
100 | ```{r}
101 | str(binarize_mean(AirPassengers))
102 | ```
103 | 
104 | ### compengine feature set {#compengine}
105 | 
106 | `compengine` calculate the features that have been used in the [CompEngine](https://www.comp-engine.org/) database, using a method introduced in package `kctsa`.
107 | 
108 | The features involved can be grouped as autocorrelation, prediction, stationarity, distribution, and scaling, which can be computed using `autocorr_features`, `pred_features`, `station_features`, `dist_features`, and `scal_features`.
109 | 
110 | ```{r}
111 | comp <- compengine(AirPassengers)
112 | knitr::kable(comp)
113 | ```
114 | 
115 | ### crossing_points {#crossing_points}
116 | 
117 | `crossing points` are defined as the number of times a time series crosses the median line.
118 | 
119 | ```{r}
120 | crossing_points(AirPassengers)
121 | ```
122 | 
123 | ### dist_features
124 | 
125 | The distribution feature set from the hctsa package.
126 | 
127 | 
128 | The scaling feature set from `hctsa`.
129 | 
130 | ```{r}
131 | dist_features(AirPassengers)
132 | ```
133 | 
134 | * `histogram_mode` measures the mode of the data vector using histograms with a given number of bins (default to 10) as suggestion.
135 | * `outlierinclude_mdrmd` measures the median as more and more outliers are included in the calculation according to a specified rule, of outliers being furthest from the mean. The threshold for including time-series data points in the analysis increases from zero to the maximum deviation, in increments of 0.01*sigma (by default), where sigma is the standard deviation of the time series. At each threshold, proportion of time series points included and median are calculated, and outputs from the algorithm measure how these statistical quantities change as more extreme points are included in the calculation. `outlierinclude_mdrmd` essentially returns the median of the median of range indices.
136 | 
137 | ### entropy {#entropy}
138 | 
139 | The spectral `entropy` is the Shannon entropy
140 | $$
141 |   -\int^\pi_{-\pi}\hat{f}(\lambda)\log\hat{f}(\lambda) d\lambda,
142 | $$
143 | where $\hat{f}(\lambda)$ is an estimate of the spectral density of the data. This measures the “forecastability” of a time series, where low values indicate a high signal-to-noise ratio, and large values occur when a series is difficult to forecast.
144 | 
145 | ```{r}
146 | entropy(AirPassengers)
147 | ```
148 | 
149 | ### firstzero_ac {#firstzero_ac}
150 | 
151 | `firstzero_ac` returns the first zero crossing of the autocorrelation function.
152 | 
153 | ```{r}
154 | firstzero_ac(AirPassengers)
155 | ```
156 | 
157 | ### flat_spots {#flat_spots}
158 | 
159 | `flat_spots` are computed by dividing the sample space of a time series into ten equal-sized intervals, and computing the maximum run length within any single interval.
160 | 
161 | ```{r}
162 | flat_spots(AirPassengers)
163 | ```
164 | 
165 | ### heterogeneity {#heterogeneity}
166 | 
167 | The `heterogeneity` features measure the heterogeneity of the time series.
168 | First, we pre-whiten the time series to remove the mean, trend, and autoregressive (AR) information (Barbour & Parker [2014](#ref)). Then we fit a $GARCH(1,1)$ model to the pre-whitened time series, $x_t$, to measure for autoregressive conditional heteroskedasticity (ARCH) effects. The residuals from this model, $z_t$, are also measured for ARCH effects using a second $GARCH(1,1)$ model.
169 | 
170 | * `arch_acf` is the sum of squares of the first 12 autocorrelations of $\{x^2_t\}$.
171 | * `garch_acf` is the sum of squares of the first 12 autocorrelations of $\{z^2_t\}$.
172 | * `arch_r2` is the $R^2$ value of an AR model applied to $\{x^2_t\}$.
173 | * `garch_r2` is the $R^2$ value of an AR model applied to $\{z^2_t\}$.
174 | 
175 | The statistics obtained from $\{x^2_t\}$ are the ARCH effects, while those from $\{z^2_t\}$ are the GARCH effects. Note that the two $R^2$ values are used in the Lagrange-multiplier test of Engle ([1982](#ref)), and the sum of squared autocorrelations are used in the Ljung-Box test proposed by Ljung & Box ([1978](#ref)).
176 | 
177 | ```{r}
178 | heterogeneity(AirPassengers)
179 | ```
180 | 
181 | ### holt_parameters and hw_parameters {#holt_hw}
182 | 
183 | `holt_parameters` Estimate the smoothing parameter for the level-alpha and the smoothing parameter for the trend-beta of Holt's linear trend method. `hw_parameters` considers additive seasonal trend: ETS(A,A,A) model, returning a vector of 3 values: alpha, beta and gamma.
184 | 
185 | ```{r}
186 | holt_parameters(AirPassengers)
187 | hw_parameters(AirPassengers)
188 | ```
189 | 
190 | ### hurst {#hurst}
191 | 
192 | We use a measure of the long-term memory of a time series (`hurst`), computed as 0.5 plus the maximum likelihood estimate of the fractional differencing order $d$ given by Haslett & Raftery ([1989](#ref)). We add 0.5 to make it consistent with the Hurst coefficient. Note that the fractal dimension can be estimated as $D = 2 - \text{hurst}$.
193 | 
194 | ```{r}
195 | hurst(AirPassengers)
196 | ```
197 | 
198 | ### lumpiness and stability {#lumpiness_stability}
199 | 
200 | `Stability` and `lumpiness` are two time series features based on tiled (non-overlapping) windows. Means or variances are produced for all tiled windows. Then `stability` is the variance of the means, while `lumpiness` is the variance of the variances.
201 | 
202 | ```{r}
203 | stability(AirPassengers)
204 | lumpiness(AirPassengers)
205 | ```
206 | 
207 | ### max_level_shift, max_var_shift and max_kl_shift {#max_shift}
208 | 
209 | These three features compute features of a time series based on sliding (overlapping) windows.
210 | `max_level_shift` finds the largest mean shift between two consecutive windows.
211 | `max_var_shift` finds the largest variance shift between two consecutive windows.
212 | `max_kl_shift` finds the largest shift in Kulback-Leibler divergence between two consecutive windows.
213 | Each feature returns a vector of 2 values: the size of the shift, and the time index of the shift.
214 | 
215 | ```{r}
216 | max_level_shift(AirPassengers)
217 | max_var_shift(AirPassengers)
218 | max_kl_shift(AirPassengers)
219 | ```
220 | 
221 | ### nonlinearity {#nonlinearity}
222 | 
223 | The `nonlinearity` coefficient is computed using a modification of the statistic used in Teräsvirta’s nonlinearity test. Teräsvirta’s test uses a statistic $X^2=T\log(\text{SSE}1/\text{SSE}0)$ where SSE1 and SSE0 are the sum of squared residuals from a nonlinear and linear autoregression respectively. This is non-ergodic, so instead, we define it as $10X^2/T$ which will converge to a value indicating the extent of nonlinearity as $T\rightarrow\infty$. This takes large values when the series is nonlinear, and values around 0 when the series is linear.
224 | 
225 | ```{r}
226 | nonlinearity(AirPassengers)
227 | ```
228 | 
229 | ### pacf_features {#pacf_features}
230 | 
231 | We compute the partial autocorrelation function of the series, the differenced series, and the second-order differenced series. Then `pacf_features` produces a vector comprising the sum of squares of the first 5 partial autocorrelation coefficients in each case.
232 | 
233 | ```{r}
234 | pacf_features(AirPassengers)
235 | ```
236 | 
237 | ### pred_features
238 | 
239 | The prediction feature set from the `hctsa` package. The first two elements are obtained from `localsimple_taurus` with different forecast methods (the mean, and an LS fit). The third is from `sampen_first`.
240 | 
241 | ```{r}
242 | pred_features(AirPassengers)
243 | ```
244 | 
245 | * Simple predictors using the past trainLength values of the time series to predict its next value. `localsimple_taures` returns the first zero crossing of the autocorrelation function of the residuals from this Simple local time-series forecasting.
246 | * `sampen_first` returns the first Sample Entropy of a time series where the embedding dimension is set to 5 and the threshold is set to 0.3. `sampenc` is the underlying function to calculate the first sample entropy with optional dimension and threshold settings.
247 | 
248 | ```{r}
249 | sampenc(AirPassengers, M = 5, r = 0.3)
250 | ```
251 | 
252 | ### scal_features
253 | 
254 | The scaling feature set from `hctsa`.
255 | 
256 | ```{r}
257 | scal_features(AirPassengers)
258 | ```
259 | 
260 | * `fluctanal_prop_r1` implements fluctuation analysis. It fits a polynomial of order 1 and then returns the range. The order of fluctuations is 2, corresponding to root mean square fluctuations.
261 | 
262 | ### station_features
263 | 
264 | The stationary feature set from `hctsa`.
265 | 
266 | ```{r}
267 | station_features(AirPassengers)
268 | ```
269 | 
270 | * `std1st_der` returns the standard deviation of the first derivative of the time series.
271 | * 100 time-series segments of length l are selected at random from the time series and the mean of the first zero-crossings of the autocorrelation function in each segment is calculated using `spreadrandomlocal_meantaul`.
272 | 
273 | ### stl_features {#stl_features}
274 | 
275 | `stl_features` Computes various measures of trend and seasonality of a time series based on an STL decomposition.  The `mstl` function is used to do the decomposition.
276 | 
277 | `nperiods` is the number of seasonal periods in the data (determined by the frequency of observation, not the observations themselves) and set to 1 for non-seasonal data. `seasonal_period` is a vector of seasonal periods and set to 1 for non-seasonal data.
278 | 
279 | The size and location of the peaks and troughs in the seasonal component are used to compute strength of peaks (`peak`) and strength of trough (`trough`).
280 | 
281 | The rest of the features are modifications of features used in Kang, Hyndman & Smith-Miles ([2017](#ref)). We extend the STL decomposition approach (Cleveland et al.[1990](#ref)) to handle multiple seasonalities. Thus, the decomposition contains a trend, up to $M$ seasonal components and a remainder component:
282 | $$
283 |   x_t=f_t+s_{1,t}+\cdots+s_{M.t}+e_t,
284 | $$
285 | where $f_t$ is the smoothed trend component, $s_{i,t}$ is the $i$th seasonal component and $e_t$ is a remainder component. The components are estimated iteratively. Let $s^{(k)}_{i,t}$ be the estimate of $s_i,t$ at the $k$th iteration, with initial values given as $s^{(0)}_{i,t}=0$. The we apply an STL decomposition to $x_t-\sum^{j=1}_{j\neq1}{}^{^{M}}s^{k-1}_{j,t}$ to obtained updated estimates $s^{(k)}_{i,t}$ for $k=1,2,\ldots$. In practice, this converges quickly and only two iterations are required. To allow the procedure to be applied automatically, we set the seasonal window span for STL to be 21 in all cases. For a non-seasonal time series, we simply estimate $x_t=f_t+e_t$ where $f_t$ is computed using  Friedman’s “super smoother” (Friedman [1984](#ref)).
286 | 
287 | Strength of trend (`trend`) and strength of seasonality (`seasonal.strength`) are defined as
288 | $$
289 |   \text{trend} = 1-\frac{\text{Var}(e_t)}{\text{Var}(f_t+e_t)}\quad \text{and}\quad \text{seasonal.strength}=1-\frac{\text{Var}(e_t)}{\text{Var}(s_{i,t}+e_t)}.
290 | $$
291 | If their values are less than 0, they are set to 0, while values greater than 1 are set to 1. For non-seasonal time series `seasonal.strength` is 0.  For seasonal time series, `seasonal.strength` is an M-vector, where M is the number of periods. This is analogous to the way the strength of trend and seasonality were defined in Wang, Smith & Hyndman ([2006](#ref)), Hyndman, Wang & Laptev ([2015](#ref)) and Kang, Hyndman & Smith-Miles ([2017](#ref)).
292 | 
293 | `spike`  measures the “spikiness” of a time series, and is computed as the variance of the leave-one-out variances of the remainder component $e_t$.
294 | 
295 | `linearity` and `curvature` measures the linearity and curvature of a time series calculated based on the coefficients of an orthogonal quadratic regression.
296 | 
297 | We compute the autocorrelation function of $e_t$, and `e_acf1` and `e_acf10` contain the first autocorrelation coefficient and the sum of the first ten squared autocorrelation coefficients.
298 | 
299 | ```{r}
300 | stl_features(AirPassengers)
301 | ```
302 | 
303 | ### unitroot_kpss and unitroot_pp {#unitroot}
304 | 
305 | `unitroot_kpss` is a vector comprising the statistic for the KPSS unit root test with linear trend and lag one, and `unitroot_pp` is the statistic for the “Z-alpha” version of PP unit root test with constant trend and lag one.
306 | 
307 | ```{r}
308 | unitroot_kpss(AirPassengers)
309 | unitroot_pp(AirPassengers)
310 | ```
311 | 
312 | ### zero_proportion
313 | 
314 | Computes proporton of zeros in a time series.
315 | 
316 | ```{r}
317 | zero_proportion(AirPassengers)
318 | ```
319 | 
320 | ## Reproducing papers
321 | 
322 | ### Hyndman, Wang and Laptev (ICDM 2015)
323 | 
324 | Here we replicate the analysis in Hyndman, Wang & Laptev (ICDM 2015). However, note that crossing_points, peak and trough are defined differently in the *tsfeatures* package than in the Hyndman et al (2015) paper. Other features are the same.
325 | 
326 | ```{r yahoo, message=FALSE}
327 | library(tsfeatures)
328 | library(dplyr)
329 | 
330 | yahoo <- yahoo_data()
331 | ```
332 | 
333 | ```{r hwl, eval=FALSE}
334 | hwl <- bind_cols(
335 |          tsfeatures(yahoo,
336 |            c("acf_features","entropy","lumpiness",
337 |              "flat_spots","crossing_points")),
338 |          tsfeatures(yahoo,"stl_features", s.window='periodic', robust=TRUE),
339 |          tsfeatures(yahoo, "max_kl_shift", width=48),
340 |          tsfeatures(yahoo,
341 |            c("mean","var"), scale=FALSE, na.rm=TRUE),
342 |          tsfeatures(yahoo,
343 |            c("max_level_shift","max_var_shift"), trim=TRUE)) %>%
344 |   select(mean, var, x_acf1, trend, linearity, curvature,
345 |          seasonal_strength, peak, trough,
346 |          entropy, lumpiness, spike, max_level_shift, max_var_shift, flat_spots,
347 |          crossing_points, max_kl_shift, time_kl_shift)
348 | ```
349 | 
350 | ```{r hwlsave, eval=FALSE, echo=FALSE}
351 | # Now store the computed results for later use
352 | save(hwl, file="../extra-data/hwl.rda")
353 | ```
354 | 
355 | ```{r hwlquick, include=FALSE}
356 | # This replicates the above but uses pre-stored data to speed things up
357 | tmp <- tempfile()
358 | utils::download.file("https://github.com/robjhyndman/tsfeatures/raw/master/extra-data/hwl.rda", tmp)
359 | load(tmp)
360 | ```
361 | 
362 | ```{r yahoographics}
363 | # 2-d Feature space
364 | library(ggplot2)
365 | hwl_pca <- hwl %>%
366 |   na.omit() %>%
367 |   prcomp(scale=TRUE)
368 | hwl_pca$x %>%
369 |   as_tibble() %>%
370 |   ggplot(aes(x=PC1, y=PC2)) +
371 |     geom_point()
372 | ```
373 | 
374 | ### Kang, Hyndman & Smith-Miles (IJF 2017)
375 | 
376 | Compute the features used in Kang, Hyndman & Smith-Miles (IJF 2017).
377 | Note that the trend and ACF1 are computed differently for non-seasonal data in the *tsfeatures* package than in the Kang et al (2017). `tsfeatures` uses `mstl` which uses `supsmu` for the trend calculation with non-seasonal data, whereas Kang et al used a penalized regression spline computed using `mgcv` instead.  Other features are the same.
378 | 
379 | ```{r ijf2017, message=FALSE}
380 | library(tsfeatures)
381 | library(dplyr)
382 | library(tidyr)
383 | library(forecast)
384 | 
385 | M3data <- purrr::map(Mcomp::M3,
386 |   function(x) {
387 |       tspx <- tsp(x$x)
388 |       ts(c(x$x,x$xx), start=tspx[1], frequency=tspx[3])
389 |   })
390 | khs_stl <- function(x,...) {
391 |   lambda <- BoxCox.lambda(x, lower=0, upper=1, method='loglik')
392 |   y <- BoxCox(x, lambda)
393 |   c(stl_features(y, s.window='periodic', robust=TRUE, ...), lambda=lambda)
394 | }
395 | ```
396 | 
397 | ```{r khs, eval=FALSE}
398 | khs <- bind_cols(
399 |   tsfeatures(M3data, c("frequency", "entropy")),
400 |   tsfeatures(M3data, "khs_stl", scale=FALSE)) %>%
401 |   select(frequency, entropy, trend, seasonal_strength, e_acf1, lambda) %>%
402 |   replace_na(list(seasonal_strength=0)) %>%
403 |   rename(
404 |     Frequency = frequency,
405 |     Entropy = entropy,
406 |     Trend = trend,
407 |     Season = seasonal_strength,
408 |     ACF1 = e_acf1,
409 |     Lambda = lambda) %>%
410 |   mutate(Period = as.factor(Frequency))
411 | ```
412 | 
413 | ```{r khssave, eval=FALSE, echo=FALSE}
414 | # Now store the computed results for later use
415 | save(khs, file="../extra-data/khs.rda")
416 | ```
417 | 
418 | ```{r khsquick, include=FALSE}
419 | # This replicates the above but uses pre-stored data to speed things up
420 | tmp <- tempfile()
421 | utils::download.file("https://github.com/robjhyndman/tsfeatures/raw/master/extra-data/khs.rda", tmp)
422 | load(tmp)
423 | ```
424 | 
425 | ```{r ijf2017graphs, message=FALSE}
426 | # Fig 1 of paper
427 | khs %>%
428 |   select(Period, Entropy, Trend, Season, ACF1, Lambda) %>%
429 |   GGally::ggpairs()
430 | 
431 | # 2-d Feature space (Top of Fig 2)
432 | khs_pca <- khs %>%
433 |   select(-Period) %>%
434 |   prcomp(scale=TRUE)
435 | khs_pca$x %>%
436 |   as_tibble() %>%
437 |   bind_cols(Period=khs$Period) %>%
438 |   ggplot(aes(x=PC1, y=PC2)) +
439 |     geom_point(aes(col=Period))
440 | ```
441 | 
442 | ## Resources  {#ref}
443 | 
444 | [Barbour, A. J., & Parker, R. L. (2014). psd: Adaptive, sine multitaper power spectral density estimation for R. Computers & Geosciences, 63, 1-8.](https://doi.org/10.1016/j.cageo.2013.09.015)
445 | 
446 | [Cleveland, R. B., Cleveland, W. S., McRae, J. E., & Terpenning, I. (1990). STL: A Seasonal-Trend Decomposition. Journal of Official Statistics, 6(1), 3-73.](https://www.proquest.com/docview/1266805989)
447 | 
448 | [Engle, R. F. (1982). Autoregressive conditional heteroscedasticity with estimates of the variance of United Kingdom inflation. Econometrica: Journal of the Econometric Society, 987-1007.](https://doi.org/10.2307/1912773)
449 | 
450 | [Friedman, JH (1984). _A variable span scatterplot smoother_. Technical Report 5. Laboratory for Computational Statistics, Stanford University.](https://www.slac.stanford.edu/pubs/slacpubs/3250/slac-pub-3477.pdf)
451 | 
452 | [Haslett, J., & Raftery, A. E. (1989). Space-time modelling with long-memory dependence: Assessing Ireland's wind power resource. Applied Statistics, 1-50.](https://doi.org/10.2307/2347679 )
453 | 
454 | [Hyndman, R. J., Wang, E., & Laptev, N. (2015, November). Large-scale unusual time series detection. In Data Mining Workshop (ICDMW), 2015 IEEE International Conference on (pp. 1616-1619). IEEE.](https://doi.org/10.1109/ICDMW.2015.104)
455 | 
456 | [Kang, Y., Hyndman, R. J., & Li, F. (2018). GRATIS: GeneRAting TIme Series with diverse and controllable characteristics.](https://robjhyndman.com/publications/gratis/)
457 | 
458 | [Kang, Y., Hyndman, R. J., & Smith-Miles, K. (2017). Visualising forecasting algorithm performance using time series instance spaces. International Journal of Forecasting, 33(2), 345-358.](https://doi.org/10.1016/j.ijforecast.2016.09.004)
459 | 
460 | [Ljung, G. M., & Box, G. E. (1978). On a measure of lack of fit in time series models. Biometrika, 65(2), 297-303.](https://doi.org/10.1093/biomet/65.2.297 )
461 | 
462 | [Wang, X, KA Smith & RJ Hyndman (2006). Characteristic-based clustering for time series data. Data Mining and Knowledge Discovery 13(3), 335–364.](https://doi.org/10.1007/s10618-005-0039-x)
463 | 
464 | ## License
465 | 
466 | This package is free and open source software, licensed under GPL-3.
467 | 


--------------------------------------------------------------------------------
/R/compengine.R:
--------------------------------------------------------------------------------
  1 | #' CompEngine feature set
  2 | #'
  3 | #' Calculate the features that have been used in CompEngine database, using method introduced in package
  4 | #' \code{hctsa}.
  5 | #'
  6 | #' The features involved can be grouped as \code{autocorrelation},
  7 | #' \code{prediction}, \code{stationarity}, \code{distribution}, and \code{scaling}.
  8 | #'
  9 | #' @param x the input time series
 10 | #' @return a vector with CompEngine features
 11 | #' @seealso \code{\link{autocorr_features}}
 12 | #' @seealso \code{\link{pred_features}}
 13 | #' @seealso \code{\link{station_features}}
 14 | #' @seealso \code{\link{dist_features}}
 15 | #' @seealso \code{\link{scal_features}}
 16 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
 17 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
 18 | #' @author Yangzhuoran Yang
 19 | #' @export
 20 | compengine <- function(x) {
 21 |   c(autocorr_features(x), pred_features(x), station_features(x), dist_features(x), scal_features(x))
 22 | }
 23 | 
 24 | #' The autocorrelation feature set from software package \code{hctsa}
 25 | #'
 26 | #' Calculate the features that grouped as autocorrelation set,
 27 | #' which have been used in CompEngine database, using method introduced in package \code{hctsa}.
 28 | #'
 29 | #' Features in this set are \code{embed2_incircle_1},
 30 | #' \code{embed2_incircle_2},
 31 | #' \code{ac_9},
 32 | #' \code{firstmin_ac},
 33 | #' \code{trev_num},
 34 | #' \code{motiftwo_entro3},
 35 | #' and \code{walker_propcross}.
 36 | #'
 37 | #' @param x the input time series
 38 | #' @return a vector with autocorrelation features
 39 | #' @seealso \code{\link{embed2_incircle}}
 40 | #' @seealso \code{\link{ac_9}}
 41 | #' @seealso \code{\link{firstmin_ac}}
 42 | #' @seealso \code{\link{trev_num}}
 43 | #' @seealso \code{\link{motiftwo_entro3}}
 44 | #' @seealso \code{\link{walker_propcross}}
 45 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
 46 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
 47 | #' @author Yangzhuoran Yang
 48 | #' @export
 49 | autocorr_features <- function(x) {
 50 |   acfv <- stats::acf(x, length(x) - 1, plot = FALSE, na.action = na.pass)
 51 |   output <- c(
 52 |     embed2_incircle_1 = embed2_incircle(x, 1, acfv = acfv),
 53 |     embed2_incircle_2 = embed2_incircle(x, 2, acfv = acfv),
 54 |     ac_9 = ac_9(x, acfv),
 55 |     firstmin_ac = firstmin_ac(x, acfv),
 56 |     trev_num = trev_num(x),
 57 |     motiftwo_entro3 = motiftwo_entro3(x),
 58 |     walker_propcross = walker_propcross(x)
 59 |   )
 60 |   return(output)
 61 | }
 62 | 
 63 | #' The prediction feature set from software package \code{hctsa}
 64 | #'
 65 | #' Calculate the features that grouped as prediction set,
 66 | #' which have been used in CompEngine database, using method introduced in package \code{hctsa}.
 67 | #'
 68 | #' Features in this set are \code{localsimple_mean1},
 69 | #' \code{localsimple_lfitac},
 70 | #' and \code{sampen_first}.
 71 | #'
 72 | #' @param x the input time series
 73 | #' @return a vector with prediction features
 74 | #' @seealso \code{\link{localsimple_taures}}
 75 | #' @seealso \code{\link{sampen_first}}
 76 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
 77 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
 78 | #' @author Yangzhuoran Yang
 79 | #' @export
 80 | pred_features <- function(x) {
 81 |   output <- c(
 82 |     localsimple_mean1 = localsimple_taures(x, "mean"),
 83 |     localsimple_lfitac = localsimple_taures(x, "lfit"),
 84 |     sampen_first = sampen_first(x)
 85 |   )
 86 |   return(output)
 87 | }
 88 | 
 89 | #' The stationarity feature set from software package \code{hctsa}
 90 | #'
 91 | #' Calculate the features that grouped as stationarity set,
 92 | #' which have been used in CompEngine database, using method introduced in package \code{hctsa}.
 93 | #'
 94 | #' Features in this set are \code{std1st_der},
 95 | #' \code{spreadrandomlocal_meantaul_50},
 96 | #' and \code{spreadrandomlocal_meantaul_ac2}.
 97 | #'
 98 | #' @param x the input time series
 99 | #' @return a vector with stationarity features
100 | #' @seealso \code{\link{std1st_der}}
101 | #' @seealso \code{\link{spreadrandomlocal_meantaul}}
102 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
103 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
104 | #' @author Yangzhuoran Yang
105 | #' @export
106 | station_features <- function(x) {
107 |   output <- c(
108 |     std1st_der = std1st_der(x),
109 |     spreadrandomlocal_meantaul_50 = spreadrandomlocal_meantaul(x, 50),
110 |     spreadrandomlocal_meantaul_ac2 = spreadrandomlocal_meantaul(x, "ac2")
111 |   )
112 |   return(output)
113 | }
114 | 
115 | #' The distribution feature set from software package \code{hctsa}
116 | #'
117 | #' Calculate the features that grouped as distribution set,
118 | #' which have been used in CompEngine database, using method introduced in package \code{hctsa}.
119 | #'
120 | #' Features in this set are \code{histogram_mode_10}
121 | #' and \code{outlierinclude_mdrmd}.
122 | #'
123 | #' @param x the input time series
124 | #' @return a vector with distribution features
125 | #' @seealso \code{\link{histogram_mode}}
126 | #' @seealso \code{\link{outlierinclude_mdrmd}}
127 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
128 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
129 | #' @author Yangzhuoran Yang
130 | #' @export
131 | dist_features <- function(x) {
132 |   output <- c(
133 |     histogram_mode_10 = histogram_mode(x),
134 |     outlierinclude_mdrmd = outlierinclude_mdrmd(x)
135 |   )
136 |   return(output)
137 | }
138 | 
139 | #' The scaling feature set from software package \code{hctsa}
140 | #'
141 | #' Calculate the features that grouped as scaling set,
142 | #' which have been used in CompEngine database, using method introduced in package \code{hctsa}.
143 | #'
144 | #' Feature in this set is \code{fluctanal_prop_r1}.
145 | #'
146 | #' @param x the input time series
147 | #' @return a vector with scaling features
148 | #' @seealso \code{\link{fluctanal_prop_r1}}
149 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
150 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
151 | #' @author Yangzhuoran Yang
152 | #' @export
153 | scal_features <- function(x) {
154 |   output <- c(fluctanal_prop_r1 = fluctanal_prop_r1(x))
155 |   return(output)
156 | }
157 | 
158 | # autocorr ----------------------------------------------------------------
159 | 
160 | # CO_Embed2_Basic_tau_incircle_1
161 | # CO_Embed2_Basic_tau_incircle_1
162 | #' Points inside a given circular boundary in a 2-d embedding space from software package \code{hctsa}
163 | #'
164 | #' The time lag is set to the first zero crossing of the autocorrelation function.
165 | #'
166 | #' @param y the input time series
167 | #' @param boundary the given circular boundary, setting to 1 or 2 in CompEngine. Default to 1.
168 | #' @param acfv vector of autocorrelation, if exist, used to avoid repeated computation.
169 | #' @return the proportion of points inside a given circular boundary
170 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
171 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
172 | #' @author Yangzhuoran Yang
173 | #' @export
174 | embed2_incircle <- function(y, boundary = NULL, acfv = stats::acf(y, length(y) - 1, plot = FALSE, na.action = na.pass)) {
175 |   if (is.null(boundary)) {
176 |     warning("`embed2_incircle()` using `boundary = 1`. Set value with `boundary`.")
177 |     boundary <- 1
178 |   }
179 |   tau <- firstzero_ac(y, acfv)
180 |   xt <- y[1:(length(y) - tau)] # part of the time series
181 |   xtp <- y[(1 + tau):length(y)] # time-lagged time series
182 |   N <- length(y) - tau # Length of each time series subsegment
183 | 
184 |   # CIRCLES (points inside a given circular boundary)
185 |   return(sum(xtp^2 + xt^2 < boundary, na.rm = TRUE) / N)
186 | }
187 | 
188 | # CO_firstzero_ac
189 | #' The first zero crossing of the autocorrelation function from software package \code{hctsa}
190 | #'
191 | #' Search up to a maximum of the length of the time series
192 | #'
193 | #' @param y the input time series
194 | #' @param acfv vector of autocorrelation, if exist, used to avoid repeated computation.
195 | #' @return The first zero crossing of the autocorrelation function
196 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
197 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
198 | #' @author Yangzhuoran Yang
199 | #' @export
200 | firstzero_ac <- function(y, acfv = stats::acf(y, N - 1, plot = FALSE, na.action = na.pass)) {
201 |   N <- length(y)
202 |   tau <- which(acfv$acf[-1] < 0)
203 |   if(length(tau)==0L) # Nothing to see here
204 |     return(0)
205 |   else if(all(is.na(tau))) # All missing
206 |     return(0)
207 |   else if(!any(tau))  # No negatives, so set output to sample size
208 |     return(N)
209 |   else # Return lag of first negative
210 |     return(tau[1])
211 | }
212 | 
213 | # ac_9
214 | #' Autocorrelation at lag 9. Included for completion and consistency.
215 | #'
216 | #' @param y the input time series
217 | #' @param acfv vector of autocorrelation, if exist, used to avoid repeated computation.
218 | #' @return autocorrelation at lag 9
219 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
220 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
221 | #' @author Yangzhuoran Yang
222 | #' @export
223 | ac_9 <- function(y, acfv = stats::acf(y, 9, plot = FALSE, na.action = na.pass)) {
224 |   acfv$acf[10]
225 | }
226 | 
227 | # CO_firstmin_ac
228 | #' Time of first minimum in the autocorrelation function from software package \code{hctsa}
229 | #'
230 | #'
231 | #' @param x the input time series
232 | #' @param acfv vector of autocorrelation, if exist, used to avoid repeated computation.
233 | #' @return The lag of the first minimum
234 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
235 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
236 | #' @author Yangzhuoran Yang
237 | #' @examples
238 | #' firstmin_ac(WWWusage)
239 | #' @export
240 | firstmin_ac <- function(x, acfv = stats::acf(x, lag.max = N - 1, plot = FALSE, na.action = na.pass)) {
241 |   # hctsa uses autocorr in MatLab to calculate autocorrelation
242 |   N <- length(x)
243 |   # getting acf for all lags
244 |   # possible delay when sample size is too big
245 |   autoCorr <- numeric(N - 1)
246 |   autoCorr[1:(N - 1)] <- acfv$acf[-1]
247 |   for (i in 1:length(autoCorr)) {
248 |     if (is.na(autoCorr[i])) {
249 |       warning("No minimum was found.")
250 |       return(NA)
251 |     }
252 |     if (i == 2 && autoCorr[2] > autoCorr[1]) {
253 |       return(1)
254 |     } else if (i > 2 && autoCorr[i - 2] > autoCorr[i - 1] && autoCorr[i - 1] < autoCorr[i]) {
255 |       return(i - 1)
256 |     }
257 |   }
258 |   return(N - 1)
259 | }
260 | 
261 | # CO_trev_1_num
262 | #' Normalized nonlinear autocorrelation, the numerator of the trev function of a time series from software package \code{hctsa}
263 | #'
264 | #' Calculates the numerator of the trev function, a normalized nonlinear autocorrelation,
265 | #' The time lag is set to 1.
266 | #'
267 | #'
268 | #' @param y the input time series
269 | #' @return the numerator of the trev function of a time series
270 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
271 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
272 | #' @author Yangzhuoran Yang
273 | #' @examples
274 | #' trev_num(WWWusage)
275 | #' @export
276 | trev_num <- function(y) {
277 |   yn <- y[1:(length(y) - 1)]
278 |   yn1 <- y[2:length(y)]
279 |   mean((yn1 - yn)^3, na.rm = TRUE)
280 | }
281 | 
282 | # SB_MotifTwo_mean_hhh
283 | #' Local motifs in a binary symbolization of the time series from software package \code{hctsa}
284 | #'
285 | #'
286 | #' Coarse-graining is performed. Time-series values above its mean are given 1,
287 | #' and those below the mean are 0.
288 | #'
289 | #' @param y the input time series
290 | #' @return Entropy of words in the binary alphabet of length 3.
291 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
292 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
293 | #' @author Yangzhuoran Yang
294 | #' @examples
295 | #' motiftwo_entro3(WWWusage)
296 | #' @export
297 | #'
298 | motiftwo_entro3 <- function(y) {
299 |   yBin <- binarize_mean(y)
300 |   N <- length(yBin)
301 |   if (N < 5) warning("Time series too short")
302 | 
303 |   r1 <- yBin == 1
304 |   r0 <- yBin == 0
305 | 
306 |   r1 <- r1[1:(length(r1) - 1)]
307 |   r0 <- r0[1:(length(r0) - 1)]
308 | 
309 |   r00 <- r0 & yBin[2:N] == 0
310 |   r01 <- r0 & yBin[2:N] == 1
311 |   r10 <- r1 & yBin[2:N] == 0
312 |   r11 <- r1 & yBin[2:N] == 1
313 | 
314 |   r00 <- r00[1:(length(r00) - 1)]
315 |   r01 <- r01[1:(length(r01) - 1)]
316 |   r10 <- r10[1:(length(r10) - 1)]
317 |   r11 <- r11[1:(length(r11) - 1)]
318 | 
319 |   r000 <- r00 & yBin[3:N] == 0
320 |   r001 <- r00 & yBin[3:N] == 1
321 |   r010 <- r01 & yBin[3:N] == 0
322 |   r011 <- r01 & yBin[3:N] == 1
323 |   r100 <- r10 & yBin[3:N] == 0
324 |   r101 <- r10 & yBin[3:N] == 1
325 |   r110 <- r11 & yBin[3:N] == 0
326 |   r111 <- r11 & yBin[3:N] == 1
327 | 
328 |   out.ddd <- mean(r000)
329 |   out.ddu <- mean(r001)
330 |   out.dud <- mean(r010)
331 |   out.duu <- mean(r011)
332 |   out.udd <- mean(r100)
333 |   out.udu <- mean(r101)
334 |   out.uud <- mean(r110)
335 |   out.uuu <- mean(r111)
336 |   ppp <- c(out.ddd, out.ddu, out.dud, out.duu, out.udd, out.udu, out.uud, out.uuu)
337 |   out.hhh <- f_entropy(ppp)
338 |   return(out.hhh)
339 | }
340 | 
341 | # BF_BF_binarize_mean
342 | #' Converts an input vector into a binarized version from software package \code{hctsa}
343 | #'
344 | #' @param y the input time series
345 | #' @return Time-series values above its mean are given 1, and those below the mean are 0.
346 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
347 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
348 | #' @author Yangzhuoran Yang
349 | #' @export
350 | 
351 | binarize_mean <- function(y) {
352 |   y <- y - mean(y)
353 |   Y <- numeric(length(y))
354 |   Y[y > 0] <- 1
355 |   return(Y)
356 | }
357 | 
358 | f_entropy <- function(x) {
359 |   # entropy of a set of counts, log(0)=0
360 |   -sum(x[x > 0] * log(x[x > 0]))
361 | }
362 | 
363 | # PH_Walker_prop_01_sw_propcross
364 | #' Simulates a hypothetical walker moving through the time domain from software package \code{hctsa}
365 | #'
366 | #' The hypothetical particle (or 'walker') moves in response to values of the
367 | #' time series at each point.
368 | #' The walker narrows the gap between its value and that
369 | #' of the time series by 10%.
370 | #'
371 | #'
372 | #' @param y the input time series
373 | #' @return fraction of time series length that walker crosses time series
374 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
375 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
376 | #' @author Yangzhuoran Yang
377 | #' @export
378 | #'
379 | #'
380 | walker_propcross <- function(y) {
381 |   N <- length(y)
382 |   p <- 0.1
383 |   #   walker starts at zero and narrows the gap between its position
384 |   #   and the time series value at that point by 0.1, to give the value at the subsequent time step
385 |   w <- numeric(N)
386 |   w[1] <- 0 # start at zero
387 |   for (i in 2:N) {
388 |     w[i] <- w[i - 1] + p * (y[i - 1] - w[i - 1])
389 |   }
390 |   out.sw_propcross <- sum((w[1:(N - 1)] - y[1:(N - 1)]) * (w[2:N] - y[2:N]) < 0, na.rm = TRUE) / (N - 1)
391 |   return(out.sw_propcross)
392 | }
393 | 
394 | # pred --------------------------------------------------------------------
395 | 
396 | # FC_localsimple_mean1_taures
397 | # FC_localsimple_lfit_taures
398 | #' The first zero crossing of the autocorrelation function of the residuals from Simple local time-series forecasting from software package \code{hctsa}
399 | #'
400 | #' Simple predictors using the past trainLength values of the time series to
401 | #' predict its next value.
402 | #'
403 | #' @param y the input time series
404 | #' @param forecastMeth the forecasting method, default to \code{mean}.
405 | #' \code{mean}: local mean prediction using the past trainLength time-series values.
406 | #' \code{lfit}: local linear prediction using the past trainLength time-series values.
407 | #' @param trainLength the number of time-series values to use to forecast the next value.
408 | #' Default to 1 when using method \code{mean} and 3 when using method \code{lfit}.
409 | #' @return The first zero crossing of the autocorrelation function of the residuals
410 | #' @export
411 | localsimple_taures <- function(y, forecastMeth = c("mean", "lfit"), trainLength = NULL) {
412 |   forecastMeth <- match.arg(forecastMeth)
413 |   if(is.null(trainLength)){
414 |     lp <- switch(forecastMeth, mean = 1, lfit = firstzero_ac(y))
415 |   }
416 | 
417 |   N <- length(y)
418 |   evalr <- (lp + 1):N
419 | 
420 |   if (lp >= length(y))
421 |     stop("Time series too short for forecasting in `localsimple_taures`")
422 | 
423 |   res <- numeric(length(evalr))
424 |   if (forecastMeth == "mean") {
425 |     for (i in 1:length(evalr))
426 |       res[i] <- mean(y[(evalr[i] - lp):(evalr[i] - 1)]) - y[evalr[i]]
427 |   }
428 |   if (forecastMeth == "lfit") {
429 |     for (i in 1:length(evalr)) {
430 |       # Fit linear
431 |       a <- 1:lp
432 |       b <- y[(evalr[i] - lp):(evalr[i] - 1)]
433 |       lm.ab <- lm(b ~ a, data = data.frame(a, b))
434 |       res[i] <- predict(lm.ab, newdata = data.frame(a = lp + 1)) - y[evalr[i]]
435 |       # p = polyfit((1:lp)',y(evalr(i)-lp:evalr(i)-1),1)
436 |       #       res(i) = polyval(p,lp+1) - y(evalr(i)); % prediction - value
437 |     }
438 |   }
439 |   out.taures <- firstzero_ac(res)
440 |   return(out.taures)
441 | }
442 | 
443 | # EN_SampEn_5_03_sampen1
444 | #' Second Sample Entropy of a time series from software package \code{hctsa}
445 | #'
446 | #' Modified from the Ben Fulcher's \code{EN_SampEn} which uses code from PhysioNet.
447 | #' The publicly-available PhysioNet Matlab code, sampenc (renamed here to
448 | #' RN_sampenc) is available from:
449 | #' http://www.physionet.org/physiotools/sampen/matlab/1.1/sampenc.m
450 | #'
451 | #' Embedding dimension is set to 5.
452 | #' The threshold is set to 0.3.
453 | #'
454 | #'
455 | #' @param y the input time series
456 | #' @references cf. "Physiological time-series analysis using approximate entropy and sample
457 | #' entropy", J. S. Richman and J. R. Moorman, Am. J. Physiol. Heart Circ.
458 | #' Physiol., 278(6) H2039 (2000)
459 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
460 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
461 | #' @author Yangzhuoran Yang
462 | #' @export
463 | sampen_first <- function(y) {
464 |   M <- 5
465 |   r <- 0.3
466 |   sampEn <- sampenc(y, M + 1, r)
467 |   return(sampEn)
468 | }
469 | 
470 | # PN_sampenc
471 | #' Second Sample Entropy from software package \code{hctsa}
472 | #'
473 | #' Modified from the Ben Fulcher version of original code sampenc.m from
474 | #' http://physionet.org/physiotools/sampen/
475 | #' http://www.physionet.org/physiotools/sampen/matlab/1.1/sampenc.m
476 | #' Code by DK Lake (dlake@virginia.edu), JR Moorman and Cao Hanqing.
477 | #'
478 | #'
479 | #' @param y the input time series
480 | #' @param M embedding dimension
481 | #' @param r threshold
482 | #'
483 | #' @references cf. "Physiological time-series analysis using approximate entropy and sample
484 | #' entropy", J. S. Richman and J. R. Moorman, Am. J. Physiol. Heart Circ.
485 | #' Physiol., 278(6) H2039 (2000)
486 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
487 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
488 | #' @author Yangzhuoran Yang
489 | #' @export
490 | sampenc <- function(y, M = 6, r = 0.3) {
491 |   N <- length(y)
492 |   lastrun <- numeric(N) # zeros(1,N)
493 |   run <- numeric(N) # zeros(1,N)
494 |   A <- numeric(M) # zeros(M,1)
495 |   B <- numeric(M) # zeros(M,1)
496 |   # Get counting:
497 |   for (i in 1:(N - 1)) { # go through each point in the time series, counting matches
498 |     y1 <- y[i]
499 |     for (jj in 1:(N - i)) { # compare to points through the rest of the time series
500 |       # Compare to future index, j:
501 |       j <- i + jj
502 |       # This future point, j, matches the time-series value at i:
503 |       if (isTRUE(abs(y[j] - y1) < r)) {
504 |         run[jj] <- lastrun[jj] + 1 # increase run count for this lag
505 |         M1 <- min(M, run[jj])
506 | 
507 |         A[1:M1] <- A[1:M1] + 1
508 |         if (j < N) B[1:M1] <- B[1:M1] + 1
509 |       } else {
510 |         run[jj] <- 0
511 |       }
512 |     }
513 |     for (j in 1:(N - i)) {
514 |       lastrun[j] <- run[j]
515 |     }
516 |   }
517 |   # Calculate for m <- 2
518 |   # NN <- N*(N-1)/2
519 |   p <- A[2] / B[1]
520 |   e <- -log(p)
521 |   return(e)
522 | }
523 | 
524 | # stationarity ------------------------------------------------------------
525 | 
526 | # SY_StdNthDer_1
527 | #' Standard deviation of the first derivative of the time series from software package \code{hctsa}
528 | #'
529 | #' Modified from \code{SY_StdNthDer} in \code{hctsa}. Based on an idea by Vladimir Vassilevsky.
530 | #'
531 | #' @param y the input time series. Missing values will be removed.
532 | #' @return Standard deviation of the first derivative of the time series.
533 | #' @references cf. http://www.mathworks.de/matlabcentral/newsreader/view_thread/136539
534 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
535 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
536 | #' @author Yangzhuoran Yang
537 | #' @export
538 | std1st_der <- function(y) {
539 |   if (length(y) < 2) stop("Time series is too short to compute differences")
540 |   yd <- diff(y)
541 |   return(sd(yd, na.rm = TRUE))
542 | }
543 | 
544 | # SY_SpreadRandomLocal_50_100_meantaul
545 | # SY_SpreadRandomLocal_ac2_100_meantaul
546 | #'  Bootstrap-based stationarity measure from software package \code{hctsa}
547 | #'
548 | #' 100 time-series segments of length \code{l} are selected at random from the time series and
549 | #' the mean of the first zero-crossings of the autocorrelation function in each segment is calculated.
550 | #'
551 | #'
552 | #' @param y the input time series
553 | #' @param l the length of local time-series segments to analyse as a positive integer. Can also be a specified character string: "ac2": twice the first zero-crossing of the autocorrelation function
554 | #' @return mean of the first zero-crossings of the autocorrelation function
555 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
556 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
557 | #' @author Yangzhuoran Yang
558 | #' @export
559 | spreadrandomlocal_meantaul <- function(y, l = 50) {
560 |   if (is.character(l) && "ac2" %in% l) l <- 2 * firstzero_ac(y)
561 |   if (!is.numeric(l)) stop("Unknown specifier `l`")
562 |   numSegs <- 100
563 |   N <- length(y)
564 |   if (l > 0.9 * N) {
565 |     warning("This time series is too short. Specify proper segment length in `l`")
566 |     return(NA_real_)
567 |   }
568 | 
569 |   qs <- numeric(numSegs)
570 | 
571 |   for (j in 1:numSegs) {
572 |     # pick a range
573 |     # in this implementation, ranges CAN overlap
574 |     ist <- sample(N - 1 - l, 1) # random start point (not exceeding the endpoint)
575 |     ifh <- ist + l - 1 # finish index
576 |     rs <- ist:ifh # sample range (from starting to finishing index)
577 |     ysub <- y[rs] # subsection of the time series
578 |     taul <- firstzero_ac(ysub)
579 |     qs[j] <- taul
580 |   }
581 |   return(mean(qs, na.rm = TRUE))
582 | }
583 | 
584 | # distribution ------------------------------------------------------------
585 | 
586 | # DN_histogram_mode_10
587 | #' Mode of a data vector from software package \code{hctsa}
588 | #'
589 | #' Measures the mode of the data vector using histograms with a given number of bins as suggestion.
590 | #' The value calculated is different from \code{hctsa} and \code{CompEngine} as the histogram edges are calculated differently.
591 | #'
592 | #' @param y the input data vector
593 | #' @param numBins the number of bins to use in the histogram.
594 | #' @return the mode
595 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
596 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
597 | #' @author Yangzhuoran Yang
598 | #' @export
599 | #' @importFrom graphics hist
600 | #' @importFrom stats predict
601 | 
602 | histogram_mode <- function(y, numBins = 10) {
603 | 
604 |   # Compute the histogram from the data:
605 |   if (is.numeric(numBins)) {
606 |     histdata <- hist(y, plot = FALSE, breaks = numBins)
607 |     binCenters <- histdata$mids
608 |   } else {
609 |     stop("Unknown format for numBins")
610 |   }
611 |   # Compute bin centers from bin edges:
612 |   # binCenters <- mean([binEdges(1:end-1) binEdges(2:end)])
613 |   # Mean position of maximums (if multiple):
614 |   out <- mean(binCenters[which.max(histdata$counts)])
615 |   return(out)
616 | }
617 | 
618 | # DN_OutlierInclude_abs_001_mdrmd
619 | #' How median depend on distributional outliers from software package \code{hctsa}
620 | #'
621 | #' Measures median as more and
622 | #' more outliers are included in the calculation according to a specified rule,
623 | #' of outliers being furthest from the mean.
624 | #'
625 | #' The threshold for including time-series data points in the analysis increases
626 | #' from zero to the maximum deviation, in increments of 0.01*sigma (by default),
627 | #' where sigma is the standard deviation of the time series.
628 | #'
629 | #' At each threshold,  proportion of time series points
630 | #' included and median are calculated, and outputs from the
631 | #' algorithm measure how these statistical quantities change as more extreme
632 | #' points are included in the calculation.
633 | #'
634 | #' Outliers are defined as furthest from the mean.
635 | #'
636 | #' @param y the input time series (ideally z-scored)
637 | #' @param zscored Should y be z-scored before computing the statistic. Default: TRUE
638 | #' @return median  of the median of range indices
639 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
640 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
641 | #' @author Yangzhuoran Yang
642 | #' @export
643 | #' @importFrom stats ts tsp sd
644 | 
645 | outlierinclude_mdrmd <- function(y, zscored = TRUE) {
646 |   if (length(unique(y)) == 1L) {
647 |     stop("The time series is a constant!")
648 |   }
649 |   if (zscored) {
650 |     tmp <- ts(c(scale(y)))
651 |     tsp(tmp) <- tsp(y)
652 |     y <- tmp
653 |     isd <- 1
654 |   } else {
655 |     isd <- sd(y, na.rm = TRUE) # Modified to fit the 0.01*sigma increment in description
656 |   }
657 |   N <- length(y)
658 |   inc <- 0.01 * isd
659 |   # inc <- 0.01
660 |   thr <- seq(from = 0, to = max(abs(y), na.rm = TRUE), by = inc)
661 |   tot <- N
662 |   if (length(thr) == 0) stop("peculiar time series")
663 | 
664 |   msDt <- numeric(length(thr))
665 |   msDtp <- numeric(length(thr))
666 |   for (i in 1:length(thr)) {
667 |     th <- thr[i] # the threshold
668 |     # Construct a time series consisting of inter-event intervals for parts
669 |     # of the time serie exceeding the threshold, th
670 |     r <- which(abs(y) >= th)
671 | 
672 |     Dt_exc <- diff(r) # Delta t (interval) time series exceeding threshold
673 |     msDt[i] <- median(r) / (N / 2) - 1
674 |     msDtp[i] <- length(Dt_exc) / tot * 100
675 |     # this is just really measuring the distribution:
676 |     # the proportion of possible values
677 |     # that are actually used in
678 |     # calculation
679 |   }
680 | 
681 |   # Trim off where the statistic power is lacking: less than 2% of data
682 |   # included
683 |   trimthr <- 2 # percent
684 |   mj <- which(msDtp > trimthr)[length(which(msDtp > trimthr))]
685 |   if (length(mj) != 0) {
686 |     msDt <- msDt[1:mj]
687 |     msDtp <- msDtp[1:mj]
688 |     thr <- thr[1:mj]
689 |   } else {
690 |     stop("the statistic power is lacking: less than 2% of data included")
691 |   }
692 | 
693 |   out.mdrmd <- median(msDt)
694 |   return(out.mdrmd)
695 | }
696 | 
697 | # scaling ----------------------------------------------------------------
698 | 
699 | # SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1
700 | #' Implements fluctuation analysis from software package \code{hctsa}
701 | #'
702 | #' Fits a polynomial of order 1 and then returns the
703 | #' range. The order of fluctuations is 2, corresponding to root mean
704 | #' square fluctuations.
705 | #'
706 | #'
707 | #' @param x the input time series (or any vector)
708 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017).
709 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013).
710 | #' @author Yangzhuoran Yang
711 | #' @export
712 | fluctanal_prop_r1 <- function(x) {
713 |   q <- 2
714 |   tauStep <- 50
715 |   k <- 1
716 | 
717 |   N <- length(x)
718 |   x_NA0 <- ifelse(!is.na(x), x, 0)
719 | 
720 |   y <- cumsum(x_NA0)
721 |   taur <- unique(round(exp(seq(from = log(5), to = log(floor(N / 2)), length.out = tauStep))))
722 |   ntau <- length(taur)
723 |   if (ntau < 8) { # fewer than 8 points
724 |     stop("This time series is too short to analyse using this fluctuation analysis")
725 |   }
726 | 
727 |   Fl <- numeric(ntau)
728 | 
729 |   for (i in 1:ntau) {
730 |     # buffer the time series at the scale tau
731 |     tau <- taur[i] # the scale on which to compute fluctuations
732 |     y_buff <- split(y, ceiling(seq_along(y) / tau))
733 | 
734 |     if (length(y_buff) > floor(N / tau)) { # zero-padded, remove trailing set of points...
735 |       y_buff <- y_buff[-length(y_buff)]
736 |     }
737 | 
738 |     # analysed length of time series (with trailing end-points removed)
739 |     nn <- length(y_buff) * tau
740 |     tt <- (1:tau) # faux time range
741 | 
742 |     for (j in 1:length(y_buff)) {
743 |       # fit a polynomial of order k in each subsegment
744 |       lm.tt <- lm(lmy ~ tt, data = data.frame(tt, lmy = y_buff[[j]]))
745 |       # remove the trend, store back in y_buff
746 |       y_buff[[j]] <- residuals(lm.tt)
747 |     }
748 | 
749 |     tem <- sapply(y_buff, range)
750 |     y_dt <- tem[2, ] - tem[1, ]
751 | 
752 |     # Compute fluctuation function:
753 | 
754 |     Fl[i] <- (mean(y_dt^q))^(1 / q)
755 |   }
756 |   logtt <- log(taur)
757 |   logFF <- log(Fl)
758 |   ntt <- ntau
759 | 
760 |   ## Try assuming two components (2 distinct scaling regimes)
761 |   # Move through, and fit a straight line to loglog before and after each point.
762 |   # Find point with the minimum sum of squared errors
763 |   # First spline interpolate to get an even sampling of the interval
764 |   # (currently, in the log scale, there are relatively more at large scales
765 |   # Determine the errors
766 |   sserr <- rep(NA, ntt) # don't choose the end points
767 |   minPoints <- 6
768 |   for (i in minPoints:(ntt - minPoints)) {
769 |     r1 <- 1:i
770 |     # p1 <- polyfit(logtt(r1),logFF(r1),1)
771 |     p1 <- lm(y ~ x, data = data.frame(x = logtt[r1], y = logFF[r1]))
772 |     r2 <- i:ntt
773 |     # p2 <- polyfit(logtt(r2),logFF(r2),1)
774 |     p2 <- lm(y ~ x, data = data.frame(x = logtt[r2], y = logFF[r2]))
775 |     # Sum of errors from fitting lines to both segments:
776 |     sserr[i] <- norm(-residuals(p1), type = "2") + norm(-residuals(p2), type = "2")
777 |   }
778 | 
779 |   # breakPt is the point where it's best to fit a line before and another line after
780 |   breakPt <- which.min(sserr)
781 |   r1 <- 1:breakPt
782 |   r2 <- breakPt:ntt
783 | 
784 |   prop_r1 <- length(r1) / ntt
785 |   return(prop_r1)
786 | }
787 | 


--------------------------------------------------------------------------------