├── .github ├── .gitignore └── workflows │ ├── pkgdown.yaml │ └── R-CMD-check.yaml ├── extra-data ├── hwl.rda ├── khs.rda └── yahoo.rda ├── tests ├── testthat.R └── testthat │ ├── test-zero_proportion.R │ ├── test-holt_parameters.R │ ├── test-pacf_features.R │ ├── test-acf_features.R │ ├── test-compengine.R │ └── test-stl_features.R ├── CRAN-SUBMISSION ├── .Rbuildignore ├── pkgdown └── extra.css ├── .gitignore ├── NEWS.md ├── man ├── crossing_points.Rd ├── zero_proportion.Rd ├── hurst.Rd ├── flat_spots.Rd ├── as.list.mts.Rd ├── arch_stat.Rd ├── holt_parameters.Rd ├── acf_features.Rd ├── pacf_features.Rd ├── stl_features.Rd ├── unitroot_kpss.Rd ├── lumpiness.Rd ├── fluctanal_prop_r1.Rd ├── binarize_mean.Rd ├── ac_9.Rd ├── motiftwo_entro3.Rd ├── trev_num.Rd ├── firstzero_ac.Rd ├── std1st_der.Rd ├── walker_propcross.Rd ├── scal_features.Rd ├── histogram_mode.Rd ├── heterogeneity.Rd ├── yahoo_data.Rd ├── localsimple_taures.Rd ├── firstmin_ac.Rd ├── dist_features.Rd ├── pred_features.Rd ├── nonlinearity.Rd ├── station_features.Rd ├── sampenc.Rd ├── compengine.Rd ├── embed2_incircle.Rd ├── max_level_shift.Rd ├── sampen_first.Rd ├── spreadrandomlocal_meantaul.Rd ├── autocorr_features.Rd ├── entropy.Rd ├── outlierinclude_mdrmd.Rd ├── tsfeatures-package.Rd └── tsfeatures.Rd ├── R ├── tsfeatures-package.R ├── as.list.mts.R ├── yahoo.R ├── entropy.R ├── multipleseasonal.R ├── yanfei.R ├── thiyanga.R ├── featurematrix.R ├── features.R └── compengine.R ├── cran-comments.md ├── README.Rmd ├── NAMESPACE ├── DESCRIPTION ├── README.md ├── _pkgdown.yml └── vignettes └── tsfeatures.Rmd /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /extra-data/hwl.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robjhyndman/tsfeatures/HEAD/extra-data/hwl.rda -------------------------------------------------------------------------------- /extra-data/khs.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robjhyndman/tsfeatures/HEAD/extra-data/khs.rda -------------------------------------------------------------------------------- /extra-data/yahoo.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robjhyndman/tsfeatures/HEAD/extra-data/yahoo.rda -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(tsfeatures) 3 | 4 | test_check("tsfeatures") 5 | -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 1.1.1 2 | Date: 2023-08-28 13:24:34 UTC 3 | SHA: 892a4d46b629549abc50e88a13035b3aecbd26b6 4 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^cran-comments\.md$ 2 | ^\.travis\.yml$ 3 | ^Meta$ 4 | ^doc$ 5 | ^docs$ 6 | ^_pkgdown\.yml$ 7 | ^.*\.Rproj$ 8 | ^\.Rproj\.user$ 9 | ^README\.Rmd$ 10 | ^README-.*\.png$ 11 | ^README\_.*$ 12 | ^READMEfigs 13 | ^extra-data 14 | ^revdep$ 15 | ^\.github$ 16 | ^CRAN-RELEASE$ 17 | ^pkgdown$ 18 | ^CRAN-SUBMISSION$ 19 | -------------------------------------------------------------------------------- /tests/testthat/test-zero_proportion.R: -------------------------------------------------------------------------------- 1 | # A unit tests for zero_proportion() function 2 | 3 | if (require(testthat)) { 4 | context("Tests on output") 5 | test_that("test for zero_proportion() ", { 6 | z <- zero_proportion(as.ts(c(0, 0, 3, 1, 2, 0))) 7 | expect_equal(length(z), 1L) 8 | expect_equal(z[1], 0.5) 9 | }) 10 | } 11 | -------------------------------------------------------------------------------- /pkgdown/extra.css: -------------------------------------------------------------------------------- 1 | h1, .h1 { 2 | font-size: 2rem; 3 | font-weight: 700; 4 | } 5 | 6 | h2, .h2 { 7 | font-size: 1.5rem; 8 | font-weight: 700; 9 | } 10 | 11 | .bg-primary .navbar-nav .show>.nav-link, .bg-primary .navbar-nav .nav-link.active, .bg-primary .navbar-nav .nav-link:hover, .bg-primary .navbar-nav .nav-link:focus { 12 | color: #ffb81c !important; 13 | } 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Makefile 2 | Meta 3 | doc 4 | .Rproj.user 5 | .Rhistory 6 | .RData 7 | docs 8 | revdep 9 | 10 | # History files 11 | .Rapp.history 12 | # Example code in package build process 13 | *-Ex.R 14 | # RStudio files 15 | .Rproj.user/ 16 | # produced vignettes 17 | vignettes/*.html 18 | vignettes/*.pdf 19 | .Rbuildignore 20 | *.Rproj 21 | README_cache 22 | inst/doc 23 | docs 24 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # tsfeatures (development version) 2 | 3 | # tsfeatures 1.1.1 4 | 5 | * Bug fixes 6 | * Improved docs 7 | 8 | # tsfeatures 1.1 9 | 10 | * Added zero_proportion 11 | * Replaced deprecated multiprocess 12 | * Bug fixes and documentation improvements 13 | 14 | # tsfeatures 1.0.2 15 | 16 | * Better handling of perfect fits in `arch_stat()` 17 | 18 | # tsfeatures 1.0.1 19 | 20 | * Bug fixes 21 | * Documentation improvements 22 | 23 | # tsfeatures 1.0.0 24 | 25 | * First release 26 | -------------------------------------------------------------------------------- /man/crossing_points.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/features.R 3 | \name{crossing_points} 4 | \alias{crossing_points} 5 | \title{Number of crossing points} 6 | \usage{ 7 | crossing_points(x) 8 | } 9 | \arguments{ 10 | \item{x}{a univariate time series} 11 | } 12 | \value{ 13 | A numeric value. 14 | } 15 | \description{ 16 | Computes the number of times a time series crosses the median. 17 | } 18 | \author{ 19 | Earo Wang and Rob J Hyndman 20 | } 21 | -------------------------------------------------------------------------------- /R/tsfeatures-package.R: -------------------------------------------------------------------------------- 1 | #' @importFrom stats as.ts bw.nrd0 coef dnorm embed fitted frequency lm spec.ar 2 | #' @importFrom stats median na.contiguous na.pass residuals cor sd tsp "tsp<-" var 3 | #' @importFrom stats quantile acf pacf stl pchisq ar Box.test poly start cmdscale 4 | #' @importFrom purrr map map_dbl 5 | #' @importFrom forecast mstl 6 | 7 | #' @aliases tsfeatures-package 8 | #' @keywords internal 9 | "_PACKAGE" 10 | 11 | ## usethis namespace: start 12 | ## usethis namespace: end 13 | NULL 14 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | Fixed _PACKAGE issue as requested 2 | 3 | ## Test environments 4 | 5 | * ubuntu 22.04 (local): R 4.3.1 6 | * macOS-latest (on GitHub Actions): release 7 | * windows-latest (on GitHub Actions): release 8 | * ubuntu-latest (on GitHub Actions): devel, release, oldrel 9 | * win-builder: devel, release, oldrelease 10 | 11 | ## R CMD check results 12 | 13 | 0 errors | 0 warnings | 0 notes 14 | 15 | ## revdep checks 16 | 17 | All 6 reverse dependencies have been checked with no new errors detected. 18 | -------------------------------------------------------------------------------- /man/zero_proportion.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/thiyanga.R 3 | \name{zero_proportion} 4 | \alias{zero_proportion} 5 | \title{Proportion of zeros} 6 | \usage{ 7 | zero_proportion(x, tol = 1e-08) 8 | } 9 | \arguments{ 10 | \item{x}{a univariate time series} 11 | 12 | \item{tol}{tolerance level. Absolute values below this are considered zeros.} 13 | } 14 | \value{ 15 | A numeric value. 16 | } 17 | \description{ 18 | Computes proportion of zeros in a time series 19 | } 20 | \author{ 21 | Thiyanga Talagala 22 | } 23 | -------------------------------------------------------------------------------- /man/hurst.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/features.R 3 | \name{hurst} 4 | \alias{hurst} 5 | \title{Hurst coefficient} 6 | \usage{ 7 | hurst(x) 8 | } 9 | \arguments{ 10 | \item{x}{a univariate time series. If missing values are present, the largest 11 | contiguous portion of the time series is used.} 12 | } 13 | \value{ 14 | A numeric value. 15 | } 16 | \description{ 17 | Computes the Hurst coefficient indicating the level of fractional differencing 18 | of a time series. 19 | } 20 | \author{ 21 | Rob J Hyndman 22 | } 23 | -------------------------------------------------------------------------------- /man/flat_spots.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/features.R 3 | \name{flat_spots} 4 | \alias{flat_spots} 5 | \title{Longest flat spot} 6 | \usage{ 7 | flat_spots(x) 8 | } 9 | \arguments{ 10 | \item{x}{a univariate time series} 11 | } 12 | \value{ 13 | A numeric value. 14 | } 15 | \description{ 16 | "Flat spots” are computed by dividing the sample space of a time series into ten equal-sized intervals, and computing the maximum run length within any single interval. 17 | } 18 | \author{ 19 | Earo Wang and Rob J Hyndman 20 | } 21 | -------------------------------------------------------------------------------- /tests/testthat/test-holt_parameters.R: -------------------------------------------------------------------------------- 1 | # A unit tests for holt_parameters() function 2 | 3 | if (require(testthat)) { 4 | context("Tests on output") 5 | test_that("test for holt_parameters() results on non-seasonal ts data", { 6 | z <- holt_parameters(WWWusage) 7 | expect_equal(length(z), 2L) 8 | expect_gt(z[1], 0.99) 9 | expect_gt(z[2], 0.99) 10 | }) 11 | test_that("test for holt_parameters() results on seasonal ts data", { 12 | z <- holt_parameters(USAccDeaths) 13 | expect_equal(length(z), 2L) 14 | expect_gt(z[1], 0.96) 15 | expect_gt(z[2], 0.00) 16 | }) 17 | } 18 | -------------------------------------------------------------------------------- /man/as.list.mts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as.list.mts.R 3 | \name{as.list.mts} 4 | \alias{as.list.mts} 5 | \title{Convert mts object to list of time series} 6 | \usage{ 7 | \method{as.list}{mts}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{multivariate time series of class mts.} 11 | 12 | \item{...}{other arguments are ignored.} 13 | } 14 | \value{ 15 | A list of ts objects. 16 | } 17 | \description{ 18 | An mts object contains a multivariate time series in a matrix, with time on rows. 19 | This is converted into a list of univariate time series. 20 | } 21 | \author{ 22 | Rob J Hyndman 23 | } 24 | -------------------------------------------------------------------------------- /tests/testthat/test-pacf_features.R: -------------------------------------------------------------------------------- 1 | # A unit tests for pacf_features() function 2 | 3 | if (require(testthat)) { 4 | context("Tests on output") 5 | test_that("test for pacf_features() results on non-seasonal ts data", { 6 | z <- pacf_features(WWWusage) 7 | expect_equal(length(z), 3L) 8 | expect_gt(z[1], 1.03) 9 | expect_gt(z[2], 0.80) 10 | expect_gt(z[3], 0.22) 11 | }) 12 | test_that("test for pacf_features() results on seasonal ts data", { 13 | z <- pacf_features(USAccDeaths) 14 | expect_equal(length(z), 4L) 15 | expect_gt(z[1], 0.63) 16 | expect_gt(z[2], 0.09) 17 | expect_gt(z[3], 0.38) 18 | expect_gt(z[4], 0.12) 19 | }) 20 | } 21 | -------------------------------------------------------------------------------- /R/as.list.mts.R: -------------------------------------------------------------------------------- 1 | #' Convert mts object to list of time series 2 | #' 3 | #' An mts object contains a multivariate time series in a matrix, with time on rows. 4 | #' This is converted into a list of univariate time series. 5 | #' 6 | #' @method as.list mts 7 | #' @param x multivariate time series of class mts. 8 | #' @param ... other arguments are ignored. 9 | #' @author Rob J Hyndman 10 | #' @return A list of ts objects. 11 | #' @export 12 | as.list.mts <- function(x, ...) { 13 | tspx <- tsp(x) 14 | listx <- as.list(as.data.frame(x)) 15 | listx <- purrr::map( 16 | listx, 17 | function(u) { 18 | u <- as.ts(u) 19 | tsp(u) <- tspx 20 | return(u) 21 | } 22 | ) 23 | return(listx) 24 | } 25 | -------------------------------------------------------------------------------- /man/arch_stat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/yanfei.R 3 | \name{arch_stat} 4 | \alias{arch_stat} 5 | \title{ARCH LM Statistic} 6 | \usage{ 7 | arch_stat(x, lags = 12, demean = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{a univariate time series} 11 | 12 | \item{lags}{Number of lags to use in the test} 13 | 14 | \item{demean}{Should data have mean removed before test applied?} 15 | } 16 | \value{ 17 | A numeric value. 18 | } 19 | \description{ 20 | Computes a statistic based on the Lagrange Multiplier (LM) test of Engle (1982) for 21 | autoregressive conditional heteroscedasticity (ARCH). The statistic returned is 22 | the \eqn{R^2}{R^2} value of an autoregressive model of order \code{lags} applied 23 | to \eqn{x^2}{x^2}. 24 | } 25 | \author{ 26 | Yanfei Kang 27 | } 28 | -------------------------------------------------------------------------------- /man/holt_parameters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/thiyanga.R 3 | \name{holt_parameters} 4 | \alias{holt_parameters} 5 | \alias{hw_parameters} 6 | \title{Parameter estimates of Holt's linear trend method} 7 | \usage{ 8 | holt_parameters(x) 9 | 10 | hw_parameters(x) 11 | } 12 | \arguments{ 13 | \item{x}{a univariate time series} 14 | } 15 | \value{ 16 | \code{holt_parameters} produces a vector of 2 values: alpha, beta. 17 | 18 | \code{hw_parameters} produces a vector of 3 values: alpha, beta and gamma. 19 | } 20 | \description{ 21 | Estimate the smoothing parameter for the level-alpha and 22 | the smoothing parameter for the trend-beta. 23 | \code{hw_parameters} considers additive seasonal trend: ets(A,A,A) model. 24 | } 25 | \author{ 26 | Thiyanga Talagala, Pablo Montero-Manso 27 | } 28 | -------------------------------------------------------------------------------- /man/acf_features.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/thiyanga.R 3 | \name{acf_features} 4 | \alias{acf_features} 5 | \title{Autocorrelation-based features} 6 | \usage{ 7 | acf_features(x) 8 | } 9 | \arguments{ 10 | \item{x}{a univariate time series} 11 | } 12 | \value{ 13 | A vector of 6 values: first autocorrelation coefficient and sum of squared of 14 | first ten autocorrelation coefficients of original series, first-differenced series, 15 | and twice-differenced series. 16 | For seasonal data, the autocorrelation coefficient at the first seasonal lag is 17 | also returned. 18 | } 19 | \description{ 20 | Computes various measures based on autocorrelation coefficients of the 21 | original series, first-differenced series and second-differenced series 22 | } 23 | \author{ 24 | Thiyanga Talagala 25 | } 26 | -------------------------------------------------------------------------------- /man/pacf_features.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/thiyanga.R 3 | \name{pacf_features} 4 | \alias{pacf_features} 5 | \title{Partial autocorrelation-based features} 6 | \usage{ 7 | pacf_features(x) 8 | } 9 | \arguments{ 10 | \item{x}{a univariate time series} 11 | } 12 | \value{ 13 | A vector of 3 values: Sum of squared of first 5 14 | partial autocorrelation coefficients of the original series, first differenced 15 | series and twice-differenced series. 16 | For seasonal data, the partial autocorrelation coefficient at the first seasonal 17 | lag is also returned. 18 | } 19 | \description{ 20 | Computes various measures based on partial autocorrelation coefficients of the 21 | original series, first-differenced series and second-differenced series 22 | } 23 | \author{ 24 | Thiyanga Talagala 25 | } 26 | -------------------------------------------------------------------------------- /tests/testthat/test-acf_features.R: -------------------------------------------------------------------------------- 1 | # A unit tests for acf_features() function 2 | 3 | if (require(testthat)) { 4 | context("Tests on output") 5 | test_that("test for acf_features() results on non-seasonal ts data", { 6 | z <- acf_features(WWWusage) 7 | expect_equal(length(z), 6L) 8 | expect_gt(z[1], 0.96) 9 | expect_gt(z[2], 4.19) 10 | expect_gt(z[3], 0.79) 11 | expect_gt(z[4], 1.40) 12 | expect_gt(z[5], 0.17) 13 | expect_gt(z[6], 0.33) 14 | }) 15 | test_that("test for acf_features() results on seasonal ts data", { 16 | z <- acf_features(USAccDeaths) 17 | expect_equal(length(z), 7L) 18 | expect_gt(z[1], 0.70) 19 | expect_gt(z[2], 1.20) 20 | expect_gt(z[3], 0.02) 21 | expect_gt(z[4], 0.27) 22 | expect_gt(z[5], -0.49) 23 | expect_gt(z[6], 0.74) 24 | expect_gt(z[7], 0.62) 25 | }) 26 | } 27 | -------------------------------------------------------------------------------- /man/stl_features.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/multipleseasonal.R 3 | \name{stl_features} 4 | \alias{stl_features} 5 | \title{Strength of trend and seasonality of a time series} 6 | \usage{ 7 | stl_features(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a univariate time series.} 11 | 12 | \item{...}{Other arguments are passed to \code{\link[forecast]{mstl}}.} 13 | } 14 | \value{ 15 | A vector of numeric values. 16 | } 17 | \description{ 18 | Computes various measures of trend and seasonality of a time series based on 19 | an STL decomposition. The number of seasonal periods, and the length of the 20 | seasonal periods are returned. Also, the strength of seasonality corresponding 21 | to each period is estimated. The \code{\link[forecast]{mstl}} function is used 22 | to do the decomposition. 23 | } 24 | \author{ 25 | Rob J Hyndman 26 | } 27 | -------------------------------------------------------------------------------- /man/unitroot_kpss.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/features.R 3 | \name{unitroot_kpss} 4 | \alias{unitroot_kpss} 5 | \alias{unitroot_pp} 6 | \title{Unit Root Test Statistics} 7 | \usage{ 8 | unitroot_kpss(x, ...) 9 | 10 | unitroot_pp(x, ...) 11 | } 12 | \arguments{ 13 | \item{x}{a univariate time series.} 14 | 15 | \item{...}{Other arguments are passed to the \code{\link[urca]{ur.kpss}} or 16 | \code{\link[urca]{ur.kpss}} functions.} 17 | } 18 | \value{ 19 | A numeric value 20 | } 21 | \description{ 22 | \code{unitroot_kpss} computes the statistic for the Kwiatkowski et al. unit root test 23 | using the default settings for the \code{\link[urca]{ur.kpss}} function. 24 | \code{unitroot_pp} computes the statistic for the Phillips-Perron unit root test 25 | using the default settings for the \code{\link[urca]{ur.pp}} function. 26 | } 27 | \author{ 28 | Pablo Montero-Manso 29 | } 30 | -------------------------------------------------------------------------------- /man/lumpiness.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/features.R 3 | \name{lumpiness} 4 | \alias{lumpiness} 5 | \alias{stability} 6 | \title{Time series features based on tiled windows} 7 | \usage{ 8 | lumpiness(x, width = ifelse(frequency(x) > 1, frequency(x), 10)) 9 | 10 | stability(x, width = ifelse(frequency(x) > 1, frequency(x), 10)) 11 | } 12 | \arguments{ 13 | \item{x}{a univariate time series} 14 | 15 | \item{width}{size of sliding window} 16 | } 17 | \value{ 18 | A numeric vector of length 2 containing a measure of lumpiness and 19 | a measure of stability. 20 | } 21 | \description{ 22 | Computes feature of a time series based on tiled (non-overlapping) windows. 23 | Means or variances are produced for all tiled windows. Then stability is 24 | the variance of the means, while lumpiness is the variance of the variances. 25 | } 26 | \author{ 27 | Earo Wang and Rob J Hyndman 28 | } 29 | -------------------------------------------------------------------------------- /man/fluctanal_prop_r1.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{fluctanal_prop_r1} 4 | \alias{fluctanal_prop_r1} 5 | \title{Implements fluctuation analysis from software package \code{hctsa}} 6 | \usage{ 7 | fluctanal_prop_r1(x) 8 | } 9 | \arguments{ 10 | \item{x}{the input time series (or any vector)} 11 | } 12 | \description{ 13 | Fits a polynomial of order 1 and then returns the 14 | range. The order of fluctuations is 2, corresponding to root mean 15 | square fluctuations. 16 | } 17 | \references{ 18 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 19 | 20 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 21 | } 22 | \author{ 23 | Yangzhuoran Yang 24 | } 25 | -------------------------------------------------------------------------------- /man/binarize_mean.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{binarize_mean} 4 | \alias{binarize_mean} 5 | \title{Converts an input vector into a binarized version from software package \code{hctsa}} 6 | \usage{ 7 | binarize_mean(y) 8 | } 9 | \arguments{ 10 | \item{y}{the input time series} 11 | } 12 | \value{ 13 | Time-series values above its mean are given 1, and those below the mean are 0. 14 | } 15 | \description{ 16 | Converts an input vector into a binarized version from software package \code{hctsa} 17 | } 18 | \references{ 19 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 20 | 21 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 22 | } 23 | \author{ 24 | Yangzhuoran Yang 25 | } 26 | -------------------------------------------------------------------------------- /man/ac_9.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{ac_9} 4 | \alias{ac_9} 5 | \title{Autocorrelation at lag 9. Included for completion and consistency.} 6 | \usage{ 7 | ac_9(y, acfv = stats::acf(y, 9, plot = FALSE, na.action = na.pass)) 8 | } 9 | \arguments{ 10 | \item{y}{the input time series} 11 | 12 | \item{acfv}{vector of autocorrelation, if exist, used to avoid repeated computation.} 13 | } 14 | \value{ 15 | autocorrelation at lag 9 16 | } 17 | \description{ 18 | Autocorrelation at lag 9. Included for completion and consistency. 19 | } 20 | \references{ 21 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 22 | 23 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 24 | } 25 | \author{ 26 | Yangzhuoran Yang 27 | } 28 | -------------------------------------------------------------------------------- /man/motiftwo_entro3.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{motiftwo_entro3} 4 | \alias{motiftwo_entro3} 5 | \title{Local motifs in a binary symbolization of the time series from software package \code{hctsa}} 6 | \usage{ 7 | motiftwo_entro3(y) 8 | } 9 | \arguments{ 10 | \item{y}{the input time series} 11 | } 12 | \value{ 13 | Entropy of words in the binary alphabet of length 3. 14 | } 15 | \description{ 16 | Coarse-graining is performed. Time-series values above its mean are given 1, 17 | and those below the mean are 0. 18 | } 19 | \examples{ 20 | motiftwo_entro3(WWWusage) 21 | } 22 | \references{ 23 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 24 | 25 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 26 | } 27 | \author{ 28 | Yangzhuoran Yang 29 | } 30 | -------------------------------------------------------------------------------- /man/trev_num.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{trev_num} 4 | \alias{trev_num} 5 | \title{Normalized nonlinear autocorrelation, the numerator of the trev function of a time series from software package \code{hctsa}} 6 | \usage{ 7 | trev_num(y) 8 | } 9 | \arguments{ 10 | \item{y}{the input time series} 11 | } 12 | \value{ 13 | the numerator of the trev function of a time series 14 | } 15 | \description{ 16 | Calculates the numerator of the trev function, a normalized nonlinear autocorrelation, 17 | The time lag is set to 1. 18 | } 19 | \examples{ 20 | trev_num(WWWusage) 21 | } 22 | \references{ 23 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 24 | 25 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 26 | } 27 | \author{ 28 | Yangzhuoran Yang 29 | } 30 | -------------------------------------------------------------------------------- /man/firstzero_ac.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{firstzero_ac} 4 | \alias{firstzero_ac} 5 | \title{The first zero crossing of the autocorrelation function from software package \code{hctsa}} 6 | \usage{ 7 | firstzero_ac(y, acfv = stats::acf(y, N - 1, plot = FALSE, na.action = na.pass)) 8 | } 9 | \arguments{ 10 | \item{y}{the input time series} 11 | 12 | \item{acfv}{vector of autocorrelation, if exist, used to avoid repeated computation.} 13 | } 14 | \value{ 15 | The first zero crossing of the autocorrelation function 16 | } 17 | \description{ 18 | Search up to a maximum of the length of the time series 19 | } 20 | \references{ 21 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 22 | 23 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 24 | } 25 | \author{ 26 | Yangzhuoran Yang 27 | } 28 | -------------------------------------------------------------------------------- /man/std1st_der.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{std1st_der} 4 | \alias{std1st_der} 5 | \title{Standard deviation of the first derivative of the time series from software package \code{hctsa}} 6 | \usage{ 7 | std1st_der(y) 8 | } 9 | \arguments{ 10 | \item{y}{the input time series. Missing values will be removed.} 11 | } 12 | \value{ 13 | Standard deviation of the first derivative of the time series. 14 | } 15 | \description{ 16 | Modified from \code{SY_StdNthDer} in \code{hctsa}. Based on an idea by Vladimir Vassilevsky. 17 | } 18 | \references{ 19 | cf. http://www.mathworks.de/matlabcentral/newsreader/view_thread/136539 20 | 21 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 22 | 23 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 24 | } 25 | \author{ 26 | Yangzhuoran Yang 27 | } 28 | -------------------------------------------------------------------------------- /man/walker_propcross.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{walker_propcross} 4 | \alias{walker_propcross} 5 | \title{Simulates a hypothetical walker moving through the time domain from software package \code{hctsa}} 6 | \usage{ 7 | walker_propcross(y) 8 | } 9 | \arguments{ 10 | \item{y}{the input time series} 11 | } 12 | \value{ 13 | fraction of time series length that walker crosses time series 14 | } 15 | \description{ 16 | The hypothetical particle (or 'walker') moves in response to values of the 17 | time series at each point. 18 | The walker narrows the gap between its value and that 19 | of the time series by 10\%. 20 | } 21 | \references{ 22 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 23 | 24 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 25 | } 26 | \author{ 27 | Yangzhuoran Yang 28 | } 29 | -------------------------------------------------------------------------------- /man/scal_features.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{scal_features} 4 | \alias{scal_features} 5 | \title{The scaling feature set from software package \code{hctsa}} 6 | \usage{ 7 | scal_features(x) 8 | } 9 | \arguments{ 10 | \item{x}{the input time series} 11 | } 12 | \value{ 13 | a vector with scaling features 14 | } 15 | \description{ 16 | Calculate the features that grouped as scaling set, 17 | which have been used in CompEngine database, using method introduced in package \code{hctsa}. 18 | } 19 | \details{ 20 | Feature in this set is \code{fluctanal_prop_r1}. 21 | } 22 | \references{ 23 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 24 | 25 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 26 | } 27 | \seealso{ 28 | \code{\link{fluctanal_prop_r1}} 29 | } 30 | \author{ 31 | Yangzhuoran Yang 32 | } 33 | -------------------------------------------------------------------------------- /man/histogram_mode.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{histogram_mode} 4 | \alias{histogram_mode} 5 | \title{Mode of a data vector from software package \code{hctsa}} 6 | \usage{ 7 | histogram_mode(y, numBins = 10) 8 | } 9 | \arguments{ 10 | \item{y}{the input data vector} 11 | 12 | \item{numBins}{the number of bins to use in the histogram.} 13 | } 14 | \value{ 15 | the mode 16 | } 17 | \description{ 18 | Measures the mode of the data vector using histograms with a given number of bins as suggestion. 19 | The value calculated is different from \code{hctsa} and \code{CompEngine} as the histogram edges are calculated differently. 20 | } 21 | \references{ 22 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 23 | 24 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 25 | } 26 | \author{ 27 | Yangzhuoran Yang 28 | } 29 | -------------------------------------------------------------------------------- /man/heterogeneity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/yanfei.R 3 | \name{heterogeneity} 4 | \alias{heterogeneity} 5 | \title{Heterogeneity coefficients} 6 | \usage{ 7 | heterogeneity(x) 8 | } 9 | \arguments{ 10 | \item{x}{a univariate time series} 11 | } 12 | \value{ 13 | A vector of numeric values. 14 | } 15 | \description{ 16 | Computes various measures of heterogeneity of a time series. First the series 17 | is pre-whitened using an AR model to give a new series y. We fit a GARCH(1,1) 18 | model to y and obtain the residuals, e. Then the four measures of heterogeneity 19 | are: 20 | (1) the sum of squares of the first 12 autocorrelations of \eqn{y^2}{y^2}; 21 | (2) the sum of squares of the first 12 autocorrelations of \eqn{e^2}{e^2}; 22 | (3) the \eqn{R^2}{R^2} value of an AR model applied to \eqn{y^2}{y^2}; 23 | (4) the \eqn{R^2}{R^2} value of an AR model applied to \eqn{e^2}{e^2}. 24 | The statistics obtained from \eqn{y^2}{y^2} are the ARCH effects, while those 25 | from \eqn{e^2}{e^2} are the GARCH effects. 26 | } 27 | \author{ 28 | Yanfei Kang and Rob J Hyndman 29 | } 30 | -------------------------------------------------------------------------------- /man/yahoo_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/yahoo.R 3 | \name{yahoo_data} 4 | \alias{yahoo_data} 5 | \title{Yahoo server metrics} 6 | \usage{ 7 | yahoo_data(...) 8 | } 9 | \arguments{ 10 | \item{...}{Additional arguments passed to \code{download.file} 11 | 12 | Downloads and returns aggregated and anonymized datasets from Yahoo representing server metrics of Yahoo services.} 13 | } 14 | \value{ 15 | A matrix of time series with 1437 rows of hourly data, and 1748 columns representing different servers. 16 | } 17 | \description{ 18 | Yahoo server metrics 19 | } 20 | \examples{ 21 | yahoo <- yahoo_data() 22 | plot(yahoo[,1:10]) 23 | plot(yahoo[,1:44], plot.type='single', col=1:44) 24 | 25 | } 26 | \references{ 27 | Hyndman, R.J., Wang, E., Laptev, N. (2015) Large-scale unusual time series detection. 28 | In: \emph{Proceedings of the IEEE International Conference on Data Mining}. Atlantic City, NJ, USA. 14–17 November 2015. 29 | \url{https://robjhyndman.com/publications/icdm2015/} 30 | } 31 | \author{ 32 | Rob Hyndman, Earo Wang, Nikolay Laptev, Mitchell O'Hara-Wild 33 | } 34 | -------------------------------------------------------------------------------- /man/localsimple_taures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{localsimple_taures} 4 | \alias{localsimple_taures} 5 | \title{The first zero crossing of the autocorrelation function of the residuals from Simple local time-series forecasting from software package \code{hctsa}} 6 | \usage{ 7 | localsimple_taures(y, forecastMeth = c("mean", "lfit"), trainLength = NULL) 8 | } 9 | \arguments{ 10 | \item{y}{the input time series} 11 | 12 | \item{forecastMeth}{the forecasting method, default to \code{mean}. 13 | \code{mean}: local mean prediction using the past trainLength time-series values. 14 | \code{lfit}: local linear prediction using the past trainLength time-series values.} 15 | 16 | \item{trainLength}{the number of time-series values to use to forecast the next value. 17 | Default to 1 when using method \code{mean} and 3 when using method \code{lfit}.} 18 | } 19 | \value{ 20 | The first zero crossing of the autocorrelation function of the residuals 21 | } 22 | \description{ 23 | Simple predictors using the past trainLength values of the time series to 24 | predict its next value. 25 | } 26 | -------------------------------------------------------------------------------- /R/yahoo.R: -------------------------------------------------------------------------------- 1 | #' Yahoo server metrics 2 | #' 3 | #' @param ... Additional arguments passed to `download.file` 4 | #' 5 | #' Downloads and returns aggregated and anonymized datasets from Yahoo representing server metrics of Yahoo services. 6 | #' 7 | #' @return A matrix of time series with 1437 rows of hourly data, and 1748 columns representing different servers. 8 | #' @author Rob Hyndman, Earo Wang, Nikolay Laptev, Mitchell O'Hara-Wild 9 | #' @references 10 | #' Hyndman, R.J., Wang, E., Laptev, N. (2015) Large-scale unusual time series detection. 11 | #' In: \emph{Proceedings of the IEEE International Conference on Data Mining}. Atlantic City, NJ, USA. 14–17 November 2015. 12 | #' \url{https://robjhyndman.com/publications/icdm2015/} 13 | #' @examples 14 | #' yahoo <- yahoo_data() 15 | #' plot(yahoo[,1:10]) 16 | #' plot(yahoo[,1:44], plot.type='single', col=1:44) 17 | #' 18 | #' @export 19 | yahoo_data <- function(...){ 20 | yahoo <- 1 # Just to avoid a note about undefined global variables. 21 | tmp <- tempfile() 22 | utils::download.file("https://github.com/robjhyndman/tsfeatures/raw/master/extra-data/yahoo.rda", tmp, ...) 23 | load(tmp) 24 | yahoo 25 | } -------------------------------------------------------------------------------- /tests/testthat/test-compengine.R: -------------------------------------------------------------------------------- 1 | # A unit test for compengine() function 2 | if (require(testthat)) { 3 | context("Tests on input") 4 | test_that("tests for a non-vector object", { 5 | expect_that(suppressWarnings(compengine(matrix(0, 2, 2))), throws_error()) 6 | }) 7 | 8 | context("Tests on output") 9 | test_that("tests for compengine results on non-seasonal data", { 10 | z <- compengine(WWWusage) 11 | expect_equal(length(z), 16L) 12 | expect_equal(z[4], c(firstmin_ac = 21)) 13 | expect_gt(z[5], 109.15) 14 | expect_gt(z[3], 0.27) 15 | }) 16 | test_that("tests for compengine results on seasonal data", { 17 | z <- compengine(USAccDeaths) 18 | expect_that(length(z), equals(16L)) 19 | expect_equal(z[4], c(firstmin_ac = 6)) 20 | expect_gt(z[6], 1.83) 21 | expect_lt(z[3], -0.0647) 22 | }) 23 | test_that("tests for compengine results on data with missing values", { 24 | y_WWWusage <- WWWusage 25 | y_WWWusage[c(16:17, 78)] <- NA 26 | z <- compengine(y_WWWusage) 27 | expect_equal(length(which(is.na(z))), 0) 28 | expect_gt(z[3], 0.2845) 29 | expect_equal(z[4], c(firstmin_ac = 21)) 30 | }) 31 | } 32 | -------------------------------------------------------------------------------- /man/firstmin_ac.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{firstmin_ac} 4 | \alias{firstmin_ac} 5 | \title{Time of first minimum in the autocorrelation function from software package \code{hctsa}} 6 | \usage{ 7 | firstmin_ac( 8 | x, 9 | acfv = stats::acf(x, lag.max = N - 1, plot = FALSE, na.action = na.pass) 10 | ) 11 | } 12 | \arguments{ 13 | \item{x}{the input time series} 14 | 15 | \item{acfv}{vector of autocorrelation, if exist, used to avoid repeated computation.} 16 | } 17 | \value{ 18 | The lag of the first minimum 19 | } 20 | \description{ 21 | Time of first minimum in the autocorrelation function from software package \code{hctsa} 22 | } 23 | \examples{ 24 | firstmin_ac(WWWusage) 25 | } 26 | \references{ 27 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 28 | 29 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 30 | } 31 | \author{ 32 | Yangzhuoran Yang 33 | } 34 | -------------------------------------------------------------------------------- /man/dist_features.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{dist_features} 4 | \alias{dist_features} 5 | \title{The distribution feature set from software package \code{hctsa}} 6 | \usage{ 7 | dist_features(x) 8 | } 9 | \arguments{ 10 | \item{x}{the input time series} 11 | } 12 | \value{ 13 | a vector with distribution features 14 | } 15 | \description{ 16 | Calculate the features that grouped as distribution set, 17 | which have been used in CompEngine database, using method introduced in package \code{hctsa}. 18 | } 19 | \details{ 20 | Features in this set are \code{histogram_mode_10} 21 | and \code{outlierinclude_mdrmd}. 22 | } 23 | \references{ 24 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 25 | 26 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 27 | } 28 | \seealso{ 29 | \code{\link{histogram_mode}} 30 | 31 | \code{\link{outlierinclude_mdrmd}} 32 | } 33 | \author{ 34 | Yangzhuoran Yang 35 | } 36 | -------------------------------------------------------------------------------- /man/pred_features.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{pred_features} 4 | \alias{pred_features} 5 | \title{The prediction feature set from software package \code{hctsa}} 6 | \usage{ 7 | pred_features(x) 8 | } 9 | \arguments{ 10 | \item{x}{the input time series} 11 | } 12 | \value{ 13 | a vector with prediction features 14 | } 15 | \description{ 16 | Calculate the features that grouped as prediction set, 17 | which have been used in CompEngine database, using method introduced in package \code{hctsa}. 18 | } 19 | \details{ 20 | Features in this set are \code{localsimple_mean1}, 21 | \code{localsimple_lfitac}, 22 | and \code{sampen_first}. 23 | } 24 | \references{ 25 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 26 | 27 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 28 | } 29 | \seealso{ 30 | \code{\link{localsimple_taures}} 31 | 32 | \code{\link{sampen_first}} 33 | } 34 | \author{ 35 | Yangzhuoran Yang 36 | } 37 | -------------------------------------------------------------------------------- /tests/testthat/test-stl_features.R: -------------------------------------------------------------------------------- 1 | # A unit test for stl_features() function 2 | if (require(testthat)) { 3 | context("Tests on input") 4 | test_that("tests for a non-vector object", { 5 | expect_that(stl_features(matrix(0, 2, 2)), throws_error()) 6 | }) 7 | 8 | context("Tests on output") 9 | test_that("tests for stl_feature results on non-seasonal data", { 10 | z <- stl_features(WWWusage) 11 | expect_equal(length(z), 8L) 12 | expect_equal(z[1], c(nperiods = 0)) 13 | expect_equal(z[2], c(seasonal_period = 1)) 14 | expect_gt(z[3], 0.98) 15 | }) 16 | test_that("tests for stl_feature results on seasonal ts data", { 17 | z <- stl_features(USAccDeaths) 18 | expect_that(length(z), equals(11L)) 19 | expect_equal(z[1], c(nperiods = 1)) 20 | expect_equal(z[2], c(seasonal_period = 12)) 21 | expect_gt(z[3], 0.78) 22 | }) 23 | test_that("tests for stl_feature results on seasonal msts data", { 24 | z <- stl_features(forecast::taylor) 25 | expect_that(length(z), equals(15L)) 26 | expect_equal(z[1], c(nperiods = 2)) 27 | expect_equal(z[2], c(seasonal_period1 = 48)) 28 | expect_equal(z[3], c(seasonal_period2 = 336)) 29 | expect_gt(z[4], 0.79) 30 | }) 31 | } 32 | -------------------------------------------------------------------------------- /man/nonlinearity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/yanfei.R 3 | \name{nonlinearity} 4 | \alias{nonlinearity} 5 | \title{Nonlinearity coefficient} 6 | \usage{ 7 | nonlinearity(x) 8 | } 9 | \arguments{ 10 | \item{x}{a univariate time series} 11 | } 12 | \value{ 13 | A numeric value. 14 | } 15 | \description{ 16 | Computes a nonlinearity statistic based on Lee, White & Granger's nonlinearity test of a time series. 17 | The statistic is \eqn{10X^2/T}{10X^2/T} where \eqn{X^2}{X^2} is the Chi-squared statistic from Lee, White and Granger, 18 | and T is the length of the time series. This takes large values 19 | when the series is nonlinear, and values around 0 when the series is linear. 20 | } 21 | \examples{ 22 | nonlinearity(lynx) 23 | } 24 | \references{ 25 | Lee, T. H., White, H., & Granger, C. W. (1993). Testing for neglected nonlinearity in time series models: A comparison of neural network methods and alternative tests. \emph{Journal of Econometrics}, 56(3), 269-290. 26 | 27 | Teräsvirta, T., Lin, C.-F., & Granger, C. W. J. (1993). Power of the neural network linearity test. \emph{Journal of Time Series Analysis}, 14(2), 209–220. 28 | } 29 | \author{ 30 | Yanfei Kang and Rob J Hyndman 31 | } 32 | -------------------------------------------------------------------------------- /man/station_features.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{station_features} 4 | \alias{station_features} 5 | \title{The stationarity feature set from software package \code{hctsa}} 6 | \usage{ 7 | station_features(x) 8 | } 9 | \arguments{ 10 | \item{x}{the input time series} 11 | } 12 | \value{ 13 | a vector with stationarity features 14 | } 15 | \description{ 16 | Calculate the features that grouped as stationarity set, 17 | which have been used in CompEngine database, using method introduced in package \code{hctsa}. 18 | } 19 | \details{ 20 | Features in this set are \code{std1st_der}, 21 | \code{spreadrandomlocal_meantaul_50}, 22 | and \code{spreadrandomlocal_meantaul_ac2}. 23 | } 24 | \references{ 25 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 26 | 27 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 28 | } 29 | \seealso{ 30 | \code{\link{std1st_der}} 31 | 32 | \code{\link{spreadrandomlocal_meantaul}} 33 | } 34 | \author{ 35 | Yangzhuoran Yang 36 | } 37 | -------------------------------------------------------------------------------- /man/sampenc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{sampenc} 4 | \alias{sampenc} 5 | \title{Second Sample Entropy from software package \code{hctsa}} 6 | \usage{ 7 | sampenc(y, M = 6, r = 0.3) 8 | } 9 | \arguments{ 10 | \item{y}{the input time series} 11 | 12 | \item{M}{embedding dimension} 13 | 14 | \item{r}{threshold} 15 | } 16 | \description{ 17 | Modified from the Ben Fulcher version of original code sampenc.m from 18 | http://physionet.org/physiotools/sampen/ 19 | http://www.physionet.org/physiotools/sampen/matlab/1.1/sampenc.m 20 | Code by DK Lake (dlake@virginia.edu), JR Moorman and Cao Hanqing. 21 | } 22 | \references{ 23 | cf. "Physiological time-series analysis using approximate entropy and sample 24 | entropy", J. S. Richman and J. R. Moorman, Am. J. Physiol. Heart Circ. 25 | Physiol., 278(6) H2039 (2000) 26 | 27 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 28 | 29 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 30 | } 31 | \author{ 32 | Yangzhuoran Yang 33 | } 34 | -------------------------------------------------------------------------------- /man/compengine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{compengine} 4 | \alias{compengine} 5 | \title{CompEngine feature set} 6 | \usage{ 7 | compengine(x) 8 | } 9 | \arguments{ 10 | \item{x}{the input time series} 11 | } 12 | \value{ 13 | a vector with CompEngine features 14 | } 15 | \description{ 16 | Calculate the features that have been used in CompEngine database, using method introduced in package 17 | \code{hctsa}. 18 | } 19 | \details{ 20 | The features involved can be grouped as \code{autocorrelation}, 21 | \code{prediction}, \code{stationarity}, \code{distribution}, and \code{scaling}. 22 | } 23 | \references{ 24 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 25 | 26 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 27 | } 28 | \seealso{ 29 | \code{\link{autocorr_features}} 30 | 31 | \code{\link{pred_features}} 32 | 33 | \code{\link{station_features}} 34 | 35 | \code{\link{dist_features}} 36 | 37 | \code{\link{scal_features}} 38 | } 39 | \author{ 40 | Yangzhuoran Yang 41 | } 42 | -------------------------------------------------------------------------------- /man/embed2_incircle.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{embed2_incircle} 4 | \alias{embed2_incircle} 5 | \title{Points inside a given circular boundary in a 2-d embedding space from software package \code{hctsa}} 6 | \usage{ 7 | embed2_incircle( 8 | y, 9 | boundary = NULL, 10 | acfv = stats::acf(y, length(y) - 1, plot = FALSE, na.action = na.pass) 11 | ) 12 | } 13 | \arguments{ 14 | \item{y}{the input time series} 15 | 16 | \item{boundary}{the given circular boundary, setting to 1 or 2 in CompEngine. Default to 1.} 17 | 18 | \item{acfv}{vector of autocorrelation, if exist, used to avoid repeated computation.} 19 | } 20 | \value{ 21 | the proportion of points inside a given circular boundary 22 | } 23 | \description{ 24 | The time lag is set to the first zero crossing of the autocorrelation function. 25 | } 26 | \references{ 27 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 28 | 29 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 30 | } 31 | \author{ 32 | Yangzhuoran Yang 33 | } 34 | -------------------------------------------------------------------------------- /man/max_level_shift.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/features.R 3 | \name{max_level_shift} 4 | \alias{max_level_shift} 5 | \alias{max_var_shift} 6 | \alias{max_kl_shift} 7 | \title{Time series features based on sliding windows} 8 | \usage{ 9 | max_level_shift(x, width = ifelse(frequency(x) > 1, frequency(x), 10)) 10 | 11 | max_var_shift(x, width = ifelse(frequency(x) > 1, frequency(x), 10)) 12 | 13 | max_kl_shift(x, width = ifelse(frequency(x) > 1, frequency(x), 10)) 14 | } 15 | \arguments{ 16 | \item{x}{a univariate time series} 17 | 18 | \item{width}{size of sliding window} 19 | } 20 | \value{ 21 | A vector of 2 values: the size of the shift, and the time index of the shift. 22 | } 23 | \description{ 24 | Computes feature of a time series based on sliding (overlapping) windows. 25 | \code{max_level_shift} finds the largest mean shift between two consecutive windows. 26 | \code{max_var_shift} finds the largest var shift between two consecutive windows. 27 | \code{max_kl_shift} finds the largest shift in Kulback-Leibler divergence between 28 | two consecutive windows. 29 | } 30 | \details{ 31 | Computes the largest level shift and largest variance shift in sliding mean calculations 32 | } 33 | \author{ 34 | Earo Wang and Rob J Hyndman 35 | } 36 | -------------------------------------------------------------------------------- /man/sampen_first.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{sampen_first} 4 | \alias{sampen_first} 5 | \title{Second Sample Entropy of a time series from software package \code{hctsa}} 6 | \usage{ 7 | sampen_first(y) 8 | } 9 | \arguments{ 10 | \item{y}{the input time series} 11 | } 12 | \description{ 13 | Modified from the Ben Fulcher's \code{EN_SampEn} which uses code from PhysioNet. 14 | The publicly-available PhysioNet Matlab code, sampenc (renamed here to 15 | RN_sampenc) is available from: 16 | http://www.physionet.org/physiotools/sampen/matlab/1.1/sampenc.m 17 | } 18 | \details{ 19 | Embedding dimension is set to 5. 20 | The threshold is set to 0.3. 21 | } 22 | \references{ 23 | cf. "Physiological time-series analysis using approximate entropy and sample 24 | entropy", J. S. Richman and J. R. Moorman, Am. J. Physiol. Heart Circ. 25 | Physiol., 278(6) H2039 (2000) 26 | 27 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 28 | 29 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 30 | } 31 | \author{ 32 | Yangzhuoran Yang 33 | } 34 | -------------------------------------------------------------------------------- /man/spreadrandomlocal_meantaul.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{spreadrandomlocal_meantaul} 4 | \alias{spreadrandomlocal_meantaul} 5 | \title{Bootstrap-based stationarity measure from software package \code{hctsa}} 6 | \usage{ 7 | spreadrandomlocal_meantaul(y, l = 50) 8 | } 9 | \arguments{ 10 | \item{y}{the input time series} 11 | 12 | \item{l}{the length of local time-series segments to analyse as a positive integer. Can also be a specified character string: "ac2": twice the first zero-crossing of the autocorrelation function} 13 | } 14 | \value{ 15 | mean of the first zero-crossings of the autocorrelation function 16 | } 17 | \description{ 18 | 100 time-series segments of length \code{l} are selected at random from the time series and 19 | the mean of the first zero-crossings of the autocorrelation function in each segment is calculated. 20 | } 21 | \references{ 22 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 23 | 24 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 25 | } 26 | \author{ 27 | Yangzhuoran Yang 28 | } 29 | -------------------------------------------------------------------------------- /man/autocorr_features.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{autocorr_features} 4 | \alias{autocorr_features} 5 | \title{The autocorrelation feature set from software package \code{hctsa}} 6 | \usage{ 7 | autocorr_features(x) 8 | } 9 | \arguments{ 10 | \item{x}{the input time series} 11 | } 12 | \value{ 13 | a vector with autocorrelation features 14 | } 15 | \description{ 16 | Calculate the features that grouped as autocorrelation set, 17 | which have been used in CompEngine database, using method introduced in package \code{hctsa}. 18 | } 19 | \details{ 20 | Features in this set are \code{embed2_incircle_1}, 21 | \code{embed2_incircle_2}, 22 | \code{ac_9}, 23 | \code{firstmin_ac}, 24 | \code{trev_num}, 25 | \code{motiftwo_entro3}, 26 | and \code{walker_propcross}. 27 | } 28 | \references{ 29 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 30 | 31 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 32 | } 33 | \seealso{ 34 | \code{\link{embed2_incircle}} 35 | 36 | \code{\link{ac_9}} 37 | 38 | \code{\link{firstmin_ac}} 39 | 40 | \code{\link{trev_num}} 41 | 42 | \code{\link{motiftwo_entro3}} 43 | 44 | \code{\link{walker_propcross}} 45 | } 46 | \author{ 47 | Yangzhuoran Yang 48 | } 49 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | jobs: 15 | pkgdown: 16 | runs-on: ubuntu-latest 17 | # Only restrict concurrency for non-PR jobs 18 | concurrency: 19 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | permissions: 23 | contents: write 24 | steps: 25 | - uses: actions/checkout@v3 26 | 27 | - uses: r-lib/actions/setup-pandoc@v2 28 | 29 | - uses: r-lib/actions/setup-r@v2 30 | with: 31 | use-public-rspm: true 32 | 33 | - uses: r-lib/actions/setup-r-dependencies@v2 34 | with: 35 | extra-packages: any::pkgdown, local::. 36 | needs: website 37 | 38 | - name: Build site 39 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 40 | shell: Rscript {0} 41 | 42 | - name: Deploy to GitHub pages 🚀 43 | if: github.event_name != 'pull_request' 44 | uses: JamesIves/github-pages-deploy-action@v4.4.1 45 | with: 46 | clean: false 47 | branch: gh-pages 48 | folder: docs 49 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macos-latest, r: 'release'} 22 | - {os: windows-latest, r: 'release'} 23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 24 | - {os: ubuntu-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'oldrel-1'} 26 | 27 | env: 28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 29 | R_KEEP_PKG_SOURCE: yes 30 | 31 | steps: 32 | - uses: actions/checkout@v3 33 | 34 | - uses: r-lib/actions/setup-pandoc@v2 35 | 36 | - uses: r-lib/actions/setup-r@v2 37 | with: 38 | r-version: ${{ matrix.config.r }} 39 | http-user-agent: ${{ matrix.config.http-user-agent }} 40 | use-public-rspm: true 41 | 42 | - uses: r-lib/actions/setup-tinytex@v2 43 | 44 | - uses: r-lib/actions/setup-r-dependencies@v2 45 | with: 46 | extra-packages: any::rcmdcheck 47 | needs: check 48 | 49 | - uses: r-lib/actions/check-r-package@v2 50 | with: 51 | args: 'c("--no-build-vignettes","--no-manual")' 52 | -------------------------------------------------------------------------------- /man/entropy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/entropy.R 3 | \name{entropy} 4 | \alias{entropy} 5 | \title{Spectral entropy of a time series} 6 | \usage{ 7 | entropy(x) 8 | } 9 | \arguments{ 10 | \item{x}{a univariate time series} 11 | } 12 | \value{ 13 | A non-negative real value for the spectral entropy \eqn{H_s(x_t)}. 14 | } 15 | \description{ 16 | Computes spectral entropy from a univariate normalized 17 | spectral density, estimated using an AR model. 18 | } 19 | \details{ 20 | The \emph{spectral entropy} equals the Shannon entropy of the spectral density 21 | \eqn{f_x(\lambda)} of a stationary process \eqn{x_t}: 22 | \deqn{ 23 | H_s(x_t) = - \int_{-\pi}^{\pi} f_x(\lambda) \log f_x(\lambda) d \lambda, 24 | } 25 | where the density is normalized such that 26 | \eqn{\int_{-\pi}^{\pi} f_x(\lambda) d \lambda = 1}. 27 | An estimate of \eqn{f(\lambda)} can be obtained using \code{\link[stats]{spec.ar}} with 28 | the \code{burg} method. 29 | } 30 | \examples{ 31 | entropy(rnorm(1000)) 32 | entropy(lynx) 33 | entropy(sin(1:20)) 34 | } 35 | \references{ 36 | Jerry D. Gibson and Jaewoo Jung (2006). \dQuote{The 37 | Interpretation of Spectral Entropy Based Upon Rate Distortion Functions}. 38 | IEEE International Symposium on Information Theory, pp. 277-281. 39 | 40 | Goerg, G. M. (2013). \dQuote{Forecastable Component Analysis}. 41 | Proceedings of the 30th International Conference on Machine Learning (PMLR) 28 (2): 64-72, 2013. 42 | Available at \url{https://proceedings.mlr.press/v28/goerg13.html}. 43 | } 44 | \seealso{ 45 | \code{\link[stats]{spec.ar}} 46 | } 47 | \author{ 48 | Rob J Hyndman 49 | } 50 | -------------------------------------------------------------------------------- /man/outlierinclude_mdrmd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compengine.R 3 | \name{outlierinclude_mdrmd} 4 | \alias{outlierinclude_mdrmd} 5 | \title{How median depend on distributional outliers from software package \code{hctsa}} 6 | \usage{ 7 | outlierinclude_mdrmd(y, zscored = TRUE) 8 | } 9 | \arguments{ 10 | \item{y}{the input time series (ideally z-scored)} 11 | 12 | \item{zscored}{Should y be z-scored before computing the statistic. Default: TRUE} 13 | } 14 | \value{ 15 | median of the median of range indices 16 | } 17 | \description{ 18 | Measures median as more and 19 | more outliers are included in the calculation according to a specified rule, 20 | of outliers being furthest from the mean. 21 | } 22 | \details{ 23 | The threshold for including time-series data points in the analysis increases 24 | from zero to the maximum deviation, in increments of 0.01*sigma (by default), 25 | where sigma is the standard deviation of the time series. 26 | 27 | At each threshold, proportion of time series points 28 | included and median are calculated, and outputs from the 29 | algorithm measure how these statistical quantities change as more extreme 30 | points are included in the calculation. 31 | 32 | Outliers are defined as furthest from the mean. 33 | } 34 | \references{ 35 | B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 36 | 37 | B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 38 | } 39 | \author{ 40 | Yangzhuoran Yang 41 | } 42 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | editor_options: 4 | chunk_output_type: console 5 | --- 6 | 7 | 8 | 9 | ```{r, echo = FALSE} 10 | knitr::opts_chunk$set( 11 | collapse = TRUE, 12 | comment = "#>", 13 | fig.path = "man/figures/README-", 14 | cache = TRUE, 15 | message = FALSE, 16 | warning = FALSE 17 | ) 18 | ``` 19 | 20 | # tsfeatures 21 | 22 | 23 | [![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/tsfeatures)](https://cran.r-project.org/package=tsfeatures) 24 | [![Downloads](http://cranlogs.r-pkg.org/badges/tsfeatures)](https://cran.r-project.org/package=tsfeatures) 25 | [![Licence](https://img.shields.io/badge/licence-GPL--3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0.en.html) 26 | [![R build status](https://github.com/robjhyndman/tsfeatures/workflows/R-CMD-check/badge.svg)](https://github.com/robjhyndman/tsfeatures/actions) 27 | 28 | 29 | The R package *tsfeatures* provides methods for extracting various features from time series data. 30 | 31 | ## Installation 32 | 33 | You can install the **stable** version on [R 34 | CRAN](https://cran.r-project.org/package=tsfeatures). 35 | 36 | ``` r 37 | install.packages('tsfeatures', dependencies = TRUE) 38 | ``` 39 | 40 | You can install the **development** version from [Github](https://github.com/robjhyndman/tsfeatures) with: 41 | 42 | ```{r gh-installation, eval = FALSE} 43 | # install.packages("devtools") 44 | devtools::install_github("robjhyndman/tsfeatures") 45 | ``` 46 | 47 | ## Usage 48 | 49 | ```{r} 50 | library(tsfeatures) 51 | mylist <- list(sunspot.year, WWWusage, AirPassengers, USAccDeaths) 52 | myfeatures <- tsfeatures(mylist) 53 | myfeatures 54 | ``` 55 | 56 | 57 | ## License 58 | 59 | This package is free and open source software, licensed under GPL-3. 60 | -------------------------------------------------------------------------------- /man/tsfeatures-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tsfeatures-package.R 3 | \docType{package} 4 | \name{tsfeatures-package} 5 | \alias{tsfeatures-package} 6 | \alias{_PACKAGE} 7 | \title{tsfeatures: Time Series Feature Extraction} 8 | \description{ 9 | Methods for extracting various features from time series data. The features provided are those from Hyndman, Wang and Laptev (2013) \doi{10.1109/ICDMW.2015.104}, Kang, Hyndman and Smith-Miles (2017) \doi{10.1016/j.ijforecast.2016.09.004} and from Fulcher, Little and Jones (2013) \doi{10.1098/rsif.2013.0048}. Features include spectral entropy, autocorrelations, measures of the strength of seasonality and trend, and so on. Users can also define their own feature functions. 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://pkg.robjhyndman.com/tsfeatures/} 15 | \item \url{https://github.com/robjhyndman/tsfeatures} 16 | \item Report bugs at \url{https://github.com/robjhyndman/tsfeatures/issues} 17 | } 18 | 19 | } 20 | \author{ 21 | \strong{Maintainer}: Rob Hyndman \email{Rob.Hyndman@monash.edu} (\href{https://orcid.org/0000-0002-2140-5352}{ORCID}) 22 | 23 | Authors: 24 | \itemize{ 25 | \item Yanfei Kang (\href{https://orcid.org/0000-0001-8769-6650}{ORCID}) 26 | \item Pablo Montero-Manso \email{p.montero.manso@udc.es} 27 | \item Mitchell O'Hara-Wild (\href{https://orcid.org/0000-0001-6729-7695}{ORCID}) 28 | \item Thiyanga Talagala (\href{https://orcid.org/0000-0002-0656-9789}{ORCID}) 29 | \item Earo Wang (\href{https://orcid.org/0000-0001-6448-5260}{ORCID}) 30 | \item Yangzhuoran Yang \email{Fin.Yang@monash.edu} 31 | } 32 | 33 | Other contributors: 34 | \itemize{ 35 | \item Souhaib Ben Taieb [contributor] 36 | \item Cao Hanqing [contributor] 37 | \item D K Lake [contributor] 38 | \item Nikolay Laptev [contributor] 39 | \item J R Moorman [contributor] 40 | \item Bohan Zhang [contributor] 41 | } 42 | 43 | } 44 | \keyword{internal} 45 | -------------------------------------------------------------------------------- /R/entropy.R: -------------------------------------------------------------------------------- 1 | #' Spectral entropy of a time series 2 | #' 3 | #' @description 4 | #' Computes spectral entropy from a univariate normalized 5 | #' spectral density, estimated using an AR model. 6 | #' 7 | #' @details 8 | #' The \emph{spectral entropy} equals the Shannon entropy of the spectral density 9 | #' \eqn{f_x(\lambda)} of a stationary process \eqn{x_t}: 10 | #' 11 | #' \deqn{H_s(x_t) = - \int_{-\pi}^{\pi} f_x(\lambda) \log f_x(\lambda) d \lambda,} 12 | #' 13 | #' where the density is normalized such that 14 | #' \eqn{\int_{-\pi}^{\pi} f_x(\lambda) d \lambda = 1}. 15 | #' An estimate of \eqn{f(\lambda)} can be obtained using \code{\link[stats]{spec.ar}} with 16 | #' the `burg` method. 17 | #' 18 | #' @param x a univariate time series 19 | #' @author Rob J Hyndman 20 | #' @return 21 | #' A non-negative real value for the spectral entropy \eqn{H_s(x_t)}. 22 | #' @seealso \code{\link[stats]{spec.ar}} 23 | #' @references 24 | #' Jerry D. Gibson and Jaewoo Jung (2006). \dQuote{The 25 | #' Interpretation of Spectral Entropy Based Upon Rate Distortion Functions}. 26 | #' IEEE International Symposium on Information Theory, pp. 277-281. 27 | #' 28 | #' Goerg, G. M. (2013). \dQuote{Forecastable Component Analysis}. 29 | #' Proceedings of the 30th International Conference on Machine Learning (PMLR) 28 (2): 64-72, 2013. 30 | #' Available at \url{https://proceedings.mlr.press/v28/goerg13.html}. 31 | #' 32 | #' @examples 33 | #' entropy(rnorm(1000)) 34 | #' entropy(lynx) 35 | #' entropy(sin(1:20)) 36 | #' @export 37 | 38 | entropy <- function(x) { 39 | #spec <- spectrum(x, plot = FALSE, n.freq = ceiling(length(x)/2 + 1), ...) 40 | spec <- try(stats::spec.ar(na.contiguous(x), plot=FALSE, method='burg', 41 | n.freq = ceiling(length(x)/2 + 1))) 42 | if ("try-error" %in% class(spec)) { 43 | entropy <- NA 44 | } else { 45 | fx <- c(rev(spec$spec[-1]),spec$spec)/ length(x) 46 | fx <- fx/sum(fx) 47 | prior.fx = rep(1 / length(fx), length = length(fx)) 48 | prior.weight = 0.001 49 | fx <- (1 - prior.weight) * fx + prior.weight * prior.fx 50 | entropy <- pmin(1, -sum(fx * log(fx, base = length(x)))) 51 | } 52 | return(c(entropy = entropy)) 53 | } 54 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(as.list,mts) 4 | export(ac_9) 5 | export(acf_features) 6 | export(arch_stat) 7 | export(autocorr_features) 8 | export(binarize_mean) 9 | export(compengine) 10 | export(crossing_points) 11 | export(dist_features) 12 | export(embed2_incircle) 13 | export(entropy) 14 | export(firstmin_ac) 15 | export(firstzero_ac) 16 | export(flat_spots) 17 | export(fluctanal_prop_r1) 18 | export(heterogeneity) 19 | export(histogram_mode) 20 | export(holt_parameters) 21 | export(hurst) 22 | export(hw_parameters) 23 | export(localsimple_taures) 24 | export(lumpiness) 25 | export(max_kl_shift) 26 | export(max_level_shift) 27 | export(max_var_shift) 28 | export(motiftwo_entro3) 29 | export(nonlinearity) 30 | export(outlierinclude_mdrmd) 31 | export(pacf_features) 32 | export(pred_features) 33 | export(sampen_first) 34 | export(sampenc) 35 | export(scal_features) 36 | export(spreadrandomlocal_meantaul) 37 | export(stability) 38 | export(station_features) 39 | export(std1st_der) 40 | export(stl_features) 41 | export(trev_num) 42 | export(tsfeatures) 43 | export(unitroot_kpss) 44 | export(unitroot_pp) 45 | export(walker_propcross) 46 | export(yahoo_data) 47 | export(zero_proportion) 48 | importFrom(forecast,mstl) 49 | importFrom(graphics,hist) 50 | importFrom(purrr,map) 51 | importFrom(purrr,map_dbl) 52 | importFrom(stats,"tsp<-") 53 | importFrom(stats,Box.test) 54 | importFrom(stats,acf) 55 | importFrom(stats,ar) 56 | importFrom(stats,as.ts) 57 | importFrom(stats,bw.nrd0) 58 | importFrom(stats,cmdscale) 59 | importFrom(stats,coef) 60 | importFrom(stats,cor) 61 | importFrom(stats,dnorm) 62 | importFrom(stats,embed) 63 | importFrom(stats,fitted) 64 | importFrom(stats,frequency) 65 | importFrom(stats,lm) 66 | importFrom(stats,median) 67 | importFrom(stats,na.contiguous) 68 | importFrom(stats,na.pass) 69 | importFrom(stats,pacf) 70 | importFrom(stats,pchisq) 71 | importFrom(stats,poly) 72 | importFrom(stats,predict) 73 | importFrom(stats,quantile) 74 | importFrom(stats,residuals) 75 | importFrom(stats,sd) 76 | importFrom(stats,spec.ar) 77 | importFrom(stats,start) 78 | importFrom(stats,stl) 79 | importFrom(stats,ts) 80 | importFrom(stats,tsp) 81 | importFrom(stats,var) 82 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: tsfeatures 2 | Title: Time Series Feature Extraction 3 | Version: 1.1.1.9000 4 | Authors@R: c( 5 | person("Rob", "Hyndman", email = "Rob.Hyndman@monash.edu", role = c("aut","cre"), comment = c(ORCID = "0000-0002-2140-5352")), 6 | person("Yanfei", "Kang", role = "aut", comment = c(ORCID = "0000-0001-8769-6650")), 7 | person("Pablo", "Montero-Manso", email="p.montero.manso@udc.es", role="aut"), 8 | person("Mitchell", "O'Hara-Wild", role="aut", comment=c(ORCID = "0000-0001-6729-7695")), 9 | person("Thiyanga", "Talagala", role = "aut", comment=c(ORCID = "0000-0002-0656-9789")), 10 | person("Earo", "Wang", role = "aut", comment=c(ORCID = "0000-0001-6448-5260")), 11 | person("Yangzhuoran", "Yang", email = "Fin.Yang@monash.edu", role = "aut"), 12 | person("Souhaib", "Ben Taieb", role = "ctb"), 13 | person("Cao", "Hanqing", role="ctb"), 14 | person("D K", "Lake", role="ctb"), 15 | person("Nikolay", "Laptev", role="ctb"), 16 | person("J R", "Moorman", role="ctb"), 17 | person("Bohan", "Zhang", role = "ctb")) 18 | Description: Methods for extracting various features from time series data. The features provided are those from Hyndman, Wang and Laptev (2013) , Kang, Hyndman and Smith-Miles (2017) and from Fulcher, Little and Jones (2013) . Features include spectral entropy, autocorrelations, measures of the strength of seasonality and trend, and so on. Users can also define their own feature functions. 19 | Depends: 20 | R (>= 3.6.0) 21 | Imports: 22 | fracdiff, 23 | forecast (>= 8.3), 24 | purrr, 25 | RcppRoll (>= 0.2.2), 26 | stats, 27 | tibble, 28 | tseries, 29 | urca, 30 | future, 31 | furrr 32 | Suggests: 33 | testthat, 34 | knitr, 35 | rmarkdown, 36 | ggplot2, 37 | tidyr, 38 | dplyr, 39 | Mcomp, 40 | GGally 41 | License: GPL-3 42 | ByteCompile: true 43 | URL: https://pkg.robjhyndman.com/tsfeatures/, https://github.com/robjhyndman/tsfeatures 44 | BugReports: https://github.com/robjhyndman/tsfeatures/issues 45 | RoxygenNote: 7.2.3 46 | Roxygen: list(markdown = TRUE, roclets=c('rd', 'collate', 'namespace')) 47 | VignetteBuilder: knitr 48 | Encoding: UTF-8 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # tsfeatures 5 | 6 | 7 | 8 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/tsfeatures)](https://cran.r-project.org/package=tsfeatures) 9 | [![Downloads](http://cranlogs.r-pkg.org/badges/tsfeatures)](https://cran.r-project.org/package=tsfeatures) 10 | [![Licence](https://img.shields.io/badge/licence-GPL--3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0.en.html) 11 | [![R build 12 | status](https://github.com/robjhyndman/tsfeatures/workflows/R-CMD-check/badge.svg)](https://github.com/robjhyndman/tsfeatures/actions) 13 | 14 | 15 | The R package *tsfeatures* provides methods for extracting various 16 | features from time series data. 17 | 18 | ## Installation 19 | 20 | You can install the **stable** version on [R 21 | CRAN](https://cran.r-project.org/package=tsfeatures). 22 | 23 | ``` r 24 | install.packages('tsfeatures', dependencies = TRUE) 25 | ``` 26 | 27 | You can install the **development** version from 28 | [Github](https://github.com/robjhyndman/tsfeatures) with: 29 | 30 | ``` r 31 | # install.packages("devtools") 32 | devtools::install_github("robjhyndman/tsfeatures") 33 | ``` 34 | 35 | ## Usage 36 | 37 | ``` r 38 | library(tsfeatures) 39 | mylist <- list(sunspot.year, WWWusage, AirPassengers, USAccDeaths) 40 | myfeatures <- tsfeatures(mylist) 41 | myfeatures 42 | #> # A tibble: 4 × 20 43 | #> frequency nperiods seasonal_period trend spike linearity curvature e_acf1 44 | #> 45 | #> 1 1 0 1 0.125 2.10e-5 3.58 1.11 0.793 46 | #> 2 1 0 1 0.985 3.01e-8 4.45 1.10 0.774 47 | #> 3 12 1 12 0.991 1.46e-8 11.0 1.09 0.509 48 | #> 4 12 1 12 0.802 9.15e-7 -2.12 2.85 0.258 49 | #> # ℹ 12 more variables: e_acf10 , entropy , x_acf1 , 50 | #> # x_acf10 , diff1_acf1 , diff1_acf10 , diff2_acf1 , 51 | #> # diff2_acf10 , seasonal_strength , peak , trough , 52 | #> # seas_acf1 53 | ``` 54 | 55 | ## License 56 | 57 | This package is free and open source software, licensed under GPL-3. 58 | -------------------------------------------------------------------------------- /man/tsfeatures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/featurematrix.R 3 | \name{tsfeatures} 4 | \alias{tsfeatures} 5 | \title{Time series feature matrix} 6 | \usage{ 7 | tsfeatures( 8 | tslist, 9 | features = c("frequency", "stl_features", "entropy", "acf_features"), 10 | scale = TRUE, 11 | trim = FALSE, 12 | trim_amount = 0.1, 13 | parallel = FALSE, 14 | multiprocess = future::multisession, 15 | na.action = na.pass, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{tslist}{a list of univariate time series, each of class \code{ts} or a numeric vector. 21 | Alternatively, an object of class \code{mts} may be used.} 22 | 23 | \item{features}{a vector of function names which return numeric vectors of features. 24 | All features returned by these functions must be named if they return more than one feature. 25 | Existing functions from installed packages may be used, but the package must be loaded first. 26 | Functions must return a result for all time series, even if it is just NA.} 27 | 28 | \item{scale}{if \code{TRUE}, time series are scaled to mean 0 and sd 1 before features 29 | are computed.} 30 | 31 | \item{trim}{if \code{TRUE}, time series are trimmed by \code{trim_amount} before features 32 | are computed. Values larger than \code{trim_amount} in absolute value are set to \code{NA}.} 33 | 34 | \item{trim_amount}{Default level of trimming if \code{trim==TRUE}.} 35 | 36 | \item{parallel}{If TRUE, multiple cores (or multiple sessions) will be used. This only speeds things up 37 | when there are a large number of time series.} 38 | 39 | \item{multiprocess}{The function from the \code{future} package to use for parallel processing. Either 40 | \code{\link[future]{multisession}} or \code{\link[future]{multicore}}. The latter is preferred 41 | for Linux and MacOS.} 42 | 43 | \item{na.action}{A function to handle missing values. Use \code{na.interp} to estimate missing values.} 44 | 45 | \item{...}{Other arguments get passed to the feature functions.} 46 | } 47 | \value{ 48 | A feature matrix (in the form of a tibble) with each row corresponding to 49 | one time series from tslist, and each column being a feature. 50 | } 51 | \description{ 52 | \code{tsfeatures} computes a matrix of time series features from a list of time series 53 | } 54 | \examples{ 55 | mylist <- list(sunspot.year, WWWusage, AirPassengers, USAccDeaths) 56 | tsfeatures(mylist) 57 | } 58 | \author{ 59 | Rob J Hyndman 60 | } 61 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: http://pkg.robjhyndman.com/tsfeatures 2 | 3 | template: 4 | bootstrap: 5 5 | theme: tango 6 | bootswatch: flatly 7 | bslib: 8 | base_font: {google: "Fira Sans"} 9 | heading_font: {google: "Fira Sans"} 10 | code_font: "Hack, mono" 11 | primary: "#234460" 12 | link-color: "#234460" 13 | includes: 14 | in_header: 15 | 16 | authors: 17 | Rob Hyndman: 18 | href: https://robjhyndman.com 19 | Yanfei Kang: 20 | href: https://yanfei.site/ 21 | Thiyanga Talagala: 22 | href: https://thiyanga.netlify.com/ 23 | Earo Wang: 24 | href: https://earo.me 25 | Yangzhuoran Yang: 26 | href: https://yangzhuoranyang.com 27 | 28 | navbar: 29 | type: light 30 | structure: 31 | left: [home, intro, reference, changelog] 32 | right: [search, github] 33 | components: 34 | home: 35 | icon: fa-home fa-lg 36 | href: index.html 37 | reference: 38 | text: Reference 39 | href: reference/index.html 40 | intro: 41 | text: Get started 42 | href: articles/tsfeatures.html 43 | changelog: 44 | text: Change log 45 | href: news/index.html 46 | github: 47 | icon: fa-github fa-lg 48 | href: https://github.com/robjhyndman/tsfeatures/ 49 | 50 | reference: 51 | - title: "Package" 52 | desc: "tsfeatures package" 53 | contents: 54 | - tsfeatures-package 55 | - title: tsfeatures 56 | desc: The main function to extract features from a list of time series. 57 | contents: 58 | - tsfeatures 59 | - title: Feature functions 60 | desc: Functions which compute features from time series 61 | contents: 62 | - ac_9 63 | - acf_features 64 | - arch_stat 65 | - autocorr_features 66 | - binarize_mean 67 | - compengine 68 | - crossing_points 69 | - dist_features 70 | - embed2_incircle 71 | - entropy 72 | - firstmin_ac 73 | - firstzero_ac 74 | - flat_spots 75 | - fluctanal_prop_r1 76 | - heterogeneity 77 | - histogram_mode 78 | - holt_parameters 79 | - hurst 80 | - localsimple_taures 81 | - lumpiness 82 | - max_level_shift 83 | - motiftwo_entro3 84 | - nonlinearity 85 | - outlierinclude_mdrmd 86 | - pacf_features 87 | - pred_features 88 | - sampen_first 89 | - sampenc 90 | - scal_features 91 | - spreadrandomlocal_meantaul 92 | - station_features 93 | - std1st_der 94 | - stl_features 95 | - trev_num 96 | - unitroot_kpss 97 | - walker_propcross 98 | - zero_proportion 99 | - title: Utility functions 100 | contents: 101 | - as.list.mts 102 | 103 | - title: Data 104 | desc: Download data providing Yahoo server metrics 105 | contents: 106 | - yahoo_data 107 | -------------------------------------------------------------------------------- /R/multipleseasonal.R: -------------------------------------------------------------------------------- 1 | 2 | #' Strength of trend and seasonality of a time series 3 | #' 4 | #' Computes various measures of trend and seasonality of a time series based on 5 | #' an STL decomposition. The number of seasonal periods, and the length of the 6 | #' seasonal periods are returned. Also, the strength of seasonality corresponding 7 | #' to each period is estimated. The \code{\link[forecast]{mstl}} function is used 8 | #' to do the decomposition. 9 | #' @param x a univariate time series. 10 | #' @param ... Other arguments are passed to \code{\link[forecast]{mstl}}. 11 | #' @return A vector of numeric values. 12 | #' @author Rob J Hyndman 13 | #' @export 14 | 15 | stl_features <- function(x, ...) { 16 | if ("msts" %in% class(x)) { 17 | msts <- attributes(x)$msts 18 | nperiods <- length(msts) 19 | } 20 | else if ("ts" %in% class(x)) { 21 | msts <- frequency(x) 22 | nperiods <- msts > 1 23 | if(length(x) <= 2*msts) { 24 | warning("Insufficient data to compute STL decomposition") 25 | x <- c(x) 26 | } 27 | season <- 0 28 | } 29 | else { 30 | msts <- 1 31 | nperiods <- 0L 32 | season <- 0 33 | } 34 | if(NCOL(x) > 1){ 35 | stop("x must be a univariate time series.") 36 | } 37 | trend <- linearity <- curvature <- season <- spike <- peak <- trough <- acfremainder <- NA 38 | 39 | # STL fits 40 | stlfit <- forecast::mstl(x, ...) 41 | trend0 <- stlfit[, "Trend"] 42 | remainder <- stlfit[, "Remainder"] 43 | seasonal <- stlfit[, grep("Season", colnames(stlfit)), drop = FALSE] 44 | 45 | # When the maximum frequency is dropped 46 | tsp(x) <- tsp(trend0) 47 | 48 | # De-trended and de-seasonalized data 49 | detrend <- x - trend0 50 | deseason <- forecast::seasadj(stlfit) 51 | fits <- x - remainder 52 | 53 | # Summary stats 54 | n <- length(x) 55 | varx <- var(x, na.rm = TRUE) 56 | vare <- var(remainder, na.rm = TRUE) 57 | vardetrend <- var(detrend, na.rm = TRUE) 58 | vardeseason <- var(deseason, na.rm = TRUE) 59 | nseas <- NCOL(seasonal) 60 | 61 | # Measure of trend strength 62 | if(varx < .Machine$double.eps) 63 | trend <- 0 64 | else if (vardeseason / varx < 1e-10) { 65 | trend <- 0 66 | } else { 67 | trend <- max(0, min(1, 1 - vare / vardeseason)) 68 | } 69 | 70 | if (nseas > 0) { 71 | # Measure of seasonal strength 72 | season <- numeric(nseas) 73 | for (i in seq(nseas)) 74 | season[i] <- max(0, min(1, 1 - vare / var(remainder + seasonal[, i], na.rm = TRUE))) 75 | 76 | # Find time of peak and trough for each component 77 | peak <- trough <- numeric(nseas) 78 | for (i in seq(nseas)) 79 | { 80 | startx <- start(x)[2L] - 1L 81 | pk <- (startx + which.max(seasonal[, i])) %% msts[i] 82 | th <- (startx + which.min(seasonal[, i])) %% msts[i] 83 | peak[i] <- ifelse(pk == 0, msts[i], pk) 84 | trough[i] <- ifelse(th == 0, msts[i], th) 85 | } 86 | } 87 | 88 | # Compute measure of spikiness 89 | d <- (remainder - mean(remainder, na.rm = TRUE))^2 90 | varloo <- (vare * (n - 1) - d) / (n - 2) 91 | spike <- var(varloo, na.rm = TRUE) 92 | 93 | # Compute measures of linearity and curvature 94 | tren.coef <- coef(lm(trend0 ~ poly(seq(n), degree = min(n-1, 2L))))[2L:3L] 95 | linearity <- tren.coef[1L] 96 | curvature <- tren.coef[2L] 97 | 98 | # ACF of remainder 99 | acfremainder <- unname(acf_features(remainder)) 100 | 101 | # Assemble results 102 | output <- c( 103 | nperiods = nperiods, seasonal_period = msts, trend = trend, 104 | spike = spike, linearity = unname(linearity), curvature = unname(curvature), 105 | e_acf1 = acfremainder[1L], e_acf10 = acfremainder[2L] 106 | ) 107 | if (nseas > 0) { 108 | output <- c(output, seasonal_strength = season, peak = peak, trough = trough) 109 | } 110 | 111 | return(output) 112 | } 113 | -------------------------------------------------------------------------------- /R/yanfei.R: -------------------------------------------------------------------------------- 1 | #' Heterogeneity coefficients 2 | #' 3 | #' Computes various measures of heterogeneity of a time series. First the series 4 | #' is pre-whitened using an AR model to give a new series y. We fit a GARCH(1,1) 5 | #' model to y and obtain the residuals, e. Then the four measures of heterogeneity 6 | #' are: 7 | #' (1) the sum of squares of the first 12 autocorrelations of \eqn{y^2}{y^2}; 8 | #' (2) the sum of squares of the first 12 autocorrelations of \eqn{e^2}{e^2}; 9 | #' (3) the \eqn{R^2}{R^2} value of an AR model applied to \eqn{y^2}{y^2}; 10 | #' (4) the \eqn{R^2}{R^2} value of an AR model applied to \eqn{e^2}{e^2}. 11 | #' The statistics obtained from \eqn{y^2}{y^2} are the ARCH effects, while those 12 | #' from \eqn{e^2}{e^2} are the GARCH effects. 13 | #' @param x a univariate time series 14 | #' @return A vector of numeric values. 15 | #' @author Yanfei Kang and Rob J Hyndman 16 | #' @export 17 | 18 | heterogeneity <- function(x) { 19 | # One possible issue when applied to the ETS/ARIMA comparison is that it will 20 | # be high for any type of heteroskedasticity, whereas ETS heteroskedasticity 21 | # is of a particular type, namely that the variation increases with the level 22 | # of the series. But the GARCH type hetero could be high when the variation 23 | # changes independently of the level of the series. 24 | 25 | # pre-whiten a series before Garch modeling 26 | x.whitened <- na.contiguous(ar(x)$resid) 27 | 28 | # perform arch and box test 29 | x.archtest <- arch_stat(x.whitened) 30 | LBstat <- sum(acf(x.whitened^2, lag.max = 12L, plot = FALSE)$acf[-1L]^2) 31 | 32 | # fit garch model to capture the variance dynamics. 33 | garch.fit <- suppressWarnings(tseries::garch(x.whitened, trace = FALSE)) 34 | 35 | # compare arch test before and after fitting garch 36 | garch.fit.std <- residuals(garch.fit) 37 | x.garch.archtest <- arch_stat(garch.fit.std) 38 | 39 | # compare Box test of squared residuals before and after fitting garch 40 | LBstat2 <- NA 41 | try(LBstat2 <- sum(acf(na.contiguous(garch.fit.std^2), lag.max = 12L, plot = FALSE)$acf[-1L]^2), 42 | silent = TRUE 43 | ) 44 | output <- c( 45 | arch_acf = LBstat, 46 | garch_acf = LBstat2, 47 | arch_r2 = unname(x.archtest), 48 | garch_r2 = unname(x.garch.archtest) 49 | ) 50 | # output[is.na(output)] <- 1 51 | return(output) 52 | } 53 | 54 | #' Nonlinearity coefficient 55 | #' 56 | #' Computes a nonlinearity statistic based on Lee, White & Granger's nonlinearity test of a time series. 57 | #' The statistic is \eqn{10X^2/T}{10X^2/T} where \eqn{X^2}{X^2} is the Chi-squared statistic from Lee, White and Granger, 58 | #' and T is the length of the time series. This takes large values 59 | #' when the series is nonlinear, and values around 0 when the series is linear. 60 | #' @param x a univariate time series 61 | #' @return A numeric value. 62 | #' @examples 63 | #' nonlinearity(lynx) 64 | #' @author Yanfei Kang and Rob J Hyndman 65 | #' @references Lee, T. H., White, H., & Granger, C. W. (1993). Testing for neglected nonlinearity in time series models: A comparison of neural network methods and alternative tests. \emph{Journal of Econometrics}, 56(3), 269-290. 66 | #' @references Teräsvirta, T., Lin, C.-F., & Granger, C. W. J. (1993). Power of the neural network linearity test. \emph{Journal of Time Series Analysis}, 14(2), 209–220. 67 | #' @export 68 | 69 | nonlinearity <- function(x) { 70 | X2 <- tryCatch(tseries::terasvirta.test(as.ts(x), type = "Chisq")$stat, 71 | error = function(e) NA) 72 | c(nonlinearity = 10 * unname(X2) / length(x)) 73 | } 74 | 75 | #' ARCH LM Statistic 76 | #' 77 | #' Computes a statistic based on the Lagrange Multiplier (LM) test of Engle (1982) for 78 | #' autoregressive conditional heteroscedasticity (ARCH). The statistic returned is 79 | #' the \eqn{R^2}{R^2} value of an autoregressive model of order \code{lags} applied 80 | #' to \eqn{x^2}{x^2}. 81 | #' @param x a univariate time series 82 | #' @param lags Number of lags to use in the test 83 | #' @param demean Should data have mean removed before test applied? 84 | #' @return A numeric value. 85 | #' @author Yanfei Kang 86 | #' @export 87 | 88 | arch_stat <- function(x, lags = 12, demean = TRUE) { 89 | if (length(x) <= lags+1) { 90 | return(c(ARCH.LM = NA_real_)) 91 | } 92 | if (demean) { 93 | x <- x - mean(x, na.rm = TRUE) 94 | } 95 | mat <- embed(x^2, lags + 1) 96 | fit <- try(lm(mat[, 1] ~ mat[, -1]), silent = TRUE) 97 | if ("try-error" %in% class(fit)) { 98 | return(c(ARCH.LM = NA_real_)) 99 | } else { 100 | arch.lm <- summary(fit) 101 | S <- arch.lm$r.squared #* NROW(mat) 102 | return(c(ARCH.LM = if(is.nan(S)) 1 else S)) 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /R/thiyanga.R: -------------------------------------------------------------------------------- 1 | #' Autocorrelation-based features 2 | #' 3 | #' Computes various measures based on autocorrelation coefficients of the 4 | #' original series, first-differenced series and second-differenced series 5 | #' @param x a univariate time series 6 | #' @return A vector of 6 values: first autocorrelation coefficient and sum of squared of 7 | #' first ten autocorrelation coefficients of original series, first-differenced series, 8 | #' and twice-differenced series. 9 | #' For seasonal data, the autocorrelation coefficient at the first seasonal lag is 10 | #' also returned. 11 | #' @author Thiyanga Talagala 12 | #' @export 13 | acf_features <- function(x) { 14 | m <- frequency(x) 15 | if(length(x) > 1) { 16 | acfx <- acf(x, lag.max = max(10L, m), plot = FALSE, na.action=na.pass)$acf[-1L] 17 | } else { 18 | acfx <- NA 19 | } 20 | if(length(x) > 10) { 21 | acfdiff1x <- acf(diff(x, differences = 1), lag.max = 10L, plot = FALSE, na.action = na.pass)$acf[-1L] 22 | } else { 23 | acfdiff1x <- NA 24 | } 25 | if(length(x) > 11) { 26 | acfdiff2x <- acf(diff(x, differences = 2), lag.max = 10L, plot = FALSE, na.action = na.pass)$acf[-1L] 27 | } else { 28 | acfdiff2x <- NA 29 | } 30 | 31 | # first autocorrelation coefficient 32 | acf_1 <- acfx[1L] 33 | 34 | # sum of squares of first 10 autocorrelation coefficients 35 | sum_of_sq_acf10 <- sum((acfx[seq(10)])^2) 36 | 37 | # first autocorrelation coefficient of differenced series 38 | diff1_acf1 <- acfdiff1x[1L] 39 | 40 | # Sum of squared of first 10 autocorrelation coefficients of differenced series 41 | diff1_acf10 <- sum((acfdiff1x[seq(10)])^2) 42 | 43 | # first autocorrelation coefficient of twice-differenced series 44 | diff2_acf1 <- acfdiff2x[1L] 45 | 46 | # Sum of squared of first 10 autocorrelation coefficients of twice-differenced series 47 | diff2_acf10 <- sum((acfdiff2x[seq(10)])^2) 48 | 49 | output <- c( 50 | x_acf1 = unname(acf_1), 51 | x_acf10 = unname(sum_of_sq_acf10), 52 | diff1_acf1 = unname(diff1_acf1), 53 | diff1_acf10 = unname(diff1_acf10), 54 | diff2_acf1 = unname(diff2_acf1), 55 | diff2_acf10 = unname(diff2_acf10) 56 | ) 57 | 58 | if (m > 1) { 59 | output <- c(output, seas_acf1 = unname(acfx[m])) 60 | } 61 | 62 | return(output) 63 | } 64 | 65 | #' Partial autocorrelation-based features 66 | #' 67 | #' Computes various measures based on partial autocorrelation coefficients of the 68 | #' original series, first-differenced series and second-differenced series 69 | #' @param x a univariate time series 70 | #' @return A vector of 3 values: Sum of squared of first 5 71 | #' partial autocorrelation coefficients of the original series, first differenced 72 | #' series and twice-differenced series. 73 | #' For seasonal data, the partial autocorrelation coefficient at the first seasonal 74 | #' lag is also returned. 75 | #' @author Thiyanga Talagala 76 | #' @export 77 | pacf_features <- function(x) { 78 | m <- frequency(x) 79 | if(length(x) > 1){ 80 | pacfx <- pacf(x, lag.max = max(5L, m), plot = FALSE)$acf 81 | } else { 82 | pacfx <- NA 83 | } 84 | 85 | # Sum of first 5 PACs squared 86 | if(length(x) > 5) { 87 | pacf_5 <- sum((pacfx[seq(5L)])^2) 88 | } else { 89 | pacf_5 <- NA 90 | } 91 | 92 | # Sum of first 5 PACs of difference series squared 93 | if(length(x) > 6) { 94 | diff1_pacf_5 <- sum(pacf(diff(x, differences = 1L), lag.max = 5L, plot = FALSE)$acf^2) 95 | } else { 96 | diff1_pacf_5 <- NA 97 | } 98 | 99 | # Sum of first 5 PACs of twice differenced series squared 100 | if(length(x) > 7) { 101 | diff2_pacf_5 <- sum(pacf(diff(x, differences = 2L), lag.max = 5L, plot = FALSE)$acf^2) 102 | } else { 103 | diff2_pacf_5 <- NA 104 | } 105 | 106 | output <- c( 107 | x_pacf5 = unname(pacf_5), 108 | diff1x_pacf5 = unname(diff1_pacf_5), 109 | diff2x_pacf5 = unname(diff2_pacf_5) 110 | ) 111 | if (m > 1) { 112 | output <- c(output, seas_pacf = pacfx[m]) 113 | } 114 | return(output) 115 | } 116 | 117 | #' Parameter estimates of Holt's linear trend method 118 | #' 119 | #' Estimate the smoothing parameter for the level-alpha and 120 | #' the smoothing parameter for the trend-beta. 121 | #' \code{hw_parameters} considers additive seasonal trend: ets(A,A,A) model. 122 | #' @param x a univariate time series 123 | #' @return \code{holt_parameters} produces a vector of 2 values: alpha, beta. 124 | #' 125 | #' \code{hw_parameters} produces a vector of 3 values: alpha, beta and gamma. 126 | #' @author Thiyanga Talagala, Pablo Montero-Manso 127 | #' @export 128 | 129 | holt_parameters <- function(x) { 130 | # parameter estimates of holt linear trend model 131 | fit <- forecast::ets(x, model = c("AAN")) 132 | params <- c(fit$par["alpha"], fit$par["beta"]) 133 | names(params) <- c("alpha", "beta") 134 | return(params) 135 | } 136 | 137 | #' @rdname holt_parameters 138 | #' @export 139 | hw_parameters <- function(x) { 140 | # parameter estimates of holt winters additive trend seasonal model 141 | hw_fit <- purrr::possibly(forecast::ets, 142 | list(par = c(alpha = NA, beta = NA, gamma = NA)))(x, model = c("AAA")) 143 | return(hw_fit$par[c("alpha", "beta", "gamma")]) 144 | } 145 | # #' Autocorrelation coefficient at lag 1 of the residual 146 | # #' 147 | # #' Computes the first order autocorrelation of the residual series of the deterministic trend model 148 | # #' @param x a univariate time series 149 | # #' @return A numeric value. 150 | # #' @author Thiyanga Talagala 151 | # #' @export 152 | # acfresid <- function(x){ 153 | # time <- 1:length(x) 154 | # linear_mod <- lm(x~time) 155 | # Res<-resid(linear_mod) 156 | # return(stats::acf(Res,lag.max=1L,plot=FALSE)$acf[-1]) 157 | # } 158 | 159 | #' Proportion of zeros 160 | #' 161 | #' Computes proportion of zeros in a time series 162 | #' @param x a univariate time series 163 | #' @param tol tolerance level. Absolute values below this are considered zeros. 164 | #' @return A numeric value. 165 | #' @author Thiyanga Talagala 166 | #' @export 167 | zero_proportion <- function(x, tol = 1e-8) { 168 | mean(abs(x) < tol, na.rm=TRUE) 169 | } 170 | -------------------------------------------------------------------------------- /R/featurematrix.R: -------------------------------------------------------------------------------- 1 | #' Time series feature matrix 2 | #' 3 | #' \code{tsfeatures} computes a matrix of time series features from a list of time series 4 | #' @param tslist a list of univariate time series, each of class \code{ts} or a numeric vector. 5 | #' Alternatively, an object of class \code{mts} may be used. 6 | #' @param features a vector of function names which return numeric vectors of features. 7 | #' All features returned by these functions must be named if they return more than one feature. 8 | #' Existing functions from installed packages may be used, but the package must be loaded first. 9 | #' Functions must return a result for all time series, even if it is just NA. 10 | #' @param scale if \code{TRUE}, time series are scaled to mean 0 and sd 1 before features 11 | #' are computed. 12 | #' @param trim if \code{TRUE}, time series are trimmed by \code{trim_amount} before features 13 | #' are computed. Values larger than \code{trim_amount} in absolute value are set to \code{NA}. 14 | #' @param trim_amount Default level of trimming if \code{trim==TRUE}. 15 | #' @param parallel If TRUE, multiple cores (or multiple sessions) will be used. This only speeds things up 16 | #' when there are a large number of time series. 17 | #' @param multiprocess The function from the \code{future} package to use for parallel processing. Either 18 | #' \code{\link[future]{multisession}} or \code{\link[future]{multicore}}. The latter is preferred 19 | #' for Linux and MacOS. 20 | #' @param na.action A function to handle missing values. Use \code{na.interp} to estimate missing values. 21 | #' @param ... Other arguments get passed to the feature functions. 22 | #' @return A feature matrix (in the form of a tibble) with each row corresponding to 23 | #' one time series from tslist, and each column being a feature. 24 | #' @examples 25 | #' mylist <- list(sunspot.year, WWWusage, AirPassengers, USAccDeaths) 26 | #' tsfeatures(mylist) 27 | #' @author Rob J Hyndman 28 | #' @export 29 | tsfeatures <- function(tslist, 30 | features = c("frequency", "stl_features", "entropy", "acf_features"), 31 | scale = TRUE, trim = FALSE, trim_amount = 0.1, 32 | parallel = FALSE, multiprocess = future::multisession, na.action = na.pass, ...) { 33 | if (!is.list(tslist)) { 34 | tslist <- as.list(as.ts(tslist)) 35 | } 36 | else{ 37 | tslist <- map(tslist, as.ts) 38 | } 39 | if (scale && any(map_dbl(tslist, var, na.rm=TRUE) == 0)){ 40 | warning("Some series are constant and cannot be scaled, so scaling has been disabled (`scale = FALSE`).") 41 | scale <- FALSE 42 | } 43 | if (scale) { 44 | tslist <- map(tslist, scalets) 45 | } 46 | if (trim) { 47 | tslist <- map(tslist, trimts, trim = trim_amount) 48 | } 49 | # Interpolate for missing values 50 | tslist <- map(tslist, function(x) { 51 | y <- na.action(x) 52 | attributes(y) <- attributes(x) 53 | x <- y 54 | }) 55 | # Compute all features 56 | flist <- funlist <- list() 57 | # Assuming that didn't generate an error, we will proceed 58 | func <- lapply(features, match.fun) 59 | if (parallel) { 60 | old_plan <- future::plan(multiprocess) 61 | on.exit(future::plan(old_plan)) 62 | } 63 | for (i in seq_along(features)) { 64 | 65 | if (parallel) { 66 | flist[[i]] <- furrr::future_map(tslist, func[[i]], ...) 67 | } 68 | else { 69 | flist[[i]] <- map(tslist, func[[i]], ...) 70 | } 71 | 72 | # Check names 73 | if (is.null(names(flist[[i]][[1]]))) { 74 | if(length(flist[[i]][[1]]) != 1L) { 75 | stop(paste("Function",features[i],"not returning named feature vector")) 76 | } 77 | flist[[i]] <- map( 78 | flist[[i]], 79 | function(x) { 80 | names(x) <- features[i] 81 | return(x) 82 | } 83 | ) 84 | } 85 | } 86 | 87 | # Rename duplicate feature names to avoid conflicts 88 | flist <- rename_duplicate_features(features, flist) 89 | 90 | # Unpack features into a list of numeric vectors 91 | featurelist <- list() 92 | for (i in seq_along(tslist)) 93 | featurelist[[i]] <- unlist(map(flist, function(u) u[[i]])) 94 | 95 | # Find feature names 96 | featurenames <- map(featurelist, names) 97 | 98 | fnames <- unique(unlist(featurenames)) 99 | if (any(featurenames == "")) { 100 | stop("Some unnamed features") 101 | } 102 | 103 | # Create feature matrix 104 | fmat <- matrix(NA_real_, nrow = length(tslist), ncol = length(fnames)) 105 | colnames(fmat) <- fnames 106 | rownames(fmat) <- names(tslist) 107 | 108 | for (i in seq_along(tslist)) 109 | fmat[i, featurenames[[i]]] <- featurelist[[i]][featurenames[[i]]] 110 | 111 | return(tibble::as_tibble(fmat)) 112 | } 113 | 114 | # Scale time series 115 | scalets <- function(x) { 116 | n <- length(x) 117 | if (forecast::is.constant(x)) { 118 | return(x) 119 | } 120 | scaledx <- as.numeric(scale(x, center = TRUE, scale = TRUE)) 121 | if ("msts" %in% class(x)) { 122 | msts <- attributes(x)$msts 123 | y <- forecast::msts(scaledx, seasonal.periods = msts) 124 | } 125 | else { 126 | y <- as.ts(scaledx) 127 | } 128 | tsp(y) <- tsp(x) 129 | return(y) 130 | } 131 | 132 | # Trim time series 133 | trimts <- function(x, trim = 0.1) { 134 | qtl <- quantile(x, c(trim, 1 - trim), na.rm = TRUE) 135 | x[x < qtl[1L] | x > qtl[2L]] <- NA 136 | return(x) 137 | } 138 | 139 | # check for duplicate feature names in the feature list and rename by prepending 140 | # the name of the function that generates them to avoid conflicts: "functionName_featureName" 141 | # both functions' features are renamed 142 | # processed in order of appearance in the list 143 | # a warning is generated when conflicts are found 144 | rename_duplicate_features <- function(fun_names, feat_list) { 145 | if (length(feat_list) < 2) { 146 | return(feat_list) 147 | } 148 | for (i in 1:(length(feat_list) - 1)) { 149 | for (j in (i + 1):length(feat_list)) { 150 | names_first_fun <- names(feat_list[[i]][[1]]) 151 | names_sec_fun <- names(feat_list[[j]][[1]]) 152 | # look for at least one match in the names of the features 153 | if (Reduce("|", names_first_fun %in% names_sec_fun)) { 154 | warning(paste("Conflicting feature names in functions: ", fun_names[[i]], " and ", fun_names[[j]])) 155 | names_first_fun <- paste(fun_names[[i]], "_", names_first_fun, sep = "") 156 | for (idx in seq_along(feat_list[[i]])) { 157 | names(feat_list[[i]][[idx]]) <- names_first_fun 158 | } 159 | names_sec_fun <- paste(fun_names[[j]], "_", names_sec_fun, sep = "") 160 | for (idx in seq_along(feat_list[[j]])) { 161 | names(feat_list[[j]][[idx]]) <- names_sec_fun 162 | } 163 | } 164 | } 165 | } 166 | feat_list 167 | } 168 | -------------------------------------------------------------------------------- /R/features.R: -------------------------------------------------------------------------------- 1 | 2 | #' Time series features based on tiled windows 3 | #' 4 | #' Computes feature of a time series based on tiled (non-overlapping) windows. 5 | #' Means or variances are produced for all tiled windows. Then stability is 6 | #' the variance of the means, while lumpiness is the variance of the variances. 7 | #' @param x a univariate time series 8 | #' @param width size of sliding window 9 | #' @return A numeric vector of length 2 containing a measure of lumpiness and 10 | #' a measure of stability. 11 | #' @author Earo Wang and Rob J Hyndman 12 | #' @export 13 | 14 | lumpiness <- function(x, width = ifelse(frequency(x) > 1, 15 | frequency(x), 10 16 | )) { 17 | x <- scalets(x) 18 | nr <- length(x) 19 | lo <- seq(1, nr, by = width) 20 | up <- seq(width, nr + width, by = width) 21 | nsegs <- nr / width 22 | varx <- map_dbl(seq_len(nsegs), function(idx) 23 | var(x[lo[idx]:up[idx]], na.rm = TRUE)) 24 | if (length(x) < 2 * width) { 25 | lumpiness <- 0 26 | } else { 27 | lumpiness <- var(varx, na.rm = TRUE) 28 | } 29 | return(c(lumpiness = lumpiness)) 30 | } 31 | 32 | #' @rdname lumpiness 33 | #' @export 34 | 35 | stability <- function(x, width = ifelse(frequency(x) > 1, 36 | frequency(x), 10 37 | )) { 38 | x <- scalets(x) 39 | nr <- length(x) 40 | lo <- seq(1, nr, by = width) 41 | up <- seq(width, nr + width, by = width) 42 | nsegs <- nr / width 43 | meanx <- map_dbl(seq_len(nsegs), function(idx) 44 | mean(x[lo[idx]:up[idx]], na.rm = TRUE)) 45 | if (length(x) < 2 * width) { 46 | stability <- 0 47 | } else { 48 | stability <- var(meanx, na.rm = TRUE) 49 | } 50 | return(c(stability = stability)) 51 | } 52 | 53 | #' Time series features based on sliding windows 54 | #' 55 | #' Computes feature of a time series based on sliding (overlapping) windows. 56 | #' \code{max_level_shift} finds the largest mean shift between two consecutive windows. 57 | #' \code{max_var_shift} finds the largest var shift between two consecutive windows. 58 | #' \code{max_kl_shift} finds the largest shift in Kulback-Leibler divergence between 59 | #' two consecutive windows. 60 | #' 61 | #' Computes the largest level shift and largest variance shift in sliding mean calculations 62 | #' @param x a univariate time series 63 | #' @param width size of sliding window 64 | #' @return A vector of 2 values: the size of the shift, and the time index of the shift. 65 | #' @author Earo Wang and Rob J Hyndman 66 | #' @export 67 | 68 | max_level_shift <- function(x, width = ifelse(frequency(x) > 1, 69 | frequency(x), 10 70 | )) { 71 | suppressWarnings(rollmean <- try(RcppRoll::roll_mean(x, width, na.rm = TRUE), silent = TRUE)) 72 | if ("try-error" %in% class(rollmean)) { 73 | maxmeans <- NA_real_ 74 | maxidx <- NA_real_ 75 | } else { 76 | means <- abs(diff(rollmean, width)) 77 | if (length(means) == 0L) { 78 | maxmeans <- 0 79 | maxidx <- NA_real_ 80 | } 81 | else if (all(is.na(means))) { 82 | maxmeans <- NA_real_ 83 | maxidx <- NA_real_ 84 | } 85 | else { 86 | maxmeans <- max(means, na.rm = TRUE) 87 | maxidx <- which.max(means) + width - 1L 88 | } 89 | } 90 | return(c(max_level_shift = maxmeans, time_level_shift = maxidx)) 91 | } 92 | 93 | #' @rdname max_level_shift 94 | #' @export 95 | 96 | max_var_shift <- function(x, width = ifelse(frequency(x) > 1, 97 | frequency(x), 10 98 | )) { 99 | suppressWarnings(rollvar <- try(RcppRoll::roll_var(x, width, na.rm = TRUE), silent = TRUE)) 100 | if ("try-error" %in% class(rollvar)) { 101 | maxvar <- NA_real_ 102 | maxidx <- NA_real_ 103 | } else { 104 | vars <- abs(diff(rollvar, width)) 105 | 106 | if (length(vars) == 0L) { 107 | maxvar <- 0 108 | maxidx <- NA_real_ 109 | } 110 | else if (all(is.na(vars))) { 111 | maxvar <- NA_real_ 112 | maxidx <- NA_real_ 113 | } 114 | else { 115 | maxvar <- max(vars, na.rm = TRUE) 116 | maxidx <- which.max(vars) + width - 1L 117 | } 118 | } 119 | return(c(max_var_shift = maxvar, time_var_shift = maxidx)) 120 | } 121 | 122 | #' @rdname max_level_shift 123 | #' @export 124 | 125 | max_kl_shift <- function(x, width = ifelse(frequency(x) > 1, 126 | frequency(x), 10 127 | )) { 128 | gw <- 100 # grid width 129 | xgrid <- seq(min(x, na.rm = TRUE), max(x, na.rm = TRUE), length = gw) 130 | grid <- xgrid[2L] - xgrid[1L] 131 | tmpx <- x[!is.na(x)] # Remove NA to calculate bw 132 | bw <- bw.nrd0(tmpx) 133 | lenx <- length(x) 134 | if (lenx <= (2 * width)) { 135 | return(c(max_kl_shift = NA_real_, time_kl_shift = NA_real_)) 136 | } 137 | # Using binning algorithm to achieve efficiency but obsecure exact positions. 138 | # lastrep <- ceiling(lenx/5) 139 | # group <- rep(1:lastrep, each = 5)[1:lenx] 140 | # midpoints <- aggregate(x, by = list(group), function(y) y[3L])[, 2] 141 | # dens.mat <- matrix(, nrow = lastrep, ncol = gw) 142 | # for (i in 1L:lastrep) { 143 | # dens.mat[i, ] <- dnorm(xgrid, mean = midpoints[i], sd = bw) 144 | # } 145 | dens.mat <- matrix(, nrow = lenx, ncol = gw) 146 | for (i in 1L:lenx) { 147 | dens.mat[i, ] <- dnorm(xgrid, mean = x[i], sd = bw) 148 | } 149 | dens.mat <- pmax(dens.mat, dnorm(38)) 150 | rmean <- RcppRoll::roll_mean(dens.mat, 151 | n = width, na.rm = TRUE, fill = NA, 152 | align = "right" 153 | ) # by column 154 | # lo <- seq(1, lastrep - width + 1) 155 | # hi <- seq(width + 1, lastrep) 156 | lo <- seq(1, lenx - width + 1) 157 | hi <- seq(width + 1, lenx) 158 | seqidx <- min(length(lo), length(hi)) 159 | kl <- sapply(1:seqidx, function(i) sum(rmean[lo[i], ] * 160 | (log(rmean[lo[i], ]) - log(rmean[hi[i], ])) * 161 | grid, na.rm = TRUE)) 162 | diffkl <- diff(kl, na.rm = TRUE) 163 | if (length(diffkl) == 0L) { 164 | diffkl <- 0 165 | maxidx <- NA_real_ 166 | } 167 | else { 168 | maxidx <- which.max(diffkl) + width - 1L 169 | } 170 | return(c(max_kl_shift = max(diffkl, na.rm = TRUE), time_kl_shift = maxidx)) 171 | } 172 | 173 | #' Number of crossing points 174 | #' 175 | #' Computes the number of times a time series crosses the median. 176 | #' @param x a univariate time series 177 | #' @return A numeric value. 178 | #' @author Earo Wang and Rob J Hyndman 179 | #' @export 180 | crossing_points <- function(x) { 181 | midline <- median(x, na.rm = TRUE) 182 | ab <- x <= midline 183 | lenx <- length(x) 184 | p1 <- ab[1:(lenx - 1)] 185 | p2 <- ab[2:lenx] 186 | cross <- (p1 & !p2) | (p2 & !p1) 187 | return(c(crossing_points = sum(cross, na.rm = TRUE))) 188 | } 189 | 190 | #' Longest flat spot 191 | #' 192 | #' "Flat spots” are computed by dividing the sample space of a time series into ten equal-sized intervals, and computing the maximum run length within any single interval. 193 | #' @param x a univariate time series 194 | #' @return A numeric value. 195 | #' @author Earo Wang and Rob J Hyndman 196 | #' @export 197 | 198 | flat_spots <- function(x) { 199 | cutx <- try(cut(x, breaks = 10, include.lowest = TRUE, labels = FALSE), 200 | silent = TRUE 201 | ) 202 | if ("try-error" %in% class(cutx)) { 203 | fspots <- NA 204 | } else { 205 | rlex <- rle(cutx) 206 | # Any flat spot 207 | return(c(flat_spots = max(rlex$lengths))) 208 | # Low flat spots 209 | # ones <- (rlex$values == 1) 210 | # return(max(rlex$lengths[ones])) 211 | } 212 | } 213 | 214 | # shapes <- function(x, width, scale = TRUE, FUN = mean, ...){ 215 | # nr <- length(x) 216 | # if (nr %% width != 0) { 217 | # stop("width must be a divisor of the length of the series.") 218 | # } 219 | # shapes <- matrix(x, ncol = width, byrow= TRUE) 220 | # if(scale){ 221 | # dtotal <- apply(shapes, 1, sum) 222 | # idremove <- which(dtotal == 0) 223 | # if(length(idremove) > 0){ 224 | # shapes <- shapes[-idremove, ] 225 | # dtotal <- dtotal[-idremove] 226 | # } 227 | # shapes <- t(t(shapes) / dtotal) 228 | # } 229 | # xprofile <- apply(shapes, 2, FUN, ...) 230 | # return(c(shapes=xprofile)) 231 | # } 232 | 233 | #' Hurst coefficient 234 | #' 235 | #' Computes the Hurst coefficient indicating the level of fractional differencing 236 | #' of a time series. 237 | #' @param x a univariate time series. If missing values are present, the largest 238 | #' contiguous portion of the time series is used. 239 | #' @return A numeric value. 240 | #' @author Rob J Hyndman 241 | #' @export 242 | 243 | hurst <- function(x) { 244 | # Hurst=d+0.5 where d is fractional difference. 245 | return(c(hurst = suppressWarnings(fracdiff::fracdiff(na.contiguous(x), 0, 0)[["d"]] + 0.5))) 246 | } 247 | 248 | #' Unit Root Test Statistics 249 | #' 250 | #' \code{unitroot_kpss} computes the statistic for the Kwiatkowski et al. unit root test 251 | #' using the default settings for the \code{\link[urca]{ur.kpss}} function. 252 | #' \code{unitroot_pp} computes the statistic for the Phillips-Perron unit root test 253 | #' using the default settings for the \code{\link[urca]{ur.pp}} function. 254 | #' @param x a univariate time series. 255 | #' @param ... Other arguments are passed to the \code{\link[urca]{ur.kpss}} or 256 | #' \code{\link[urca]{ur.kpss}} functions. 257 | #' @return A numeric value 258 | #' @author Pablo Montero-Manso 259 | #' @export 260 | unitroot_kpss <- function(x, ...) { 261 | kpss <- try(urca::ur.kpss(x, ...)@teststat, silent=TRUE) 262 | if("try-error" %in% class(kpss)) { 263 | warning("Error in unitroot_kpss") 264 | kpss <- NA 265 | } 266 | return(kpss) 267 | } 268 | 269 | #' @rdname unitroot_kpss 270 | #' @export 271 | unitroot_pp <- function(x, ...) { 272 | pp <- try(urca::ur.pp(x, ...)@teststat, silent = TRUE) 273 | if("try-error" %in% class(pp)) { 274 | warning("Error in unitroot_pp") 275 | pp <- NA 276 | } 277 | return(pp) 278 | } 279 | -------------------------------------------------------------------------------- /vignettes/tsfeatures.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Introduction to the tsfeatures package" 3 | author: "Yangzhuoran Yang and Rob J Hyndman" 4 | date: "`r Sys.Date()`" 5 | output: 6 | rmarkdown::html_vignette: 7 | toc: true 8 | toc_depth: 3 9 | vignette: > 10 | %\VignetteIndexEntry{Introduction to the tsfeatures package} 11 | %\VignetteEngine{knitr::rmarkdown} 12 | %\VignetteEncoding{UTF-8} 13 | --- 14 | 15 | ```{r setup, include = FALSE} 16 | knitr::opts_chunk$set( 17 | collapse = TRUE, 18 | comment = "#>", 19 | warning = FALSE, 20 | fig.align = "center" 21 | ) 22 | library(tsfeatures) 23 | ``` 24 | 25 | # tsfeatures 26 | 27 | The R package *tsfeatures* provides methods for extracting various features from time series data. 28 | 29 | ## Installation 30 | 31 | The **stable** version on R CRAN and can be installed in the usual way: 32 | 33 | ```{r cran-installation, eval = FALSE} 34 | install.packages("tsfeatures") 35 | ``` 36 | 37 | You can install the **development** version from [Github](https://github.com/robjhyndman/tsfeatures) with: 38 | 39 | ```{r gh-installation, eval = FALSE} 40 | # install.packages("devtools") 41 | devtools::install_github("robjhyndman/tsfeatures") 42 | ``` 43 | 44 | ## Usage 45 | 46 | The function `tsfeatures()` computes a tibble of time series features from a list of time series. 47 | 48 | ```{r} 49 | mylist <- list(sunspot.year, WWWusage, AirPassengers, USAccDeaths) 50 | tsfeatures(mylist) 51 | ``` 52 | 53 | The default functions that `tsfeatures` uses to compute features are `frequency`, `stl_features`, `entropy` and `acf_features`. Each of them can produce one or more features. Detailed information of features included in the *tsfeatures* package are described below. Functions from other packages, or user-defined functions, may also be used. 54 | 55 | ```{r} 56 | # Function from outside of tsfeatures package being used 57 | is.monthly <- function(x){ 58 | frequency(x) == 12 59 | } 60 | tsfeatures(mylist, features = "is.monthly") 61 | ``` 62 | 63 | ## List of features 64 | 65 | ### acf_features {#acf_features} 66 | 67 | We compute the autocorrelation function of the series, the differenced series, and the twice-differenced series. `acf_features` produces a vector comprising the first autocorrelation coefficient in each case, and the sum of squares of the first 10 autocorrelation coefficients in each case. 68 | 69 | ```{r} 70 | acf_features(AirPassengers) 71 | ``` 72 | 73 | ### arch_stat {#arch_stat} 74 | 75 | `arch_stat` Computes a statistic based on the Lagrange Multiplier (LM) test of Engle ([1982](#ref)) for autoregressive conditional heteroscedasticity (ARCH). The statistic returned is the $R^2$ value of an autoregressive model of order specified as lags applied to $x^2$. 76 | 77 | ```{r} 78 | arch_stat(AirPassengers) 79 | ``` 80 | 81 | ### autocorr_features 82 | 83 | The autocorrelation feature set from software package hctsa 84 | 85 | ```{r} 86 | autocorr_features(AirPassengers) 87 | ``` 88 | 89 | * `ac_9` is the autocorrelation at lag 9. 90 | * `embed2_incircle` gives proportion of points inside a given circular boundary in a 2-d embedding space. 91 | * `firstmin_ac` returns the time of first minimum in the autocorrelation function. 92 | * `trev_num` returns the numerator of the trev function of a time series, a normalized nonlinear autocorrelation. The time lag is set to 1. 93 | * `motiftwo_entro3` finds local motifs in a binary symbolization of the time series. Coarse-graining is performed. Time-series values above its mean are given 1, and those below the mean are 0. `motiftwo_entro3` returns the entropy of words in the binary alphabet of length 3. 94 | * `walker_propcross` simulates a hypothetical walker moving through the time domain. The hypothetical particle (or 'walker') moves in response to values of the time series at each point. The walker narrows the gap between its value and that of the time series by 10. `walker_propcross` returns the fraction of time series length that walker crosses time series. 95 | 96 | ### binarize_mean {#binarize_mean} 97 | 98 | `binarize_mean` converts an input vector into a binarized version. Time-series values above its mean are given 1, and those below the mean are 0. 99 | 100 | ```{r} 101 | str(binarize_mean(AirPassengers)) 102 | ``` 103 | 104 | ### compengine feature set {#compengine} 105 | 106 | `compengine` calculate the features that have been used in the [CompEngine](https://www.comp-engine.org/) database, using a method introduced in package `kctsa`. 107 | 108 | The features involved can be grouped as autocorrelation, prediction, stationarity, distribution, and scaling, which can be computed using `autocorr_features`, `pred_features`, `station_features`, `dist_features`, and `scal_features`. 109 | 110 | ```{r} 111 | comp <- compengine(AirPassengers) 112 | knitr::kable(comp) 113 | ``` 114 | 115 | ### crossing_points {#crossing_points} 116 | 117 | `crossing points` are defined as the number of times a time series crosses the median line. 118 | 119 | ```{r} 120 | crossing_points(AirPassengers) 121 | ``` 122 | 123 | ### dist_features 124 | 125 | The distribution feature set from the hctsa package. 126 | 127 | 128 | The scaling feature set from `hctsa`. 129 | 130 | ```{r} 131 | dist_features(AirPassengers) 132 | ``` 133 | 134 | * `histogram_mode` measures the mode of the data vector using histograms with a given number of bins (default to 10) as suggestion. 135 | * `outlierinclude_mdrmd` measures the median as more and more outliers are included in the calculation according to a specified rule, of outliers being furthest from the mean. The threshold for including time-series data points in the analysis increases from zero to the maximum deviation, in increments of 0.01*sigma (by default), where sigma is the standard deviation of the time series. At each threshold, proportion of time series points included and median are calculated, and outputs from the algorithm measure how these statistical quantities change as more extreme points are included in the calculation. `outlierinclude_mdrmd` essentially returns the median of the median of range indices. 136 | 137 | ### entropy {#entropy} 138 | 139 | The spectral `entropy` is the Shannon entropy 140 | $$ 141 | -\int^\pi_{-\pi}\hat{f}(\lambda)\log\hat{f}(\lambda) d\lambda, 142 | $$ 143 | where $\hat{f}(\lambda)$ is an estimate of the spectral density of the data. This measures the “forecastability” of a time series, where low values indicate a high signal-to-noise ratio, and large values occur when a series is difficult to forecast. 144 | 145 | ```{r} 146 | entropy(AirPassengers) 147 | ``` 148 | 149 | ### firstzero_ac {#firstzero_ac} 150 | 151 | `firstzero_ac` returns the first zero crossing of the autocorrelation function. 152 | 153 | ```{r} 154 | firstzero_ac(AirPassengers) 155 | ``` 156 | 157 | ### flat_spots {#flat_spots} 158 | 159 | `flat_spots` are computed by dividing the sample space of a time series into ten equal-sized intervals, and computing the maximum run length within any single interval. 160 | 161 | ```{r} 162 | flat_spots(AirPassengers) 163 | ``` 164 | 165 | ### heterogeneity {#heterogeneity} 166 | 167 | The `heterogeneity` features measure the heterogeneity of the time series. 168 | First, we pre-whiten the time series to remove the mean, trend, and autoregressive (AR) information (Barbour & Parker [2014](#ref)). Then we fit a $GARCH(1,1)$ model to the pre-whitened time series, $x_t$, to measure for autoregressive conditional heteroskedasticity (ARCH) effects. The residuals from this model, $z_t$, are also measured for ARCH effects using a second $GARCH(1,1)$ model. 169 | 170 | * `arch_acf` is the sum of squares of the first 12 autocorrelations of $\{x^2_t\}$. 171 | * `garch_acf` is the sum of squares of the first 12 autocorrelations of $\{z^2_t\}$. 172 | * `arch_r2` is the $R^2$ value of an AR model applied to $\{x^2_t\}$. 173 | * `garch_r2` is the $R^2$ value of an AR model applied to $\{z^2_t\}$. 174 | 175 | The statistics obtained from $\{x^2_t\}$ are the ARCH effects, while those from $\{z^2_t\}$ are the GARCH effects. Note that the two $R^2$ values are used in the Lagrange-multiplier test of Engle ([1982](#ref)), and the sum of squared autocorrelations are used in the Ljung-Box test proposed by Ljung & Box ([1978](#ref)). 176 | 177 | ```{r} 178 | heterogeneity(AirPassengers) 179 | ``` 180 | 181 | ### holt_parameters and hw_parameters {#holt_hw} 182 | 183 | `holt_parameters` Estimate the smoothing parameter for the level-alpha and the smoothing parameter for the trend-beta of Holt's linear trend method. `hw_parameters` considers additive seasonal trend: ETS(A,A,A) model, returning a vector of 3 values: alpha, beta and gamma. 184 | 185 | ```{r} 186 | holt_parameters(AirPassengers) 187 | hw_parameters(AirPassengers) 188 | ``` 189 | 190 | ### hurst {#hurst} 191 | 192 | We use a measure of the long-term memory of a time series (`hurst`), computed as 0.5 plus the maximum likelihood estimate of the fractional differencing order $d$ given by Haslett & Raftery ([1989](#ref)). We add 0.5 to make it consistent with the Hurst coefficient. Note that the fractal dimension can be estimated as $D = 2 - \text{hurst}$. 193 | 194 | ```{r} 195 | hurst(AirPassengers) 196 | ``` 197 | 198 | ### lumpiness and stability {#lumpiness_stability} 199 | 200 | `Stability` and `lumpiness` are two time series features based on tiled (non-overlapping) windows. Means or variances are produced for all tiled windows. Then `stability` is the variance of the means, while `lumpiness` is the variance of the variances. 201 | 202 | ```{r} 203 | stability(AirPassengers) 204 | lumpiness(AirPassengers) 205 | ``` 206 | 207 | ### max_level_shift, max_var_shift and max_kl_shift {#max_shift} 208 | 209 | These three features compute features of a time series based on sliding (overlapping) windows. 210 | `max_level_shift` finds the largest mean shift between two consecutive windows. 211 | `max_var_shift` finds the largest variance shift between two consecutive windows. 212 | `max_kl_shift` finds the largest shift in Kulback-Leibler divergence between two consecutive windows. 213 | Each feature returns a vector of 2 values: the size of the shift, and the time index of the shift. 214 | 215 | ```{r} 216 | max_level_shift(AirPassengers) 217 | max_var_shift(AirPassengers) 218 | max_kl_shift(AirPassengers) 219 | ``` 220 | 221 | ### nonlinearity {#nonlinearity} 222 | 223 | The `nonlinearity` coefficient is computed using a modification of the statistic used in Teräsvirta’s nonlinearity test. Teräsvirta’s test uses a statistic $X^2=T\log(\text{SSE}1/\text{SSE}0)$ where SSE1 and SSE0 are the sum of squared residuals from a nonlinear and linear autoregression respectively. This is non-ergodic, so instead, we define it as $10X^2/T$ which will converge to a value indicating the extent of nonlinearity as $T\rightarrow\infty$. This takes large values when the series is nonlinear, and values around 0 when the series is linear. 224 | 225 | ```{r} 226 | nonlinearity(AirPassengers) 227 | ``` 228 | 229 | ### pacf_features {#pacf_features} 230 | 231 | We compute the partial autocorrelation function of the series, the differenced series, and the second-order differenced series. Then `pacf_features` produces a vector comprising the sum of squares of the first 5 partial autocorrelation coefficients in each case. 232 | 233 | ```{r} 234 | pacf_features(AirPassengers) 235 | ``` 236 | 237 | ### pred_features 238 | 239 | The prediction feature set from the `hctsa` package. The first two elements are obtained from `localsimple_taurus` with different forecast methods (the mean, and an LS fit). The third is from `sampen_first`. 240 | 241 | ```{r} 242 | pred_features(AirPassengers) 243 | ``` 244 | 245 | * Simple predictors using the past trainLength values of the time series to predict its next value. `localsimple_taures` returns the first zero crossing of the autocorrelation function of the residuals from this Simple local time-series forecasting. 246 | * `sampen_first` returns the first Sample Entropy of a time series where the embedding dimension is set to 5 and the threshold is set to 0.3. `sampenc` is the underlying function to calculate the first sample entropy with optional dimension and threshold settings. 247 | 248 | ```{r} 249 | sampenc(AirPassengers, M = 5, r = 0.3) 250 | ``` 251 | 252 | ### scal_features 253 | 254 | The scaling feature set from `hctsa`. 255 | 256 | ```{r} 257 | scal_features(AirPassengers) 258 | ``` 259 | 260 | * `fluctanal_prop_r1` implements fluctuation analysis. It fits a polynomial of order 1 and then returns the range. The order of fluctuations is 2, corresponding to root mean square fluctuations. 261 | 262 | ### station_features 263 | 264 | The stationary feature set from `hctsa`. 265 | 266 | ```{r} 267 | station_features(AirPassengers) 268 | ``` 269 | 270 | * `std1st_der` returns the standard deviation of the first derivative of the time series. 271 | * 100 time-series segments of length l are selected at random from the time series and the mean of the first zero-crossings of the autocorrelation function in each segment is calculated using `spreadrandomlocal_meantaul`. 272 | 273 | ### stl_features {#stl_features} 274 | 275 | `stl_features` Computes various measures of trend and seasonality of a time series based on an STL decomposition. The `mstl` function is used to do the decomposition. 276 | 277 | `nperiods` is the number of seasonal periods in the data (determined by the frequency of observation, not the observations themselves) and set to 1 for non-seasonal data. `seasonal_period` is a vector of seasonal periods and set to 1 for non-seasonal data. 278 | 279 | The size and location of the peaks and troughs in the seasonal component are used to compute strength of peaks (`peak`) and strength of trough (`trough`). 280 | 281 | The rest of the features are modifications of features used in Kang, Hyndman & Smith-Miles ([2017](#ref)). We extend the STL decomposition approach (Cleveland et al.[1990](#ref)) to handle multiple seasonalities. Thus, the decomposition contains a trend, up to $M$ seasonal components and a remainder component: 282 | $$ 283 | x_t=f_t+s_{1,t}+\cdots+s_{M.t}+e_t, 284 | $$ 285 | where $f_t$ is the smoothed trend component, $s_{i,t}$ is the $i$th seasonal component and $e_t$ is a remainder component. The components are estimated iteratively. Let $s^{(k)}_{i,t}$ be the estimate of $s_i,t$ at the $k$th iteration, with initial values given as $s^{(0)}_{i,t}=0$. The we apply an STL decomposition to $x_t-\sum^{j=1}_{j\neq1}{}^{^{M}}s^{k-1}_{j,t}$ to obtained updated estimates $s^{(k)}_{i,t}$ for $k=1,2,\ldots$. In practice, this converges quickly and only two iterations are required. To allow the procedure to be applied automatically, we set the seasonal window span for STL to be 21 in all cases. For a non-seasonal time series, we simply estimate $x_t=f_t+e_t$ where $f_t$ is computed using Friedman’s “super smoother” (Friedman [1984](#ref)). 286 | 287 | Strength of trend (`trend`) and strength of seasonality (`seasonal.strength`) are defined as 288 | $$ 289 | \text{trend} = 1-\frac{\text{Var}(e_t)}{\text{Var}(f_t+e_t)}\quad \text{and}\quad \text{seasonal.strength}=1-\frac{\text{Var}(e_t)}{\text{Var}(s_{i,t}+e_t)}. 290 | $$ 291 | If their values are less than 0, they are set to 0, while values greater than 1 are set to 1. For non-seasonal time series `seasonal.strength` is 0. For seasonal time series, `seasonal.strength` is an M-vector, where M is the number of periods. This is analogous to the way the strength of trend and seasonality were defined in Wang, Smith & Hyndman ([2006](#ref)), Hyndman, Wang & Laptev ([2015](#ref)) and Kang, Hyndman & Smith-Miles ([2017](#ref)). 292 | 293 | `spike` measures the “spikiness” of a time series, and is computed as the variance of the leave-one-out variances of the remainder component $e_t$. 294 | 295 | `linearity` and `curvature` measures the linearity and curvature of a time series calculated based on the coefficients of an orthogonal quadratic regression. 296 | 297 | We compute the autocorrelation function of $e_t$, and `e_acf1` and `e_acf10` contain the first autocorrelation coefficient and the sum of the first ten squared autocorrelation coefficients. 298 | 299 | ```{r} 300 | stl_features(AirPassengers) 301 | ``` 302 | 303 | ### unitroot_kpss and unitroot_pp {#unitroot} 304 | 305 | `unitroot_kpss` is a vector comprising the statistic for the KPSS unit root test with linear trend and lag one, and `unitroot_pp` is the statistic for the “Z-alpha” version of PP unit root test with constant trend and lag one. 306 | 307 | ```{r} 308 | unitroot_kpss(AirPassengers) 309 | unitroot_pp(AirPassengers) 310 | ``` 311 | 312 | ### zero_proportion 313 | 314 | Computes proporton of zeros in a time series. 315 | 316 | ```{r} 317 | zero_proportion(AirPassengers) 318 | ``` 319 | 320 | ## Reproducing papers 321 | 322 | ### Hyndman, Wang and Laptev (ICDM 2015) 323 | 324 | Here we replicate the analysis in Hyndman, Wang & Laptev (ICDM 2015). However, note that crossing_points, peak and trough are defined differently in the *tsfeatures* package than in the Hyndman et al (2015) paper. Other features are the same. 325 | 326 | ```{r yahoo, message=FALSE} 327 | library(tsfeatures) 328 | library(dplyr) 329 | 330 | yahoo <- yahoo_data() 331 | ``` 332 | 333 | ```{r hwl, eval=FALSE} 334 | hwl <- bind_cols( 335 | tsfeatures(yahoo, 336 | c("acf_features","entropy","lumpiness", 337 | "flat_spots","crossing_points")), 338 | tsfeatures(yahoo,"stl_features", s.window='periodic', robust=TRUE), 339 | tsfeatures(yahoo, "max_kl_shift", width=48), 340 | tsfeatures(yahoo, 341 | c("mean","var"), scale=FALSE, na.rm=TRUE), 342 | tsfeatures(yahoo, 343 | c("max_level_shift","max_var_shift"), trim=TRUE)) %>% 344 | select(mean, var, x_acf1, trend, linearity, curvature, 345 | seasonal_strength, peak, trough, 346 | entropy, lumpiness, spike, max_level_shift, max_var_shift, flat_spots, 347 | crossing_points, max_kl_shift, time_kl_shift) 348 | ``` 349 | 350 | ```{r hwlsave, eval=FALSE, echo=FALSE} 351 | # Now store the computed results for later use 352 | save(hwl, file="../extra-data/hwl.rda") 353 | ``` 354 | 355 | ```{r hwlquick, include=FALSE} 356 | # This replicates the above but uses pre-stored data to speed things up 357 | tmp <- tempfile() 358 | utils::download.file("https://github.com/robjhyndman/tsfeatures/raw/master/extra-data/hwl.rda", tmp) 359 | load(tmp) 360 | ``` 361 | 362 | ```{r yahoographics} 363 | # 2-d Feature space 364 | library(ggplot2) 365 | hwl_pca <- hwl %>% 366 | na.omit() %>% 367 | prcomp(scale=TRUE) 368 | hwl_pca$x %>% 369 | as_tibble() %>% 370 | ggplot(aes(x=PC1, y=PC2)) + 371 | geom_point() 372 | ``` 373 | 374 | ### Kang, Hyndman & Smith-Miles (IJF 2017) 375 | 376 | Compute the features used in Kang, Hyndman & Smith-Miles (IJF 2017). 377 | Note that the trend and ACF1 are computed differently for non-seasonal data in the *tsfeatures* package than in the Kang et al (2017). `tsfeatures` uses `mstl` which uses `supsmu` for the trend calculation with non-seasonal data, whereas Kang et al used a penalized regression spline computed using `mgcv` instead. Other features are the same. 378 | 379 | ```{r ijf2017, message=FALSE} 380 | library(tsfeatures) 381 | library(dplyr) 382 | library(tidyr) 383 | library(forecast) 384 | 385 | M3data <- purrr::map(Mcomp::M3, 386 | function(x) { 387 | tspx <- tsp(x$x) 388 | ts(c(x$x,x$xx), start=tspx[1], frequency=tspx[3]) 389 | }) 390 | khs_stl <- function(x,...) { 391 | lambda <- BoxCox.lambda(x, lower=0, upper=1, method='loglik') 392 | y <- BoxCox(x, lambda) 393 | c(stl_features(y, s.window='periodic', robust=TRUE, ...), lambda=lambda) 394 | } 395 | ``` 396 | 397 | ```{r khs, eval=FALSE} 398 | khs <- bind_cols( 399 | tsfeatures(M3data, c("frequency", "entropy")), 400 | tsfeatures(M3data, "khs_stl", scale=FALSE)) %>% 401 | select(frequency, entropy, trend, seasonal_strength, e_acf1, lambda) %>% 402 | replace_na(list(seasonal_strength=0)) %>% 403 | rename( 404 | Frequency = frequency, 405 | Entropy = entropy, 406 | Trend = trend, 407 | Season = seasonal_strength, 408 | ACF1 = e_acf1, 409 | Lambda = lambda) %>% 410 | mutate(Period = as.factor(Frequency)) 411 | ``` 412 | 413 | ```{r khssave, eval=FALSE, echo=FALSE} 414 | # Now store the computed results for later use 415 | save(khs, file="../extra-data/khs.rda") 416 | ``` 417 | 418 | ```{r khsquick, include=FALSE} 419 | # This replicates the above but uses pre-stored data to speed things up 420 | tmp <- tempfile() 421 | utils::download.file("https://github.com/robjhyndman/tsfeatures/raw/master/extra-data/khs.rda", tmp) 422 | load(tmp) 423 | ``` 424 | 425 | ```{r ijf2017graphs, message=FALSE} 426 | # Fig 1 of paper 427 | khs %>% 428 | select(Period, Entropy, Trend, Season, ACF1, Lambda) %>% 429 | GGally::ggpairs() 430 | 431 | # 2-d Feature space (Top of Fig 2) 432 | khs_pca <- khs %>% 433 | select(-Period) %>% 434 | prcomp(scale=TRUE) 435 | khs_pca$x %>% 436 | as_tibble() %>% 437 | bind_cols(Period=khs$Period) %>% 438 | ggplot(aes(x=PC1, y=PC2)) + 439 | geom_point(aes(col=Period)) 440 | ``` 441 | 442 | ## Resources {#ref} 443 | 444 | [Barbour, A. J., & Parker, R. L. (2014). psd: Adaptive, sine multitaper power spectral density estimation for R. Computers & Geosciences, 63, 1-8.](https://doi.org/10.1016/j.cageo.2013.09.015) 445 | 446 | [Cleveland, R. B., Cleveland, W. S., McRae, J. E., & Terpenning, I. (1990). STL: A Seasonal-Trend Decomposition. Journal of Official Statistics, 6(1), 3-73.](https://www.proquest.com/docview/1266805989) 447 | 448 | [Engle, R. F. (1982). Autoregressive conditional heteroscedasticity with estimates of the variance of United Kingdom inflation. Econometrica: Journal of the Econometric Society, 987-1007.](https://doi.org/10.2307/1912773) 449 | 450 | [Friedman, JH (1984). _A variable span scatterplot smoother_. Technical Report 5. Laboratory for Computational Statistics, Stanford University.](https://www.slac.stanford.edu/pubs/slacpubs/3250/slac-pub-3477.pdf) 451 | 452 | [Haslett, J., & Raftery, A. E. (1989). Space-time modelling with long-memory dependence: Assessing Ireland's wind power resource. Applied Statistics, 1-50.](https://doi.org/10.2307/2347679 ) 453 | 454 | [Hyndman, R. J., Wang, E., & Laptev, N. (2015, November). Large-scale unusual time series detection. In Data Mining Workshop (ICDMW), 2015 IEEE International Conference on (pp. 1616-1619). IEEE.](https://doi.org/10.1109/ICDMW.2015.104) 455 | 456 | [Kang, Y., Hyndman, R. J., & Li, F. (2018). GRATIS: GeneRAting TIme Series with diverse and controllable characteristics.](https://robjhyndman.com/publications/gratis/) 457 | 458 | [Kang, Y., Hyndman, R. J., & Smith-Miles, K. (2017). Visualising forecasting algorithm performance using time series instance spaces. International Journal of Forecasting, 33(2), 345-358.](https://doi.org/10.1016/j.ijforecast.2016.09.004) 459 | 460 | [Ljung, G. M., & Box, G. E. (1978). On a measure of lack of fit in time series models. Biometrika, 65(2), 297-303.](https://doi.org/10.1093/biomet/65.2.297 ) 461 | 462 | [Wang, X, KA Smith & RJ Hyndman (2006). Characteristic-based clustering for time series data. Data Mining and Knowledge Discovery 13(3), 335–364.](https://doi.org/10.1007/s10618-005-0039-x) 463 | 464 | ## License 465 | 466 | This package is free and open source software, licensed under GPL-3. 467 | -------------------------------------------------------------------------------- /R/compengine.R: -------------------------------------------------------------------------------- 1 | #' CompEngine feature set 2 | #' 3 | #' Calculate the features that have been used in CompEngine database, using method introduced in package 4 | #' \code{hctsa}. 5 | #' 6 | #' The features involved can be grouped as \code{autocorrelation}, 7 | #' \code{prediction}, \code{stationarity}, \code{distribution}, and \code{scaling}. 8 | #' 9 | #' @param x the input time series 10 | #' @return a vector with CompEngine features 11 | #' @seealso \code{\link{autocorr_features}} 12 | #' @seealso \code{\link{pred_features}} 13 | #' @seealso \code{\link{station_features}} 14 | #' @seealso \code{\link{dist_features}} 15 | #' @seealso \code{\link{scal_features}} 16 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 17 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 18 | #' @author Yangzhuoran Yang 19 | #' @export 20 | compengine <- function(x) { 21 | c(autocorr_features(x), pred_features(x), station_features(x), dist_features(x), scal_features(x)) 22 | } 23 | 24 | #' The autocorrelation feature set from software package \code{hctsa} 25 | #' 26 | #' Calculate the features that grouped as autocorrelation set, 27 | #' which have been used in CompEngine database, using method introduced in package \code{hctsa}. 28 | #' 29 | #' Features in this set are \code{embed2_incircle_1}, 30 | #' \code{embed2_incircle_2}, 31 | #' \code{ac_9}, 32 | #' \code{firstmin_ac}, 33 | #' \code{trev_num}, 34 | #' \code{motiftwo_entro3}, 35 | #' and \code{walker_propcross}. 36 | #' 37 | #' @param x the input time series 38 | #' @return a vector with autocorrelation features 39 | #' @seealso \code{\link{embed2_incircle}} 40 | #' @seealso \code{\link{ac_9}} 41 | #' @seealso \code{\link{firstmin_ac}} 42 | #' @seealso \code{\link{trev_num}} 43 | #' @seealso \code{\link{motiftwo_entro3}} 44 | #' @seealso \code{\link{walker_propcross}} 45 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 46 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 47 | #' @author Yangzhuoran Yang 48 | #' @export 49 | autocorr_features <- function(x) { 50 | acfv <- stats::acf(x, length(x) - 1, plot = FALSE, na.action = na.pass) 51 | output <- c( 52 | embed2_incircle_1 = embed2_incircle(x, 1, acfv = acfv), 53 | embed2_incircle_2 = embed2_incircle(x, 2, acfv = acfv), 54 | ac_9 = ac_9(x, acfv), 55 | firstmin_ac = firstmin_ac(x, acfv), 56 | trev_num = trev_num(x), 57 | motiftwo_entro3 = motiftwo_entro3(x), 58 | walker_propcross = walker_propcross(x) 59 | ) 60 | return(output) 61 | } 62 | 63 | #' The prediction feature set from software package \code{hctsa} 64 | #' 65 | #' Calculate the features that grouped as prediction set, 66 | #' which have been used in CompEngine database, using method introduced in package \code{hctsa}. 67 | #' 68 | #' Features in this set are \code{localsimple_mean1}, 69 | #' \code{localsimple_lfitac}, 70 | #' and \code{sampen_first}. 71 | #' 72 | #' @param x the input time series 73 | #' @return a vector with prediction features 74 | #' @seealso \code{\link{localsimple_taures}} 75 | #' @seealso \code{\link{sampen_first}} 76 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 77 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 78 | #' @author Yangzhuoran Yang 79 | #' @export 80 | pred_features <- function(x) { 81 | output <- c( 82 | localsimple_mean1 = localsimple_taures(x, "mean"), 83 | localsimple_lfitac = localsimple_taures(x, "lfit"), 84 | sampen_first = sampen_first(x) 85 | ) 86 | return(output) 87 | } 88 | 89 | #' The stationarity feature set from software package \code{hctsa} 90 | #' 91 | #' Calculate the features that grouped as stationarity set, 92 | #' which have been used in CompEngine database, using method introduced in package \code{hctsa}. 93 | #' 94 | #' Features in this set are \code{std1st_der}, 95 | #' \code{spreadrandomlocal_meantaul_50}, 96 | #' and \code{spreadrandomlocal_meantaul_ac2}. 97 | #' 98 | #' @param x the input time series 99 | #' @return a vector with stationarity features 100 | #' @seealso \code{\link{std1st_der}} 101 | #' @seealso \code{\link{spreadrandomlocal_meantaul}} 102 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 103 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 104 | #' @author Yangzhuoran Yang 105 | #' @export 106 | station_features <- function(x) { 107 | output <- c( 108 | std1st_der = std1st_der(x), 109 | spreadrandomlocal_meantaul_50 = spreadrandomlocal_meantaul(x, 50), 110 | spreadrandomlocal_meantaul_ac2 = spreadrandomlocal_meantaul(x, "ac2") 111 | ) 112 | return(output) 113 | } 114 | 115 | #' The distribution feature set from software package \code{hctsa} 116 | #' 117 | #' Calculate the features that grouped as distribution set, 118 | #' which have been used in CompEngine database, using method introduced in package \code{hctsa}. 119 | #' 120 | #' Features in this set are \code{histogram_mode_10} 121 | #' and \code{outlierinclude_mdrmd}. 122 | #' 123 | #' @param x the input time series 124 | #' @return a vector with distribution features 125 | #' @seealso \code{\link{histogram_mode}} 126 | #' @seealso \code{\link{outlierinclude_mdrmd}} 127 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 128 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 129 | #' @author Yangzhuoran Yang 130 | #' @export 131 | dist_features <- function(x) { 132 | output <- c( 133 | histogram_mode_10 = histogram_mode(x), 134 | outlierinclude_mdrmd = outlierinclude_mdrmd(x) 135 | ) 136 | return(output) 137 | } 138 | 139 | #' The scaling feature set from software package \code{hctsa} 140 | #' 141 | #' Calculate the features that grouped as scaling set, 142 | #' which have been used in CompEngine database, using method introduced in package \code{hctsa}. 143 | #' 144 | #' Feature in this set is \code{fluctanal_prop_r1}. 145 | #' 146 | #' @param x the input time series 147 | #' @return a vector with scaling features 148 | #' @seealso \code{\link{fluctanal_prop_r1}} 149 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 150 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 151 | #' @author Yangzhuoran Yang 152 | #' @export 153 | scal_features <- function(x) { 154 | output <- c(fluctanal_prop_r1 = fluctanal_prop_r1(x)) 155 | return(output) 156 | } 157 | 158 | # autocorr ---------------------------------------------------------------- 159 | 160 | # CO_Embed2_Basic_tau_incircle_1 161 | # CO_Embed2_Basic_tau_incircle_1 162 | #' Points inside a given circular boundary in a 2-d embedding space from software package \code{hctsa} 163 | #' 164 | #' The time lag is set to the first zero crossing of the autocorrelation function. 165 | #' 166 | #' @param y the input time series 167 | #' @param boundary the given circular boundary, setting to 1 or 2 in CompEngine. Default to 1. 168 | #' @param acfv vector of autocorrelation, if exist, used to avoid repeated computation. 169 | #' @return the proportion of points inside a given circular boundary 170 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 171 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 172 | #' @author Yangzhuoran Yang 173 | #' @export 174 | embed2_incircle <- function(y, boundary = NULL, acfv = stats::acf(y, length(y) - 1, plot = FALSE, na.action = na.pass)) { 175 | if (is.null(boundary)) { 176 | warning("`embed2_incircle()` using `boundary = 1`. Set value with `boundary`.") 177 | boundary <- 1 178 | } 179 | tau <- firstzero_ac(y, acfv) 180 | xt <- y[1:(length(y) - tau)] # part of the time series 181 | xtp <- y[(1 + tau):length(y)] # time-lagged time series 182 | N <- length(y) - tau # Length of each time series subsegment 183 | 184 | # CIRCLES (points inside a given circular boundary) 185 | return(sum(xtp^2 + xt^2 < boundary, na.rm = TRUE) / N) 186 | } 187 | 188 | # CO_firstzero_ac 189 | #' The first zero crossing of the autocorrelation function from software package \code{hctsa} 190 | #' 191 | #' Search up to a maximum of the length of the time series 192 | #' 193 | #' @param y the input time series 194 | #' @param acfv vector of autocorrelation, if exist, used to avoid repeated computation. 195 | #' @return The first zero crossing of the autocorrelation function 196 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 197 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 198 | #' @author Yangzhuoran Yang 199 | #' @export 200 | firstzero_ac <- function(y, acfv = stats::acf(y, N - 1, plot = FALSE, na.action = na.pass)) { 201 | N <- length(y) 202 | tau <- which(acfv$acf[-1] < 0) 203 | if(length(tau)==0L) # Nothing to see here 204 | return(0) 205 | else if(all(is.na(tau))) # All missing 206 | return(0) 207 | else if(!any(tau)) # No negatives, so set output to sample size 208 | return(N) 209 | else # Return lag of first negative 210 | return(tau[1]) 211 | } 212 | 213 | # ac_9 214 | #' Autocorrelation at lag 9. Included for completion and consistency. 215 | #' 216 | #' @param y the input time series 217 | #' @param acfv vector of autocorrelation, if exist, used to avoid repeated computation. 218 | #' @return autocorrelation at lag 9 219 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 220 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 221 | #' @author Yangzhuoran Yang 222 | #' @export 223 | ac_9 <- function(y, acfv = stats::acf(y, 9, plot = FALSE, na.action = na.pass)) { 224 | acfv$acf[10] 225 | } 226 | 227 | # CO_firstmin_ac 228 | #' Time of first minimum in the autocorrelation function from software package \code{hctsa} 229 | #' 230 | #' 231 | #' @param x the input time series 232 | #' @param acfv vector of autocorrelation, if exist, used to avoid repeated computation. 233 | #' @return The lag of the first minimum 234 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 235 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 236 | #' @author Yangzhuoran Yang 237 | #' @examples 238 | #' firstmin_ac(WWWusage) 239 | #' @export 240 | firstmin_ac <- function(x, acfv = stats::acf(x, lag.max = N - 1, plot = FALSE, na.action = na.pass)) { 241 | # hctsa uses autocorr in MatLab to calculate autocorrelation 242 | N <- length(x) 243 | # getting acf for all lags 244 | # possible delay when sample size is too big 245 | autoCorr <- numeric(N - 1) 246 | autoCorr[1:(N - 1)] <- acfv$acf[-1] 247 | for (i in 1:length(autoCorr)) { 248 | if (is.na(autoCorr[i])) { 249 | warning("No minimum was found.") 250 | return(NA) 251 | } 252 | if (i == 2 && autoCorr[2] > autoCorr[1]) { 253 | return(1) 254 | } else if (i > 2 && autoCorr[i - 2] > autoCorr[i - 1] && autoCorr[i - 1] < autoCorr[i]) { 255 | return(i - 1) 256 | } 257 | } 258 | return(N - 1) 259 | } 260 | 261 | # CO_trev_1_num 262 | #' Normalized nonlinear autocorrelation, the numerator of the trev function of a time series from software package \code{hctsa} 263 | #' 264 | #' Calculates the numerator of the trev function, a normalized nonlinear autocorrelation, 265 | #' The time lag is set to 1. 266 | #' 267 | #' 268 | #' @param y the input time series 269 | #' @return the numerator of the trev function of a time series 270 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 271 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 272 | #' @author Yangzhuoran Yang 273 | #' @examples 274 | #' trev_num(WWWusage) 275 | #' @export 276 | trev_num <- function(y) { 277 | yn <- y[1:(length(y) - 1)] 278 | yn1 <- y[2:length(y)] 279 | mean((yn1 - yn)^3, na.rm = TRUE) 280 | } 281 | 282 | # SB_MotifTwo_mean_hhh 283 | #' Local motifs in a binary symbolization of the time series from software package \code{hctsa} 284 | #' 285 | #' 286 | #' Coarse-graining is performed. Time-series values above its mean are given 1, 287 | #' and those below the mean are 0. 288 | #' 289 | #' @param y the input time series 290 | #' @return Entropy of words in the binary alphabet of length 3. 291 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 292 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 293 | #' @author Yangzhuoran Yang 294 | #' @examples 295 | #' motiftwo_entro3(WWWusage) 296 | #' @export 297 | #' 298 | motiftwo_entro3 <- function(y) { 299 | yBin <- binarize_mean(y) 300 | N <- length(yBin) 301 | if (N < 5) warning("Time series too short") 302 | 303 | r1 <- yBin == 1 304 | r0 <- yBin == 0 305 | 306 | r1 <- r1[1:(length(r1) - 1)] 307 | r0 <- r0[1:(length(r0) - 1)] 308 | 309 | r00 <- r0 & yBin[2:N] == 0 310 | r01 <- r0 & yBin[2:N] == 1 311 | r10 <- r1 & yBin[2:N] == 0 312 | r11 <- r1 & yBin[2:N] == 1 313 | 314 | r00 <- r00[1:(length(r00) - 1)] 315 | r01 <- r01[1:(length(r01) - 1)] 316 | r10 <- r10[1:(length(r10) - 1)] 317 | r11 <- r11[1:(length(r11) - 1)] 318 | 319 | r000 <- r00 & yBin[3:N] == 0 320 | r001 <- r00 & yBin[3:N] == 1 321 | r010 <- r01 & yBin[3:N] == 0 322 | r011 <- r01 & yBin[3:N] == 1 323 | r100 <- r10 & yBin[3:N] == 0 324 | r101 <- r10 & yBin[3:N] == 1 325 | r110 <- r11 & yBin[3:N] == 0 326 | r111 <- r11 & yBin[3:N] == 1 327 | 328 | out.ddd <- mean(r000) 329 | out.ddu <- mean(r001) 330 | out.dud <- mean(r010) 331 | out.duu <- mean(r011) 332 | out.udd <- mean(r100) 333 | out.udu <- mean(r101) 334 | out.uud <- mean(r110) 335 | out.uuu <- mean(r111) 336 | ppp <- c(out.ddd, out.ddu, out.dud, out.duu, out.udd, out.udu, out.uud, out.uuu) 337 | out.hhh <- f_entropy(ppp) 338 | return(out.hhh) 339 | } 340 | 341 | # BF_BF_binarize_mean 342 | #' Converts an input vector into a binarized version from software package \code{hctsa} 343 | #' 344 | #' @param y the input time series 345 | #' @return Time-series values above its mean are given 1, and those below the mean are 0. 346 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 347 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 348 | #' @author Yangzhuoran Yang 349 | #' @export 350 | 351 | binarize_mean <- function(y) { 352 | y <- y - mean(y) 353 | Y <- numeric(length(y)) 354 | Y[y > 0] <- 1 355 | return(Y) 356 | } 357 | 358 | f_entropy <- function(x) { 359 | # entropy of a set of counts, log(0)=0 360 | -sum(x[x > 0] * log(x[x > 0])) 361 | } 362 | 363 | # PH_Walker_prop_01_sw_propcross 364 | #' Simulates a hypothetical walker moving through the time domain from software package \code{hctsa} 365 | #' 366 | #' The hypothetical particle (or 'walker') moves in response to values of the 367 | #' time series at each point. 368 | #' The walker narrows the gap between its value and that 369 | #' of the time series by 10%. 370 | #' 371 | #' 372 | #' @param y the input time series 373 | #' @return fraction of time series length that walker crosses time series 374 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 375 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 376 | #' @author Yangzhuoran Yang 377 | #' @export 378 | #' 379 | #' 380 | walker_propcross <- function(y) { 381 | N <- length(y) 382 | p <- 0.1 383 | # walker starts at zero and narrows the gap between its position 384 | # and the time series value at that point by 0.1, to give the value at the subsequent time step 385 | w <- numeric(N) 386 | w[1] <- 0 # start at zero 387 | for (i in 2:N) { 388 | w[i] <- w[i - 1] + p * (y[i - 1] - w[i - 1]) 389 | } 390 | out.sw_propcross <- sum((w[1:(N - 1)] - y[1:(N - 1)]) * (w[2:N] - y[2:N]) < 0, na.rm = TRUE) / (N - 1) 391 | return(out.sw_propcross) 392 | } 393 | 394 | # pred -------------------------------------------------------------------- 395 | 396 | # FC_localsimple_mean1_taures 397 | # FC_localsimple_lfit_taures 398 | #' The first zero crossing of the autocorrelation function of the residuals from Simple local time-series forecasting from software package \code{hctsa} 399 | #' 400 | #' Simple predictors using the past trainLength values of the time series to 401 | #' predict its next value. 402 | #' 403 | #' @param y the input time series 404 | #' @param forecastMeth the forecasting method, default to \code{mean}. 405 | #' \code{mean}: local mean prediction using the past trainLength time-series values. 406 | #' \code{lfit}: local linear prediction using the past trainLength time-series values. 407 | #' @param trainLength the number of time-series values to use to forecast the next value. 408 | #' Default to 1 when using method \code{mean} and 3 when using method \code{lfit}. 409 | #' @return The first zero crossing of the autocorrelation function of the residuals 410 | #' @export 411 | localsimple_taures <- function(y, forecastMeth = c("mean", "lfit"), trainLength = NULL) { 412 | forecastMeth <- match.arg(forecastMeth) 413 | if(is.null(trainLength)){ 414 | lp <- switch(forecastMeth, mean = 1, lfit = firstzero_ac(y)) 415 | } 416 | 417 | N <- length(y) 418 | evalr <- (lp + 1):N 419 | 420 | if (lp >= length(y)) 421 | stop("Time series too short for forecasting in `localsimple_taures`") 422 | 423 | res <- numeric(length(evalr)) 424 | if (forecastMeth == "mean") { 425 | for (i in 1:length(evalr)) 426 | res[i] <- mean(y[(evalr[i] - lp):(evalr[i] - 1)]) - y[evalr[i]] 427 | } 428 | if (forecastMeth == "lfit") { 429 | for (i in 1:length(evalr)) { 430 | # Fit linear 431 | a <- 1:lp 432 | b <- y[(evalr[i] - lp):(evalr[i] - 1)] 433 | lm.ab <- lm(b ~ a, data = data.frame(a, b)) 434 | res[i] <- predict(lm.ab, newdata = data.frame(a = lp + 1)) - y[evalr[i]] 435 | # p = polyfit((1:lp)',y(evalr(i)-lp:evalr(i)-1),1) 436 | # res(i) = polyval(p,lp+1) - y(evalr(i)); % prediction - value 437 | } 438 | } 439 | out.taures <- firstzero_ac(res) 440 | return(out.taures) 441 | } 442 | 443 | # EN_SampEn_5_03_sampen1 444 | #' Second Sample Entropy of a time series from software package \code{hctsa} 445 | #' 446 | #' Modified from the Ben Fulcher's \code{EN_SampEn} which uses code from PhysioNet. 447 | #' The publicly-available PhysioNet Matlab code, sampenc (renamed here to 448 | #' RN_sampenc) is available from: 449 | #' http://www.physionet.org/physiotools/sampen/matlab/1.1/sampenc.m 450 | #' 451 | #' Embedding dimension is set to 5. 452 | #' The threshold is set to 0.3. 453 | #' 454 | #' 455 | #' @param y the input time series 456 | #' @references cf. "Physiological time-series analysis using approximate entropy and sample 457 | #' entropy", J. S. Richman and J. R. Moorman, Am. J. Physiol. Heart Circ. 458 | #' Physiol., 278(6) H2039 (2000) 459 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 460 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 461 | #' @author Yangzhuoran Yang 462 | #' @export 463 | sampen_first <- function(y) { 464 | M <- 5 465 | r <- 0.3 466 | sampEn <- sampenc(y, M + 1, r) 467 | return(sampEn) 468 | } 469 | 470 | # PN_sampenc 471 | #' Second Sample Entropy from software package \code{hctsa} 472 | #' 473 | #' Modified from the Ben Fulcher version of original code sampenc.m from 474 | #' http://physionet.org/physiotools/sampen/ 475 | #' http://www.physionet.org/physiotools/sampen/matlab/1.1/sampenc.m 476 | #' Code by DK Lake (dlake@virginia.edu), JR Moorman and Cao Hanqing. 477 | #' 478 | #' 479 | #' @param y the input time series 480 | #' @param M embedding dimension 481 | #' @param r threshold 482 | #' 483 | #' @references cf. "Physiological time-series analysis using approximate entropy and sample 484 | #' entropy", J. S. Richman and J. R. Moorman, Am. J. Physiol. Heart Circ. 485 | #' Physiol., 278(6) H2039 (2000) 486 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 487 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 488 | #' @author Yangzhuoran Yang 489 | #' @export 490 | sampenc <- function(y, M = 6, r = 0.3) { 491 | N <- length(y) 492 | lastrun <- numeric(N) # zeros(1,N) 493 | run <- numeric(N) # zeros(1,N) 494 | A <- numeric(M) # zeros(M,1) 495 | B <- numeric(M) # zeros(M,1) 496 | # Get counting: 497 | for (i in 1:(N - 1)) { # go through each point in the time series, counting matches 498 | y1 <- y[i] 499 | for (jj in 1:(N - i)) { # compare to points through the rest of the time series 500 | # Compare to future index, j: 501 | j <- i + jj 502 | # This future point, j, matches the time-series value at i: 503 | if (isTRUE(abs(y[j] - y1) < r)) { 504 | run[jj] <- lastrun[jj] + 1 # increase run count for this lag 505 | M1 <- min(M, run[jj]) 506 | 507 | A[1:M1] <- A[1:M1] + 1 508 | if (j < N) B[1:M1] <- B[1:M1] + 1 509 | } else { 510 | run[jj] <- 0 511 | } 512 | } 513 | for (j in 1:(N - i)) { 514 | lastrun[j] <- run[j] 515 | } 516 | } 517 | # Calculate for m <- 2 518 | # NN <- N*(N-1)/2 519 | p <- A[2] / B[1] 520 | e <- -log(p) 521 | return(e) 522 | } 523 | 524 | # stationarity ------------------------------------------------------------ 525 | 526 | # SY_StdNthDer_1 527 | #' Standard deviation of the first derivative of the time series from software package \code{hctsa} 528 | #' 529 | #' Modified from \code{SY_StdNthDer} in \code{hctsa}. Based on an idea by Vladimir Vassilevsky. 530 | #' 531 | #' @param y the input time series. Missing values will be removed. 532 | #' @return Standard deviation of the first derivative of the time series. 533 | #' @references cf. http://www.mathworks.de/matlabcentral/newsreader/view_thread/136539 534 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 535 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 536 | #' @author Yangzhuoran Yang 537 | #' @export 538 | std1st_der <- function(y) { 539 | if (length(y) < 2) stop("Time series is too short to compute differences") 540 | yd <- diff(y) 541 | return(sd(yd, na.rm = TRUE)) 542 | } 543 | 544 | # SY_SpreadRandomLocal_50_100_meantaul 545 | # SY_SpreadRandomLocal_ac2_100_meantaul 546 | #' Bootstrap-based stationarity measure from software package \code{hctsa} 547 | #' 548 | #' 100 time-series segments of length \code{l} are selected at random from the time series and 549 | #' the mean of the first zero-crossings of the autocorrelation function in each segment is calculated. 550 | #' 551 | #' 552 | #' @param y the input time series 553 | #' @param l the length of local time-series segments to analyse as a positive integer. Can also be a specified character string: "ac2": twice the first zero-crossing of the autocorrelation function 554 | #' @return mean of the first zero-crossings of the autocorrelation function 555 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 556 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 557 | #' @author Yangzhuoran Yang 558 | #' @export 559 | spreadrandomlocal_meantaul <- function(y, l = 50) { 560 | if (is.character(l) && "ac2" %in% l) l <- 2 * firstzero_ac(y) 561 | if (!is.numeric(l)) stop("Unknown specifier `l`") 562 | numSegs <- 100 563 | N <- length(y) 564 | if (l > 0.9 * N) { 565 | warning("This time series is too short. Specify proper segment length in `l`") 566 | return(NA_real_) 567 | } 568 | 569 | qs <- numeric(numSegs) 570 | 571 | for (j in 1:numSegs) { 572 | # pick a range 573 | # in this implementation, ranges CAN overlap 574 | ist <- sample(N - 1 - l, 1) # random start point (not exceeding the endpoint) 575 | ifh <- ist + l - 1 # finish index 576 | rs <- ist:ifh # sample range (from starting to finishing index) 577 | ysub <- y[rs] # subsection of the time series 578 | taul <- firstzero_ac(ysub) 579 | qs[j] <- taul 580 | } 581 | return(mean(qs, na.rm = TRUE)) 582 | } 583 | 584 | # distribution ------------------------------------------------------------ 585 | 586 | # DN_histogram_mode_10 587 | #' Mode of a data vector from software package \code{hctsa} 588 | #' 589 | #' Measures the mode of the data vector using histograms with a given number of bins as suggestion. 590 | #' The value calculated is different from \code{hctsa} and \code{CompEngine} as the histogram edges are calculated differently. 591 | #' 592 | #' @param y the input data vector 593 | #' @param numBins the number of bins to use in the histogram. 594 | #' @return the mode 595 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 596 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 597 | #' @author Yangzhuoran Yang 598 | #' @export 599 | #' @importFrom graphics hist 600 | #' @importFrom stats predict 601 | 602 | histogram_mode <- function(y, numBins = 10) { 603 | 604 | # Compute the histogram from the data: 605 | if (is.numeric(numBins)) { 606 | histdata <- hist(y, plot = FALSE, breaks = numBins) 607 | binCenters <- histdata$mids 608 | } else { 609 | stop("Unknown format for numBins") 610 | } 611 | # Compute bin centers from bin edges: 612 | # binCenters <- mean([binEdges(1:end-1) binEdges(2:end)]) 613 | # Mean position of maximums (if multiple): 614 | out <- mean(binCenters[which.max(histdata$counts)]) 615 | return(out) 616 | } 617 | 618 | # DN_OutlierInclude_abs_001_mdrmd 619 | #' How median depend on distributional outliers from software package \code{hctsa} 620 | #' 621 | #' Measures median as more and 622 | #' more outliers are included in the calculation according to a specified rule, 623 | #' of outliers being furthest from the mean. 624 | #' 625 | #' The threshold for including time-series data points in the analysis increases 626 | #' from zero to the maximum deviation, in increments of 0.01*sigma (by default), 627 | #' where sigma is the standard deviation of the time series. 628 | #' 629 | #' At each threshold, proportion of time series points 630 | #' included and median are calculated, and outputs from the 631 | #' algorithm measure how these statistical quantities change as more extreme 632 | #' points are included in the calculation. 633 | #' 634 | #' Outliers are defined as furthest from the mean. 635 | #' 636 | #' @param y the input time series (ideally z-scored) 637 | #' @param zscored Should y be z-scored before computing the statistic. Default: TRUE 638 | #' @return median of the median of range indices 639 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 640 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 641 | #' @author Yangzhuoran Yang 642 | #' @export 643 | #' @importFrom stats ts tsp sd 644 | 645 | outlierinclude_mdrmd <- function(y, zscored = TRUE) { 646 | if (length(unique(y)) == 1L) { 647 | stop("The time series is a constant!") 648 | } 649 | if (zscored) { 650 | tmp <- ts(c(scale(y))) 651 | tsp(tmp) <- tsp(y) 652 | y <- tmp 653 | isd <- 1 654 | } else { 655 | isd <- sd(y, na.rm = TRUE) # Modified to fit the 0.01*sigma increment in description 656 | } 657 | N <- length(y) 658 | inc <- 0.01 * isd 659 | # inc <- 0.01 660 | thr <- seq(from = 0, to = max(abs(y), na.rm = TRUE), by = inc) 661 | tot <- N 662 | if (length(thr) == 0) stop("peculiar time series") 663 | 664 | msDt <- numeric(length(thr)) 665 | msDtp <- numeric(length(thr)) 666 | for (i in 1:length(thr)) { 667 | th <- thr[i] # the threshold 668 | # Construct a time series consisting of inter-event intervals for parts 669 | # of the time serie exceeding the threshold, th 670 | r <- which(abs(y) >= th) 671 | 672 | Dt_exc <- diff(r) # Delta t (interval) time series exceeding threshold 673 | msDt[i] <- median(r) / (N / 2) - 1 674 | msDtp[i] <- length(Dt_exc) / tot * 100 675 | # this is just really measuring the distribution: 676 | # the proportion of possible values 677 | # that are actually used in 678 | # calculation 679 | } 680 | 681 | # Trim off where the statistic power is lacking: less than 2% of data 682 | # included 683 | trimthr <- 2 # percent 684 | mj <- which(msDtp > trimthr)[length(which(msDtp > trimthr))] 685 | if (length(mj) != 0) { 686 | msDt <- msDt[1:mj] 687 | msDtp <- msDtp[1:mj] 688 | thr <- thr[1:mj] 689 | } else { 690 | stop("the statistic power is lacking: less than 2% of data included") 691 | } 692 | 693 | out.mdrmd <- median(msDt) 694 | return(out.mdrmd) 695 | } 696 | 697 | # scaling ---------------------------------------------------------------- 698 | 699 | # SC_FluctAnal_2_rsrangefit_50_1_logi_prop_r1 700 | #' Implements fluctuation analysis from software package \code{hctsa} 701 | #' 702 | #' Fits a polynomial of order 1 and then returns the 703 | #' range. The order of fluctuations is 2, corresponding to root mean 704 | #' square fluctuations. 705 | #' 706 | #' 707 | #' @param x the input time series (or any vector) 708 | #' @references B.D. Fulcher and N.S. Jones. hctsa: A computational framework for automated time-series phenotyping using massive feature extraction. Cell Systems 5, 527 (2017). 709 | #' @references B.D. Fulcher, M.A. Little, N.S. Jones Highly comparative time-series analysis: the empirical structure of time series and their methods. J. Roy. Soc. Interface 10, 83 (2013). 710 | #' @author Yangzhuoran Yang 711 | #' @export 712 | fluctanal_prop_r1 <- function(x) { 713 | q <- 2 714 | tauStep <- 50 715 | k <- 1 716 | 717 | N <- length(x) 718 | x_NA0 <- ifelse(!is.na(x), x, 0) 719 | 720 | y <- cumsum(x_NA0) 721 | taur <- unique(round(exp(seq(from = log(5), to = log(floor(N / 2)), length.out = tauStep)))) 722 | ntau <- length(taur) 723 | if (ntau < 8) { # fewer than 8 points 724 | stop("This time series is too short to analyse using this fluctuation analysis") 725 | } 726 | 727 | Fl <- numeric(ntau) 728 | 729 | for (i in 1:ntau) { 730 | # buffer the time series at the scale tau 731 | tau <- taur[i] # the scale on which to compute fluctuations 732 | y_buff <- split(y, ceiling(seq_along(y) / tau)) 733 | 734 | if (length(y_buff) > floor(N / tau)) { # zero-padded, remove trailing set of points... 735 | y_buff <- y_buff[-length(y_buff)] 736 | } 737 | 738 | # analysed length of time series (with trailing end-points removed) 739 | nn <- length(y_buff) * tau 740 | tt <- (1:tau) # faux time range 741 | 742 | for (j in 1:length(y_buff)) { 743 | # fit a polynomial of order k in each subsegment 744 | lm.tt <- lm(lmy ~ tt, data = data.frame(tt, lmy = y_buff[[j]])) 745 | # remove the trend, store back in y_buff 746 | y_buff[[j]] <- residuals(lm.tt) 747 | } 748 | 749 | tem <- sapply(y_buff, range) 750 | y_dt <- tem[2, ] - tem[1, ] 751 | 752 | # Compute fluctuation function: 753 | 754 | Fl[i] <- (mean(y_dt^q))^(1 / q) 755 | } 756 | logtt <- log(taur) 757 | logFF <- log(Fl) 758 | ntt <- ntau 759 | 760 | ## Try assuming two components (2 distinct scaling regimes) 761 | # Move through, and fit a straight line to loglog before and after each point. 762 | # Find point with the minimum sum of squared errors 763 | # First spline interpolate to get an even sampling of the interval 764 | # (currently, in the log scale, there are relatively more at large scales 765 | # Determine the errors 766 | sserr <- rep(NA, ntt) # don't choose the end points 767 | minPoints <- 6 768 | for (i in minPoints:(ntt - minPoints)) { 769 | r1 <- 1:i 770 | # p1 <- polyfit(logtt(r1),logFF(r1),1) 771 | p1 <- lm(y ~ x, data = data.frame(x = logtt[r1], y = logFF[r1])) 772 | r2 <- i:ntt 773 | # p2 <- polyfit(logtt(r2),logFF(r2),1) 774 | p2 <- lm(y ~ x, data = data.frame(x = logtt[r2], y = logFF[r2])) 775 | # Sum of errors from fitting lines to both segments: 776 | sserr[i] <- norm(-residuals(p1), type = "2") + norm(-residuals(p2), type = "2") 777 | } 778 | 779 | # breakPt is the point where it's best to fit a line before and another line after 780 | breakPt <- which.min(sserr) 781 | r1 <- 1:breakPt 782 | r2 <- breakPt:ntt 783 | 784 | prop_r1 <- length(r1) / ntt 785 | return(prop_r1) 786 | } 787 | --------------------------------------------------------------------------------