├── .github
    ├── .gitignore
    └── workflows
    │   └── R-CMD-check.yaml
├── src
    ├── .gitignore
    ├── RcppExports.cpp
    └── code.cpp
├── .gitignore
├── revdep
    ├── .gitignore
    └── email.yml
├── .Rbuildignore
├── R
    ├── tsfknn-package.R
    ├── RcppExports.R
    ├── utilities.R
    ├── print_knnForecast.R
    ├── rolling_origin.R
    ├── knn.R
    ├── plotting.R
    └── forecasting.R
├── tests
    ├── testthat
    │   ├── test_knn_model.R
    │   ├── test_n_training_examples.R
    │   ├── test_predict.R
    │   ├── test_regression.R
    │   ├── test_rolling_origin.R
    │   ├── test_knn_forecasting.R
    │   └── test_build_examples.R
    └── testthat.R
├── cran-comments.md
├── tsfknn.Rproj
├── NAMESPACE
├── README.md
├── man
    ├── knn_examples.Rd
    ├── plot.knnForecastRO.Rd
    ├── nearest_neighbors.Rd
    ├── n_training_examples.Rd
    ├── tsfknn-package.Rd
    ├── predict.knnForecast.Rd
    ├── autoplot.knnForecast.Rd
    ├── rolling_origin.Rd
    └── knn_forecasting.Rd
├── inst
    └── CITATION
├── DESCRIPTION
├── README.Rmd
├── NEWS.md
└── vignettes
    └── tsfknn.Rmd


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | inst/doc
6 | 


--------------------------------------------------------------------------------
/revdep/.gitignore:
--------------------------------------------------------------------------------
1 | checks
2 | library
3 | checks.noindex
4 | library.noindex
5 | cloud.noindex
6 | data.sqlite
7 | *.html
8 | 


--------------------------------------------------------------------------------
/revdep/email.yml:
--------------------------------------------------------------------------------
1 | release_date: ???
2 | rel_release_date: ???
3 | my_news_url: ???
4 | release_version: ???
5 | release_details: ???
6 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^cran-comments\.md$
 4 | ^README\.Rmd$
 5 | ^README-.*\.png$
 6 | ^CRAN-RELEASE$
 7 | ^\.github$
 8 | ^revdep$
 9 | ^CRAN-SUBMISSION$
10 | 


--------------------------------------------------------------------------------
/R/tsfknn-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 | 
4 | ## usethis namespace: start
5 | #' @useDynLib tsfknn
6 | #' @importFrom Rcpp sourceCpp
7 | ## usethis namespace: end
8 | NULL
9 | 


--------------------------------------------------------------------------------
/tests/testthat/test_knn_model.R:
--------------------------------------------------------------------------------
1 | test_that("errors calling knnmodel function", {
2 |   expect_error(knn_model(ts(1:5), lags = 0:3, k = 5))
3 |   expect_error(knn_model(ts(1:5), lags = 3:5, k = 1))
4 |   expect_error(knn_model(ts(1:5), lags = 2:4, k = 2))
5 | })
6 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Test environments
 2 | * local Windows install, R 4.3.2
 3 | * Windows Server 
 4 | * Ubuntu Linux 
 5 | * MacOS
 6 | 
 7 | ## R CMD check results
 8 | There were no ERRORs, WARNINGs or NOTEs.
 9 | 
10 | ## Downstream dependencies
11 | The changes made to this package have no effect in downstream dependencies.
12 | 


--------------------------------------------------------------------------------
/tests/testthat/test_n_training_examples.R:
--------------------------------------------------------------------------------
 1 | test_that("number of examples with MIMO strategy", {
 2 |   expect_equal(n_training_examples(ts(1:10), h = 2, lags = 1:3, msas = "MIMO"),
 3 |                6)
 4 | })
 5 | 
 6 | test_that("number of examples with recursive strategy", {
 7 |   expect_equal(
 8 |     n_training_examples(ts(1:10), h = 2, lags = 1:3, msas = "recursive"),
 9 |     7
10 |   )
11 | })
12 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
 7 | # * https://testthat.r-lib.org/articles/special-files.html
 8 | 
 9 | library(testthat)
10 | library(tsfknn)
11 | 
12 | test_check("tsfknn")
13 | 


--------------------------------------------------------------------------------
/tsfknn.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/tests/testthat/test_predict.R:
--------------------------------------------------------------------------------
 1 | expect_error(predict(knn_forecasting(ts(1:30), msas = "MIMO", h = 3), h = 4))
 2 | 
 3 | test_that("MIMO strategy predicts fine", {
 4 |   expect_equal(knn_forecasting(ts(1:30), h = 3)$prediction,
 5 |                predict(knn_forecasting(ts(1:30), h = 3), h = 3)$prediction)
 6 | })
 7 | 
 8 | test_that("recursive strategy predicts fine", {
 9 |   expect_equal(knn_forecasting(ts(1:30), h = 6, msas = "recursive")$prediction,
10 |                predict(knn_forecasting(ts(1:30), h = 3, msas = "recursive"), h
11 |                        = 6)$prediction)
12 | })
13 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(autoplot,knnForecast)
 4 | S3method(plot,knnForecast)
 5 | S3method(plot,knnForecastRO)
 6 | S3method(predict,knnForecast)
 7 | S3method(print,knnForecast)
 8 | S3method(print,summary.knnForecast)
 9 | S3method(summary,knnForecast)
10 | export(knn_examples)
11 | export(knn_forecasting)
12 | export(n_training_examples)
13 | export(nearest_neighbors)
14 | export(rolling_origin)
15 | importFrom(Rcpp,sourceCpp)
16 | importFrom(ggplot2,autoplot)
17 | importFrom(graphics,plot)
18 | importFrom(stats,predict)
19 | useDynLib(tsfknn)
20 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | first_n <- function(m, v, k) {
 5 |     .Call('_tsfknn_first_n', PACKAGE = 'tsfknn', m, v, k)
 6 | }
 7 | 
 8 | build_examples2 <- function(timeS, lags, nt) {
 9 |     .Call('_tsfknn_build_examples2', PACKAGE = 'tsfknn', timeS, lags, nt)
10 | }
11 | 
12 | build_examples_m <- function(timeS, lags, nt) {
13 |     .Call('_tsfknn_build_examples_m', PACKAGE = 'tsfknn', timeS, lags, nt)
14 | }
15 | 
16 | build_examples_a <- function(timeS, lags, nt) {
17 |     .Call('_tsfknn_build_examples_a', PACKAGE = 'tsfknn', timeS, lags, nt)
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # tsfknn
 2 | 
 3 | The goal of tsfknn is to forecast time series using KNN regression
 4 | 
 5 | ## Installation
 6 | 
 7 | You can install tsfknn from github with:
 8 | 
 9 | ``` r
10 | # install.packages("devtools")
11 | devtools::install_github("franciscomartinezdelrio/tsfknn")
12 | ```
13 | 
14 | ## Example
15 | 
16 | This is a basic example which shows you how to forecast with tsfknn:
17 | 
18 | ``` r
19 | library(tsfknn)
20 | pred <- knn_forecasting(USAccDeaths, h = 12, k = 3)
21 | pred$prediction # To see a time series with the forecasts
22 | plot(pred) # To see a plot with the forecast
23 | library(ggplot2)
24 | autoplot(pred, highlight = "neighbors")  # To see the nearest neighbors
25 | ```
26 | 


--------------------------------------------------------------------------------
/man/knn_examples.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecasting.R
 3 | \name{knn_examples}
 4 | \alias{knn_examples}
 5 | \title{Examples of the model associated with a prediction}
 6 | \usage{
 7 | knn_examples(forecast)
 8 | }
 9 | \arguments{
10 | \item{forecast}{A \code{knnForecast} object.}
11 | }
12 | \value{
13 | A matrix including the features and targets of the examples
14 | associated with the model of a \code{knnForecast} object.
15 | }
16 | \description{
17 | It allows to see the examples of the model associated to a
18 | \code{knnForecast} object.
19 | }
20 | \examples{
21 | pred <- knn_forecasting(ts(1:8), h = 1, lags = 1:2, k = 2)
22 | knn_examples(pred)
23 | }
24 | 


--------------------------------------------------------------------------------
/man/plot.knnForecastRO.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rolling_origin.R
 3 | \name{plot.knnForecastRO}
 4 | \alias{plot.knnForecastRO}
 5 | \title{Plot a prediction of a test set}
 6 | \usage{
 7 | \method{plot}{knnForecastRO}(x, h = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{the object obtained from a call to \code{\link{rolling_origin}}.}
11 | 
12 | \item{h}{an integer. The forecasting horizon. If \code{NULL}, the maximum
13 | forecasting horizon of all the test sets is used.}
14 | 
15 | \item{...}{Other plotting parameters to affect the plot.}
16 | }
17 | \description{
18 | It uses a test set generated with the function \code{\link{rolling_origin}}
19 | and plots its forecast.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/nearest_neighbors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecasting.R
 3 | \name{nearest_neighbors}
 4 | \alias{nearest_neighbors}
 5 | \title{Nearest neighbors associated with predictions}
 6 | \usage{
 7 | nearest_neighbors(forecast)
 8 | }
 9 | \arguments{
10 | \item{forecast}{A \code{knnForecast} object.}
11 | }
12 | \value{
13 | A list including the new instances used in KNN regression and their
14 | nearest neighbors.
15 | }
16 | \description{
17 | It allows to check the new instances and their nearest neighbors used in a
18 | prediction associated with a "knnForecast" object.
19 | }
20 | \examples{
21 | pred <- knn_forecasting(UKgas, h = 4, lags = 1:4, k = 2, msas = "MIMO")
22 | nearest_neighbors(pred)
23 | }
24 | 


--------------------------------------------------------------------------------
/R/utilities.R:
--------------------------------------------------------------------------------
 1 | # Combine time series
 2 | #
 3 | # @param ts1 a time series
 4 | # @param ts2 a time series or numeric vector
 5 | #
 6 | # @return The combination of \code{ts1} and \code{ts2}
 7 | combine <- function(ts1, ts2) {
 8 |   stats::ts(c(ts1, ts2),
 9 |             start = stats::start(ts1),
10 |             frequency = stats::frequency(ts1)
11 |   )
12 | }
13 | 
14 | train_test <- function(timeS, h) {
15 |   training <- stats::ts(utils::head(timeS, -h),
16 |                         start = stats::start(timeS),
17 |                         frequency = stats::frequency(timeS)
18 |   )
19 |   tmp <- stats::ts(1:2,
20 |                     start = stats::end(training),
21 |                     frequency = stats::frequency(training)
22 |   )
23 |   test <- stats::ts(utils::tail(timeS, h),
24 |                     start = stats::end(tmp),
25 |                     frequency = stats::frequency(tmp)
26 |   )
27 |   list(
28 |     training = training,
29 |     test = test
30 |   )
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/testthat/test_regression.R:
--------------------------------------------------------------------------------
 1 | test_that("knn regression with one target", {
 2 |   model <- knn_model(ts(c(2, 3, 1, 5, 4, 0, 7, 1, 2)), lags = 1:2, k = 2,
 3 |                      transform = "none")
 4 |   r <- list(
 5 |     prediction = 3,
 6 |     neighbors = c(3, 4)
 7 |   )
 8 |   expect_equal(regression(model, c(1, 2), k = 2), r)
 9 | })
10 | 
11 | test_that("knn regression with multiple targets", {
12 |   model <- knn_model(ts(c(2, 3, 1, 5, 4, 0, 7, 1, 2)), lags = 1:2, k = 2, nt = 2,
13 |                      transform = "none")
14 |   r <- list(
15 |     prediction = c(3, 4.5),
16 |     neighbors = c(3, 4)
17 |   )
18 |   expect_equal(regression(model, c(1, 2), k = 2), r)
19 | })
20 | 
21 | test_that("knn regression with weighted combination and equal neighbor", {
22 |   model <- knn_model(ts(c(1, 2, 1, 5, 4, 0, 7, 1, 2)), lags = 1:2, k = 2,
23 |                      nt = 1, cf = "weighted", transform = "none")
24 |   r <- list(
25 |     prediction = 1,
26 |     neighbors = c(3, 4)
27 |   )
28 |   expect_equal(regression(model, c(1, 2), k = 2), r)
29 | })
30 | 


--------------------------------------------------------------------------------
/man/n_training_examples.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecasting.R
 3 | \name{n_training_examples}
 4 | \alias{n_training_examples}
 5 | \title{Number of training examples}
 6 | \usage{
 7 | n_training_examples(timeS, h, lags, msas = c("MIMO", "recursive"))
 8 | }
 9 | \arguments{
10 | \item{timeS}{A numeric vector or time series of class \code{ts}.}
11 | 
12 | \item{h}{A positive integer. Number of values to forecast.}
13 | 
14 | \item{lags}{An integer vector in increasing order expressing the lags used
15 | as autoregressive variables.}
16 | 
17 | \item{msas}{A string indicating the Multiple-Step Ahead Strategy used when
18 | more than one value is predicted. It can be "recursive" or "MIMO" (the
19 | default).}
20 | }
21 | \value{
22 | An integer.
23 | }
24 | \description{
25 | It computes the number of training examples that would have a KNN model
26 | with the specified parameters.
27 | }
28 | \examples{
29 | n_training_examples(ts(1:10), h = 2, lags = 1:3, msas = "MIMO")
30 | n_training_examples(ts(1:10), h = 2, lags = 1:3, msas = "recursive")
31 | }
32 | 


--------------------------------------------------------------------------------
/man/tsfknn-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tsfknn-package.R
 3 | \docType{package}
 4 | \name{tsfknn-package}
 5 | \alias{tsfknn}
 6 | \alias{tsfknn-package}
 7 | \title{tsfknn: Time Series Forecasting Using Nearest Neighbors}
 8 | \description{
 9 | Allows forecasting time series using nearest neighbors regression Francisco Martinez, Maria P. Frias, Maria D. Perez-Godoy and Antonio J. Rivera (2019) \doi{10.1007/s10462-017-9593-z}. When the forecasting horizon is higher than 1, two multi-step ahead forecasting strategies can be used. The model built is autoregressive, that is, it is only based on the observations of the time series. The nearest neighbors used in a prediction can be consulted and plotted.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://github.com/franciscomartinezdelrio/tsfknn}
15 |   \item Report bugs at \url{https://github.com/franciscomartinezdelrio/tsfknn/issues}
16 | }
17 | 
18 | }
19 | \author{
20 | \strong{Maintainer}: Francisco Martinez \email{fmartin@ujaen.es}
21 | 
22 | }
23 | \keyword{internal}
24 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | year <- sub("-.*", "", meta$Date)
 2 | if(!length(year))
 3 |   year <- substr(Sys.Date(),1,4)
 4 | vers <- meta$Version
 5 | if(is.null(vers))
 6 |   vers <- packageVersion("tsfknn")
 7 | vers <- paste("R package version", vers)
 8 | 
 9 | # Grab authors from DESCRIPTION file
10 | # authors <- eval(parse(text=as.list(read.dcf("../DESCRIPTION")[1, ])$`Authors@R`))
11 | # authors <- authors[sapply(authors$role, function(roles) "aut" %in% roles)]
12 | # authors <- sapply(authors, function(author) paste(author$given, author$family))
13 | # authors <- paste(authors, collapse = " and ")
14 | 
15 | citHeader("To cite the tsfknn package in publications, please use:")
16 | 
17 | bibentry(bibtype = "Article",
18 |   title     = "{Time Series Forecasting with KNN in R: the tsfknn Package}",
19 |   author    = c(person("Francisco", "Martinez"),
20 |                        person(c("Maria", "P."),"Frias"),
21 |                        person("Francisco", "Charte"),
22 |                        person(c("Antonio","J."), "Rivera")),
23 |   journal   = "{The R Journal}",
24 |   volume    =  11,
25 |   number    =  2,
26 |   pages     = "229--242",
27 |   year      =  2019)
28 | 


--------------------------------------------------------------------------------
/man/predict.knnForecast.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/knn.R
 3 | \name{predict.knnForecast}
 4 | \alias{predict.knnForecast}
 5 | \title{Predict method for KNN models for time series forecasting.}
 6 | \usage{
 7 | \method{predict}{knnForecast}(object, h, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a \code{knnForecast} object obtained by a call to the
11 | \code{\link{knn_forecasting}} function.}
12 | 
13 | \item{h}{an integer. The forecasting horizon.}
14 | 
15 | \item{...}{further arguments passed to or from other methods.}
16 | }
17 | \value{
18 | a \code{knnForecast} object with the prediction and information
19 | about the KNN model, see the documentation of \code{\link{knn_forecasting}}
20 | for the structure of \code{knnForecast} objects.
21 | }
22 | \description{
23 | Predicted values based on a KNN model for time series forecasting.
24 | }
25 | \details{
26 | If the models uses the MIMO strategy for multiple-step ahead prediction,
27 | the forecasting horizon is fixed to the model forecasting horizon.
28 | }
29 | \examples{
30 | pred <- knn_forecasting(UKgas, h = 4, k = 1, msas = "recursive")
31 | new_pred <- predict(pred, h = 6)
32 | print(new_pred$prediction)
33 | plot(new_pred) # To see a plot with the forecast
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: tsfknn
 2 | Type: Package
 3 | Title: Time Series Forecasting Using Nearest Neighbors
 4 | Version: 0.6.0
 5 | Authors@R: person("Francisco", "Martinez", email = "fmartin@ujaen.es",
 6 |     role = c("aut", "cre"))
 7 | Description: Allows forecasting time series using nearest neighbors regression
 8 |     Francisco Martinez, Maria P. Frias, Maria D. Perez-Godoy and Antonio J.
 9 |     Rivera (2019) <doi:10.1007/s10462-017-9593-z>. When the forecasting horizon
10 |     is higher than 1, two multi-step ahead forecasting strategies can be used.
11 |     The model built is autoregressive, that is, it is only based on the 
12 |     observations of the time series. The nearest neighbors used in a prediction
13 |     can be consulted and plotted.
14 | Maintainer: Francisco Martinez <fmartin@ujaen.es>
15 | License: GPL-2
16 | Encoding: UTF-8
17 | Roxygen: list(markdown = TRUE)
18 | RoxygenNote: 7.2.3
19 | Depends: R (>= 3.6.0)
20 | Suggests:
21 |     knitr,
22 |     rmarkdown,
23 |     testthat (>= 3.0.0)
24 | Imports:
25 |     ggplot2 (>= 3.1.1),
26 |     graphics,
27 |     Rcpp,
28 |     stats,
29 |     utils
30 | VignetteBuilder: knitr
31 | URL: https://github.com/franciscomartinezdelrio/tsfknn
32 | BugReports: https://github.com/franciscomartinezdelrio/tsfknn/issues
33 | LinkingTo: 
34 |     Rcpp
35 | Config/testthat/edition: 3
36 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | <!-- badges: start -->
 5 | [![R-CMD-check](https://github.com/franciscomartinezdelrio/tsfknn/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/franciscomartinezdelrio/tsfknn/actions/workflows/R-CMD-check.yaml)
 6 | <!-- badges: end -->
 7 | 
 8 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 9 | 
10 | <!-- README.md is generated from README.Rmd. Please edit that file -->
11 | 
12 | ```{r, echo = FALSE}
13 | knitr::opts_chunk$set(
14 |   collapse = TRUE,
15 |   comment = "#>",
16 |   eval = FALSE,
17 |   fig.path = "README-"
18 | )
19 | ```
20 | 
21 | # tsfknn
22 | 
23 | The goal of tsfknn is to forecast time series using KNN regression.
24 | 
25 | ## Installation
26 | 
27 | You can install tsfknn from github with:
28 | 
29 | ```{r gh-installation, eval = FALSE}
30 | # install.packages("devtools")
31 | devtools::install_github("franciscomartinezdelrio/tsfknn")
32 | ```
33 | 
34 | ## Example
35 | 
36 | This is a basic example which shows how to forecast with tsfknn:
37 | 
38 | ```{r example}
39 | library(tsfknn)
40 | pred <- knn_forecasting(USAccDeaths, h = 12, k = 3)
41 | pred$prediction # To see a time series with the forecasts
42 | plot(pred) # To see a plot with the forecast
43 | library(ggplot2)
44 | autoplot(pred, highlight = "neighbors")  # To see the nearest neighbors
45 | ```
46 | 


--------------------------------------------------------------------------------
/tests/testthat/test_rolling_origin.R:
--------------------------------------------------------------------------------
 1 | pred <- knn_forecasting(ts(1:30), h = 4, msas = "recursive", transform = "none")
 2 | ro <- rolling_origin(pred, h = 4)
 3 | 
 4 | m <- matrix(c(27, 28, 29, 30, 28, 29, 30, NA, 29, 30, NA, NA, 30, NA, NA, NA),
 5 |             nrow = 4, byrow = TRUE)
 6 | colnames(m) <- paste("h=", 1:4, sep = "")
 7 | 
 8 | test_that("Test set is built correctly", {
 9 |   expect_equal(m, ro$test_sets)
10 | })
11 | 
12 | p <- matrix(c(24, 24, 24, 24, 25, 25, 25, NA, 26, 26, NA, NA, 27, NA, NA, NA),
13 |             nrow = 4, byrow = TRUE)
14 | colnames(p) <- paste("h=", 1:4, sep = "")
15 | 
16 | test_that("Predictions are OK", {
17 |   expect_equal(p, ro$prediction)
18 | })
19 | 
20 | 
21 | e <- matrix(c(3, 4, 5, 6, 3, 4, 5, NA, 3, 4, NA, NA, 3, NA, NA, NA),
22 |             nrow = 4, byrow = TRUE)
23 | colnames(e) <- paste("h=", 1:4, sep = "")
24 | 
25 | test_that("Errors are OK", {
26 |   expect_equal(e, ro$errors)
27 | })
28 | 
29 | ro <- rolling_origin(pred, h = 4, rolling = FALSE)
30 | 
31 | m <- matrix(c(27, 28, 29, 30), nrow = 1, byrow = TRUE)
32 | colnames(m) <- paste("h=", 1:4, sep = "")
33 | 
34 | test_that("Test set is built correctly", {
35 |   expect_equal(m, ro$test_sets)
36 | })
37 | 
38 | p <- matrix(c(24, 24, 24, 24), nrow = 1, byrow = TRUE)
39 | colnames(p) <- paste("h=", 1:4, sep = "")
40 | 
41 | test_that("Predictions are OK", {
42 |   expect_equal(p, ro$prediction)
43 | })
44 | 


--------------------------------------------------------------------------------
/man/autoplot.knnForecast.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting.R
 3 | \name{autoplot.knnForecast}
 4 | \alias{autoplot.knnForecast}
 5 | \title{Create a ggplot object from a knnForecast object}
 6 | \usage{
 7 | \method{autoplot}{knnForecast}(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{An object of class \code{knnForecast}.}
11 | 
12 | \item{...}{additional parameter, see details.}
13 | }
14 | \value{
15 | The ggplot object representing a plotting with the forecast.
16 | }
17 | \description{
18 | It uses a knnForecast object to create a ggplot object that plots a time
19 | series and its forecast using KNN regression.
20 | }
21 | \details{
22 | Commonly used parameters are:
23 | \itemize{
24 | \item \code{highlight}. A character string indicating what elements should be highlighted. Possible values are
25 | \code{"none"}, \code{"points"} and \code{"neighbors"}. The default value is \code{"none"}.
26 | \item \code{faceting}. Logical. This applies only if the \code{highlight} parameter is
27 | set to \code{"neighbors"}. It indicates whether the different nearest neighbors
28 | should be seen in different plots (\code{TRUE}, the default value) or in one
29 | plot.
30 | }
31 | }
32 | \examples{
33 | pred <- knn_forecasting(USAccDeaths, h = 12, lags = 1:12, k = 2)
34 | library(ggplot2)
35 | autoplot(pred)
36 | autoplot(pred, highlight = "neighbors")
37 | }
38 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macos-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v3
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           upload-snapshots: true
50 | 


--------------------------------------------------------------------------------
/tests/testthat/test_knn_forecasting.R:
--------------------------------------------------------------------------------
 1 | test_that("errors calling knn_forecasting", {
 2 |   expect_error(knn_forecasting(ts(1:5), h = 1, lags = 3:1, k = 2))
 3 |   expect_error(knn_forecasting(ts(1:5), h = 1, lags = 0:2, k = 2))
 4 |   expect_error(knn_forecasting(ts(1:5), h = 1, lags = 3:5, k = 1))
 5 |   expect_error(knn_forecasting(ts(1:5), h = 1, lags = 1, k = 1, transform = "additive"))
 6 |   expect_error(knn_forecasting(ts(1:5), h = 1, lags = 1, k = 1, transform = "multiplicative"))
 7 | })
 8 | 
 9 | pred <- knn_forecasting(ts(c(2, 3, 1, 5, 4, 0, 7, 1, 2)), h = 2, lags = 1:2, k = 2,
10 |                         msas = "MIMO", transform = "none")
11 | test_that("MIMO strategy predicts fine", {
12 |   expect_equal(as.vector(pred$prediction), c(3, 4.5))
13 | })
14 | 
15 | pred <- knn_forecasting(UKgas, h = 4, lags = 1:4, k = 2, msas = "MIMO",
16 |                         transform = "none")
17 | nn <- nearest_neighbors(pred)
18 | 
19 | test_that("MIMO strategy predicts fine", {
20 |   expect_equal(as.vector(pred$prediction), c(1125.45, 573.9, 314.6, 785.2))
21 | })
22 | 
23 | test_that("MIMO strategy predicts fine", {
24 |   expect_equal(unname(nn$instance), c(1163.9, 613.1, 347.4, 782.8))
25 | })
26 | 
27 | n <- data.frame("Lag 4" = c(1087.0, 989.4),
28 |                 "Lag 3" = c(534.7, 477.1),
29 |                 "Lag 2" = c(281.8, 233.7),
30 |                 "Lag 1" = c(787.6, 730.0),
31 |                 "H1"    = c(1163.9, 1087.0),
32 |                 "H2"    = c(613.1, 534.7),
33 |                 "H3"    = c(347.4, 281.8),
34 |                 "H4"    = c(782.8, 787.6), check.names = FALSE
35 | )
36 | 
37 | test_that("MIMO strategy predicts fine", {
38 |   expect_equal(nn$nneighbors, n)
39 | })
40 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # tsfknn
 2 | 
 3 | * Bug fixed computing weights when neighbors are weighted by distance
 4 | * When lags are selected automatically it is not allowed only one lag
 5 |   and the additive or multiplicative transformation
 6 | * It is not allowed only one autoregressive lag and the additive or
 7 |   multiplicative transformation
 8 | * More information in the vignette about transformations  
 9 | 
10 | # tsfknn 0.5.2
11 | 
12 | * bug fixed in rolling_origin
13 | * modifying tsfknn-package.R to comply with CRAN
14 | 
15 | # tsfknn 0.5.1
16 | 
17 | * autoplot.knnForecast has been modified to comply with CRAN
18 | 
19 | # tsfknn 0.5.0
20 | 
21 | * The default Multi-step ahead strategy is recursive
22 | * An optional transformation to the training samples has been added. It improves forecast accuracy for time series with a trend
23 | * When several k are used, only those k that are equal or lower than
24 | the number of training samples are admitted
25 | 
26 | # tsfknn 0.4.0
27 | 
28 | * Using Rcpp for faster computation of nearest neighbors
29 | 
30 | # tsfknn 0.3.1
31 | 
32 | * Fix calculation of rolling origin prediction with recursive strategy
33 | 
34 | # tsfknn 0.3.0
35 | 
36 | * Now it is possible to assess the model using rolling origin evaluation
37 | * A predict method has been added to generate new forecasts based on a
38 |   previously built model
39 | 
40 | # tsfknn 0.2.0
41 | 
42 | * summary and print.summary methods are added for "knnForecast" objects
43 | * String parameters are processed with match.arg
44 | * Fix calculation of how many KNN examples has the model in knn_forecasting
45 | * Weighted combination of the targets of nearest neighbors is implemented
46 | * A function that computes the number of training instances that would have 
47 |   a model has been added
48 | 


--------------------------------------------------------------------------------
/tests/testthat/test_build_examples.R:
--------------------------------------------------------------------------------
 1 | test_that("build_examples with one target", {
 2 |   patterns <- rbind(1:2, 2:3, 3:4)
 3 |   colnames(patterns) <- paste0("Lag", 2:1)
 4 |   targets <- matrix(3:5, ncol = 1)
 5 |   colnames(targets) <- "H1"
 6 |   targetsI <- 3:5
 7 |   result <- list(
 8 |     patterns = patterns,
 9 |     targets = targets,
10 |     targetsI = targetsI
11 |   )
12 |   expect_equal(build_examples(ts(1:5), 2:1, transform = "none"), result)
13 | })
14 | 
15 | 
16 | test_that("build_examples with two targets", {
17 |   patterns <- rbind(1:2, 2:3)
18 |   colnames(patterns) <- paste0("Lag", 2:1)
19 |   targets <- rbind(3:4, 4:5)
20 |   colnames(targets) <- paste0("H", 1:2)
21 |   targetsI <- 3:4
22 |   result <- list(
23 |     patterns = patterns,
24 |     targets = targets,
25 |     targetsI = targetsI
26 |   )
27 |   expect_equal(build_examples(ts(1:5), 2:1, nt = 2, transform = "none"), result)
28 | })
29 | 
30 | test_that("build_examples with additive transformation", {
31 |   patterns <- matrix(c(-1, 1, -2, 2, -1, 1, -0.5, 0.5), nrow = 4, byrow = TRUE)
32 |   colnames(patterns) <- paste0("Lag", 2:1)
33 |   targets <- matrix(c(5, 4, 2, 2.5), ncol = 1)
34 |   colnames(targets) <- "H1"
35 |   targetsI <- 3:6
36 |   result <- list(
37 |     patterns = patterns,
38 |     targets = targets,
39 |     targetsI = targetsI
40 |   )
41 |   expect_equal(build_examples(ts(c(1, 3, 7, 9, 10, 12)), 2:1, transform = "additive"), result)
42 | })
43 | 
44 | test_that("build_examples with multiplicative transformation", {
45 |   patterns <- matrix(c(0.5, 1.5, 0.6, 1.4, 0.875, 1.125, 0.94736842, 1.05263158), nrow = 4, byrow = TRUE)
46 |   colnames(patterns) <- paste0("Lag", 2:1)
47 |   targets <- matrix(c(3.5, 1.8, 1.25, 1.2631579), ncol = 1)
48 |   colnames(targets) <- "H1"
49 |   targetsI <- 3:6
50 |   result <- list(
51 |     patterns = patterns,
52 |     targets = targets,
53 |     targetsI = targetsI
54 |   )
55 |   expect_equal(build_examples(ts(c(1, 3, 7, 9, 10, 12)), 2:1, transform = "multiplicative"), result)
56 | })
57 | 


--------------------------------------------------------------------------------
/man/rolling_origin.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rolling_origin.R
 3 | \name{rolling_origin}
 4 | \alias{rolling_origin}
 5 | \title{Assessing forecasting accuracy with rolling origin}
 6 | \usage{
 7 | rolling_origin(knnf, h = NULL, rolling = TRUE)
 8 | }
 9 | \arguments{
10 | \item{knnf}{A \code{knnForecast} object.}
11 | 
12 | \item{h}{A positive integer. The forecast horizon. If \code{NULL} the
13 | prediction horizon of the \code{knnForecast} object is used.}
14 | 
15 | \item{rolling}{A logical. If \code{TRUE} (the default), forecasting
16 | horizons from 1 to \code{h} are used. Otherwise, only horizon
17 | \code{h} is used.}
18 | }
19 | \value{
20 | A list containing at least the following fields:
21 | 
22 | \item{\code{test_sets}}{a matrix containing the test sets used in the
23 | evaluation. Every row contains a different test set.}
24 | \item{\code{predictions}}{The predictions for the test sets.}
25 | \item{\code{errors}}{The errors for the test sets.}
26 | \item{\code{global_accu}}{Different measures of accuracy applied to all the
27 | errors.}
28 | \item{\code{h_accu}}{Different measures of accuracy applied to all the
29 | errors for every forecasting horizon.}
30 | }
31 | \description{
32 | It uses the model and the time series associated with the \code{knnForecast}
33 | object to asses the forecasting accuracy of the model using the last
34 | \code{h} values of the time series to build test sets applying a rolling
35 | origin evaluation.
36 | }
37 | \details{
38 | This function assesses the forecast accuracy of the model used by the
39 | \code{knnForecast} object. It uses \code{h} different test and training
40 | sets. The first test set consists of the last \code{h} values of the time
41 | series (the training set is formed by the previous values). The next test
42 | set consists of the last \eqn{h - 1} values of the time series and so on
43 | (the last test set is formed by the last value of the time series).
44 | }
45 | \examples{
46 | pred <- knn_forecasting(UKgas, h = 4, lags = 1:4, k = 2)
47 | ro <- rolling_origin(pred)
48 | print(ro$global_accu)
49 | }
50 | 


--------------------------------------------------------------------------------
/man/knn_forecasting.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/forecasting.R
 3 | \name{knn_forecasting}
 4 | \alias{knn_forecasting}
 5 | \title{Time series forecasting using KNN regression}
 6 | \usage{
 7 | knn_forecasting(
 8 |   timeS,
 9 |   h,
10 |   lags = NULL,
11 |   k = c(3, 5, 7),
12 |   msas = c("recursive", "MIMO"),
13 |   cf = c("mean", "median", "weighted"),
14 |   transform = c("additive", "multiplicative", "none")
15 | )
16 | }
17 | \arguments{
18 | \item{timeS}{A numeric vector or time series of class \code{ts}.}
19 | 
20 | \item{h}{A positive integer. Number of values to forecast.}
21 | 
22 | \item{lags}{An integer vector in increasing order expressing the lags used
23 | as autoregressive variables.}
24 | 
25 | \item{k}{A positive integer. The k parameter in KNN regression. A vector of
26 | k values can also be used. In that case, the forecast is the average
27 | of the forecasts produced by the different models with the different k
28 | parameters.}
29 | 
30 | \item{msas}{A string indicating the Multiple-Step Ahead Strategy used when
31 | more than one value is predicted. It can be "recursive" or "MIMO" (the
32 | default).}
33 | 
34 | \item{cf}{A string. It indicates the combination function used to aggregate
35 | the targets associated with the nearest neighbors. It can be "median",
36 | "weighted" or "mean" (the default).}
37 | 
38 | \item{transform}{A character value indicating whether the training samples
39 | are transformed. If the time series has a trend it is recommended. By
40 | default is \code{"multiplicative"} (multiplicative transformation). It is also
41 | possible a multiplicative transformation or no transformation.}
42 | }
43 | \value{
44 | An object of class \code{"knnForecast"}. The
45 | function \code{\link[base]{summary}} can be used to obtain or print a
46 | summary of the results.
47 | 
48 | \if{html}{\out{<div class="sourceCode">}}\preformatted{An object of class \code{"knnForecast"} is a list containing at least
49 | the following components:
50 | }\if{html}{\out{</div>}}
51 | 
52 | \item{\code{call}}{the matched call.}
53 | \item{\code{msas}}{the Multi-Step Ahead Strategy.}
54 | \item{\code{prediction}}{a time series with the forecast.}
55 | \item{\code{model}}{an object of class \code{"knnModel"} with the KNN
56 | model}
57 | }
58 | \description{
59 | It applies KNN regression to forecast the future values of a time series.
60 | The lags used as autoregressive variables are set with the \code{lags}
61 | parameter. If the user does not set the number of nearest neighbors or
62 | the lags, these values are selected automatically.
63 | }
64 | \examples{
65 | pred <- knn_forecasting(USAccDeaths, h = 12, lags = 1:12, k = 2)
66 | pred$prediction # To see a time series with the forecasts
67 | plot(pred) # To see a plot with the forecast
68 | }
69 | 


--------------------------------------------------------------------------------
/R/print_knnForecast.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | print.knnForecast <- function (x, ...) {
 3 |   cat("\nCall:  ",
 4 |       paste(deparse(x$call),
 5 |             sep = "\n",
 6 |             collapse = "\n"
 7 |       ),
 8 |       "\n\n",
 9 |       sep = ""
10 |   )
11 |   cat("Multiple-Step Ahead Strategy:", x$msas, "\n")
12 |   if (length(x$model$k) == 1) {
13 |     cat("K (number of nearest neighbors):", x$model$k, "\n")
14 |   } else {
15 |     cat("K (number of nearest neighbors):",
16 |         length(x$model$k),
17 |         "models with ")
18 |     for (ind in seq_along(x$model$k)) {
19 |       if (ind == 1) {
20 |         cat(x$model$k[ind])
21 |       } else if (ind == length(x$model$k)) {
22 |         cat (" and", x$model$k[ind])
23 |       } else {
24 |         cat (",", x$model$k[ind])
25 |       }
26 |     }
27 |     cat(" neighbors respectively\n")
28 |   }
29 |   cat("Autoregressive lags:", rev(x$model$lags), "\n")
30 |   cat("Number of examples:", nrow(x$model$examples$patterns), "\n")
31 |   cat("Targets are combined using ")
32 |   if (x$model$cf %in% c("mean", "median")) {
33 |     cat("the", x$model$cf, "function.\n")
34 |   } else {
35 |     cat("a weighted average.\n")
36 |   }
37 |   invisible(x)
38 | }
39 | 
40 | #' @export
41 | summary.knnForecast <- function (object, ...) {
42 |   structure(
43 |     list(
44 |       call = object$call,
45 |       k = object$model$k,
46 |       msas = object$msas,
47 |       nneighbors =  nrow(object$model$examples$patterns),
48 |       lags = rev(object$model$lags),
49 |       prediction = object$prediction,
50 |       cf = object$model$cf
51 |     ),
52 |     class = "summary.knnForecast"
53 |   )
54 | }
55 | 
56 | #' @export
57 | print.summary.knnForecast <- function (x, ...) {
58 |   stopifnot(inherits(x, "summary.knnForecast"))
59 |   cat("\nCall:  ",
60 |       paste(deparse(x$call),
61 |             sep = "\n",
62 |             collapse = "\n"
63 |       ),
64 |       "\n\n",
65 |       sep = ""
66 |   )
67 |   cat("Multiple-Step Ahead Strategy:", x$msas, "\n")
68 |   if (length(x$k) == 1) {
69 |     cat("K (number of nearest neighbors):", x$k, "\n")
70 |   } else {
71 |     cat("K (number of nearest neighbors):",
72 |         length(x$k),
73 |         "models with ")
74 |     for (ind in seq_along(x$k)) {
75 |       if (ind == 1) {
76 |         cat(x$k[ind])
77 |       } else if (ind == length(x$k)) {
78 |         cat (" and", x$k[ind])
79 |       } else {
80 |         cat (",", x$k[ind])
81 |       }
82 |     }
83 |     cat(" neighbors respectively\n")
84 |   }
85 |   cat("Autoregressive lags:", x$lags, "\n")
86 |   cat("Number of examples:", x$nneighbors, "\n")
87 |   cat("Targets are combined using ")
88 |   if (x$cf %in% c("mean", "median")) {
89 |     cat("the", x$cf, "function.\n")
90 |   } else {
91 |     cat("a weighted average.\n")
92 |   }
93 |   cat("Forecasting horizon:", length(x$prediction), "\n")
94 |   cat("Forecast:\n")
95 |   print(x$prediction)
96 |   invisible(x)
97 | }
98 | 
99 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | #include <Rcpp.h>
 5 | 
 6 | using namespace Rcpp;
 7 | 
 8 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
 9 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
10 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
11 | #endif
12 | 
13 | // first_n
14 | List first_n(NumericMatrix m, NumericVector v, int k);
15 | RcppExport SEXP _tsfknn_first_n(SEXP mSEXP, SEXP vSEXP, SEXP kSEXP) {
16 | BEGIN_RCPP
17 |     Rcpp::RObject rcpp_result_gen;
18 |     Rcpp::RNGScope rcpp_rngScope_gen;
19 |     Rcpp::traits::input_parameter< NumericMatrix >::type m(mSEXP);
20 |     Rcpp::traits::input_parameter< NumericVector >::type v(vSEXP);
21 |     Rcpp::traits::input_parameter< int >::type k(kSEXP);
22 |     rcpp_result_gen = Rcpp::wrap(first_n(m, v, k));
23 |     return rcpp_result_gen;
24 | END_RCPP
25 | }
26 | // build_examples2
27 | List build_examples2(NumericVector timeS, NumericVector lags, int nt);
28 | RcppExport SEXP _tsfknn_build_examples2(SEXP timeSSEXP, SEXP lagsSEXP, SEXP ntSEXP) {
29 | BEGIN_RCPP
30 |     Rcpp::RObject rcpp_result_gen;
31 |     Rcpp::RNGScope rcpp_rngScope_gen;
32 |     Rcpp::traits::input_parameter< NumericVector >::type timeS(timeSSEXP);
33 |     Rcpp::traits::input_parameter< NumericVector >::type lags(lagsSEXP);
34 |     Rcpp::traits::input_parameter< int >::type nt(ntSEXP);
35 |     rcpp_result_gen = Rcpp::wrap(build_examples2(timeS, lags, nt));
36 |     return rcpp_result_gen;
37 | END_RCPP
38 | }
39 | // build_examples_m
40 | List build_examples_m(NumericVector timeS, NumericVector lags, int nt);
41 | RcppExport SEXP _tsfknn_build_examples_m(SEXP timeSSEXP, SEXP lagsSEXP, SEXP ntSEXP) {
42 | BEGIN_RCPP
43 |     Rcpp::RObject rcpp_result_gen;
44 |     Rcpp::RNGScope rcpp_rngScope_gen;
45 |     Rcpp::traits::input_parameter< NumericVector >::type timeS(timeSSEXP);
46 |     Rcpp::traits::input_parameter< NumericVector >::type lags(lagsSEXP);
47 |     Rcpp::traits::input_parameter< int >::type nt(ntSEXP);
48 |     rcpp_result_gen = Rcpp::wrap(build_examples_m(timeS, lags, nt));
49 |     return rcpp_result_gen;
50 | END_RCPP
51 | }
52 | // build_examples_a
53 | List build_examples_a(NumericVector timeS, NumericVector lags, int nt);
54 | RcppExport SEXP _tsfknn_build_examples_a(SEXP timeSSEXP, SEXP lagsSEXP, SEXP ntSEXP) {
55 | BEGIN_RCPP
56 |     Rcpp::RObject rcpp_result_gen;
57 |     Rcpp::RNGScope rcpp_rngScope_gen;
58 |     Rcpp::traits::input_parameter< NumericVector >::type timeS(timeSSEXP);
59 |     Rcpp::traits::input_parameter< NumericVector >::type lags(lagsSEXP);
60 |     Rcpp::traits::input_parameter< int >::type nt(ntSEXP);
61 |     rcpp_result_gen = Rcpp::wrap(build_examples_a(timeS, lags, nt));
62 |     return rcpp_result_gen;
63 | END_RCPP
64 | }
65 | 
66 | static const R_CallMethodDef CallEntries[] = {
67 |     {"_tsfknn_first_n", (DL_FUNC) &_tsfknn_first_n, 3},
68 |     {"_tsfknn_build_examples2", (DL_FUNC) &_tsfknn_build_examples2, 3},
69 |     {"_tsfknn_build_examples_m", (DL_FUNC) &_tsfknn_build_examples_m, 3},
70 |     {"_tsfknn_build_examples_a", (DL_FUNC) &_tsfknn_build_examples_a, 3},
71 |     {NULL, NULL, 0}
72 | };
73 | 
74 | RcppExport void R_init_tsfknn(DllInfo *dll) {
75 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
76 |     R_useDynamicSymbols(dll, FALSE);
77 | }
78 | 


--------------------------------------------------------------------------------
/src/code.cpp:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | #include <algorithm>
  3 | using namespace Rcpp;
  4 | 
  5 | /* This function finds the indexes of the k nearest neighbors and computes
  6 |  * the distances to all the examples
  7 |  */
  8 | // [[Rcpp::export]]
  9 | List first_n(NumericMatrix m, NumericVector v, int k) {
 10 |   NumericVector d(m.nrow());
 11 |   NumericVector d_copy(m.nrow());
 12 |   for (int i = 0; i < m.nrow(); ++i) {
 13 |     d[i] = sum(pow(m(i, _ ) - v, 2));
 14 |     d_copy[i] = sum(pow(m(i, _ ) - v, 2));
 15 |   }
 16 |   double maxi = max(d);
 17 |   IntegerVector ind(k);
 18 |   for (int i = 0; i < k; ++i) {
 19 |     NumericVector::iterator it = std::min_element(d.begin(), d.end());
 20 |     ind[i] = std::distance(d.begin(), it) + 1;
 21 |     *it = maxi + 1;
 22 |   }
 23 |   List ret;
 24 |   ret["indexes"] = ind;
 25 |   ret["distances"] = d_copy;
 26 |   return ret;
 27 | }
 28 | 
 29 | // [[Rcpp::export]]
 30 | List build_examples2(NumericVector timeS, NumericVector lags, int nt) {
 31 |   const int MAXLAG = lags[0];
 32 |   const int NCOL   = lags.size();
 33 |   const int NROW   = timeS.size() - MAXLAG - nt + 1;
 34 |   NumericMatrix patterns(NROW, NCOL);
 35 |   NumericMatrix targets(NROW, nt);
 36 |   IntegerVector targetsI(NROW);
 37 |   int row = 0;
 38 |   for (int ind = MAXLAG + nt -1; ind < timeS.size(); ++ind) {
 39 |     for (int col = 0; col < NCOL; ++col)
 40 |       patterns(row, col) = timeS[ind - nt + 1 - lags[col]];
 41 |     targets(row, _) = timeS[Range(ind - nt + 1, ind + 1)];
 42 |     targetsI[row] = ind - nt + 2;
 43 |     row++;
 44 |   }
 45 |   List ret;
 46 |   ret["patterns"] = patterns;
 47 |   ret["targets"]  = targets;
 48 |   ret["targetsI"] = targetsI;
 49 |   return ret;
 50 | }
 51 | 
 52 | // [[Rcpp::export]]
 53 | List build_examples_m(NumericVector timeS, NumericVector lags, int nt) {
 54 |   const int MAXLAG = lags[0];
 55 |   const int NCOL   = lags.size();
 56 |   const int NROW   = timeS.size() - MAXLAG - nt + 1;
 57 |   NumericMatrix patterns(NROW, NCOL);
 58 |   NumericMatrix targets(NROW, nt);
 59 |   IntegerVector targetsI(NROW);
 60 |   int row = 0;
 61 |   for (int ind = MAXLAG + nt -1; ind < timeS.size(); ++ind) {
 62 |     double sum = 0;
 63 |     for (int col = 0; col < NCOL; ++col) {
 64 |       patterns(row, col) = timeS[ind - nt + 1 - lags[col]];
 65 |       sum += patterns(row, col);
 66 |     }
 67 |     sum /= NCOL;
 68 |     for (int col = 0; col < NCOL; ++col)
 69 |       patterns(row, col) /= sum;
 70 |     targets(row, _) = timeS[Range(ind - nt + 1, ind + 1)] / sum;
 71 |     targetsI[row] = ind - nt + 2;
 72 |     row++;
 73 |   }
 74 |   List ret;
 75 |   ret["patterns"] = patterns;
 76 |   ret["targets"]  = targets;
 77 |   ret["targetsI"] = targetsI;
 78 |   return ret;
 79 | }
 80 | 
 81 | // [[Rcpp::export]]
 82 | List build_examples_a(NumericVector timeS, NumericVector lags, int nt) {
 83 |   const int MAXLAG = lags[0];
 84 |   const int NCOL   = lags.size();
 85 |   const int NROW   = timeS.size() - MAXLAG - nt + 1;
 86 |   NumericMatrix patterns(NROW, NCOL);
 87 |   NumericMatrix targets(NROW, nt);
 88 |   IntegerVector targetsI(NROW);
 89 |   int row = 0;
 90 |   for (int ind = MAXLAG + nt -1; ind < timeS.size(); ++ind) {
 91 |     double sum = 0;
 92 |     for (int col = 0; col < NCOL; ++col) {
 93 |       patterns(row, col) = timeS[ind - nt + 1 - lags[col]];
 94 |       sum += patterns(row, col);
 95 |     }
 96 |     sum /= NCOL;
 97 |     for (int col = 0; col < NCOL; ++col)
 98 |       patterns(row, col) -= sum;
 99 |     targets(row, _) = timeS[Range(ind - nt + 1, ind + 1)] - sum;
100 |     targetsI[row] = ind - nt + 2;
101 |     row++;
102 |   }
103 |   List ret;
104 |   ret["patterns"] = patterns;
105 |   ret["targets"]  = targets;
106 |   ret["targetsI"] = targetsI;
107 |   return ret;
108 | }
109 | 


--------------------------------------------------------------------------------
/R/rolling_origin.R:
--------------------------------------------------------------------------------
  1 | #' Assessing forecasting accuracy with rolling origin
  2 | #'
  3 | #' It uses the model and the time series associated with the \code{knnForecast}
  4 | #' object to asses the forecasting accuracy of the model using the last
  5 | #' \code{h} values of the time series to build test sets applying a rolling
  6 | #' origin evaluation.
  7 | #'
  8 | #' This function assesses the forecast accuracy of the model used by the
  9 | #' \code{knnForecast} object. It uses \code{h} different test and training
 10 | #' sets. The first test set consists of the last \code{h} values of the time
 11 | #' series (the training set is formed by the previous values). The next test
 12 | #' set consists of the last \eqn{h - 1} values of the time series and so on
 13 | #' (the last test set is formed by the last value of the time series).
 14 | #'
 15 | #' @param knnf A \code{knnForecast} object.
 16 | #' @param h A positive integer. The forecast horizon. If \code{NULL} the
 17 | #'    prediction horizon of the \code{knnForecast} object is used.
 18 | #' @param rolling A logical. If \code{TRUE} (the default), forecasting
 19 | #'    horizons from 1 to \code{h} are used. Otherwise, only horizon
 20 | #'    \code{h} is used.
 21 | #' @return A list containing at least the following fields:
 22 | #'
 23 | #'  \item{\code{test_sets}}{a matrix containing the test sets used in the
 24 | #'   evaluation. Every row contains a different test set.}
 25 | #'  \item{\code{predictions}}{The predictions for the test sets.}
 26 | #'  \item{\code{errors}}{The errors for the test sets.}
 27 | #'  \item{\code{global_accu}}{Different measures of accuracy applied to all the
 28 | #'  errors.}
 29 | #'  \item{\code{h_accu}}{Different measures of accuracy applied to all the
 30 | #'  errors for every forecasting horizon.}
 31 | #'
 32 | #' @examples
 33 | #' pred <- knn_forecasting(UKgas, h = 4, lags = 1:4, k = 2)
 34 | #' ro <- rolling_origin(pred)
 35 | #' print(ro$global_accu)
 36 | #' @export
 37 | rolling_origin <- function(knnf, h = NULL, rolling = TRUE) {
 38 |   # Check knnf parameter
 39 |   stopifnot(class(knnf) == "knnForecast")
 40 | 
 41 |   # Check h parameter
 42 |   if (is.null(h)) h <- length(knnf$prediction)
 43 |   stopifnot(is.numeric(h), length(h) == 1, h >= 1)
 44 | 
 45 |   max_k <- utils::tail(knnf$model$k, 1)
 46 |   nte <-  n_training_examples(stats::ts(utils::head(knnf$model$ts, -h),
 47 |                                  start = stats::start(knnf$model$ts),
 48 |                                  frequency = stats::frequency(knnf$model$ts)),
 49 |                               h = h,
 50 |                               lags = rev(knnf$model$lags),
 51 |                               msas = knnf$msas
 52 |   )
 53 |   if (max_k > nte)
 54 |     stop(paste("Impossible to create", max_k, "examples"))
 55 | 
 56 |   # Check rolling parameter
 57 |   stopifnot(is.logical(rolling), length(rolling) == 1)
 58 | 
 59 |   if (rolling) {
 60 |     horizons <- seq(h)
 61 |   } else {
 62 |     horizons <- h
 63 |   }
 64 |   timeS <- knnf$model$ts
 65 |   test_sets <- matrix(NA, nrow = length(horizons), ncol = h)
 66 |   predictions <- test_sets
 67 |   ind <- nrow(test_sets)
 68 |   for (hor in horizons) {
 69 |     tt <- train_test(timeS, hor)
 70 |     test_sets[ind, 1:hor] <- tt$test
 71 | #   if (knnf$msas == "MIMO" || length(horizons) == 1) {
 72 |     pred <- knn_forecasting(tt$training,
 73 |                             h = hor,
 74 |                             lags = rev(knnf$model$lags),
 75 |                             k = knnf$model$k,
 76 |                             msas = knnf$msas,
 77 |                             cf = knnf$model$cf,
 78 |                             transform = knnf$transform)
 79 |     predictions[ind, 1:hor] <- pred$prediction
 80 |     # } else { # optimization for recursive forecasting
 81 |     #   knnf$model$examples$patterns <- knnf$model$examples$patterns[1:(nrow(knnf$model$examples$patterns) - 1), , drop = FALSE]
 82 |     #   knnf$model$examples$targets <- knnf$model$examples$targets[1:(nrow(knnf$model$examples$targets) - 1), , drop = FALSE ]
 83 |     #   predictions[ind, 1:hor] <- predict(knnf, h = hor)$prediction
 84 |     # }
 85 |     ind <- ind - 1
 86 |   }
 87 |   colnames(test_sets)   <-  paste("h=", 1:h, sep = "")
 88 |   colnames(predictions) <-  paste("h=", 1:h, sep = "")
 89 |   errors <- test_sets - predictions
 90 |   g_rmse <- sqrt(mean(errors ^ 2, na.rm = TRUE))
 91 |   g_mae  <- mean(abs(errors), na.rm = TRUE)
 92 |   g_mape <- mean(abs(100*errors/test_sets), na.rm = TRUE)
 93 |   global_accu <- c(g_rmse, g_mae, g_mape)
 94 |   names(global_accu) <- c("RMSE", "MAE", "MAPE")
 95 | 
 96 |   accu <- function(c) {
 97 |     rmse <- sqrt(mean(errors[, c] ^ 2, na.rm = TRUE))
 98 |     mae  <- mean(abs(errors[, c]), na.rm = TRUE)
 99 |     mape <- mean(abs(100*errors[, c]/test_sets[, c]), na.rm = TRUE)
100 |     c(rmse, mae, mape)
101 |   }
102 |   h_accu <- sapply(1:h, accu)
103 |   colnames(h_accu) <-  paste("h=", 1:h, sep = "")
104 |   rownames(h_accu) <- c("RMSE", "MAE", "MAPE")
105 | 
106 |   structure(
107 |     list(
108 |       knnf = knnf,
109 |       test_sets = test_sets,
110 |       predictions = predictions,
111 |       errors = test_sets - predictions,
112 |       global_accu = global_accu,
113 |       h_accu = h_accu
114 |     ),
115 |     class = "knnForecastRO"
116 |   )
117 | }
118 | 
119 | #' Plot a prediction of a test set
120 | #'
121 | #' It uses a test set generated with the function \code{\link{rolling_origin}}
122 | #' and plots its forecast.
123 | #'
124 | #' @param x the object obtained from a call to \code{\link{rolling_origin}}.
125 | #'
126 | #' @param h an integer. The forecasting horizon. If \code{NULL}, the maximum
127 | #'    forecasting horizon of all the test sets is used.
128 | #' @param ... Other plotting parameters to affect the plot.
129 | #'
130 | #' @export
131 | plot.knnForecastRO <- function(x, h = NULL, ...) {
132 |   # Check h parameter
133 |   if (is.null(h))
134 |     h <- ncol(x$test_sets)
135 |   stopifnot(is.numeric(h), length(h) == 1, h >= 1, h <= ncol(x$test_sets))
136 |   if (nrow(x$test_sets) == 1) stopifnot(h == ncol(x$test_sets))
137 | 
138 |   if (nrow(x$test_sets) == 1) {
139 |     the_row <- 1
140 |   } else {
141 |     the_row <- nrow(x$test_sets) - h + 1
142 |   }
143 | 
144 |   timeS <- x$knnf$model$ts
145 |   graphics::plot(timeS, type = "o", pch = 20, ylab = "")
146 |   prediction <- timeS
147 |   prediction[1:(length(timeS) - 1)] <- rep(NA, length(timeS) - 1)
148 |   prediction[(length(timeS) - h + 1):length(timeS)] <-
149 |     x$predictions[the_row, 1:h]
150 |   graphics::lines(prediction, col = my_colours("red"))
151 |   graphics::points(prediction, col = my_colours("red"), pch = 20)
152 | }
153 | 
154 | 


--------------------------------------------------------------------------------
/R/knn.R:
--------------------------------------------------------------------------------
  1 | # Build the examples.
  2 | #
  3 | # Build the examples for a KNN model to forecast a time series using
  4 | # lags values of the series as autoregressive features.
  5 | #
  6 | # @param timeS The time series.
  7 | # @param lags An integer vector with the lags used as feature vector in
  8 | #             decreasing order.
  9 | # @param nt The number of targets.
 10 | #
 11 | # @return A list with two fields: 1) a matrix with the features of the
 12 | #         examples and 2) a matrix with the targets of the examples
 13 | # @examples
 14 | # build_examples(ts(1:5), lags = 2:1)
 15 | # build_examples(ts(1:5), lags = 2:1, nt = 2)
 16 | # @export
 17 | build_examples <- function(timeS, lags, nt = 1, transform) {
 18 |   # MAXLAG   <- lags[1]
 19 |   # NCOL     <- length(lags)
 20 |   # NROW     <- length(timeS) - MAXLAG - nt + 1
 21 |   # patterns <- matrix(0, nrow = NROW, ncol = NCOL)
 22 |   # targets  <- matrix(0, nrow = NROW, ncol = nt)
 23 |   # row <- 1
 24 |   # for (ind in seq(MAXLAG + nt, length(timeS))) {
 25 |   #   the_mean <- mean(timeS[ind - nt + 1 - lags])
 26 |   #   patterns[row, ] <- timeS[ind - nt + 1 - lags] / the_mean
 27 |   #   targets[row, ] <- timeS[(ind - nt + 1):ind] / the_mean
 28 |   #   row <- row + 1
 29 |   # }
 30 |   if (transform == "none") {
 31 |     r <- build_examples2(timeS, lags, nt)
 32 |   } else if (transform == "multiplicative") {
 33 |     r <- build_examples_m(timeS, lags, nt)
 34 |   }else if (transform == "additive") {
 35 |     r <- build_examples_a(timeS, lags, nt)
 36 |   }
 37 |   colnames(r$patterns) <- paste0("Lag", lags)
 38 |   colnames(r$targets)  <- paste0("H", 1:nt)
 39 |   r
 40 |   # list(
 41 |   #   patterns = patterns,
 42 |   #   targets = targets
 43 |   # )
 44 | }
 45 | 
 46 | # Create a KNN model.
 47 | #
 48 | # Build a KNN model to forecast a time series using autoregressive features.
 49 | #
 50 | # @param timeS The time series.
 51 | # @param lags An integer vector with the lags used as feature vector in
 52 | #             increasing order.
 53 | # @param k The k parameter.
 54 | # @param nt The number of targets (amount of horizons to be forecast).
 55 | # @param cf The combination function used to aggregate the targets of
 56 | #     the nearest neighbors.
 57 | # @return An object of type knnModel.
 58 | #
 59 | # @export
 60 | knn_model <- function(timeS, lags, k, nt = 1, cf = "mean", transform) {
 61 |   lags <- rev(lags)
 62 |   stopifnot(utils::tail(lags, 1) >= 1)
 63 |   MAXLAG <- lags[1]
 64 |   if (MAXLAG + nt > length(timeS)) stop("Impossible to create one example")
 65 |   examples <- build_examples(timeS, lags, nt, transform)
 66 |   if (utils::tail(k, 1) > nrow(examples$patterns))
 67 |     stop("k > number of examples")
 68 |   structure(
 69 |     list(
 70 |       ts = timeS,
 71 |       lags = lags,
 72 |       examples = examples,
 73 |       k = k,
 74 |       cf = cf
 75 |     ),
 76 |     class = "knnModel"
 77 |   )
 78 | }
 79 | 
 80 | # Predicts one example doing KNN regression.
 81 | #
 82 | # @param model The KNN model (its class should be knnModel).
 83 | # @param ex The features of the example whose target is to be predicted.
 84 | #
 85 | # @export
 86 | # @examples
 87 | # model <- knn_model(ts(c(2, 3, 1, 5, 4, 0, 7, 1, 2)), lags = 1:2, k = 2)
 88 | # regression(model, c(1, 2), k = 2)
 89 | regression <- function(model, example, k) {
 90 |   r <- first_n(model$examples$patterns, example, k)
 91 |   values <- model$examples$targets[r$indexes, , drop = F]
 92 |   if (model$cf == "mean") {
 93 |     prediction <- unname(colMeans(values))
 94 |   } else if (model$cf == "median") {
 95 |     prediction <- apply(values, 2, stats::median)
 96 |   } else if (model$cf == "weighted") {
 97 |     if (r$distances[r$indexes[1]] == 0) {
 98 |       prediction <- unname(values[1, ])
 99 |     } else {
100 |       reciprocal_d <- 1 / sqrt(r$distances[r$indexes])
101 |       prediction <- numeric(ncol(model$example$targets))
102 |       for (k_ in seq(k)) {
103 |         prediction <- prediction + values[k_, ] * reciprocal_d[k_]
104 |       }
105 |       prediction <- prediction / sum(reciprocal_d)
106 |     }
107 |   }
108 |   list(
109 |     prediction = prediction,
110 |     neighbors = model$examples$targetsI[r$indexes]
111 |   )
112 | }
113 | 
114 | #' Predict method for KNN models for time series forecasting.
115 | #'
116 | #' Predicted values based on a KNN model for time series forecasting.
117 | #'
118 | #' If the models uses the MIMO strategy for multiple-step ahead prediction,
119 | #' the forecasting horizon is fixed to the model forecasting horizon.
120 | #'
121 | #' @param object a \code{knnForecast} object obtained by a call to the
122 | #'    \code{\link{knn_forecasting}} function.
123 | #' @param h an integer. The forecasting horizon.
124 | #' @param ... further arguments passed to or from other methods.
125 | #'
126 | #' @return a \code{knnForecast} object with the prediction and information
127 | #' about the KNN model, see the documentation of \code{\link{knn_forecasting}}
128 | #' for the structure of \code{knnForecast} objects.
129 | #'
130 | #' @examples
131 | #' pred <- knn_forecasting(UKgas, h = 4, k = 1, msas = "recursive")
132 | #' new_pred <- predict(pred, h = 6)
133 | #' print(new_pred$prediction)
134 | #' plot(new_pred) # To see a plot with the forecast
135 | #'
136 | #' @importFrom stats predict
137 | #' @export
138 | predict.knnForecast <- function(object, h, ...) {
139 |   # Check h parameter
140 |   stopifnot(is.numeric(h), length(h) == 1, h >= 1)
141 | 
142 |   k <- object$model$k
143 |   ts <- object$model$ts
144 |   if (object$msas == "recursive") {
145 |     p <- numeric(h)
146 |     for (value in k) {
147 |       pred <- recPrediction(object, h = h, k = value)
148 |       p <- p + pred$prediction
149 |     }
150 |     prediction <- p / length(k)
151 |     neighbors <- pred$neighbors
152 |   } else { # MIMO
153 |     hor = ncol(object$model$examples$targets)
154 |     if (h != hor)
155 |       stop(paste("The model only predicts horizon", hor))
156 |     example <- as.vector(ts[(length(ts) + 1) - object$model$lags])
157 |     if (object$transformation != "none") {
158 |       the_mean <- mean(example)
159 |       if (object$transformation == "multiplicative")
160 |         example <- example / the_mean
161 |       else
162 |         example <- example - the_mean
163 |     }
164 |     p <- numeric(h)
165 |     for (value in k) {
166 |       reg <- regression(object$model, example, k = value)
167 |       if (object$transformation != "none") {
168 |         if (object$transformation == "multiplicative")
169 |           reg$prediction <- reg$prediction * the_mean
170 |         else
171 |           reg$prediction <- reg$prediction + the_mean
172 |       }
173 |       p <- p + reg$prediction
174 |     }
175 |     prediction <- p / length(k)
176 |     neighbors <- reg$neighbors
177 |   }
178 |   temp <- stats::ts(1:2,
179 |                     start = stats::end(ts),
180 |                     frequency = stats::frequency(ts)
181 |   )
182 |   prediction <- stats::ts(prediction,
183 |             start = stats::end(temp),
184 |             frequency = stats::frequency(ts)
185 |   )
186 |   r <- object
187 |   r$prediction = prediction
188 |   r$neighbors = neighbors
189 |   r
190 | }
191 | 
192 | recPrediction <- function(object, h, k) {
193 |   model <- object$model
194 |   prediction <- numeric(h)
195 |   neighbors <- matrix(nrow = h, ncol = k)
196 |   values <- as.vector(model$ts)
197 |   for (hor in 1:h) {
198 |     example <- values[(length(values) + 1) - model$lags]
199 |     if (object$transformation != "none") {
200 |       the_mean <- mean(example)
201 |       if (object$transformation == "multiplicative")
202 |         example <- example / the_mean
203 |       else
204 |         example <- example - the_mean
205 |     }
206 |     reg <- regression(model, example, k)
207 |     prediction[hor] <- reg$prediction
208 |     if (object$transformation != "none") {
209 |       if (object$transformation == "multiplicative")
210 |         prediction[hor] <- prediction[hor] * the_mean
211 |       else
212 |         prediction[hor] <- prediction[hor] + the_mean
213 |     }
214 |     neighbors[hor, ] <- reg$neighbors
215 |     values <- c(values, prediction[hor])
216 |   }
217 |   return(list(
218 |     prediction = prediction,
219 |     neighbors = neighbors
220 |   ))
221 | }
222 | 


--------------------------------------------------------------------------------
/R/plotting.R:
--------------------------------------------------------------------------------
  1 | my_colours <- function(name) {
  2 |   col_l <- list("blue" = "#000099",
  3 |                 "red" = "#CC0000",
  4 |                 "green" = "#339900",
  5 |                 "orange" = "#CC79A7"
  6 |   )
  7 |   return(col_l[[name]])
  8 | }
  9 | 
 10 | #' @importFrom graphics plot
 11 | #' @export
 12 | plot.knnForecast <- function(x, y, ...) {
 13 |   timeS <- combine(x$model$ts, x$prediction)
 14 |   graphics::plot(timeS, type = "n", ylab = "")
 15 |   graphics::lines(x$model$ts, type = "o", pch = 20)
 16 |   graphics::lines(x$prediction, type = "o",
 17 |                   col = my_colours("red"),
 18 |                   pch = 20)
 19 | }
 20 | 
 21 | #' Create a ggplot object from a knnForecast object
 22 | #'
 23 | #' It uses a knnForecast object to create a ggplot object that plots a time
 24 | #' series and its forecast using KNN regression.
 25 | #'
 26 | #' @details Commonly used parameters are:
 27 | #'
 28 | #' * `highlight`. A character string indicating what elements should be highlighted. Possible values are
 29 | #'   `"none"`, `"points"` and `"neighbors"`. The default value is `"none"`.
 30 | #' * `faceting`. Logical. This applies only if the `highlight` parameter is
 31 | #'   set to `"neighbors"`. It indicates whether the different nearest neighbors
 32 | #'   should be seen in different plots (`TRUE`, the default value) or in one
 33 | #'   plot.
 34 | #'
 35 | #' @param object An object of class `knnForecast`.
 36 | #' @param ... additional parameter, see details.
 37 | #'
 38 | #' @return The ggplot object representing a plotting with the forecast.
 39 | #'
 40 | #' @examples
 41 | #' pred <- knn_forecasting(USAccDeaths, h = 12, lags = 1:12, k = 2)
 42 | #' library(ggplot2)
 43 | #' autoplot(pred)
 44 | #' autoplot(pred, highlight = "neighbors")
 45 | #' @export
 46 | #' @importFrom ggplot2 autoplot
 47 | autoplot.knnForecast <- function(object, ...) {
 48 |   # check ... parameter
 49 |   l <- list(...)
 50 |   if (length(l) > 0) {
 51 |     valid_n <- c("highlight", "faceting") # valid parameter names, apart from object
 52 |     if(! all(names(l) %in% valid_n))
 53 |       stop(paste0("Parameters ", setdiff(names(l), valid_n), " not supported"))
 54 |     if ("highlight" %in% names(l) && (!is.character(l$highlight) || length(l$highlight) > 1))
 55 |       stop("highlight parameter should be character string of length 1")
 56 |     if ("faceting" %in% names(l) && (!is.logical(l$faceting) || length(l$faceting) > 1))
 57 |       stop("faceting parameter should be a logical value")
 58 |   }
 59 | 
 60 |   forecast <- object
 61 |   highlight <- if ("highlight" %in% names(l)) l$highlight else "none"
 62 |   faceting <- if ("faceting" %in% names(l)) l$faceting else TRUE
 63 | 
 64 |   # extract the time series
 65 |   timeS <- data.frame(
 66 |     x = as.vector(stats::time(forecast$model$ts)),
 67 |     y = as.vector(forecast$model$ts)
 68 |   )
 69 | 
 70 |   # extract the forecast
 71 |   pred <- data.frame(
 72 |     x = as.vector(stats::time(forecast$prediction)),
 73 |     y = as.vector(forecast$prediction)
 74 |   )
 75 | 
 76 |   if (highlight %in% c("neighbours", "neighbors")) {
 77 |     if (length(forecast$model$k) > 1) {
 78 |       warning("When several k are used it is not possible to see the
 79 |               neighbors")
 80 |     } else if (forecast$msas == "recursive") {
 81 |       return(plot_recursive(timeS, pred, forecast, faceting))
 82 |     } else {
 83 |       return(plot_mimo(timeS, pred, forecast, faceting))
 84 |     }
 85 |   }
 86 | 
 87 |   p <- ggplot2::ggplot(timeS, ggplot2::aes_string('x', 'y'))
 88 |   p <- p + ggplot2::geom_line(ggplot2::aes(colour = "Original"))
 89 |   p <- p + ggplot2::geom_line(ggplot2::aes(colour = "Forecast"), data = pred)
 90 |   if (highlight == "points") {
 91 |     p <- p + ggplot2::geom_point(ggplot2::aes(colour = "Original"))
 92 |     p <- p + ggplot2::geom_point(ggplot2::aes(colour = "Forecast"), data = pred)
 93 |   }
 94 |   breaks <- c("Original", "Forecast")
 95 |   colours <- c("Original" = "black", "Forecast" = my_colours("red"))
 96 |   p <- p + ggplot2::scale_colour_manual(values = colours, breaks = breaks)
 97 |   p <- p + ggplot2::labs(x = "Time", y = NULL, colour = "Time series")
 98 |   p
 99 | }
100 | 
101 | plot_recursive <- function(timeS, predS, forecast, faceting) {
102 |   op <- graphics::par(ask = TRUE)
103 |   on.exit(graphics::par(op), add = TRUE)
104 |   for (h in 1:nrow(predS)){
105 |     # extract the example
106 |     temp <- rbind(timeS, predS)
107 |     example <- temp[nrow(timeS) + h - forecast$model$lags, ]
108 | 
109 |     # extract the K nearest neighbours
110 |     features <- data.frame(matrix(ncol = 3, nrow = 0))
111 |     colnames(features) <- c("x", "y", "k")
112 |     targets <- data.frame(matrix(ncol = 3, nrow = 0))
113 |     colnames(targets) <- c("x", "y", "k")
114 |     for (k in seq(forecast$model$k)) {
115 |       d <- forecast$neighbors[h, k]
116 |       feature <- timeS[d - forecast$model$lags, ]
117 |       feature$k <- rep(k, nrow(feature))
118 |       features <- rbind(features, feature)
119 |       target  <- timeS[d + seq(ncol(forecast$model$examples$targets)) - 1, ]
120 |       target$k <- rep(k, nrow(target))
121 |       targets <- rbind(targets, target)
122 |     }
123 |     p <- plot_neighbours(timeS, predS, predS[h, ], example, features, targets, faceting)
124 |     print(p)
125 |   }
126 | }
127 | 
128 | plot_mimo <- function(timeS, predS, forecast, faceting) {
129 |   # extract the example
130 |   example <- timeS[nrow(timeS) + 1 - forecast$model$lags, ]
131 | 
132 |   # extract the K nearest neighbours
133 |   features <- data.frame(matrix(ncol = 3, nrow = 0))
134 |   colnames(features) <- c("x", "y", "k")
135 |   targets <- data.frame(matrix(ncol = 3, nrow = 0))
136 |   colnames(targets) <- c("x", "y", "k")
137 |   for (k in seq(forecast$neighbors)) {
138 |     d <- forecast$neighbors[k]
139 |     feature <- timeS[d - forecast$model$lags, ]
140 |     feature$k <- rep(k, nrow(feature))
141 |     features <- rbind(features, feature)
142 |     target  <- timeS[d + seq(ncol(forecast$model$examples$targets)) - 1, ]
143 |     target$k <- rep(k, nrow(target))
144 |     targets <- rbind(targets, target)
145 |   }
146 |   plot_neighbours(timeS, predS, predS, example, features, targets, faceting)
147 | }
148 | 
149 | plot_neighbours <- function(timeS, pred, pred2, example, features, targets,
150 |                             faceting) {
151 |   # plot the time series
152 |   p <- ggplot2::ggplot(timeS, ggplot2::aes_string('x', 'y'))
153 |   p <- p + ggplot2::geom_line()
154 | 
155 |   # plot the forecast
156 |   if (nrow(pred) > 1)
157 |     p <- p + ggplot2::geom_line(data = pred, colour = my_colours("red"))
158 |   p <- p + ggplot2::geom_point(ggplot2::aes(colour = "Forecast",
159 |                                             shape = "Forecast"), data = pred2)
160 | 
161 |   # plot the example
162 |   p <- p + ggplot2::geom_point(ggplot2::aes(colour = "Instance",
163 |                                             shape = "Instance"),
164 |                                data = example,
165 |                                size = 2
166 |   )
167 | 
168 |   # plot the K nearest neighbours
169 |   p <- p + ggplot2::geom_point(ggplot2::aes(colour = "NN Features",
170 |                                             shape = "NN Features"),
171 |                                size = 2,
172 |                                data = features
173 |   )
174 |   p <- p + ggplot2::geom_point(ggplot2::aes(colour = "NN Targets",
175 |                                             shape = "NN Targets"),
176 |                                size = 2,
177 |                                data = targets
178 |   )
179 |   if (faceting) {
180 |     p <- p + ggplot2::facet_grid(k ~ .)
181 |   }
182 | 
183 |   shapes <- c("NN Features" = 1, "NN Targets" = 0, "Instance" = 18,
184 |               "Forecast" = 16)
185 |   breaks <- c("NN Features", "NN Targets", "Instance", "Forecast")
186 |   p <- p + ggplot2::scale_shape_manual(values = shapes, breaks = breaks)
187 |   colours <- c("NN Features" = my_colours("blue"),
188 |                "NN Targets" = my_colours("green"),
189 |                "Instance" = my_colours("orange"),
190 |                "Forecast" = my_colours("red")
191 |   )
192 |   p <- p + ggplot2::scale_colour_manual(values = colours, breaks = breaks)
193 |   g <- ggplot2::guide_legend("Data point")
194 |   p <- p + ggplot2::guides(colour = g, shape = g)
195 |   p <- p + ggplot2::labs(x = "Time", y = NULL)
196 |   p
197 | }
198 | 


--------------------------------------------------------------------------------
/R/forecasting.R:
--------------------------------------------------------------------------------
  1 | #' Time series forecasting using KNN regression
  2 | #'
  3 | #' It applies KNN regression to forecast the future values of a time series.
  4 | #' The lags used as autoregressive variables are set with the \code{lags}
  5 | #' parameter. If the user does not set the number of nearest neighbors or
  6 | #' the lags, these values are selected automatically.
  7 | #'
  8 | #' @param timeS A numeric vector or time series of class \code{ts}.
  9 | #' @param h A positive integer. Number of values to forecast.
 10 | #' @param lags An integer vector in increasing order expressing the lags used
 11 | #'     as autoregressive variables.
 12 | #' @param k A positive integer. The k parameter in KNN regression. A vector of
 13 | #'     k values can also be used. In that case, the forecast is the average
 14 | #'     of the forecasts produced by the different models with the different k
 15 | #'     parameters.
 16 | #' @param msas A string indicating the Multiple-Step Ahead Strategy used when
 17 | #'     more than one value is predicted. It can be "recursive" or "MIMO" (the
 18 | #'     default).
 19 | #' @param cf A string. It indicates the combination function used to aggregate
 20 | #'     the targets associated with the nearest neighbors. It can be "median",
 21 | #'     "weighted" or "mean" (the default).
 22 | #' @param transform A character value indicating whether the training samples
 23 | #'   are transformed. If the time series has a trend it is recommended. By
 24 | #'   default is \code{"multiplicative"} (multiplicative transformation). It is also
 25 | #'   possible a multiplicative transformation or no transformation.
 26 | #' @return An object of class \code{"knnForecast"}. The
 27 | #'     function \code{\link[base]{summary}} can be used to obtain or print a
 28 | #'     summary of the results.
 29 | #'
 30 | #'     An object of class \code{"knnForecast"} is a list containing at least
 31 | #'     the following components:
 32 | #'
 33 | #'  \item{\code{call}}{the matched call.}
 34 | #'  \item{\code{msas}}{the Multi-Step Ahead Strategy.}
 35 | #'  \item{\code{prediction}}{a time series with the forecast.}
 36 | #'  \item{\code{model}}{an object of class \code{"knnModel"} with the KNN
 37 | #'                      model}
 38 | #'
 39 | #' @examples
 40 | #' pred <- knn_forecasting(USAccDeaths, h = 12, lags = 1:12, k = 2)
 41 | #' pred$prediction # To see a time series with the forecasts
 42 | #' plot(pred) # To see a plot with the forecast
 43 | #' @export
 44 | knn_forecasting <- function(timeS, h, lags = NULL, k = c(3, 5, 7),
 45 |                             msas = c("recursive", "MIMO"),
 46 |                             cf = c("mean", "median", "weighted"),
 47 |                             transform = c("additive", "multiplicative", "none")) {
 48 |   # Check timeS parameter
 49 |   stopifnot(stats::is.ts(timeS) || is.vector(timeS, mode = "numeric"))
 50 |   if (! stats::is.ts(timeS))
 51 |     timeS <- stats::as.ts(timeS)
 52 | 
 53 |   # Check h parameter
 54 |   stopifnot(is.numeric(h), length(h) == 1, h >= 1)
 55 | 
 56 |   # msas parameter
 57 |   msas <- match.arg(msas)
 58 | 
 59 |   # Check transform parameter
 60 |   transform <- match.arg(transform)
 61 | 
 62 |   # Check lags parameter
 63 |   stopifnot(is.null(lags) || is.vector(lags, mode = "numeric"))
 64 |   if (is.null(lags)) {
 65 |     if (stats::frequency(timeS) > 1) {
 66 |       lags <- 1:stats::frequency(timeS)
 67 |     } else {
 68 |       partial <- stats::pacf(timeS, plot = FALSE)
 69 |       lags <- which(partial$acf > 2/ sqrt(length(timeS)))
 70 |       if (length(lags) == 0 ||
 71 |           (length(lags) == 1 && transform %in% c("additive", "multiplicative"))) {
 72 |           lags = 1:5
 73 |       }
 74 |     }
 75 |   }
 76 | 
 77 |   if (is.unsorted(lags)) stop("lags should be a vector in increasing order")
 78 |   stopifnot(lags[1] >= 1)
 79 | 
 80 |   if ((length(lags) == 1 && transform %in% c("additive", "multiplicative"))) {
 81 |     stop("It does not make sense to use only 1 autoregressive lag with the additive or multiplicative transformation")
 82 |   }
 83 | 
 84 |   # Check k parameter
 85 |   stopifnot(is.numeric(k))
 86 |   k <- sort(k)
 87 |   if (k[1] < 1) stop("k values should be positive")
 88 |   if (k[1] > n_training_examples(timeS, h, lags, msas)) {
 89 |     stop(paste("Impossible to create", k[1], "examples"))
 90 |   } else {
 91 |     tmp <- k
 92 |     k <- NULL
 93 |     for (x in tmp)
 94 |       if (x <= n_training_examples(timeS, h, lags, msas)) {
 95 |         k <- c(k, x)
 96 |       } else {
 97 |         warning(paste("k =", x, "rejected: impossible to create",
 98 |                       x, "examples"))
 99 |       }
100 |   }
101 | 
102 |   # cf parameter
103 |   cf <- match.arg(cf)
104 | 
105 |   if (msas == "recursive") {
106 |     fit <- knn_model(timeS, lags = lags, k = k, nt = 1, cf = cf, transform)
107 |   } else { # MIMO
108 |     fit <- knn_model(timeS, lags = lags, k = k, nt = h, cf = cf, transform)
109 |   }
110 |   fit$k <- k
111 |   r <- structure(
112 |     list(
113 |       call = match.call(),
114 |       model = fit,
115 |       msas = msas,
116 |       transformation = transform
117 |     ),
118 |     class = "knnForecast"
119 |   )
120 |   predict(r, h)
121 | }
122 | 
123 | #' Number of training examples
124 | #'
125 | #' It computes the number of training examples that would have a KNN model
126 | #' with the specified parameters.
127 | #'
128 | #' @inheritParams knn_forecasting
129 | #' @return An integer.
130 | #'
131 | #' @examples
132 | #' n_training_examples(ts(1:10), h = 2, lags = 1:3, msas = "MIMO")
133 | #' n_training_examples(ts(1:10), h = 2, lags = 1:3, msas = "recursive")
134 | #' @export
135 | n_training_examples <- function(timeS, h, lags,
136 |                                 msas = c("MIMO", "recursive")) {
137 |   # Check timeS parameter
138 |   stopifnot(stats::is.ts(timeS) || is.vector(timeS, mode = "numeric"))
139 |   if (! stats::is.ts(timeS))
140 |     timeS <- stats::as.ts(timeS)
141 | 
142 |   # Check h parameter
143 |   stopifnot(is.numeric(h), length(h) == 1, h >= 1)
144 | 
145 |   # Check lags parameter
146 |   stopifnot(is.vector(lags, mode = "numeric"))
147 | 
148 |   # Check msas parameter
149 |   msas <- match.arg(msas)
150 | 
151 |   if (is.unsorted(lags)) stop("lags should be a vector in increasing order")
152 |   stopifnot(lags[1] >= 1)
153 |   if (utils::tail(lags, 1) + ifelse(msas == "MIMO", h, 1) > length(timeS))
154 |     stop("Impossible to create one example")
155 | 
156 |   length(timeS) - utils::tail(lags, 1) - ifelse(msas == "MIMO", h, 1) + 1
157 | }
158 | 
159 | #' Nearest neighbors associated with predictions
160 | #'
161 | #' It allows to check the new instances and their nearest neighbors used in a
162 | #' prediction associated with a "knnForecast" object.
163 | #'
164 | #' @param forecast A \code{knnForecast} object.
165 | #' @return A list including the new instances used in KNN regression and their
166 | #'    nearest neighbors.
167 | #'
168 | #' @examples
169 | #' pred <- knn_forecasting(UKgas, h = 4, lags = 1:4, k = 2, msas = "MIMO")
170 | #' nearest_neighbors(pred)
171 | #' @export
172 | nearest_neighbors <- function(forecast) {
173 |   stopifnot(class(forecast) == "knnForecast")
174 | 
175 |   if (forecast$msas == "recursive") {
176 |     return(nearest_neighbors_recursive(forecast))
177 |   } else {
178 |     return(nearest_neighbors_mimo(forecast))
179 |   }
180 | }
181 | 
182 | nearest_neighbors_recursive <- function(forecast) {
183 |   result <- list()
184 |   timeS <- forecast$model$ts
185 |   temp <- c(timeS, forecast$prediction)
186 |   for (h in 1:nrow(forecast$neighbors)){
187 |     # extract the example
188 |     example <- temp[length(timeS) + h - forecast$model$lags]
189 |     names(example) <- paste("Lag", forecast$model$lags)
190 | 
191 |     r <- data.frame(matrix(
192 |       ncol = ncol(forecast$model$examples$patterns) + 1,
193 |       nrow = forecast$model$k
194 |     ))
195 |     colnames(r) <- c(paste("Lag", forecast$model$lags), "H1")
196 |     for (k in seq(forecast$neighbors[h, ])) {
197 |       d <- forecast$neighbors[h, k]
198 |       r[k, 1:length(forecast$model$lags)] <- timeS[d - forecast$model$lags]
199 |       r[k, (length(forecast$model$lags) + 1):ncol(r)] <-
200 |         timeS[d + seq(ncol(forecast$model$examples$targets)) - 1]
201 |     }
202 |     result[[h]] <- list(
203 |       instance = example,
204 |       nneighbors = r
205 |     )
206 |   }
207 |   return(result)
208 | }
209 | 
210 | nearest_neighbors_mimo <- function(forecast) {
211 |   timeS <- forecast$model$ts
212 |   example <- timeS[length(timeS) + 1 - forecast$model$lags]
213 |   names(example) <- paste("Lag", forecast$model$lags)
214 |   r <- data.frame(matrix(
215 |     ncol = ncol(forecast$model$examples$patterns) +
216 |       ncol(forecast$model$examples$targets),
217 |     nrow = forecast$model$k
218 |   ))
219 |   colnames(r) <- c(paste("Lag", forecast$model$lags),
220 |                    paste0("H", 1:ncol(forecast$model$examples$targets)))
221 |   for (k in seq(forecast$neighbors)) {
222 |     d <- forecast$neighbors[k]
223 |     r[k, 1:length(forecast$model$lags)] <- timeS[d - forecast$model$lags]
224 |     r[k, (length(forecast$model$lags) + 1):ncol(r)] <-
225 |       timeS[d + seq(ncol(forecast$model$examples$targets)) - 1]
226 |   }
227 |   return(list(
228 |     instance = example,
229 |     nneighbors = r
230 |   ))
231 | }
232 | 
233 | #' Examples of the model associated with a prediction
234 | #'
235 | #' It allows to see the examples of the model associated to a
236 | #' \code{knnForecast} object.
237 | #'
238 | #' @param forecast A \code{knnForecast} object.
239 | #' @return A matrix including the features and targets of the examples
240 | #'    associated with the model of a \code{knnForecast} object.
241 | #'
242 | #' @examples
243 | #' pred <- knn_forecasting(ts(1:8), h = 1, lags = 1:2, k = 2)
244 | #' knn_examples(pred)
245 | #' @export
246 | knn_examples <- function(forecast) {
247 |   stopifnot(class(forecast) == "knnForecast")
248 |   cbind(forecast$model$examples$patterns, forecast$model$examples$targets)
249 | }
250 | 
251 | 


--------------------------------------------------------------------------------
/vignettes/tsfknn.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Time Series Forecasting with KNN in R: the tsfknn Package"
  3 | author: "Francisco Martinez, Maria P. Frias, Francisco Charte, Antonio J. Rivera"
  4 | date: "`r Sys.Date()`"
  5 | output: 
  6 |   html_document:
  7 |     number_sections: yes
  8 | vignette: >
  9 |   %\VignetteEncoding{UTF-8}
 10 |   %\VignetteIndexEntry{Time Series Forecasting with KNN in R: the tsfknn Package}
 11 |   %\VignetteEngine{knitr::rmarkdown}
 12 | ---
 13 | 
 14 | ```{r, echo = FALSE}
 15 | knitr::opts_chunk$set(warning = FALSE, message = FALSE)
 16 | 
 17 | ```
 18 | 
 19 | 
 20 | In this paper the **tsfknn** package for time series forecasting using KNN
 21 | regression is described. The package allows, with only one function, specifying
 22 | the KNN model and generating the forecasts. The user can choose among different
 23 | multi-step ahead strategies and among different functions to aggregate the targets of the nearest neighbors. It is also possible to consult the model used in the prediction and to obtain a graph including the forecast and the nearest neighbors used by KNN.
 24 | 
 25 | # Introduction
 26 | 
 27 | Time series forecasting has been performed traditionally using statistical methods such as ARIMA models or exponential smoothing. 
 28 | However, the last decades have witnessed the use of computational intelligence
 29 | techniques to forecast time series. Although artificial neural networks is the
 30 | most prominent machine learning technique used in time series forecasting,
 31 | other approaches, such as Gaussian Process or KNN, have also been applied.
 32 | Compared with classical statistical models, computational intelligence methods exhibit interesting features, such as their nonlinearity or the lack of an underlying model, that is, they are non-parametric.
 33 | 
 34 | Statistical methodologies for time series forecasting are present in CRAN as excellent packages. For example, the **forecast** package includes implementations of ARIMA, exponential smoothing, the theta method or basic techniques, such as the naive approach, that can be used as benchmark methods. On the other hand, although a great variety of computational intelligence approaches for regression are available in R (see, for example, the **caret** package), these approaches cannot be directly applied to time series forecasting. Fortunately, some new packages are filling this gap. For example,  the **nnfor** package or the `nnetar` function from the **forecast** package allows us to predict time series using artificial neural networks.
 35 | 
 36 | KNN is a very popular algorithm used in classification and regression.
 37 | This algorithm simply stores a collection of examples. Each
 38 | example consists of a vector of features (describing the example) and its associated
 39 | class (for classification) or numeric value (for prediction). Given a new 
 40 | example, KNN finds its *k* most similar examples (called nearest neighbors),
 41 | according to a distance metric (such as the Euclidean distance), and predicts
 42 | its class as the majority class of its nearest neighbors or, in the case of regression, as an aggregation of the target values associated with its nearest
 43 | neighbors. In this paper we describe the **tsfknn** R package for univariate time
 44 | series forecasting using KNN regression.
 45 | 
 46 | The rest of the paper is organized as follows. Section 2 explains how
 47 | KNN regression can be applied in a time series forecasting context using the
 48 | **tsfknn** package. In Section
 49 | 3 the different multi-step ahead strategies implemented in our package are
 50 | explained. Section 4 discusses some additional feature of our package. Section 5 describes how the forecast accuracy of a KNN model can be assessed using a rolling origin evaluation. Finally, Section 6 draws some conclusions.
 51 | 
 52 | # Time series forecasting with KNN regression
 53 | 
 54 | In this section we explain how KNN regression can be applied to forecast time
 55 | series. To this end, we will use some functionality of the package **tsfknn**.
 56 | Let
 57 | us start with a simple time series: $t = \{ 1, 2, 3, 4, 5, 6, 7, 8 \}$ and suppose that we want to predict its next future value. First, we have to determine how the KNN examples are built, that is, we have to decide what are
 58 | the features and the targets associated with an example. The target of an
 59 | example is a value of the time series and its features are lagged values of
 60 | the target. For example, if we use lags 1-2 as features, the examples
 61 | associated with the time series $t$ are:
 62 | 
 63 | | Features | Target |
 64 | |:---------|:-------|
 65 | |1, 2      | 3      |
 66 | |2, 3      | 4      |
 67 | |3, 4      | 5      |
 68 | |5, 6      | 7      |
 69 | |6, 7      | 8      |
 70 | 
 71 | In our package, you can consult the examples associated with a KNN model used for time series forecasting with the `knn_examples` function:
 72 | 
 73 | ```{r}
 74 | library(tsfknn)
 75 | pred <- knn_forecasting(ts(1:8), h = 1, lags = 1:2, k = 2, transform = "none")
 76 | knn_examples(pred)
 77 | ```
 78 | 
 79 | Before consulting the examples, you have to build the model. This is done with
 80 | the function `knn_forecasting` that builds a model associated with a time 
 81 | series and uses the model to predict the future values of the time series.
 82 | Let us see the main arguments of this function:
 83 | 
 84 | * `timeS`: the time series to be forecast.
 85 | * `h`: the forecast horizon, that is, the number of future values to be predicted.
 86 | * `lags`: an integer vector indicating the lagged values of the target used as features in the examples (for instance, 1:2 means that lagged values 1 and 2 should be used).
 87 | * `k`: the number of nearest neighbors used by the KNN model.
 88 | * `transform`: set the kind of transformation applied to the examples and their targets. In general, it is useful to forecast time series with a trend. It will be explained later.
 89 |     
 90 | `knn_forecasting` is very handy because, as mentioned above, it builds the
 91 | KNN model and then uses the model to predict the time series. This function
 92 | returns a `knnForecast` object with information of the model and its 
 93 | prediction. As we have seen above, you can use the function `knn_examples` to see the examples associated with the model. You can also consult the prediction or get a plot through the `knnForecast` object:
 94 | 
 95 | ```{r}
 96 | pred$prediction
 97 | plot(pred)
 98 | ```
 99 | 
100 | You can also consult how the prediction was made. That is, you can consult the instance whose target was predicted and its nearest neighbors. This information is obtained with the `nearest_neighbors` function applied to a `knnForecast` object:
101 | 
102 | ```{r}
103 | nearest_neighbors(pred)
104 | ```
105 | 
106 | Because we have used lags 1-2 as features, the features associated with the
107 | next future value of the time series are the last two values of the time series (vector 
108 | $[7, 8]$). The two most similar examples (nearest neighbors) of this instance are vectors
109 | $[6, 7]$ and $[5, 6]$, whose targets (8 and 7) are averaged to produce the
110 | prediction 7.5. You can obtain a nice plot including the instance, its  nearest neighbors and the prediction:
111 | 
112 | ```{r}
113 | library(ggplot2)
114 | autoplot(pred, highlight = "neighbors")
115 | ```
116 | 
117 | As can be observed, each nearest neighbor has been plotted in a different plot (you can also select to get all the nearest neighbors in the same plot). The neighbors in the plots are sorted according to their distance to the instance, being the neighbor in the top plot the nearest neighbor.
118 | 
119 | By the way, this artificial example of a time series with a constant linear trend
120 | illustrates the fact that KNN is not suitable for predicting time series with a global
121 | trend. This is because KNN predicts an aggregation of historical
122 | values of the time series. Therefore, in order to predict a time series with
123 | global trend some detrending scheme should be used.
124 | 
125 | To recapitulate, because we use univariate time series, to specify a KNN model in our package you have to set:
126 | 
127 | * the lags used to build the KNN examples. They determine the lagged values      used as features or autoregressive explanatory variables.
128 | 
129 | * k: the number of nearest neighbors used in the prediction.
130 | 
131 | # Multi-step ahead strategies
132 | 
133 | In the previous section we have seen an example of one-step ahead prediction with KNN. Nonetheless, it is very common to forecast more than one value into
134 | the future. To this end, a multi-step ahead strategy has to be chosen. Our
135 | package implements two common strategies: the MIMO approach and the recursive or iterative approach (when only one future value is predicted both strategies are equivalent). Let us see how they work.
136 | 
137 | ## The Multiple Input Multiple Output strategy
138 | 
139 | This strategy is commonly applied with KNN and it is characterized by the use
140 | of a vector of target values. The length of this vector is equal to the number
141 | of 
142 | periods to forecast. For example, let us suppose that we are working with a
143 | time series of hourly electricity demand and we want to forecast the demand
144 | for the next 24 hours. In this situation, a good choice for the lags
145 | would be 1-24, that is, the demand of 24 consecutive hours. If the MIMO 
146 | strategy is chosen, then an example consists of:
147 | 
148 | * a feature vector with the demand of 24 consecutive hours and
149 | * a target vector with the demand in the next 24
150 | consecutive hours (after the 24 hours of the feature vector). 
151 | 
152 | The new instance would be the demand in the last 24 hours of the time series. This way, we would look for the demands most similar to the last 24 hours in the time series and we would predict an aggregation of their subsequent 24 hours.
153 | 
154 | In the next example we predict the next 12 months of a monthly time series
155 | using the MIMO strategy:
156 | 
157 | ```{r}
158 | pred <- knn_forecasting(USAccDeaths, h = 12, lags = 1:12, k = 2, msas = "MIMO")
159 | autoplot(pred, highlight = "neighbors", faceting = FALSE)
160 | ```
161 | 
162 | The prediction is the average of the target vectors of the two nearest neighbors. As can be observed, we have chosen to see all the nearest neighbors in the
163 | same plot. Because we are working with a monthly time series, we have thought that lags 1-12 are a suitable choice for selecting the features of the examples. In this case, the last 12 values of the time series are the new
164 | instance whose target has to be predicted. The two sequences of 12 consecutive
165 | values most similar to this instance are found (in blue) and their subsequent 12 values (in green) are averaged to obtain the prediction (in red).
166 | 
167 | ## The recursive strategy
168 | 
169 | The recursive or iterative strategy is the approach used by ARIMA or 
170 | exponential smoothing to forecast several periods ahead. Basically, a model
171 | that only forecasts one-step ahead is used, so that the model is applied iteratively to forecast all the future values. When historical observations to be used as features of the new instance are unavailable, previous predictions are used instead. 
172 | 
173 | Because the recursive strategy uses a one-step ahead model, this means that,
174 | in the case of KNN, the target of an example only contains one value. For instance, let us see how the recursive strategy works with the following
175 | example in which the next two future quarters of a quarterly time series are predicted:
176 | 
177 | ```{r}
178 | timeS <- window(UKgas, start = c(1976, 1))
179 | pred <- knn_forecasting(timeS, h = 2, lags = 1:4, k = 2, msas = "recursive")
180 | library(ggplot2)
181 | autoplot(pred, highlight = "neighbors")
182 | ```
183 | 
184 | In this example we have used lags 1-4 to specify the features of an example. To predict the first future point the last 4 values of the time series are used as "its features". To predict the second future point "its features" are the last three values of the time series and the prediction for the first future point. In the plot the prediction for the first future point can be seen. If you reproduce this code snippet you will also see the forecast for the second future point.
185 | 
186 | # Additional features
187 | 
188 | In this section several additional features of our package are described.
189 | 
190 | ## Combination and distance function
191 | 
192 | By default, the targets of the different nearest neighbors are averaged. However, it is possible to combine the targets using other aggregation functions. Currently, our package allows us to choose among the mean, the median and a weighted mean using the `cb` parameter of the `knn_forecasting` function. In the *weighted* mean the target are weighted by the inverse of their distance. That is, closer neighbors of a query point will have a greater influence than neighbors which are further away.
193 | 
194 | Regarding the distance function applied to compute the nearest neighbors, our package uses the Euclidean distance, although we can implement other distance metrics in the future.
195 | 
196 | ## Combining several models with different k parameters
197 | 
198 | In order to specify a KNN model the user has to select, among other things, the
199 | value of the *k* parameter. Several strategies can be used to choose this value. A first, fast, straightforward solution is to use some heuristic (it is recommended setting *k* to the square root of the number of training
200 | examples). Other approach is to select *k* using an optimization tool on a validation set. *k* should minimize a forecast accuracy measure. The optimization strategy is very time consuming.
201 | 
202 | A third strategy is to use several KNN models with different *k* values. Each
203 | KNN 
204 | model generates its forecasts and the forecasts of the different models are 
205 | averaged to produce the final forecast. This strategy is based on the success of
206 | model combination in time series forecasting. This way, the use of a time consuming optimization tool is avoided and the forecasts are not based on an unique, heuristic *k* value. In our package you can use of this strategy specifying a vector of *k* values:
207 | 
208 | ```{r}
209 | pred <- knn_forecasting(ldeaths, h = 12, lags = 1:12, k = c(2, 4))
210 | pred$prediction
211 | plot(pred)
212 | ```
213 | 
214 | ## Forecasting time series with a trend
215 | 
216 | KNN is not suitable for forecasting a time series with a trend. The reason is simple, KNN predicts an average of historical values of the time series, so it cannot predict correctly values out of the range of the time series. If your time series has a trend we recommend using the parameter `transform` to transform the training samples. Use the value `"additive"`if the trend is additive or `"multiplicative"` for exponential time series:
217 | 
218 | ```{r}
219 | set.seed(5)
220 | timeS <- ts(1:10 + rnorm(10, 0, .2))
221 | pred <- knn_forecasting(timeS, h = 3, transform = "none")
222 | plot(pred)
223 | pred2 <- knn_forecasting(timeS, h = 3, transform = "additive")
224 | plot(pred2)
225 | ```
226 | 
227 | After a lot of experimentation we have observed that, in general, the additive transformation works better than the multiplicative transformation. The additive transformation works this way:
228 | 
229 | * An example is transformed by subtracting the mean of the example from its values.
230 | * The target associated with an example is transformed by subtracting from it the mean of its associated example.
231 | * This way, a prediction is a weighted combination of transformed targets. To back transform a prediction, the mean of the input vector is added to it.
232 | 
233 | It is easy to see an example of additive transformation using the API of the package. For example, let us see the examples of a model with no transformation:
234 | 
235 | ```{r}
236 | timeS <- ts(c(1, 3, 7, 9, 10, 12))
237 | model_n <- knn_forecasting(timeS, h = 1, lags = 1:2, k = 2, transform = "none")
238 | knn_examples(model_n)
239 | plot(model_n)
240 | ```
241 | 
242 | And now let us see the effect of the additive transformation:
243 | 
244 | ```{r}
245 | model_a <- knn_forecasting(timeS, h = 1, lags = 1:2, k = 2, transform = "additive")
246 | knn_examples(model_a)
247 | plot(model_a)
248 | ```
249 | 
250 | The forecast of the additive model is 14.5:
251 | 
252 | ```{r}
253 | model_a$pred
254 | ```
255 | 
256 | Let us see how this forecast is built. The last two values of the series `c(10, 12)` are the instance or query point. This instance is transform to `c(-1, 1)` by subtracting its mean value. Its two nearest neighbors are the first and third examples. Their targets are 5 and 2 respectively. These target are averaged obtaining 3.5. Finally, we add 3.5 to the mean of the query point, 11, getting the final forecast 14.5.
257 | 
258 | The multiplicative transformation is similar to the additive transformation:
259 | 
260 | * An example is transformed by dividing it by its mean.
261 | * The target associated with an example is transformed by dividing it by the mean of its associated example.
262 | * This way, a prediction is a weighted combination of transformed targets. To back transform a prediction, the prediction is multiplied by the mean of the input vector.
263 | 
264 | 
265 | ## Automatic forecasting
266 | 
267 | Sometimes a great number of time series have to be forecast. In that situation, an automatic way of generating the forecasts is very useful. Our package is able to automatically choose all the KNN parameters. If the user only
268 | specifies the time series and the forecasting horizon the KNN parameters are selected as follows:
269 | 
270 | * As multi-step ahead strategy the recursive strategy is chosen.
271 | * The combination function used to aggregate the targets is the mean.
272 | * *k* is selected as a combination of three models using 3, 5 and 7 nearest neighbors respectively.
273 | * If `frequency(ts) == f` where `ts` is the time series to be forecast and $f > 1$ then the lags used as autoregressive features are 1:*f*. For example, the lags for quarterly data are 1:4 and for monthly data 1:12. 
274 | * If `frequency(ts) == 1`, then:
275 |     * The lags with significant autocorrelation in the partial autocorrelation function (PACF) are selected. 
276 |     * If no lag has a significant autocorrelation, then lags 1:5 are chosen.
277 |     * If only one lag has significant autocorrelation, then lags 1:5 are chosen. This is done because by default the additive transformation is used and it does not make sense to use this transformation with only one autoregressive lag.
278 | * The additive transformation is applied to the samples, so that a series with a trend can be properly forecast.
279 | 
280 | # Evaluating the model
281 | 
282 | The function `rolling_origin` uses the rolling origin technique to assess the forecast
283 | accuracy of a KNN model. In order to use this function a KNN model has to be built previously.
284 | Let us see how `rolling_origin` works with the following artificial time series:
285 | 
286 | ```{r}
287 | pred <- knn_forecasting(ts(1:20), h = 4, lags = 1:2, k = 2)
288 | ro <- rolling_origin(pred, h = 4)
289 | ```
290 | 
291 | The function `rolling_origin` uses the model generated by a `knn_forecasting` call to apply
292 | rolling origin evaluation. The object returned by `rolling_origin` contains the results of the evaluation. For
293 | example, the test sets can be seen this way:
294 | 
295 | ```{r}
296 | print(ro$test_sets)
297 | ```
298 | 
299 | Every row of the matrix contains a different test set. The first row is a test set with the last `h` values of the time series, the second row a test set with the last `h` -  1 values of the time series and so on. Each test set has an associated training test with all the data in the time series preceding the test set. For every training set a KNN model with the parameters associated with the original model is built and the test set is predicted. You can see the predictions as follows:
300 | 
301 | ```{r}
302 | print(ro$predictions)
303 | ```
304 | 
305 | and also the errors in the predictions:
306 | 
307 | ```{r}
308 | print(ro$errors)
309 | ```
310 | 
311 | Several forecasting accuracy measures applied to all the errors in the different test sets can be consulted:
312 | 
313 | ```{r}
314 | ro$global_accu
315 | ```
316 | 
317 | It is also possible to consult the forecasting accuracy measures for every forecasting horizon:
318 | 
319 | ```{r}
320 | ro$h_accu
321 | ```
322 | 
323 | Finally, a plot with the predictions for a given forecast horizon can be generated:
324 | 
325 | ```{r}
326 | plot(ro, h = 4)
327 | ```
328 | 
329 | The rolling origin technique is very time-consuming, if you want to get a faster assessment of the model you can disable this feature:
330 | 
331 | ```{r}
332 | ro <- rolling_origin(pred, h = 4, rolling = FALSE)
333 | print(ro$test_sets)
334 | print(ro$predictions)
335 | ```
336 | 
337 | 
338 | # Conclusions
339 | 
340 | In R, just a few packages apply regression methods based on computational intelligence to time series forecasting. In this paper we have presented the **tsfknn** package that allows forecasting a time series using KNN regression. The interface of the
341 | package is quite simple, with only one function the user can specify a KNN model
342 | and predict a time series. Furthermore, several graphs can be generated illustrating how the prediction has been computed and the forecasting accuracy of the model can be assessed using hold-out data.
343 | 
344 | # References
345 | 
346 | If you want to learn more about this package or univariate time series forecasting using KNN we suggest:
347 | 
348 | * [Martínez, F., Frías, M.P., Pérez, M.D., Rivera, A.J. A methodology for applying k-nearest neighbor to time series forecasting. Artif Intell Rev 52, 2019–2037 (2019)]( https://doi.org/10.1007/s10462-017-9593-z)
349 | 
350 | * [Martínez, F., Frías, M.P., Charte, F., Rivera, A.J. Time Series Forecasting with KNN in R: the tsfknn Package. The R Journal 11(2), 229–242 (2019)]( https://doi.org/10.32614/RJ-2019-004)
351 | 
352 | 


--------------------------------------------------------------------------------