├── .Rbuildignore ├── .gitignore ├── src ├── cfit.o ├── RcppExports.o ├── partialCI.dll ├── partialCI_init.o ├── partialCI_init.c ├── RcppExports.cpp └── cfit.cc ├── vignettes ├── figures │ ├── OLSRES_RSARSB_2006_2016.png │ └── MCSDHIST_RSARSB_2006_2016.png └── pci_vignette.Rmd ├── partialCI.Rproj ├── cran-comments.md ├── R ├── RcppExports.R ├── rand.R ├── support.R ├── extras.R ├── lrtables.R ├── fit_pci.R ├── fit_pci_wo_alpha.R ├── fit_pci_with_alpha.R └── hedge.R ├── man ├── multigetYahooPrices.Rd ├── yfit.pci.Rd ├── yhedge.pci.Rd ├── statehistory.pci.Rd ├── which.hypothesis.pcitest.Rd ├── likelihood_ratio.pci.Rd ├── rpci.Rd ├── loglik.pci.Rd ├── test.pci.Rd ├── partialCI-package.Rd ├── fit.pci.Rd └── hedge.pci.Rd ├── DESCRIPTION ├── NAMESPACE ├── README.md └── tests └── tests.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /src/cfit.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matthewclegg/partialCI/HEAD/src/cfit.o -------------------------------------------------------------------------------- /src/RcppExports.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matthewclegg/partialCI/HEAD/src/RcppExports.o -------------------------------------------------------------------------------- /src/partialCI.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matthewclegg/partialCI/HEAD/src/partialCI.dll -------------------------------------------------------------------------------- /src/partialCI_init.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matthewclegg/partialCI/HEAD/src/partialCI_init.o -------------------------------------------------------------------------------- /vignettes/figures/OLSRES_RSARSB_2006_2016.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matthewclegg/partialCI/HEAD/vignettes/figures/OLSRES_RSARSB_2006_2016.png -------------------------------------------------------------------------------- /vignettes/figures/MCSDHIST_RSARSB_2006_2016.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/matthewclegg/partialCI/HEAD/vignettes/figures/MCSDHIST_RSARSB_2006_2016.png -------------------------------------------------------------------------------- /partialCI.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Test environments 2 | * local Windows 10 install, R 3.3.2 and R-devel 3 | * local Windows 7 install, R 3.3.2 and R-devel 4 | * local ubuntu 16.04, R 3.3.2 5 | * win-builder (devel and release) 6 | 7 | ## R CMD check results 8 | 9 | 0 errors | 0 warnings | 1 note 10 | 11 | * This is a new release. 12 | 13 | * checking CRAN incoming feasibility ... NOTE 14 | Maintainer: 'Jonas Rende ' 15 | 16 | ## Reverse dependencies 17 | 18 | This is a new release, so there are no reverse dependencies. 19 | 20 | --- 21 | 22 | * I have run R CMD check on downstream dependencies of partialCI. 23 | All packages that I could install passed. 24 | 25 | 26 | * FAILURE SUMMARY 27 | 28 | ## Check results 29 | 0 packages with problems 30 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | lagged_variance_c <- function(X, k, n) { 5 | .Call('partialCI_lagged_variance_c', PACKAGE = 'partialCI', X, k, n) 6 | } 7 | 8 | estimate_rho_par_c <- function(X) { 9 | .Call('partialCI_estimate_rho_par_c', PACKAGE = 'partialCI', X) 10 | } 11 | 12 | estimate_par_c <- function(X, rho_max = 1.0) { 13 | .Call('partialCI_estimate_par_c', PACKAGE = 'partialCI', X, rho_max) 14 | } 15 | 16 | pvmr_par_c <- function(rho, sigma_M, sigma_R) { 17 | .Call('partialCI_pvmr_par_c', PACKAGE = 'partialCI', rho, sigma_M, sigma_R) 18 | } 19 | 20 | kalman_gain_par_mr <- function(rho, sigma_M, sigma_R) { 21 | .Call('partialCI_kalman_gain_par_mr', PACKAGE = 'partialCI', rho, sigma_M, sigma_R) 22 | } 23 | 24 | loglik_par_c <- function(Y, rho, sigma_M, sigma_R, M0, R0) { 25 | .Call('partialCI_loglik_par_c', PACKAGE = 'partialCI', Y, rho, sigma_M, sigma_R, M0, R0) 26 | } 27 | 28 | loglik_par_t_c <- function(Y, rho, sigma_M, sigma_R, M0, R0, nu = 5.0) { 29 | .Call('partialCI_loglik_par_t_c', PACKAGE = 'partialCI', Y, rho, sigma_M, sigma_R, M0, R0, nu) 30 | } 31 | 32 | -------------------------------------------------------------------------------- /src/partialCI_init.c: -------------------------------------------------------------------------------- 1 | // This file was automatically generated by 'Kmisc::registerFunctions()' 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | SEXP partialCI_lagged_variance_c(SEXP XSEXP, SEXP kSEXP, SEXP nSEXP); 9 | SEXP partialCI_estimate_rho_par_c(SEXP XSEXP); 10 | SEXP partialCI_estimate_par_c(SEXP XSEXP, SEXP rho_maxSEXP); 11 | SEXP partialCI_pvmr_par_c(SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP); 12 | SEXP partialCI_kalman_gain_par_mr(SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP); 13 | SEXP partialCI_loglik_par_c(SEXP YSEXP, SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP, SEXP M0SEXP, SEXP R0SEXP); 14 | SEXP partialCI_loglik_par_t_c(SEXP YSEXP, SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP, SEXP M0SEXP, SEXP R0SEXP, SEXP nuSEXP); 15 | 16 | R_CallMethodDef callMethods[] = { 17 | {"partialCI_lagged_variance_c", (DL_FUNC) &partialCI_lagged_variance_c, 3}, 18 | {"partialCI_estimate_rho_par_c", (DL_FUNC) &partialCI_estimate_rho_par_c, 1}, 19 | {"partialCI_estimate_par_c", (DL_FUNC) &partialCI_estimate_par_c, 2}, 20 | {"partialCI_pvmr_par_c", (DL_FUNC) &partialCI_pvmr_par_c, 3}, 21 | {"partialCI_kalman_gain_par_mr", (DL_FUNC) &partialCI_kalman_gain_par_mr, 3}, 22 | {"partialCI_loglik_par_c", (DL_FUNC) &partialCI_loglik_par_c, 6}, 23 | {"partialCI_loglik_par_t_c", (DL_FUNC) &partialCI_loglik_par_t_c, 7}, 24 | {NULL, NULL, 0} 25 | }; 26 | 27 | void R_init_partialCI(DllInfo *info) { 28 | R_registerRoutines(info, NULL, callMethods, NULL, NULL); 29 | R_useDynamicSymbols(info, FALSE); 30 | } 31 | 32 | -------------------------------------------------------------------------------- /man/multigetYahooPrices.Rd: -------------------------------------------------------------------------------- 1 | \name{multigetYahooPrices} 2 | \alias{multigetYahooPrices} 3 | \title{ 4 | Fetches closing prices of multiple stock tickers 5 | } 6 | \description{ 7 | Fetches a \code{zoo} \code{data.frame} of daily closing prices of multiple stock tickers. 8 | } 9 | \usage{ 10 | multigetYahooPrices(components, start, end, quiet = FALSE, adjust = TRUE) 11 | } 12 | \arguments{ 13 | \item{components}{Character vector of Yahoo ticker symbols} 14 | \item{start}{First date of desired data in YYYYMMDD format. Default is 15 | earliest date of all series} 16 | \item{end}{Last date of desired data in YYYYMMDD format. Default is the 17 | last date for which data is available} 18 | \item{quiet}{If \code{FALSE}, then information is printed about the 19 | progress of the fetch operation} 20 | \item{adjust}{If \code{TRUE}, then adjusted closing prices are returned. 21 | Otherwise, unadjusted prices are returned.} 22 | } 23 | % \details{ 24 | %% ~~ If necessary, more details than the description above ~~ 25 | % } 26 | \value{ 27 | Returns a \code{\link{zoo}} \code{\link{data.frame}} containing the 28 | closing prices of the series listed in the \code{components} parameter, 29 | one column per price series. 30 | } 31 | \author{ 32 | Matthew Clegg \email{matthewcleggphd@gmail.com} 33 | 34 | Christopher Krauss \email{christopher.krauss@fau.de} 35 | 36 | Jonas Rende \email{jonas.rende@fau.de} 37 | } 38 | 39 | \seealso{\code{\link[TTR]{getYahooData}} 40 | } 41 | \examples{ 42 | \dontrun{ 43 | ##---- Should be DIRECTLY executable !! ---- 44 | ##-- ==> Define data, use random, 45 | ##-- or do help(data=index) for the standard data sets. 46 | 47 | ### Note: you must have a working internet 48 | ### connection for these examples to work! 49 | spy.voo <- multigetYahooPrices(c("SPY","VOO")) 50 | } 51 | } -------------------------------------------------------------------------------- /man/yfit.pci.Rd: -------------------------------------------------------------------------------- 1 | \name{yfit.pci} 2 | \alias{yfit.pci} 3 | \title{ 4 | Fetch series from Yahoo and perform a partial cointegration fit. 5 | } 6 | \description{ 7 | Fetch series from Yahoo and perform a partial cointegration fit. 8 | } 9 | \usage{ 10 | yfit.pci(target, factors, start, end, na.rm=FALSE, ...) 11 | } 12 | \arguments{ 13 | \item{target}{The ticker symbol of the stock price series that is to be modeled. } 14 | \item{factors}{A list of ticker symbols of stock price series to be used in modeling 15 | \code{target}} 16 | \item{start}{The starting date for which data is to be fetched, given in the 17 | format YYYYMMDD. Default: 2 years ago today.} 18 | \item{end}{The ending date for which data is to be fetched, given in the 19 | format YYYYMMDD. Default: today.} 20 | \item{na.rm}{If TRUE, then \code{NA}'s will be removed from the \code{data.frame} 21 | of fetched prices. A heuristic approach is used to decide between deleting 22 | securities versus deleting days.} 23 | \item{...}{Additional optional parameters to be passed to \code{\link{fit.pci}}} 24 | } 25 | 26 | %\details{ 27 | %% ~~ If necessary, more details than the description above ~~ 28 | %} 29 | \value{ 30 | An \code{S3} object of class \code{pci.fit} representing the best fit that 31 | was found. 32 | } 33 | \author{ 34 | Matthew Clegg \email{matthewcleggphd@gmail.com} 35 | 36 | Christopher Krauss \email{christopher.krauss@fau.de} 37 | 38 | Jonas Rende \email{jonas.rende@fau.de} 39 | } 40 | 41 | \seealso{ 42 | \code{\link{fit.pci}} 43 | } 44 | 45 | \examples{ 46 | # Compare a cointegration fit Coca-Cola and Pepsi to a partial cointegration fit. 47 | # Note that yegcm(X, Y) has a different parameter ordering than yfit.pci(Y, X) 48 | # yegcm("PEP", "KO", start=as.numeric(format(Sys.Date() - 365*2, "%Y%m%d"))) 49 | # yfit.pci("KO", "PEP") 50 | } 51 | \keyword{ ts } 52 | \keyword{ models } 53 | 54 | -------------------------------------------------------------------------------- /man/yhedge.pci.Rd: -------------------------------------------------------------------------------- 1 | \name{yhedge.pci} 2 | \alias{yhedge.pci} 3 | \title{ 4 | Hedge portfolio for a stock price series 5 | } 6 | \description{ 7 | Computes the hedge of a stock price series fetched form Yahoo! 8 | using one or more other stock price series also fetched form Yahoo! 9 | } 10 | \usage{ 11 | yhedge.pci(target, factors, start, end, na.rm=FALSE, ...) 12 | } 13 | \arguments{ 14 | \item{target}{The ticker symbol of the stock price series that is to be modeled. } 15 | \item{factors}{A list of ticker symbols of stock price series to be used in modeling 16 | \code{target}} 17 | \item{start}{The starting date for which data is to be fetched, given in the 18 | format YYYYMMDD. Default: 2 years ago today.} 19 | \item{end}{The ending date for which data is to be fetched, given in the 20 | format YYYYMMDD. Default: today.} 21 | \item{na.rm}{If TRUE, then \code{NA}'s will be removed from the \code{data.frame} 22 | of fetched prices. A heuristic approach is used to decide between deleting 23 | securities versus deleting days.} 24 | \item{...}{Additional optional parameters to be passed to \code{\link{fit.pci}}} 25 | } 26 | 27 | %\details{ 28 | %% ~~ If necessary, more details than the description above ~~ 29 | %} 30 | \value{ 31 | An \code{S3} object of class \code{pci.hedge} representing the best fit that 32 | was found. 33 | } 34 | \author{ 35 | Matthew Clegg \email{matthewcleggphd@gmail.com} 36 | 37 | Christopher Krauss \email{christopher.krauss@fau.de} 38 | 39 | Jonas Rende \email{jonas.rende@fau.de} 40 | } 41 | 42 | \seealso{ 43 | \code{\link{fit.pci}} 44 | } 45 | 46 | \examples{ 47 | # Compute the best hedge of Coca-Cola using sector ETFS. 48 | # sectorETFS <- c("XLB","XLE","XLF","XLI","XLK","XLP","XLU","XLV","XLY") 49 | # hedge <- yhedge.pci("KO", sectorETFS) 50 | # hedge 51 | # test.pci(hedge$pci) 52 | # plot(hedge) 53 | } 54 | \keyword{ ts } 55 | \keyword{ models } 56 | 57 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: partialCI 2 | Type: Package 3 | Title: Partial Cointegration 4 | Version: 1.1.1 5 | Date: 2017-04-21 6 | Authors@R: c(person("Matthew", "Clegg", role=c("aut"), 7 | email="matthewcleggphd@gmail.com"), 8 | person("Christopher", "Krauss", role=c("aut"), 9 | email="christopher.krauss@fau.de"), 10 | person("Jonas", "Rende", role=c("cre","aut"), 11 | email="jonas.rende@fau.de")) 12 | Author: Matthew Clegg [aut], 13 | Christopher Krauss [aut], 14 | Jonas Rende [cre, aut] 15 | Maintainer: Jonas Rende 16 | Description: A collection of time series is partially cointegrated if a linear combination of these time series can be found so that the residual spread is partially autoregressive - meaning that it can be represented as a sum of an autoregressive series and a random walk. This concept is useful in modeling certain sets of financial time series and beyond, as it allows for the spread to contain transient and permanent components alike. Partial cointegration has been introduced by Clegg and Krauss (2016) , along with a large-scale empirical application to financial market data. The partialCI package comprises estimation, testing, and simulation routines for partial cointegration models in state space. Clegg et al. (2017) provide an in in-depth discussion of the package functionality as well as illustrating examples in the fields of finance and macroeconomics. 17 | License: GPL-2 | GPL-3 18 | Depends: 19 | partialAR 20 | Imports: 21 | zoo, 22 | parallel, 23 | ggplot2, 24 | grid, 25 | MASS, 26 | TTR, 27 | data.table, 28 | glmnet, 29 | methods, 30 | Rcpp, 31 | FKF, 32 | xts 33 | Suggests: 34 | egcm, 35 | knitr, 36 | rmarkdown 37 | RoxygenNote: 6.0.1 38 | VignetteBuilder: knitr 39 | LinkingTo: Rcpp 40 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | S3method(as.data.frame,pci.fit) 2 | S3method(plot,pci.fit) 3 | S3method(plot,pci.hedge) 4 | S3method(print,pci.fit) 5 | S3method(print,pci.hedge) 6 | S3method(print,pcitest) 7 | export(fit.pci) 8 | export(hedge.pci) 9 | export(likelihood_ratio.pci) 10 | export(loglik.pci) 11 | export(multigetYahooPrices) 12 | export(rpci) 13 | export(statehistory.pci) 14 | export(test.pci) 15 | export(which.hypothesis.pcitest) 16 | export(yfit.pci) 17 | export(yhedge.pci) 18 | import(glmnet) 19 | importFrom(FKF,fkf) 20 | importFrom(MASS,rlm) 21 | importFrom(Rcpp,sourceCpp) 22 | importFrom(TTR,getYahooData) 23 | importFrom(data.table,data.table) 24 | importFrom(data.table,setkey) 25 | importFrom(data.table,setnames) 26 | importFrom(ggplot2,Facet) 27 | importFrom(ggplot2,aes) 28 | importFrom(ggplot2,geom_hline) 29 | importFrom(ggplot2,geom_line) 30 | importFrom(ggplot2,ggplot) 31 | importFrom(ggplot2,ggtitle) 32 | importFrom(ggplot2,scale_colour_manual) 33 | importFrom(ggplot2,scale_linetype_manual) 34 | importFrom(ggplot2,scale_size_manual) 35 | importFrom(ggplot2,theme) 36 | importFrom(ggplot2,xlab) 37 | importFrom(ggplot2,ylab) 38 | importFrom(graphics,plot) 39 | importFrom(grid,grid.layout) 40 | importFrom(grid,grid.newpage) 41 | importFrom(grid,pushViewport) 42 | importFrom(grid,viewport) 43 | importFrom(methods,is) 44 | importFrom(parallel,mclapply) 45 | importFrom(partialAR,fit.par) 46 | importFrom(partialAR,kalman.gain.par) 47 | importFrom(partialAR,loglik.par) 48 | importFrom(partialAR,pvmr.par) 49 | importFrom(partialAR,rpar) 50 | importFrom(stats,approx) 51 | importFrom(stats,coef) 52 | importFrom(stats,cor) 53 | importFrom(stats,lm) 54 | importFrom(stats,optim) 55 | importFrom(stats,quantile) 56 | importFrom(stats,rnorm) 57 | importFrom(stats,rt) 58 | importFrom(stats,sd) 59 | importFrom(utils,read.table) 60 | importFrom(utils,write.table) 61 | importFrom(zoo,coredata) 62 | importFrom(zoo,index) 63 | importFrom(zoo,is.zoo) 64 | importFrom(zoo,zoo) 65 | import(xts) 66 | useDynLib(partialCI, .registration=TRUE) 67 | -------------------------------------------------------------------------------- /man/statehistory.pci.Rd: -------------------------------------------------------------------------------- 1 | \name{statehistory.pci} 2 | \alias{statehistory.pci} 3 | \title{ 4 | Generates the sequence of inferred states of a partial cointegration model 5 | } 6 | \description{Generates the sequence of inferred states of a partial cointegration model} 7 | \usage{ 8 | statehistory.pci(A, data = A$data, basis = A$basis) 9 | } 10 | %- maybe also 'usage' for other objects documented here. 11 | \arguments{ 12 | \item{A}{ 13 | An object returned by \code{\link{fit.pci}} representing a partial cointegration fit. 14 | } 15 | \item{data}{ 16 | The data history for which the inferred states are to be computed. This should 17 | be a \code{(k+1) x n} matrix, where \code{k} is the number of independent variables 18 | and \code{n} is the number of observations. If this is omitted, then uses the 19 | data history that was used in fitting the model \code{A}. 20 | } 21 | \item{basis}{ 22 | The coefficients of the independent variables. This is a vector 23 | of length \code{k}. If this is omitted, 24 | then uses the coefficients that were computed in fitting the model \code{A}. 25 | } 26 | } 27 | \details{ 28 | Computes the expected internal states of the model over the course 29 | of the data history. 30 | } 31 | \value{ 32 | Returns a \code{data.frame} with the following columns: 33 | \item{Y}{The variable being modeled} 34 | \item{X1,...,X_N}{The independent variables} 35 | \item{Z}{The residual series \code{Y - beta \%*\% X}} 36 | \item{M}{The estimated state of the mean reverting component} 37 | \item{R}{The estimated state of the random walk component} 38 | \item{eps_M}{The innovation to the mean reverting component} 39 | \item{eps_R}{The innovation to the random walk component} 40 | } 41 | \author{ 42 | Matthew Clegg \email{matthewcleggphd@gmail.com} 43 | 44 | Christopher Krauss \email{christopher.krauss@fau.de} 45 | 46 | Jonas Rende \email{jonas.rende@fau.de} 47 | } 48 | \seealso{ 49 | \code{\link[egcm:egcm]{egcm}} Engle-Granger cointegration model 50 | 51 | \code{\link[partialAR:partialAR]{partialAR}} Partially autoregressive models 52 | } 53 | \examples{ 54 | ##---- Should be DIRECTLY executable !! ---- 55 | ##-- ==> Define data, use random, 56 | ##-- or do help(data=index) for the standard data sets. 57 | 58 | YX <- rpci(n=1000, alpha=1, beta=c(2,3), sigma_C=c(0.1,0.1), rho=0.9, sigma_M=1, sigma_R=2) 59 | f <- fit.pci(YX[,1], YX[,2:ncol(YX)]) 60 | statehistory.pci(f) 61 | 62 | } 63 | \keyword{ ts } 64 | \keyword{ models } 65 | -------------------------------------------------------------------------------- /man/which.hypothesis.pcitest.Rd: -------------------------------------------------------------------------------- 1 | \name{which.hypothesis.pcitest} 2 | \alias{which.hypothesis.pcitest} 3 | \title{ 4 | Returns the preferred hypothesis when testing for partial cointegration 5 | } 6 | \description{Returns the preferred hypothesis when testing for partial cointegration} 7 | \usage{ 8 | which.hypothesis.pcitest(AT) 9 | } 10 | %- maybe also 'usage' for other objects documented here. 11 | \arguments{ 12 | \item{AT}{ 13 | An object of class \code{"pcitest"} that has been returned by a 14 | previous call to \code{\link{test.pci}}. 15 | } 16 | } 17 | \details{ 18 | Based upon the critical value \code{alpha} that was given in the 19 | call to \code{test.pci} and the p-value that was computed, 20 | determines which hypothesis best fits the data. 21 | } 22 | \value{ 23 | If a non-robust fit was used, then one of the following values is returned: 24 | \item{\code{"PCI"}}{Partially cointegrated. Both the random walk hypothesis 25 | and the AR(1) hypothesis were rejected.} 26 | \item{\code{"RW"}}{Random walk.} 27 | \item{\code{"AR1"}}{Autoregressive of order one.} 28 | 29 | If a robust fit was used, then one of the following values is returned: 30 | \item{\code{"RPCI"}}{Partially cointegrated. Both the random walk hypothesis 31 | and the AR(1) hypothesis were rejected.} 32 | \item{\code{"RRW"}}{Random walk.} 33 | \item{\code{"RAR1"}}{Autoregressive of order one.} 34 | 35 | } 36 | \author{ 37 | Matthew Clegg \email{matthewcleggphd@gmail.com} 38 | 39 | Christopher Krauss \email{christopher.krauss@fau.de} 40 | 41 | Jonas Rende \email{jonas.rende@fau.de} 42 | } 43 | \seealso{ 44 | \code{\link{fit.pci}} Fits a partially cointegrated model 45 | 46 | \code{\link{likelihood_ratio.pci}} Computes the likelihood ratio of a PCI model 47 | versus a null model 48 | } 49 | \examples{ 50 | \dontrun{ 51 | # The following should usually return "PCI" 52 | YX <- rpci(n=1000, alpha=1, beta=c(2,3), sigma_C=c(0.1,0.1), rho=0.8, sigma_M=1, sigma_R=1) 53 | which.hypothesis.pcitest(test.pci(YX[,1], YX[,2:ncol(YX)])) 54 | 55 | # The following should usally return "RW" 56 | YX.rw <- rpci(n=1000, alpha=1, beta=c(2,3), sigma_C=c(0.1,0.1), rho=0.8, sigma_M=0, sigma_R=1) 57 | which.hypothesis.pcitest(test.pci(YX.rw[,1], YX.rw[,2:ncol(YX.rw)])) 58 | 59 | # The following should usually return "AR1" 60 | YX.mr <- rpci(n=1000, alpha=1, beta=c(2,3), sigma_C=c(0.1,0.1), rho=0.8, sigma_M=1, sigma_R=0) 61 | which.hypothesis.pcitest(test.pci(YX.mr[,1], YX.mr[,2:ncol(YX.mr)])) 62 | } 63 | } 64 | \keyword{ ts } 65 | \keyword{ models } 66 | 67 | -------------------------------------------------------------------------------- /R/rand.R: -------------------------------------------------------------------------------- 1 | # rand.R -- functions for generating random variates 2 | 3 | # Copyright (C) 2016 Matthew Clegg 4 | 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation; either version 2 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # A copy of the GNU General Public License is available at 16 | # http://www.r-project.org/Licenses/ 17 | 18 | 19 | rpci <- function (n, alpha, beta, sigma_C, rho, sigma_M, sigma_R, 20 | include.state = FALSE, robust=FALSE, nu=5) { 21 | # Generates a random partially cointegrated sequence. On input, n is the 22 | # length of the sequence to be generated. beta is a vector of length k 23 | # representing the coefficients of the factor loadings, and sigma_C is a 24 | # vector of length k representing the standard deviations of the increments 25 | # of the factor loadings. 26 | # 27 | # Generates a random realization of the sequence 28 | # y_t = alpha + beta_1 F_{1,t} + beta_2 F_{2,t} + ... + beta_k F_{k,t} + m_t + r_t 29 | # F_{i,j} = F_{i,j-1} + delta_{i,j} 30 | # m_t = rho m_{t-1} + eps_{M,t} 31 | # r_t = r_{t-1} + eps_{R,t} 32 | # delta_{i,j} ~ N(0, sigma_C^2) 33 | # eps_{M,t} ~ N(0, sigma_M^2) 34 | # eps_{R,t} ~ N(0, sigma_R^2) 35 | # 36 | # If include.state is FALSE, returns an n x (k+1) matrix whose columns 37 | # are y, F_1, F_2, ..., F_k. If include.state is TRUE, returns an 38 | # n x (2k + 6) matrix whose columns are 39 | # y, F_1, F_2, ..., F_k, x, m, r, delta_1, delta_2, ..., delta_k, 40 | # eps_M, eps_R 41 | 42 | if (!robust) { 43 | D <- do.call("cbind", lapply(1:length(beta), function(i) rnorm(n,0,sigma_C[i]))) 44 | } else { 45 | D <- do.call("cbind", lapply(1:length(beta), function(i) rt(n,nu) * sigma_C[i])) 46 | } 47 | F <- apply(D, 2, cumsum) 48 | colnames(F) <- paste ("F", 1:ncol(F), sep="_") 49 | PAR <- rpar (n, rho, sigma_M, sigma_R, include.state=TRUE, robust=robust, nu=nu) 50 | Y <- F %*% beta + PAR$X + alpha 51 | DF <- cbind(Y, F) 52 | colnames(DF)[1] <- "Y" 53 | if (include.state) { 54 | colnames(D) <- paste("delta", 1:ncol(D), sep="_") 55 | DF <- cbind(DF, X=PAR$X, M=PAR$M, R=PAR$R, D, eps_M=PAR$eps_M, eps_R=PAR$eps_R) 56 | } 57 | DF 58 | } 59 | -------------------------------------------------------------------------------- /R/support.R: -------------------------------------------------------------------------------- 1 | # support.R -- a miscellaneous collection of supporting functions 2 | 3 | # Copyright (C) 2016 Matthew Clegg 4 | 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation; either version 2 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # A copy of the GNU General Public License is available at 16 | # http://www.r-project.org/Licenses/ 17 | 18 | ctext <- function (str, n) { 19 | # Centers the string str in a field of n blanks 20 | if (nchar(str) == n) return(str) 21 | if (nchar(str) > n) return(substr(str, 1, n)) 22 | 23 | nleft <- floor((n - nchar(str))/2) 24 | nright <- n - (nchar(str) + nleft) 25 | left_pad <- paste(rep(" ", nleft), collapse="") 26 | right_pad <- paste(rep(" ", nright), collapse="") 27 | pstr <- paste(left_pad, str, right_pad, sep="") 28 | pstr 29 | } 30 | 31 | printf <- function (...) { cat(sprintf(...)); } 32 | println <- function (...) { cat(sprintf(...)); cat("\n"); } 33 | 34 | quantile.table.from.samples <- function (colname, samples, 35 | quantiles = seq(0.01, 0.99, by=0.01)) { 36 | # Builds a table of quantile values from the data.frame samples 37 | 38 | sample_sizes <- sort(unique(samples$n)) 39 | 40 | nq <- length(quantiles) 41 | ns <- length(sample_sizes) 42 | 43 | qtab <- matrix(NA, ncol=ns+1, nrow=nq+1) 44 | qtab[2:(nq+1), 1] <- quantiles 45 | qtab[1, 2:(ns+1)] <- sample_sizes 46 | 47 | for (i in 2:(ns+1)) { 48 | n <- sample_sizes[i-1] 49 | qtab[2:(nq+1), i] <- quantile(samples[samples$n == n, colname], quantiles) 50 | } 51 | 52 | qtab 53 | } 54 | 55 | quantile_table_interpolate <- function (qtab, sample_size, stat, stop.on.na=FALSE) { 56 | # On input, qtab is a dataframe of quantiles. Each column corresponds to 57 | # a sample size, and each row corresponds to a quantile value. The sample 58 | # sizes are given in the first row, and the quantiles are given in the 59 | # first column. 60 | n <- nrow(qtab) 61 | i <- findInterval(sample_size, qtab[1,2:ncol(qtab)])+1 62 | if (i == 1) { 63 | parent_name <- as.character(sys.call(-1)[[1]]) 64 | if (stop.on.na) { 65 | stop (parent_name," requires a minimum of ", qtab[1,2], " observations.") 66 | } else { 67 | warning (parent_name, " requires a minimum of ", qtab[1,2], " observations.") 68 | return(NA) 69 | } 70 | } 71 | y1 <- approx(qtab[2:n, i], qtab[2:n, 1], stat, rule=2)$y 72 | if (i < ncol(qtab)) { 73 | y2 <- approx(qtab[2:n, i+1], qtab[2:n, 1], stat, rule=2)$y 74 | n1 <- qtab[1,i] 75 | n2 <- qtab[1,i+1] 76 | y <- y1 * (n2 - sample_size) / (n2 - n1) + y2 * (sample_size - n1)/(n2 - n1) 77 | } else { 78 | y <- y1 79 | } 80 | y 81 | } 82 | 83 | 84 | -------------------------------------------------------------------------------- /man/likelihood_ratio.pci.Rd: -------------------------------------------------------------------------------- 1 | \name{likelihood_ratio.pci} 2 | \alias{likelihood_ratio.pci} 3 | \title{ 4 | Computes the likelihood ratio of the partially cointegrated model vs the null model 5 | } 6 | \description{ 7 | Computes the likelihood ratio of the partially cointegrated model vs the null model 8 | } 9 | \usage{ 10 | likelihood_ratio.pci(Y, X, 11 | robust = FALSE, 12 | null_model = c("rw", "ar1"), 13 | pci_opt_method = c("jp", "twostep"), 14 | nu = 5) 15 | } 16 | %- maybe also 'usage' for other objects documented here. 17 | \arguments{ 18 | \item{Y}{ 19 | The time series that is to be modeled. A plain or \code{\link{zoo}} vector of length \code{n}. 20 | } 21 | \item{X}{ 22 | A (possibly \code{\link{zoo}}) matrix of dimensions \code{n} x \code{k}. 23 | If \code{k=1}, then this may be a plain or \code{\link{zoo}} vector. 24 | } 25 | \item{robust}{ 26 | If \code{TRUE}, then the residuals are assumed to follow a t-distribution with 27 | \code{nu} degrees of freedom. Default: \code{FALSE}. 28 | } 29 | 30 | \item{null_model}{ 31 | This specifies the model that is assumed under the null hypothesis. 32 | \itemize{ 33 | \item{rw}{Random walk. Assumes sigma_M = rho = 0. Default.} 34 | \item{ar1}{Autoregressive of order one. Assumes sigma_R=0.} 35 | } 36 | } 37 | \item{pci_opt_method}{ 38 | Method to be used for fitting Y to X. 39 | \itemize{ 40 | \item{jp}{The coefficients of Y are jointly optimized 41 | with the parameters of the AAR fit of the residuals. Default.} 42 | \item{twostep}{A modified Engle-Granger procedure is used, where 43 | the coefficients of Y are first estimated, and then an AAR 44 | model is fit to the residuals.} 45 | }} 46 | \item{nu}{ 47 | If \code{robust} is \code{TRUE}, then this is the degrees of freedom 48 | parameter used in fitting the t-distribution. Default: 5. 49 | } 50 | } 51 | \details{ 52 | First searches for the optimal fit under the null model, and computes 53 | the log of the likelihood score of this fit. Then, searches for the optimal 54 | fit under the full model, and computes the log of the likelihood score of 55 | this fit. Returns the difference of the two likelihood scores. Since the 56 | null model is nested in the full model, the log likelihood ratio score 57 | is guaranteed to be negative. 58 | } 59 | \value{The log of the ratio of the likelihoods of the two models. 60 | } 61 | \references{ 62 | Clegg, Matthew, 2015. 63 | Modeling Time Series with Both Permanent and Transient Components 64 | using the Partially Autoregressive Model. 65 | \emph{Available at SSRN: http://ssrn.com/abstract=2556957} 66 | } 67 | \author{ 68 | Matthew Clegg \email{matthewcleggphd@gmail.com} 69 | 70 | Christopher Krauss \email{christopher.krauss@fau.de} 71 | 72 | Jonas Rende \email{jonas.rende@fau.de} 73 | } 74 | \seealso{ 75 | \code{\link{fit.pci}} Fitting partially cointegrated models 76 | } 77 | \examples{ 78 | YX <- rpci(n=1000, alpha=1, beta=c(2,3,4), sigma_C=c(1,1,1), rho=0.9, sigma_M=0.1, sigma_R=0.2) 79 | likelihood_ratio.pci(YX[,1], YX[,2:ncol(YX)]) 80 | } 81 | % Add one or more standard keywords, see file 'KEYWORDS' in the 82 | % R documentation directory. 83 | \keyword{ ts } 84 | \keyword{ models }% __ONLY ONE__ keyword per line 85 | -------------------------------------------------------------------------------- /man/rpci.Rd: -------------------------------------------------------------------------------- 1 | \name{rpci} 2 | \alias{rpci} 3 | \title{ 4 | Generates a random instance of a partial cointegration model 5 | } 6 | \description{Generates a random instance of a partial cointegration model} 7 | \usage{ 8 | rpci(n, alpha, beta, sigma_C, rho, sigma_M, sigma_R, 9 | include.state = FALSE, robust = FALSE, nu = 5) 10 | } 11 | \arguments{ 12 | \item{n}{ 13 | Number of observations to generate 14 | } 15 | \item{alpha}{ 16 | Constant term of the model 17 | } 18 | \item{beta}{ 19 | A vector of factor loadings 20 | } 21 | \item{sigma_C}{ 22 | A vector of standard deviations 23 | } 24 | \item{rho}{ 25 | The coefficient of mean reversion 26 | } 27 | \item{sigma_R}{ 28 | The standard deviation of the innovations of the random walk portion of the residual series 29 | } 30 | \item{sigma_M}{ 31 | The standard deviation of the innovations of the mean-reverting portion of the residual series 32 | } 33 | \item{include.state}{ 34 | If TRUE, then the output data.frame contains the innovations to the factors and residual 35 | series, as well as the state of the residual series. Default: FALSE 36 | } 37 | \item{robust}{ 38 | If TRUE, then a t-distribution is used to generate the innovations. 39 | Otherwise, the innovations are normally distributed. Default: FALSE. 40 | } 41 | \item{nu}{ 42 | The degrees of freedom parameter used for t-distributed innovations. Default: 5. 43 | } 44 | } 45 | \details{ 46 | Generates a random set of partially cointegrated vectors. On input, \code{n} is the 47 | length of the sequence to be generated. \code{beta} is a vector of length k 48 | representing the coefficients of the factor loadings, and \code{sigma_C} is a 49 | vector of length k representing the standard deviations of the increments 50 | of the factor loadings. 51 | 52 | Generates a random realization of the sequence 53 | \deqn{Y_t = \alpha + \beta_1 F_{1,t} + \beta_2 F_{2,t} + ... + \beta_k F_{k,t} + M_t + R_t} 54 | \deqn{F_{i,j} = F_{i,j-1} + \delta_{i,j}} 55 | \deqn{M_t = \rho m_{t-1} + \epsilon_{M,t}} 56 | \deqn{R_t = r_{t-1} + \epsilon_{R,t}} 57 | \deqn{\delta_{i,j} ~ N(0, \sigma_{C,i}^2)} 58 | \deqn{\epsilon_{M,t} ~ N(0, \sigma_M^2)} 59 | \deqn{\epsilon_{R,t} ~ N(0, \sigma_R^2)} 60 | 61 | } 62 | \value{ 63 | A \code{data.frame} of \code{n} rows representing the realization of the partially 64 | cointegrated sequence. 65 | 66 | If \code{include.state} is \code{FALSE}, returns an \code{n x (k+1)} matrix whose columns 67 | are \code{y, F_1, F_2, ..., F_k}. If include.state is \code{TRUE}, returns an 68 | \code{n x (2k + 6)} matrix whose columns are 69 | \code{y, F_1, F_2, ..., F_k, x, M, R, delta_1, delta_2, ..., delta_k, epsilon_M, epsilon_R}. 70 | } 71 | \author{ 72 | Matthew Clegg \email{matthewcleggphd@gmail.com} 73 | 74 | Christopher Krauss \email{christopher.krauss@fau.de} 75 | 76 | Jonas Rende \email{jonas.rende@fau.de} 77 | } 78 | 79 | \seealso{ 80 | \code{\link{fit.pci}} 81 | } 82 | \examples{ 83 | rpci(10, alpha =1, beta=1, sigma_C=1, rho=0.9, sigma_R=1, sigma_M=1) 84 | } 85 | % Add one or more standard keywords, see file 'KEYWORDS' in the 86 | % R documentation directory. 87 | \keyword{ ts } 88 | \keyword{ models }% __ONLY ONE__ keyword per line 89 | -------------------------------------------------------------------------------- /man/loglik.pci.Rd: -------------------------------------------------------------------------------- 1 | \name{loglik.pci} 2 | \alias{loglik.pci} 3 | \title{ 4 | Computes the log likelihood of a partially cointegrated model 5 | } 6 | \description{ 7 | Computes the log likelihood of a partially cointegrated model 8 | } 9 | \usage{ 10 | loglik.pci(Y, X, alpha, beta, rho, sigma_M, sigma_R, 11 | M0 = 0, R0 = 0, 12 | calc_method = c("css", "fkf", "ss", "sst", "csst"), 13 | nu = pci.nu.default()) 14 | } 15 | %- maybe also 'usage' for other objects documented here. 16 | \arguments{ 17 | \item{Y}{ 18 | The time series that is to be modeled. A plain or \code{\link{zoo}} vector of length \code{n}. 19 | } 20 | \item{X}{ 21 | A (possibly \code{\link{zoo}}) matrix of dimensions \code{n} x \code{k}. If \code{k=1}, then this may be a plain or \code{\link{zoo}} vector. 22 | } 23 | \item{alpha}{ 24 | The constant term to be used in the fit. 25 | } 26 | \item{beta}{ 27 | A vector of length \code{k} representing the weightings to be given to the components of \code{X}. 28 | } 29 | \item{rho}{ 30 | The coefficient of mean reversion. 31 | } 32 | \item{sigma_M}{ 33 | The standard deviation of the innovations of the mean-reverting component of the model. 34 | } 35 | \item{sigma_R}{ 36 | The standard deviation of the innovations of the random walk component of the model. 37 | } 38 | \item{M0}{ 39 | The initial value of the mean-reverting component. Default = 0. 40 | } 41 | \item{R0}{ 42 | The initial value of the random walk component. Default = 0. 43 | } 44 | \item{calc_method}{ 45 | Specifies the Kalman filter implementation that will be used for computing 46 | the likelihood score: 47 | \itemize{ 48 | \item "ss" Steady-state Kalman filter 49 | \item "css" C++ implementation of steady-state Kalman filter 50 | \item "fkf" Kalman filter implementation of the FKF package 51 | \item "sst" Steady-state Kalman filter using t-distributed innovations 52 | \item "csst" C++ implementation of steady-state Kalman filter using t-distributed innovations 53 | } 54 | Default: \code{css} 55 | } 56 | 57 | \item{nu}{ 58 | The degrees-of-freedom parameter to be used if \code{calc_method} is "sst" or "csst". 59 | } 60 | } 61 | \details{ 62 | The partial cointegration model is given by the equations: 63 | 64 | \deqn{ 65 | Y_t = \alpha + \beta_1 * X_{t,1} + beta_2 * X_{t,2} + ... + beta_k * X_{t,k} + M_t + R_t}{ 66 | Y[t] = alpha + beta[1] * X[t,1] + beta[2] * X[t,2] + ... + beta[k] * X[t,k] + M[t] + R[t] 67 | } 68 | \deqn{M_t = \rho M_{t-1} + \epsilon_{M,t}}{M[t] = rho * M[t-1] + epsilon_M[t]} 69 | \deqn{R_t = R_{t-1} + \epsilon_{R,t}}{R[t] = R[t-1] + epsilon_R[t]} 70 | \deqn{-1 < \rho < 1}{-1 < rho < 1} 71 | \deqn{\epsilon_{M,t} \sim N(0,\sigma_M^2)}{epsilon_M[t] ~ N(0, sigma_M^2)} 72 | \deqn{\epsilon_{R,t} \sim N(0,\sigma_R^2)}{epsilon_R[t] ~ N(0, sigma_R^2)} 73 | 74 | Given the input series 75 | \code{Y} and \code{X}, 76 | and given the parameter values 77 | \code{alpha}, \code{beta}, \code{rho}, \code{M0} and \code{R0}, 78 | the innovations \code{epsilon_M[t]} and \code{epsilon_R[t]} are calculated 79 | using a Kalman filter. Based upon these values, the log-likelihood score 80 | is then computed and returned. 81 | } 82 | \value{ 83 | The log of the likelihood score of the Kalman filter 84 | } 85 | \references{ 86 | Clegg, Matthew, 2015. 87 | Modeling Time Series with Both Permanent and Transient Components 88 | using the Partially Autoregressive Model. 89 | \emph{Available at SSRN: http://ssrn.com/abstract=2556957} 90 | } 91 | \author{ 92 | Matthew Clegg \email{matthewcleggphd@gmail.com} 93 | 94 | Christopher Krauss \email{christopher.krauss@fau.de} 95 | 96 | Jonas Rende \email{jonas.rende@fau.de} 97 | } 98 | \seealso{ 99 | \code{\link[egcm:egcm]{egcm}} Engle-Granger cointegration model 100 | 101 | \code{\link[partialAR:partialAR]{partialAR}} Partially autoregressive models 102 | } 103 | \examples{ 104 | ##---- Should be DIRECTLY executable !! ---- 105 | ##-- ==> Define data, use random, 106 | ##-- or do help(data=index) for the standard data sets. 107 | 108 | set.seed(1) 109 | YX <- rpci(n=500, alpha=1, beta=c(2,3,4), sigma_C=c(1,1,1), rho=0.9, sigma_M=0.1, sigma_R=0.2) 110 | loglik.pci(YX[,1], YX[,2:ncol(YX)], alpha=1, beta=c(2,3,4), rho=0.9, sigma_M=0.1, sigma_R=0.2) 111 | 112 | } 113 | % Add one or more standard keywords, see file 'KEYWORDS' in the 114 | % R documentation directory. 115 | \keyword{ ts } 116 | \keyword{ models } 117 | -------------------------------------------------------------------------------- /man/test.pci.Rd: -------------------------------------------------------------------------------- 1 | \name{test.pci} 2 | \alias{test.pci} 3 | \title{Tests the goodness of fit of a partial cointegration model} 4 | \description{ 5 | Tests the goodness of fit of a partial cointegration model 6 | } 7 | \usage{ 8 | test.pci(Y, X, alpha = 0.05, 9 | null_hyp = c("rw", "ar1"), 10 | robust = FALSE, 11 | pci_opt_method = c("jp", "twostep")) 12 | } 13 | %- maybe also 'usage' for other objects documented here. 14 | \arguments{ 15 | \item{Y}{ 16 | The time series that is to be modeled. A plain or \code{\link{zoo}} vector of length \code{n}. 17 | } 18 | \item{X}{ 19 | A (possibly \code{\link{zoo}}) matrix of dimensions \code{n} x \code{k}. 20 | If \code{k=1}, then this may be a plain or \code{\link{zoo}} vector. 21 | } 22 | \item{alpha}{ 23 | The cutoff value to be used for determining whether or not to accept the 24 | null hypothesis. If the the \code{p-value} computed through the likelihood 25 | ratio test is below this value, then the null hypothesis is rejected. 26 | Default: 0.05. 27 | } 28 | \item{null_hyp}{ 29 | This specifies the null hypothesis. 30 | This can be either \code{"rw"}, \code{"ar1"} or \code{c("rw","ar1")}. 31 | If \code{"rw"}, then the null hypothesis is a random walk. 32 | If \code{"ar1"}, then the null hypothesis is an autoregressive process 33 | of order 1. (In this case, the null hypothesis calls for Y and X to be 34 | cointegrated.) If \code{(c("rw","ar1")}, then the null hypothesis is 35 | either a random walk or AR(1) process. Default: both. 36 | } 37 | 38 | \item{robust}{ 39 | If \code{TRUE}, then the residuals are assumed to follow a t-distribution. Default: \code{FALSE}. 40 | } 41 | \item{pci_opt_method}{ 42 | The method that will be used for fitting a partially cointegrated 43 | model to \code{X} and \code{Y}. This can be either \code{"jp"} 44 | (joint penalty) or \code{"twostep"} (Engle-Granger two-step). 45 | See \code{\link{fit.pci}} for a complete explanation. 46 | Default: \code{"jp"}. 47 | } 48 | } 49 | \details{ 50 | The likelihood ratio test is used to determine whether 51 | the null hypothesis should be accepted or whether the alternative 52 | of partial cointegration should be accepted. That is to say, 53 | a search is performed for the best fitting model under the null 54 | hypothesis, and the log likelihood score of this model is computed. 55 | Then a search is performed for the best fitting model under the 56 | alternative hypothesis of partial cointegration, and the log 57 | likelihood score of this model is computed. The associated 58 | p-values have been computed through simulation. 59 | 60 | If the null hypothesis is the union \code{c("rw", "ar1")}, then 61 | a table of p-values is used that has been pre-computed, however 62 | this table is not unique. 63 | } 64 | \value{An object of class \code{"pcitest"} containing the 65 | results of the hypothesis test. 66 | 67 | } 68 | 69 | \references{ 70 | Clegg, Matthew, 2015. 71 | Modeling Time Series with Both Permanent and Transient Components 72 | using the Partially Autoregressive Model. 73 | \emph{Available at SSRN: http://ssrn.com/abstract=2556957} 74 | 75 | Clegg, Matthew and Krauss, Christopher, 2016. Pairs trading with partial cointegration. FAU Discussion Papers in Economics, University of Erlangen-Nuernberg. 76 | \emph{Available at https://www.iwf.rw.fau.de/files/2016/05/05-2016.pdf} 77 | } 78 | 79 | \author{ 80 | Matthew Clegg \email{matthewcleggphd@gmail.com} 81 | 82 | Christopher Krauss \email{christopher.krauss@fau.de} 83 | 84 | Jonas Rende \email{jonas.rende@fau.de} 85 | } 86 | \seealso{ 87 | \code{\link{fit.pci}} Fits a partially cointegrated model 88 | 89 | \code{\link{likelihood_ratio.pci}} Computes the likelihood ratio of a PCI model 90 | versus a null model 91 | } 92 | \examples{ 93 | # The following should reject both the random walk and AR(1) models 94 | 95 | \dontrun{ 96 | YX <- rpci(n=1000, alpha=1, beta=c(2,3), sigma_C=c(0.1,0.1), rho=0.8, sigma_M=1, sigma_R=1) 97 | test.pci(YX[,1], YX[,2:ncol(YX)]) 98 | 99 | # The following should accept the random walk model and reject the AR(1) model 100 | YX.rw <- rpci(n=1000, alpha=1, beta=c(2,3), sigma_C=c(0.1,0.1), rho=0.8, sigma_M=0, sigma_R=1) 101 | test.pci(YX.rw[,1], YX.rw[,2:ncol(YX.rw)]) 102 | 103 | # The following should reject the random walk model and accept the AR(1) model 104 | YX.mr <- rpci(n=1000, alpha=1, beta=c(2,3), sigma_C=c(0.1,0.1), rho=0.8, sigma_M=1, sigma_R=0) 105 | test.pci(YX.mr[,1], YX.mr[,2:ncol(YX.mr)]) 106 | } 107 | } 108 | % Add one or more standard keywords, see file 'KEYWORDS' in the 109 | % R documentation directory. 110 | \keyword{ ts } 111 | \keyword{ models } 112 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | // lagged_variance_c 9 | double lagged_variance_c(NumericVector X, int k, int n); 10 | RcppExport SEXP partialCI_lagged_variance_c(SEXP XSEXP, SEXP kSEXP, SEXP nSEXP) { 11 | BEGIN_RCPP 12 | Rcpp::RObject rcpp_result_gen; 13 | Rcpp::RNGScope rcpp_rngScope_gen; 14 | Rcpp::traits::input_parameter< NumericVector >::type X(XSEXP); 15 | Rcpp::traits::input_parameter< int >::type k(kSEXP); 16 | Rcpp::traits::input_parameter< int >::type n(nSEXP); 17 | rcpp_result_gen = Rcpp::wrap(lagged_variance_c(X, k, n)); 18 | return rcpp_result_gen; 19 | END_RCPP 20 | } 21 | // estimate_rho_par_c 22 | double estimate_rho_par_c(NumericVector X); 23 | RcppExport SEXP partialCI_estimate_rho_par_c(SEXP XSEXP) { 24 | BEGIN_RCPP 25 | Rcpp::RObject rcpp_result_gen; 26 | Rcpp::RNGScope rcpp_rngScope_gen; 27 | Rcpp::traits::input_parameter< NumericVector >::type X(XSEXP); 28 | rcpp_result_gen = Rcpp::wrap(estimate_rho_par_c(X)); 29 | return rcpp_result_gen; 30 | END_RCPP 31 | } 32 | // estimate_par_c 33 | NumericVector estimate_par_c(NumericVector X, double rho_max); 34 | RcppExport SEXP partialCI_estimate_par_c(SEXP XSEXP, SEXP rho_maxSEXP) { 35 | BEGIN_RCPP 36 | Rcpp::RObject rcpp_result_gen; 37 | Rcpp::RNGScope rcpp_rngScope_gen; 38 | Rcpp::traits::input_parameter< NumericVector >::type X(XSEXP); 39 | Rcpp::traits::input_parameter< double >::type rho_max(rho_maxSEXP); 40 | rcpp_result_gen = Rcpp::wrap(estimate_par_c(X, rho_max)); 41 | return rcpp_result_gen; 42 | END_RCPP 43 | } 44 | // pvmr_par_c 45 | double pvmr_par_c(double rho, double sigma_M, double sigma_R); 46 | RcppExport SEXP partialCI_pvmr_par_c(SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP) { 47 | BEGIN_RCPP 48 | Rcpp::RObject rcpp_result_gen; 49 | Rcpp::RNGScope rcpp_rngScope_gen; 50 | Rcpp::traits::input_parameter< double >::type rho(rhoSEXP); 51 | Rcpp::traits::input_parameter< double >::type sigma_M(sigma_MSEXP); 52 | Rcpp::traits::input_parameter< double >::type sigma_R(sigma_RSEXP); 53 | rcpp_result_gen = Rcpp::wrap(pvmr_par_c(rho, sigma_M, sigma_R)); 54 | return rcpp_result_gen; 55 | END_RCPP 56 | } 57 | // kalman_gain_par_mr 58 | double kalman_gain_par_mr(double rho, double sigma_M, double sigma_R); 59 | RcppExport SEXP partialCI_kalman_gain_par_mr(SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP) { 60 | BEGIN_RCPP 61 | Rcpp::RObject rcpp_result_gen; 62 | Rcpp::RNGScope rcpp_rngScope_gen; 63 | Rcpp::traits::input_parameter< double >::type rho(rhoSEXP); 64 | Rcpp::traits::input_parameter< double >::type sigma_M(sigma_MSEXP); 65 | Rcpp::traits::input_parameter< double >::type sigma_R(sigma_RSEXP); 66 | rcpp_result_gen = Rcpp::wrap(kalman_gain_par_mr(rho, sigma_M, sigma_R)); 67 | return rcpp_result_gen; 68 | END_RCPP 69 | } 70 | // loglik_par_c 71 | double loglik_par_c(NumericVector Y, double rho, double sigma_M, double sigma_R, double M0, double R0); 72 | RcppExport SEXP partialCI_loglik_par_c(SEXP YSEXP, SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP, SEXP M0SEXP, SEXP R0SEXP) { 73 | BEGIN_RCPP 74 | Rcpp::RObject rcpp_result_gen; 75 | Rcpp::RNGScope rcpp_rngScope_gen; 76 | Rcpp::traits::input_parameter< NumericVector >::type Y(YSEXP); 77 | Rcpp::traits::input_parameter< double >::type rho(rhoSEXP); 78 | Rcpp::traits::input_parameter< double >::type sigma_M(sigma_MSEXP); 79 | Rcpp::traits::input_parameter< double >::type sigma_R(sigma_RSEXP); 80 | Rcpp::traits::input_parameter< double >::type M0(M0SEXP); 81 | Rcpp::traits::input_parameter< double >::type R0(R0SEXP); 82 | rcpp_result_gen = Rcpp::wrap(loglik_par_c(Y, rho, sigma_M, sigma_R, M0, R0)); 83 | return rcpp_result_gen; 84 | END_RCPP 85 | } 86 | // loglik_par_t_c 87 | double loglik_par_t_c(NumericVector Y, double rho, double sigma_M, double sigma_R, double M0, double R0, double nu); 88 | RcppExport SEXP partialCI_loglik_par_t_c(SEXP YSEXP, SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP, SEXP M0SEXP, SEXP R0SEXP, SEXP nuSEXP) { 89 | BEGIN_RCPP 90 | Rcpp::RObject rcpp_result_gen; 91 | Rcpp::RNGScope rcpp_rngScope_gen; 92 | Rcpp::traits::input_parameter< NumericVector >::type Y(YSEXP); 93 | Rcpp::traits::input_parameter< double >::type rho(rhoSEXP); 94 | Rcpp::traits::input_parameter< double >::type sigma_M(sigma_MSEXP); 95 | Rcpp::traits::input_parameter< double >::type sigma_R(sigma_RSEXP); 96 | Rcpp::traits::input_parameter< double >::type M0(M0SEXP); 97 | Rcpp::traits::input_parameter< double >::type R0(R0SEXP); 98 | Rcpp::traits::input_parameter< double >::type nu(nuSEXP); 99 | rcpp_result_gen = Rcpp::wrap(loglik_par_t_c(Y, rho, sigma_M, sigma_R, M0, R0, nu)); 100 | return rcpp_result_gen; 101 | END_RCPP 102 | } 103 | -------------------------------------------------------------------------------- /R/extras.R: -------------------------------------------------------------------------------- 1 | # extras.R 2 | # Copyright (C) 2016 by Matthew Clegg 3 | 4 | # This program is free software; you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation; either version 2 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # A copy of the GNU General Public License is available at 15 | # http://www.r-project.org/Licenses/ 16 | 17 | 18 | # A few extra helper routines for the egcm module. 19 | 20 | # This program is free software; you can redistribute it and/or modify 21 | # it under the terms of the GNU General Public License as published by 22 | # the Free Software Foundation; either version 2 of the License, or 23 | # (at your option) any later version. 24 | # 25 | # This program is distributed in the hope that it will be useful, 26 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 27 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 28 | # GNU General Public License for more details. 29 | # 30 | # A copy of the GNU General Public License is available at 31 | # http://www.r-project.org/Licenses/ 32 | 33 | price_matrix.rm.na <- function (DF, target=1) { 34 | # Heuristically removes NA's from a data.frame representing a matrix of 35 | # prices. 36 | 37 | # Remove all rows where the target has an NA 38 | DF <- DF[!is.na(DF[,target]),] 39 | 40 | # Remove all rows where there are two or more NA's 41 | DF <- DF[apply(DF, 1, function(x) sum(is.na(x)) <= 1),] 42 | 43 | # Remove all columns containing two or more NA's 44 | DF <- DF[,apply(DF, 2, function(x) sum(is.na(x)) <= 1),] 45 | 46 | # Remove all rows containing an NA 47 | DF <- DF[apply(DF, 1, function(x) all(!is.na(x))), ] 48 | 49 | DF 50 | } 51 | 52 | multigetYahooPrices <- function ( 53 | components, # Character vector of Yahoo ticker symbols 54 | start, # First date of desired data in YYYYMMDD format. 55 | end, # Last date of desired data in YYYYMMDD format. 56 | quiet=FALSE, # If TRUE, prints information as symbols are fetched 57 | adjust=TRUE # If TRUE, adjusted closing prices are returned. 58 | ) { 59 | # Given a list of ticker symbols, fetches the 60 | # adjusted closing prices of each of the symbols in the 61 | # list from Yahoo!, and creates a zoo data.frame. 62 | 63 | yahooParams <- list(symbol="NA", adjust=adjust, quiet=quiet) 64 | if (!missing(start)) yahooParams["start"] <- start 65 | if (!missing(end)) yahooParams["end"] <- end 66 | 67 | get_ticker <- function (t) { 68 | if (!quiet) cat(t, " ") 69 | p <- NULL 70 | # try (p <- getYahooData (t, startdate, enddate, adjust=adjust) ) 71 | yahooParams["symbol"] <- t 72 | try (p <- do.call("getYahooData", yahooParams)) 73 | if (is.null(dim(p))) return(NULL) 74 | zdf <- zoo(data.frame(p$Close), index(p)) 75 | colnames(zdf) <- t 76 | zdf 77 | } 78 | 79 | all_prices <- lapply(components, get_ticker) 80 | if (!quiet) cat("\n") 81 | not_null <- !c(lapply(all_prices, is.null), recursive=TRUE) 82 | all_prices <- do.call("cbind", all_prices[not_null]) 83 | all_prices 84 | } 85 | 86 | yfit.pci <- function(target, factors, 87 | start=as.numeric(format(Sys.Date()-365*2, "%Y%m%d")), # Starting date 88 | end=as.numeric(format(Sys.Date(), "%Y%m%d")), # Ending date 89 | na.rm=FALSE, 90 | ...) { # Additional parameters to be passed to fit.pci 91 | # Fetches the price series of a target security and set of potential 92 | # factor securities from Yahoo and constructs a partial cointegration 93 | # model from them. 94 | 95 | # require(TTR) 96 | prices <- multigetYahooPrices(c(target, factors), start, end, quiet=TRUE) 97 | if (any(is.na(prices))) { 98 | if (!na.rm && any(is.na(prices))) { 99 | has.any <- colnames(prices)[apply(prices,2,function(x) any(is.na(x)))] 100 | stop("The following tickers have NAs: ", has.any, "\nTry re-running with na.rm=TRUE") 101 | } 102 | } else { 103 | prices <- price_matrix.rm.na(prices) 104 | } 105 | 106 | e <- fit.pci(prices[,1,drop=FALSE], prices[,2:ncol(prices),drop=FALSE], ...) 107 | e 108 | } 109 | 110 | yhedge.pci <- function(target, factors, 111 | start=as.numeric(format(Sys.Date()-365*2, "%Y%m%d")), # Starting date 112 | end=as.numeric(format(Sys.Date(), "%Y%m%d")), # Ending date 113 | na.rm=FALSE, 114 | ...) { # Additional parameters to be passed to hedge.pci 115 | # Fetches the price series of a target security and set of potential 116 | # factor securities from Yahoo and constructs a partial cointegration 117 | # model from them. 118 | 119 | # require(TTR) 120 | prices <- multigetYahooPrices(c(target, factors), start, end, quiet=TRUE) 121 | if (any(is.na(prices))) { 122 | if (!na.rm && any(is.na(prices))) { 123 | has.any <- colnames(prices)[apply(prices,2,function(x) any(is.na(x)))] 124 | stop("The following tickers have NAs: ", has.any, "\nTry re-running with na.rm=TRUE") 125 | } 126 | } else { 127 | prices <- price_matrix.rm.na(prices) 128 | } 129 | 130 | e <- hedge.pci(prices[,1,drop=FALSE], prices[,2:ncol(prices),drop=FALSE], ...) 131 | e 132 | } 133 | 134 | -------------------------------------------------------------------------------- /man/partialCI-package.Rd: -------------------------------------------------------------------------------- 1 | \name{partialCI-package} 2 | \alias{partialCI-package} 3 | \alias{partialCI} 4 | \docType{package} 5 | \title{ 6 | Partial Cointegration 7 | } 8 | \description{ 9 | A collection of time series is said to be partially cointegrated if they have a linear 10 | combination that is partially autoregressive, e.g., that can be represented as a sum of an 11 | autoregressive series and a random walk. This may be useful in modeling 12 | certain sets of financial time series. 13 | 14 | To find the partially cointegrated model that best fits two series X and Y, use: 15 | 16 | \preformatted{ 17 | > fit.pci(Y, X) 18 | } 19 | 20 | An interface to Yahoo! Finance permits you to find the best fits for two particular 21 | stocks of interest: 22 | 23 | \preformatted{ 24 | > yfit.pci("RDS-B", "RDS-A") 25 | Fitted values for PCI model 26 | Y[t] = alpha + X[t] %*% beta + M[t] + R[t] 27 | M[t] = rho * M[t-1] + eps_M [t], eps_M[t] ~ N(0, sigma_M^2) 28 | R[t] = R[t-1] + eps_R [t], eps_R[t] ~ N(0, sigma_R^2) 29 | 30 | Estimate Std. Err 31 | alpha 0.2063 0.8804 32 | beta_RDS-A 1.0531 0.0133 33 | rho 0.9055 0.0355 34 | sigma_M 0.2431 0.0162 35 | sigma_R 0.0993 0.0350 36 | 37 | -LL = 41.30, R^2[MR] = 0.863 38 | } 39 | 40 | This example was run on 1/7/2016. RDS-A and RDS-B are two classes of shares offered by 41 | Royal Dutch Shell that differ slightly in aspects of their tax treatment. The above fit 42 | shows that the spread between the two shares is mostly mean-reverting but that it contains 43 | a small random walk component. The mean-reverting component accounts for 86.3\% of 44 | the variance of the daily returns. The value of 0.9055 for rho corresponds to a half-life 45 | of mean reversion of about 7 trading days. 46 | 47 | To test the goodness of fit, the \code{test.pci} function can be used: 48 | 49 | \preformatted{ 50 | > h <- yfit.pci("RDS-B", "RDS-A") 51 | > test.pci(h) 52 | 53 | Likelihood ratio test of [Random Walk or CI(1)] vs Almost PCI(1) (joint penalty method) 54 | 55 | data: h 56 | 57 | Hypothesis Statistic p-value 58 | Random Walk -4.94 0.010 59 | AR(1) -4.08 0.010 60 | Combined 0.010 61 | } 62 | 63 | The \code{test.pci} function tests each of two different null hypotheses: 64 | (a) the residual series is purely a random walk, and (b) the residual series is 65 | purely autoregressive. In addition, the union of these hypothesis is 66 | also tested. For practical applications, one is usually most interested in 67 | rejecting the first of these null hypotheses, e.g., that the residual series 68 | is purely a random walk. 69 | 70 | The partialCI package also contains a function for searching for hedging portfolios. 71 | Given a particular stock (or time series), a search can be conducted to find the set 72 | of stocks that best replicate the target stock. In the following example, a hedge is 73 | sought for SPY using sector ETF's. 74 | 75 | \preformatted{ 76 | > sectorETFS <- c("XLB", "XLE", "XLF", "XLI", "XLK", "XLP", "XLU", "XLV", "XLY") 77 | > prices <- multigetYahooPrices(c("SPY", sectorETFS), start=20140101) 78 | > hedge.pci(prices[,"SPY"], prices) 79 | -LL LR[rw] p[rw] p[mr] rho R^2[MR] Factor | Factor coefficients 80 | 490.67 -1.7771 0.1782 0.0100 0.9587 0.8246 XLF | 6.8351 81 | 283.26 -4.3988 0.0137 0.0786 0.9642 1.0000 XLK | 3.6209 2.2396 82 | 168.86 -6.4339 0.0100 0.0100 0.7328 0.6619 XLI | 2.3191 1.6542 1.1391 83 | 84 | Fitted values for PCI model 85 | Y[t] = alpha + X[t] %*% beta + M[t] + R[t] 86 | M[t] = rho * M[t-1] + eps_M [t], eps_M[t] ~ N(0, sigma_M^2) 87 | R[t] = R[t-1] + eps_R [t], eps_R[t] ~ N(0, sigma_R^2) 88 | 89 | Estimate Std. Err 90 | alpha 14.2892 1.5598 91 | beta_XLF 2.3191 0.1439 92 | beta_XLK 1.6542 0.0804 93 | beta_XLI 1.1391 0.0662 94 | rho 0.7328 0.1047 95 | sigma_M 0.2678 0.0315 96 | sigma_R 0.2056 0.0401 97 | 98 | -LL = 168.86, R^2[MR] = 0.662 99 | } 100 | 101 | The top table displays the quality of the fit that is found as each new factor is 102 | added to the fit. The best fit consisting of only one factor is found by using XLF 103 | (the financials sector). The negative log likelihod score for this model is 490.67. 104 | However, the random walk hypothesis (p[rw]) cannot be rejected at the 5\% level. 105 | When adding XLK (the technology sector), the negative log likelihood drops to 283.26 106 | and the random walk hypothesis for the spread can now be rejected. This means that SPY 107 | is at least partially cointegrated and possibly fully cointegrated with a portfolio 108 | consisting of XLF and XLK in the right proportions. The best overall fit is obtained 109 | by also adding XLI (industrials) to the hedging portfolio. The final fit is 110 | 111 | \preformatted{ 112 | SPY = $14.29 + 2.32 XLF + 1.65 XLK + 1.14 XLI 113 | } 114 | 115 | For this fit, the proportion of variance attributable to the mean reverting 116 | component is 66.2\%, and the half life of mean reversion is about 2.2 days. 117 | 118 | Please feel free to contact us if you have questions or suggestions. 119 | 120 | Matthew Clegg, Christopher Krauss and Jonas Rende 121 | 122 | Feb 13, 2017 123 | } 124 | \author{ 125 | Matthew Clegg \email{matthewcleggphd@gmail.com} 126 | 127 | Christopher Krauss \email{christopher.krauss@fau.de} 128 | 129 | Jonas Rende \email{jonas.rende@fau.de} 130 | } 131 | 132 | \seealso{ 133 | \code{\link{fit.pci}} 134 | \code{\link{yfit.pci}} 135 | \code{\link{test.pci}} 136 | \code{\link{hedge.pci}} 137 | \code{\link{yhedge.pci}} 138 | } 139 | 140 | \keyword{ ts } 141 | \keyword{ models }% __ONLY ONE__ keyword per line 142 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # partialCI 2 | R package for fitting the partially cointegrated model 3 | 4 | A collection of time series is partially cointegrated if a linear combination of these time series can be found so that the residual spread is partially autoregressive - meaning that it can be represented as a sum of an autoregressive series and a random walk. This concept is useful in modeling certain sets of financial time series and beyond, as it allows for the spread to contain transient and permanent components alike. Partial cointegration has been introduced by Clegg and Krauss (2016) , along with a large-scale empirical application to financial market data. The partialCI package comprises estimation, testing, and simulation routines for partial cointegration models in state space. Clegg et al. (2017) provide an in in-depth discussion of the package functionality as well as illustrating examples in the fields of finance and macroeconomics. 5 | If a collection of time series is partially cointegrated, then the spread between 6 | them can be interepreted as a mean-reverting series that has possibly been 7 | contaminated with a (hopefully small) random walk. 8 | 9 | The developer version can be find on Github. To use the developer version of the partialCI package, you will need to start by installing it, 10 | which can be done using devtools: 11 | 12 | ``` 13 | > install.packages("devtools") # if devtools is not already installed 14 | > install_github("matthewclegg/partialCI") 15 | ``` 16 | 17 | To find the partially cointegrated model that best fits two series 18 | X and Y, use: 19 | 20 | ``` 21 | > fit.pci(Y, X) 22 | ``` 23 | 24 | An interface to Yahoo! Finance permits you to find the best fits for 25 | two particular stocks of interest: 26 | 27 | ``` 28 | > yfit.pci("RDS-B", "RDS-A") 29 | Fitted values for PCI model 30 | Y[t] = alpha + X[t] %*% beta + M[t] + R[t] 31 | M[t] = rho * M[t-1] + eps_M [t], eps_M[t] ~ N(0, sigma_M^2) 32 | R[t] = R[t-1] + eps_R [t], eps_R[t] ~ N(0, sigma_R^2) 33 | 34 | Estimate Std. Err 35 | alpha 0.2063 0.8804 36 | beta_RDS-A 1.0531 0.0133 37 | rho 0.9055 0.0355 38 | sigma_M 0.2431 0.0162 39 | sigma_R 0.0993 0.0350 40 | 41 | -LL = 41.30, R^2[MR] = 0.863 42 | ``` 43 | 44 | This example was run on 1/7/2016. RDS-A and RDS-B are two 45 | classes of shares offered by Royal Dutch Shell that differ slightly 46 | in aspects of their tax treatment. The above fit shows that 47 | the spread between the two shares is mostly mean-reverting but that 48 | it contains a small random walk component. The mean-reverting 49 | component accounts for 86.3% of the variance of the daily returns. 50 | The value of 0.9055 for rho corresponds to a half-life of mean 51 | reversion of about 7 trading days. 52 | 53 | To test the goodness of fit, the test.pci function can be used: 54 | 55 | ``` 56 | > h <- yfit.pci("RDS-B", "RDS-A") 57 | > test.pci(h) 58 | 59 | Likelihood ratio test of [Random Walk or CI(1)] vs Almost PCI(1) (joint penalty method) 60 | 61 | data: h 62 | 63 | Hypothesis Statistic p-value 64 | Random Walk -4.94 0.010 65 | AR(1) -4.08 0.010 66 | Combined 0.010 67 | 68 | ``` 69 | 70 | The test.pci function tests each of two different null hypotheses: 71 | (a) the residual series is purely a random walk, and (b) the residual series is 72 | purely autoregressive. In addition, the union of these hypothesis is 73 | also tested. For practical applications, one is usually most interested in 74 | rejecting the first of these null hypotheses, e.g., that the residual series 75 | is purely a random walk. 76 | 77 | The partialCI package also contains a function for searching for 78 | hedging portfolios. Given a particular stock (or time series), 79 | a search can be conducted to find the set of stocks that best 80 | replicate the target stock. In the following example, a hedge 81 | is sought for SPY using sector ETF's. 82 | 83 | ``` 84 | > sectorETFS <- c("XLB", "XLE", "XLF", "XLI", "XLK", "XLP", "XLU", "XLV", "XLY") 85 | > prices <- multigetYahooPrices(c("SPY", sectorETFS), start=20140101) 86 | > hedge.pci(prices[,"SPY"], prices) 87 | -LL LR[rw] p[rw] p[mr] rho R^2[MR] Factor | Factor coefficients 88 | 490.67 -1.7771 0.1782 0.0100 0.9587 0.8246 XLF | 6.8351 89 | 283.26 -4.3988 0.0137 0.0786 0.9642 1.0000 XLK | 3.6209 2.2396 90 | 168.86 -6.4339 0.0100 0.0100 0.7328 0.6619 XLI | 2.3191 1.6542 1.1391 91 | 92 | Fitted values for PCI model 93 | Y[t] = alpha + X[t] %*% beta + M[t] + R[t] 94 | M[t] = rho * M[t-1] + eps_M [t], eps_M[t] ~ N(0, sigma_M^2) 95 | R[t] = R[t-1] + eps_R [t], eps_R[t] ~ N(0, sigma_R^2) 96 | 97 | Estimate Std. Err 98 | alpha 14.2892 1.5598 99 | beta_XLF 2.3191 0.1439 100 | beta_XLK 1.6542 0.0804 101 | beta_XLI 1.1391 0.0662 102 | rho 0.7328 0.1047 103 | sigma_M 0.2678 0.0315 104 | sigma_R 0.2056 0.0401 105 | 106 | -LL = 168.86, R^2[MR] = 0.662 107 | ``` 108 | 109 | The top table displays the quality of the fit that is found as each new 110 | factor is added to the fit. The best fit consisting of only one factor 111 | is found by using XLF (the financials sector). The negative log likelihod 112 | score for this model is 490.67. However, the random walk 113 | hypothesis (p[rw]) cannot be rejected at the 5% level. When adding 114 | XLK (the technology sector), the negative log likelihood drops to 283.26 115 | and the random walk hypothesis for the spread can now be rejected. This means 116 | that SPY is at least partially cointegrated and possibly fully cointegrated 117 | with a portfolio consisting of XLF and XLK in the right proportions. The 118 | best overall fit is obtained by also adding XLI (industrials) to the hedging 119 | portfolio. The final fit is 120 | 121 | ``` 122 | SPY = $14.29 + 2.32 XLF + 1.65 XLK + 1.14 XLI 123 | ``` 124 | 125 | For this fit, the proportion of variance attributable to the mean reverting 126 | component is 66.2%, and the half life of mean reversion is about 2.2 days. 127 | 128 | Please feel free to write to us if you have questions or suggestions. 129 | 130 | Matthew Clegg 131 | 132 | matthewcleggphd@gmail.com 133 | 134 | Christopher Krauss 135 | 136 | christopher.krauss@fau.de 137 | 138 | Jonas Rende 139 | 140 | jonas.rende@fau.de 141 | 142 | April 21, 2017 143 | 144 | -------------------------------------------------------------------------------- /R/lrtables.R: -------------------------------------------------------------------------------- 1 | # lrtables.R -- functions for building up tables of likelihood ratios 2 | # Copyright (C) 2016 Matthew Clegg 3 | 4 | # This program is free software; you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation; either version 2 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # A copy of the GNU General Public License is available at 15 | # http://www.r-project.org/Licenses/ 16 | 17 | # 1. Start R 18 | # 2. Load the partialCI package 19 | # 3. Create the list of samples and likelihood tables by entering the 20 | # following command: 21 | # partialCI:::build_lrtables() 22 | # On my Macbook Pro, this takes about a week to run. 23 | # 4. Reload the set of likelihood tables: 24 | # partialCI:::load_lrtables() 25 | # 5. Regenerate the lrdata.R file 26 | # partialCI:::dump_lrdata() 27 | # 6. Locate the new lrdata.R file on disk and move it to the partialCI/R 28 | # source code directory. 29 | # 7. Rebuild the partialCI package 30 | # R CMD build partialCI 31 | # 8. Reinstall the partialCI package 32 | # R CMD install partialCI.tar.gz 33 | # 9. Delete the temporary directories samples/ and tables/ 34 | 35 | # if(getRversion() >= "2.15.1") utils::globalVariables(c("PAR.SAMPLES", 36 | # "PAR.JOINT.CRITICAL.VALUES.DT", 37 | # "PAR.JOINT.CRITICAL.VALUES.KPSS.DT", 38 | # "PAR.SAMPLES.DT")) 39 | 40 | # if (!exists("PAR.SAMPLES")) PAR.SAMPLES <- NULL 41 | # if (!exists("PAR.SAMPLES.DT")) PAR.SAMPLES.DT <- NULL 42 | # if (!exists("PAR.POWER.SAMPLES")) PAR.POWER.SAMPLES <- NULL 43 | # if (!exists("PAR.POWER.PVMR.SAMPLES")) PAR.POWER.PVMR.SAMPLES <- NULL 44 | # if (!exists("PAR.JOINT.CRITICAL.VALUES.DT")) PAR.JOINT.CRITICAL.VALUES.DT <- NULL 45 | # if (!exists("PAR.JOINT.CRITICAL.VALUES.KPSS.DT")) PAR.JOINT.CRITICAL.VALUES.KPSS.DT <- NULL 46 | 47 | utils::globalVariables(c("PCI.SAMPLES")) 48 | 49 | LR_TABLES_LIST <- c("PCI.RWNULL.JP.LRQT", 50 | "PCI.MRNULL.JP.LRQT", 51 | "PCI.RWNULL.ROB.JP.LRQT", 52 | "PCI.MRNULL.ROB.JP.LRQT", 53 | "PCI.RWNULL.TWOSTEP.LRQT", 54 | "PCI.MRNULL.TWOSTEP.LRQT", 55 | "PCI.RWNULL.ROB.TWOSTEP.LRQT", 56 | "PCI.MRNULL.ROB.TWOSTEP.LRQT", 57 | "PCI.JOINT.CRITICAL.VALUES") 58 | 59 | for (t in LR_TABLES_LIST) { 60 | if (!exists(t)) assign(t, NULL) 61 | } 62 | 63 | build_lrtables <- function (dir="tables", debug=FALSE, nrep=10000, rebuild_samples=TRUE) { 64 | # Rebuilds all of the tables that are contained in this file. 65 | # This function takes several days to a week to run to completion. 66 | # The tables are written to files in the specified directory, 67 | # where they can then be loaded back into R and used to update 68 | # this file. 69 | 70 | # Initial definition for PCI.Samples 71 | PCI.SAMPLES<-NULL 72 | 73 | if (debug) { 74 | nr <- 1 75 | } else { 76 | nr <- nrep 77 | } 78 | 79 | if (rebuild_samples) { 80 | cat("Rebuilding PCI likelihood ratio samples ...\n") 81 | pci.generate.likelihood_ratio.samples(nrep=nrep) 82 | } 83 | 84 | pci.load.likelihood_ratio.samples() 85 | SAMPLES <- PCI.SAMPLES 86 | 87 | dir.create(dir, recursive=TRUE, showWarnings=FALSE) 88 | 89 | pci.rwnull.jp.lrqt <- quantile.table.from.samples( 90 | "rw_lrt", 91 | SAMPLES[SAMPLES$sigma_M==0.0 & SAMPLES$robust == FALSE & SAMPLES$pci_opt == "jp",]) 92 | pci.rwnull.twostep.lrqt <- quantile.table.from.samples( 93 | "rw_lrt", 94 | SAMPLES[SAMPLES$sigma_M==0.0 & SAMPLES$robust == FALSE & SAMPLES$pci_opt == "twostep",]) 95 | pci.rwnull.rob.jp.lrqt <- quantile.table.from.samples( 96 | "rw_lrt", 97 | SAMPLES[SAMPLES$sigma_M==0.0 & SAMPLES$robust == TRUE & SAMPLES$pci_opt == "jp",]) 98 | pci.rwnull.rob.twostep.lrqt <- quantile.table.from.samples( 99 | "rw_lrt", 100 | SAMPLES[SAMPLES$sigma_M==0.0 & SAMPLES$robust == TRUE & SAMPLES$pci_opt == "twostep",]) 101 | 102 | pci.mrnull.jp.lrqt <- quantile.table.from.samples( 103 | "mr_lrt", 104 | SAMPLES[SAMPLES$sigma_R==0.0 & SAMPLES$robust == FALSE & SAMPLES$pci_opt == "jp",]) 105 | pci.mrnull.twostep.lrqt <- quantile.table.from.samples( 106 | "mr_lrt", 107 | SAMPLES[SAMPLES$sigma_R==0.0 & SAMPLES$robust == FALSE & SAMPLES$pci_opt == "twostep",]) 108 | pci.mrnull.rob.jp.lrqt <- quantile.table.from.samples( 109 | "mr_lrt", 110 | SAMPLES[SAMPLES$sigma_R==0.0 & SAMPLES$robust == TRUE & SAMPLES$pci_opt == "jp",]) 111 | pci.mrnull.rob.twostep.lrqt <- quantile.table.from.samples( 112 | "mr_lrt", 113 | SAMPLES[SAMPLES$sigma_R==0.0 & SAMPLES$robust == TRUE & SAMPLES$pci_opt == "twostep",]) 114 | 115 | dput (pci.rwnull.jp.lrqt, sprintf("%s/%s", dir, "PCI.RWNULL.JP.LRQT")) 116 | dput (pci.rwnull.twostep.lrqt, sprintf("%s/%s", dir, "PCI.RWNULL.TWOSTEP.LRQT")) 117 | dput (pci.rwnull.rob.jp.lrqt, sprintf("%s/%s", dir, "PCI.RWNULL.ROB.JP.LRQT")) 118 | dput (pci.rwnull.rob.twostep.lrqt, sprintf("%s/%s", dir, "PCI.RWNULL.ROB.TWOSTEP.LRQT")) 119 | 120 | dput (pci.mrnull.jp.lrqt, sprintf("%s/%s", dir, "PCI.MRNULL.JP.LRQT")) 121 | dput (pci.mrnull.twostep.lrqt, sprintf("%s/%s", dir, "PCI.MRNULL.TWOSTEP.LRQT")) 122 | dput (pci.mrnull.rob.jp.lrqt, sprintf("%s/%s", dir, "PCI.MRNULL.ROB.JP.LRQT")) 123 | dput (pci.mrnull.rob.twostep.lrqt, sprintf("%s/%s", dir, "PCI.MRNULL.ROB.TWOSTEP.LRQT")) 124 | 125 | pci.joint.critical.values <- pci.findall.joint.critical.values() 126 | dput (pci.joint.critical.values, sprintf("%s/%s", dir, "PCI.JOINT.CRITICAL.VALUES")) 127 | 128 | printf ("%s done\n", Sys.time()) 129 | } 130 | 131 | load_table <- function (..., dir="tables") { 132 | # Loads a table and stores it in a global variable 133 | for (table_name in list(...)) { 134 | printf("Loading %s\n", table_name) 135 | tab <- dget(sprintf("%s/%s", dir, table_name)) 136 | if (exists(table_name, envir=asNamespace("partialCI"))) { 137 | unlockBinding(table_name, asNamespace("partialCI")) 138 | } 139 | assign(table_name, tab, envir = asNamespace("partialCI")) 140 | } 141 | } 142 | 143 | load_lrtables <- function () { 144 | sapply(LR_TABLES_LIST, function (t) load_table(t)) 145 | 0 146 | } 147 | 148 | dump_lrdata <- function(filename="lrdata.R") { 149 | 150 | dump(LR_TABLES_LIST, filename) 151 | cat("\n\n", file=filename, append=TRUE) 152 | } 153 | 154 | -------------------------------------------------------------------------------- /man/fit.pci.Rd: -------------------------------------------------------------------------------- 1 | \name{fit.pci} 2 | \alias{fit.pci} 3 | \title{ 4 | Fits the partial cointegration model to a collection of time series 5 | } 6 | \description{ 7 | Fits the partial cointegration model to a collection of time series 8 | } 9 | \usage{ 10 | fit.pci(Y, X, 11 | pci_opt_method = c("jp", "twostep"), 12 | par_model = c("par", "ar1", "rw"), 13 | lambda = 0, 14 | robust = FALSE, nu = 5, 15 | include_alpha=FALSE) 16 | } 17 | %- maybe also 'usage' for other objects documented here. 18 | \arguments{ 19 | \item{Y}{ 20 | The time series that is to be modeled. A plain or \code{\link{zoo}} vector of length \code{n}. 21 | } 22 | \item{X}{ 23 | A (possibly \code{\link{zoo}}) matrix of dimensions \code{n} x \code{k}. If \code{k=1}, then this may be a plain or \code{\link{zoo}} vector. 24 | } 25 | \item{pci_opt_method}{ 26 | Specifies the method that will be used for finding the best fitting model. One of the following: 27 | \itemize{ 28 | \item "jp" The joint-penalty method (see below) 29 | \item "twostep" The two-step method (see below) 30 | } 31 | Default: \code{jp} 32 | 33 | } 34 | \item{par_model}{ 35 | The model used for the residual series. One of the following: 36 | \itemize{ 37 | \item "par" The residuals are assumed to follow a partially autoregressive model. 38 | \item "ar1" The residuals are assumed to be autoregressive of order one. 39 | \item "rw" The residuals are assumed to follow a random walk. 40 | } 41 | Default: \code{par} 42 | } 43 | \item{lambda}{ 44 | The penalty parameter to be used in the joint-penalty (\code{jp}) estimation method. 45 | Default: 0. 46 | } 47 | \item{robust}{ 48 | If \code{TRUE}, then the residuals are assumed to follow a t-distribution with 49 | \code{nu} degrees of freedom. Default: \code{FALSE}. 50 | } 51 | \item{nu}{ 52 | The degrees-of-freedom parameter to be used in robust estimation. Default: 5. 53 | } 54 | \item{include_alpha}{If TRUE, then a constant term is estimated with the model. 55 | If FALSE, the constant term is omitted. Default: \code{FALSE}. 56 | } 57 | } 58 | \details{ 59 | The partial cointegration model is given by the equations: 60 | 61 | \deqn{ 62 | Y_t = \alpha + \beta_1 * X_{t,1} + beta_2 * X_{t,2} + ... + beta_k * X_{t,k} + M_t + R_t}{ 63 | Y[t] = alpha + beta[1] * X[t,1] + beta[2] * X[t,2] + ... + beta[k] * X[t,k] + M[t] + R[t] 64 | } 65 | \deqn{M_t = \rho M_{t-1} + \epsilon_{M,t}}{M[t] = rho * M[t-1] + epsilon_M[t]} 66 | \deqn{R_t = R_{t-1} + \epsilon_{R,t}}{R[t] = R[t-1] + epsilon_R[t]} 67 | \deqn{-1 < \rho < 1}{-1 < rho < 1} 68 | \deqn{\epsilon_{M,t} \sim N(0,\sigma_M^2)}{epsilon_M[t] ~ N(0, sigma_M^2)} 69 | \deqn{\epsilon_{R,t} \sim N(0,\sigma_R^2)}{epsilon_R[t] ~ N(0, sigma_R^2)} 70 | 71 | Given the input series 72 | \code{Y} and \code{X}, 73 | this function searches for the parameter values 74 | \code{alpha}, \code{beta}, \code{rho} 75 | that give the best fit of this model when using a Kalman filter. 76 | 77 | If \code{pci_opt_method} is \code{twostep}, then a two-step procedure is used. 78 | In the first step, a linear regression is performed of \code{X} on \code{Y} to determine 79 | the parameters \code{alpha} and \code{beta}. From this regression, a series of residuals 80 | is determined. In the second step, a model is fit to the residual series. If 81 | \code{par_model} is \code{par}, then a partially autoregressive model is fit to 82 | the residual series. If \code{par_model} is \code{ar1}, then an autoregressive model 83 | is fit to the residual series. If \code{par_model} is \code{rw} then a random walk 84 | model is fit to the residual series. Note that if \code{pci_opt_method} is \code{twostep} 85 | and \code{par_model} is \code{ar1}, then this reduces to the Engle-Granger two-step 86 | procedure. 87 | 88 | If \code{pci_opt_method} is \code{jp}, then the joint-penalty procedure is used. 89 | In this method, the parameters \code{alpha} and \code{beta} are estimated jointly 90 | with the parameter \code{rho} using a gradient-search optimization function. 91 | In addition, a penalty value of 92 | \eqn{\lambda * \sigma_R^2}{lambda * sigma_R^2} 93 | is added to the Kalman filter likelihood score when searching for the 94 | optimum solution. By choosing a positive value for \code{lambda}, you can drive 95 | the solution towards a value that places greater emphasis on the mean-reverting 96 | component. 97 | 98 | Because the joint-penalty method uses gradient search, the final parameter 99 | values found are dependent upon the starting point. There is no guarantee 100 | that a global optimum will be found. However, the joint-penalty method chooses 101 | several different starting points, so as to increase the chance of finding a 102 | global optimum. One of the chosen starting points consists of the parameters 103 | found through the two-step procedure. Because of this, the joint-penalty method 104 | is guaranteed to find parameter values which give a likelihood score at least 105 | as good as those found using the two-step procedure. Sometimes the improvement 106 | over the two-step procedure is substantial. 107 | 108 | The parameter \code{include_alpha} determines whether or not a constant term 109 | is included with the fit. Note that the model \code{alpha = a, R0 = 0} is 110 | equivalent to the model \code{alpha = 0, R0 = a}. Thus, the constant term can 111 | be interpreted as an estimate of the mean of the system at time \code{t=0}. 112 | As the random walk evolves, the mean will drift correspondingly. 113 | } 114 | \value{ 115 | An object of class \code{pci.fit} containing the fit that was found. The following components 116 | may be of interest 117 | \item{alpha}{The constant term of the fit} 118 | \item{alpha.se}{The estimated standard error of \code{alpha}} 119 | \item{beta}{The vector of weights} 120 | \item{beta.se}{The standard errors of the components of \code{beta}} 121 | \item{rho}{The estimated coefficient of mean reversion} 122 | \item{rho.se}{The standard error of \code{rho}} 123 | \item{negloglik}{The negative of the log likelihood} 124 | \item{pvmr}{The proportion of variance attributable to mean reversion} 125 | %% ~Describe the value returned 126 | %% If it is a LIST, use 127 | %% \item{comp1 }{Description of 'comp1'} 128 | %% \item{comp2 }{Description of 'comp2'} 129 | %% ... 130 | } 131 | \references{ 132 | Clegg, Matthew, 2015. 133 | Modeling Time Series with Both Permanent and Transient Components 134 | using the Partially Autoregressive Model. 135 | \emph{Available at SSRN: http://ssrn.com/abstract=2556957} 136 | 137 | Clegg, Matthew and Krauss, Christopher, 2016. Pairs trading with partial cointegration. FAU Discussion Papers in Economics, University of Erlangen-Nuernberg. 138 | \emph{Available at https://www.iwf.rw.fau.de/files/2016/05/05-2016.pdf} 139 | } 140 | \author{ 141 | Matthew Clegg \email{matthewcleggphd@gmail.com} 142 | 143 | Christopher Krauss \email{christopher.krauss@fau.de} 144 | 145 | Jonas Rende \email{jonas.rende@fau.de} 146 | } 147 | \seealso{ 148 | \code{\link[egcm:egcm]{egcm}} Engle-Granger cointegration model 149 | 150 | \code{\link[partialAR:partialAR]{partialAR}} Partially autoregressive models 151 | } 152 | \examples{ 153 | ##---- Should be DIRECTLY executable !! ---- 154 | ##-- ==> Define data, use random, 155 | ##-- or do help(data=index) for the standard data sets. 156 | 157 | 158 | YX <- rpci(n=1000, alpha=1, beta=c(2,3,4), sigma_C=c(1,1,1), rho=0.9,sigma_M=0.1, sigma_R=0.2) 159 | fit.pci(YX[,1], YX[,2:ncol(YX)]) 160 | 161 | } 162 | % Add one or more standard keywords, see file 'KEYWORDS' in the 163 | % R documentation directory. 164 | \keyword{ ts } 165 | \keyword{ models } 166 | 167 | -------------------------------------------------------------------------------- /man/hedge.pci.Rd: -------------------------------------------------------------------------------- 1 | \name{hedge.pci} 2 | \alias{hedge.pci} 3 | \title{ 4 | Searches for a partially cointegrated hedge for a given time series 5 | } 6 | \description{ 7 | Given a time series and a collection of possible factors, finds a subset of the 8 | factors that provides the best fit to the given time series using the partially 9 | cointegrated model. 10 | } 11 | \usage{ 12 | hedge.pci(Y, X, 13 | maxfact = 10, 14 | lambda = 0, 15 | use.multicore = TRUE, 16 | minimum.stepsize = 0, 17 | verbose = TRUE, 18 | exclude.cols = c(), 19 | search_type = c("lasso", "full", "limited"), 20 | pci_opt_method=c("jp", "twostep"), 21 | ...) 22 | } 23 | \arguments{ 24 | \item{Y}{ 25 | An \code{N x 1} column vector or data \code{data.frame}, 26 | representing the series that is to be hedged. 27 | } 28 | \item{X}{ 29 | An \code{N x L} \code{data.frame}, where each column represents a possible factor to 30 | be used in a partially cointegrated fit. 31 | } 32 | \item{maxfact}{ 33 | The maximum number of columns from \code{X} that will be selected for modeling \code{Y}. 34 | Default: 10 35 | } 36 | \item{lambda}{ 37 | A penalty to be applied to the random walk 38 | portion of the partialAR model. A positive value for \code{lambda} 39 | will drive the model towards a solution with a smaller random walk component. 40 | Default: 0 41 | } 42 | \item{use.multicore}{ 43 | If \code{TRUE}, parallel processing will be used to improve performance. 44 | See \code{\link[parallel:mclapply]{parallel:mclapply}} 45 | Default: \code{TRUE} 46 | } 47 | \item{minimum.stepsize}{ 48 | If this is non-NA, then the search stops if an improvement 49 | cannot be found of at least this much. 50 | Default: 0 51 | } 52 | \item{verbose}{ 53 | If \code{TRUE}, then detailed information is printed about 54 | the execution. 55 | Default: \code{TRUE} 56 | } 57 | \item{exclude.cols}{ 58 | A list of column indexes specifying columns from \code{X} which 59 | should be excluded from consideration. Alternatively, the list of 60 | excluded columns may be given as a list of strings, in which case 61 | they are interepreted as column names. 62 | Default: \code{c()} 63 | } 64 | \item{search_type}{ 65 | If "lasso", then the lasso algorithm (see \code{\link{glmnet}}) is 66 | used to identify the factors that provide the best linear fit to 67 | the target sequence. 68 | If "full", then a greedy algorithm is used to search for 69 | factors to be used in the hedge. At each step, all possible additions 70 | to the portfolio are considered, and the best one is chosen for inclusion. 71 | If "limited", then at each iteration, a preliminary screening step is performed 72 | to identify the securities with the highest correlations to the residuals 73 | of the currently selected portfolio. The top securities from this list are 74 | then checked for whether they would improve the portfolio, and the best one 75 | included. 76 | } 77 | \item{pci_opt_method}{ 78 | Specifies the method that will be used for finding the best fitting model. One of the following: 79 | \itemize{ 80 | \item \code{"jp"} The joint-penalty method (see \code{\link{fit.pci}}) 81 | \item \code{"twostep"} The two-step method (see \code{\link{fit.pci}}) 82 | } 83 | Default: \code{jp} 84 | 85 | } 86 | \item{\dots}{ 87 | Other parameters to be passed onto the search function. See the source 88 | code. 89 | } 90 | } 91 | \details{ 92 | The hedge is constructed by searching for column indices \code{i1,i2, ..., iN} 93 | from among the columns of \code{X} which yield the best fit to the partially 94 | cointegrated fit: 95 | 96 | \deqn{ 97 | Y_t = \alpha + \beta_1 * X_{t,i1} + beta_2 * X_{t,i2} + ... + beta_N * X_{t,iN} + M_t + R_t}{ 98 | Y[t] = alpha + beta[1] * X[t,i1] + beta[2] * X[t,i2] + ... + beta[k] * X[t,iN] + M[t] + R[t] 99 | } 100 | \deqn{M_t = \rho M_{t-1} + \epsilon_{M,t}}{M[t] = rho * M[t-1] + epsilon_M[t]} 101 | \deqn{R_t = R_{t-1} + \epsilon_{R,t}}{R[t] = R[t-1] + epsilon_R[t]} 102 | \deqn{-1 < \rho < 1}{-1 < rho < 1} 103 | \deqn{\epsilon_{M,t} \sim N(0,\sigma_M^2)}{epsilon_M[t] ~ N(0, sigma_M^2)} 104 | \deqn{\epsilon_{R,t} \sim N(0,\sigma_R^2)}{epsilon_R[t] ~ N(0, sigma_R^2)} 105 | 106 | if \code{search_type="lasso"} is specified, then the lasso algorithm 107 | (see \code{\link{glmnet}}) is used to search for the factors that give 108 | the best linear fit to the target sequence \code{Y}. Having determined 109 | the list of factors, the cutoff point is determined based successive 110 | improvements to the likelihood score of the fitted model. 111 | 112 | Otherwise, a greedy algorithm (\code{search_type="full"}) or a modified greedy algorithm 113 | (\code{search_type="limited"}) is used. This proceeds by searching through all 114 | columns of \code{X} (except those listed in \code{exclude.cols}) to find the 115 | column that gives the best fit to \code{Y}, as determined by 116 | the likelihood score of the partially cointegrated model. This column becomes the initial 117 | hedging portfolio. Having selected columns \code{i1, i2, ..., iK}, the next 118 | column is found by searching through all remaining columns of \code{X} (except those 119 | listed in \code{exclude.cols}) for the column which gives the best improvement 120 | to the partially cointegrated fit. However, if the best improvement is less than 121 | \code{minimum.stepsize}, or if \code{maxfact} columns have already been added, 122 | then the search terminates. 123 | 124 | In the case of the modified greedy algorithm (\code{search_type="limited"}), a 125 | preprocessing step is used at the beginning of each iteration. In this preprocessing 126 | step, the correlation is computed between each unused column of \code{X} and the 127 | residual series of the currently computed best fit. The top \code{B} choices are then 128 | considered for inclusion in the portfolio, where \code{B} is a branching factor. 129 | The branching factor can be controlled by setting the value of the optional parameter 130 | \code{max.branch}. Its default value is 10. 131 | 132 | The \code{lasso} algorithm is by far the fastest, followed by the \code{limited} greedy search. 133 | So, the best strategy is probably to start by using the \code{lasso}. If it fails to 134 | produce acceptable results, then move on to the \code{limited} greedy algorithm and finally 135 | the \code{full} search. 136 | } 137 | \value{ 138 | Returns an S3 object of class \code{pci.hedge} containing the following fields 139 | \item{pci}{The best partially cointegrated fit that was found} 140 | \item{indexes}{The indexes of the columns from \code{X} that were selected} 141 | \item{index_names}{The names of the columns from \code{X} that were selected} 142 | } 143 | \author{ 144 | Matthew Clegg \email{matthewcleggphd@gmail.com} 145 | 146 | Christopher Krauss \email{christopher.krauss@fau.de} 147 | 148 | Jonas Rende \email{jonas.rende@fau.de} 149 | } 150 | \seealso{ 151 | \code{\link{fit.pci}} Fitting of partially cointegrated models 152 | 153 | \code{\link[partialAR:partialAR]{partialAR}} Partially autoregressive models 154 | 155 | \code{\link[egcm:egcm]{egcm}} Engle-Granger cointegration model 156 | } 157 | \examples{ 158 | ##---- Should be DIRECTLY executable !! ---- 159 | ##-- ==> Define data, use random, 160 | ##-- or do help(data=index) for the standard data sets. 161 | 162 | \dontrun{YX <- rpci(n=1000, alpha=1, beta=c(2,3,4,5,6), 163 | sigma_C=c(0.1,0.1,0.1,0.1,0.1), rho=0.9, sigma_M=1, sigma_R=1) 164 | YXC <- cbind(YX, matrix(rnorm(5000), ncol=5)) 165 | hedge.pci(YX[,1], YX[,2:ncol(YX)]) 166 | hedge.pci(YXC[,1], YXC[,2:ncol(YXC)])} 167 | 168 | } 169 | % Add one or more standard keywords, see file 'KEYWORDS' in the 170 | % R documentation directory. 171 | \keyword{ ts } 172 | \keyword{ models } 173 | -------------------------------------------------------------------------------- /src/cfit.cc: -------------------------------------------------------------------------------- 1 | /* cfit.C -- high-speed fitting of partially AR(1) models */ 2 | /* Copyright 2015 Matthew Clegg */ 3 | 4 | /* 5 | # This program is free software; you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation; either version 2 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # A copy of the GNU General Public License is available at 16 | # http://www.r-project.org/Licenses/ 17 | */ 18 | 19 | #include 20 | using namespace Rcpp; 21 | 22 | const double kPi = 3.14159265358979323846; 23 | 24 | // [[Rcpp::export]] 25 | double lagged_variance_c (NumericVector X, int k, int n) { 26 | // Computes the variance of (1-B^k)X[(k+1)..n] 27 | 28 | if (k < 1 || k >= n-2 || n < 4 || n+k > X.size()) return(NA_REAL); 29 | 30 | double s = 0.0; 31 | double ssq = 0.0; 32 | for (int i = 0; i < n; i++) { 33 | double dx = X[i+k] - X[i]; 34 | s += dx; // This is a telescoping sum, but taking advantage of this 35 | // fact is not going to improve execution time noticeably 36 | ssq += dx*dx; 37 | } 38 | double v = (ssq - s*s/n)/(n-1); 39 | return (v); 40 | } 41 | 42 | // [[Rcpp::export]] 43 | 44 | double estimate_rho_par_c (NumericVector X) { 45 | // Computes an estimate of mean reversion for the mean-reverting 46 | // portion of a PAR process. If v[k] = Var[X[t+k]-X[t]], then 47 | // rho is given by the variance formula: 48 | // rho = - (v[1] - 2 * v[2] + v[3]) / (2 * v[1] - v[2]) 49 | 50 | int n = X.size(); 51 | if (n < 5) return(NA_REAL); 52 | 53 | double xv1 = lagged_variance_c(X, 1, n-3); 54 | double xv2 = lagged_variance_c(X, 2, n-3); 55 | double xv3 = lagged_variance_c(X, 3, n-3); 56 | 57 | double rho = -(xv1 - 2 * xv2 + xv3) / (2 * xv1 - xv2); 58 | if (rho < -0.99) rho = -1.0; 59 | if (rho > 0.99) rho = 1.0; 60 | 61 | return rho; 62 | } 63 | 64 | // [[Rcpp::export]] 65 | 66 | NumericVector estimate_par_c (NumericVector X, double rho_max = 1.0) { 67 | /* Estimates the parameters of an partially AR(1) sequence using the 68 | variances of the differences. On input, X is an array of doubles, 69 | representing the time series of the data. p is an output vector 70 | of length 3. On return, the p[1] will contain rho, p[2] will 71 | contain sigma_M and p[3] will contain sigma_R. 72 | */ 73 | 74 | int n = X.size(); 75 | if (n < 5) return(NumericVector::create(NA_REAL, NA_REAL, NA_REAL)); 76 | 77 | double xv1 = lagged_variance_c(X, 1, n-3); 78 | double xv2 = lagged_variance_c(X, 2, n-3); 79 | double xv3 = lagged_variance_c(X, 3, n-3); 80 | 81 | double rho = -(xv1 - 2 * xv2 + xv3) / (2 * xv1 - xv2); 82 | if (rho > rho_max) rho = rho_max; 83 | if (rho < -0.99) { 84 | return NumericVector::create(-1.0, sqrt(xv1), 0.0); 85 | } else if (rho > 0.99) { 86 | return NumericVector::create(0.0, 0.0, sqrt(xv1)); 87 | } 88 | 89 | double sigma2_M = 0.5 * ((rho + 1.0)/(rho - 1.0)) * (xv2 - 2.0 * xv1); 90 | if (sigma2_M > 0.5 * xv2) sigma2_M = 0.5 * xv2; // Dead code? 91 | if (sigma2_M < 0.0) sigma2_M = 0.0; 92 | 93 | double sigma2_R = 0.5 * (xv2 - 2.0 * sigma2_M); 94 | 95 | double sigma_M = sqrt(sigma2_M); 96 | double sigma_R = sqrt(sigma2_R); 97 | 98 | return NumericVector::create(rho, sigma_M, sigma_R); 99 | } 100 | 101 | // [[Rcpp::export]] 102 | 103 | double pvmr_par_c(double rho, double sigma_M, double sigma_R) { 104 | // Returns the proportion of variance attributable to mean reversion 105 | // for a PAR process with parameters rho, sigma_M and sigma_R 106 | 107 | if ((rho < -1.0) || (rho > 1.0)) return(NA_REAL); 108 | if (sigma_M < 0.0) return(NA_REAL); 109 | if (sigma_R < 0.0) return(NA_REAL); 110 | if ((sigma_M == 0.0) && (sigma_R == 0.0)) return(NA_REAL); 111 | 112 | double sigma2_M = sigma_M * sigma_M; 113 | double sigma2_R = sigma_R * sigma_R; 114 | double r2 = (2 * sigma2_M) / (2 * sigma2_M + (1 + rho) * sigma2_R); 115 | return r2; 116 | } 117 | 118 | // [[Rcpp::export]] 119 | 120 | double kalman_gain_par_mr (double rho, double sigma_M, double sigma_R) { 121 | // Computes the values of the steady state Kalman gain K_mr 122 | // of the mean-reverting portion of the state equation 123 | // based on the parameters rho, sigma_M and sigma_R given 124 | // as input. The Kalman gain of the random walk portion of the 125 | // state equation is 1 - K_mr. 126 | 127 | if ((rho < -1.0) || (rho > 1.0)) return(NA_REAL); 128 | if (sigma_M < 0.0) return(NA_REAL); 129 | if (sigma_R < 0.0) return(NA_REAL); 130 | if ((sigma_M == 0.0) && (sigma_R == 0.0)) return(NA_REAL); 131 | 132 | if (sigma_M == 0.0) return 0.0; 133 | if (sigma_R == 0.0) return 1.0; 134 | 135 | double sigma2_M = sigma_M * sigma_M; 136 | double sigma2_R = sigma_R * sigma_R; 137 | double rad = sqrt((rho + 1.0)*(rho + 1.0) * sigma2_R + 4.0 * sigma2_M); 138 | 139 | double num = 2.0 * sigma2_M; 140 | double den = sigma_R*(rad + (1.0 + rho) * sigma_R) + 2.0 * sigma2_M; 141 | double K_mr = num / den; 142 | 143 | return K_mr; 144 | } 145 | 146 | // [[Rcpp::export]] 147 | 148 | double loglik_par_c (NumericVector Y, double rho, double sigma_M, double sigma_R, double M0, double R0) { 149 | /* Computes the log likelihood of Y given parameters p using 150 | a steady state Kalman filter. Returns the negative log 151 | likelihood value. 152 | */ 153 | 154 | if ((rho < -1.0) || (rho > 1.0)) return(NA_REAL); 155 | if (sigma_M < 0.0) return(NA_REAL); 156 | if (sigma_R < 0.0) return(NA_REAL); 157 | if ((sigma_M == 0.0) && (sigma_R == 0.0)) return(NA_REAL); 158 | 159 | int n = Y.size(); 160 | if (n < 1) return(NA_REAL); 161 | 162 | double K_mr = kalman_gain_par_mr (rho, sigma_M, sigma_R); 163 | double K_rw = 1.0 - K_mr; 164 | 165 | double esumsq = 0.0; 166 | double M = M0; 167 | double R = R0; 168 | double tvar = sigma_M*sigma_M + sigma_R*sigma_R; 169 | 170 | for (int i=0; i < n; i++) { 171 | double xhat = rho * M + R; 172 | double e = Y[i] - xhat; 173 | esumsq += e*e; 174 | M = rho * M + e * K_mr; 175 | R += e * K_rw; 176 | } 177 | 178 | double nll = (n/2.0)*log(tvar * 2.0*kPi) + esumsq/(2.0*tvar); 179 | return nll; 180 | } 181 | 182 | // [[Rcpp::export]] 183 | 184 | double loglik_par_t_c (NumericVector Y, double rho, double sigma_M, double sigma_R, 185 | double M0, double R0, double nu=5.0) { 186 | /* Computes the log likelihood of Y given parameters p using 187 | a steady state Kalman filter using the assumption that the 188 | error terms will be t-distributed. Returns the negative log 189 | likelihood value. 190 | */ 191 | 192 | if ((rho < -1.0) || (rho > 1.0)) return(NA_REAL); 193 | if (sigma_M < 0.0) return(NA_REAL); 194 | if (sigma_R < 0.0) return(NA_REAL); 195 | if ((sigma_M == 0.0) && (sigma_R == 0.0)) return(NA_REAL); 196 | if (nu < 2) return(NA_REAL); 197 | 198 | int n = Y.size(); 199 | if (n < 1) return(NA_REAL); 200 | 201 | double K_mr = kalman_gain_par_mr (rho, sigma_M, sigma_R); 202 | double K_rw = 1.0 - K_mr; 203 | 204 | double esum = 0.0; 205 | double M = M0; 206 | double R = R0; 207 | // double tvar = (sigma_M*sigma_M + sigma_R*sigma_R) * (nu - 2.0) / nu; 208 | double tvar = (sigma_M*sigma_M + sigma_R*sigma_R); 209 | double tsd = sqrt(tvar); 210 | double const_term = n * (lgamma((nu+1.0)*0.5) - 0.5*log(nu * kPi) - 211 | lgamma(nu*0.5) - log(tsd)); 212 | 213 | for (int i=0; i < n; i++) { 214 | double xhat = rho * M + R; 215 | double e = Y[i] - xhat; 216 | esum += log(1 + (e*e/tvar)/nu); 217 | M = rho * M + e * K_mr; 218 | R += e * K_rw; 219 | } 220 | 221 | double nll = ((nu + 1.0) * 0.5) * esum - const_term; 222 | return nll; 223 | } 224 | -------------------------------------------------------------------------------- /R/fit_pci.R: -------------------------------------------------------------------------------- 1 | # Functions pertaining to fitting partially cointegrated (PCI) series 2 | # Copyright (C) 2016 Matthew Clegg 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # A copy of the GNU General Public License is available at 14 | # http://www.r-project.org/Licenses/ 15 | 16 | fit.pci.twostep <- function (Y, X, par_model=c("par", "ar1", "rw"), robust=FALSE, nu=5, 17 | include_alpha=FALSE) { 18 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 19 | # Fits the model 20 | # 21 | # Y[t] = alpha + beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k] 22 | # + m[t] + r[t] 23 | # 24 | # where 25 | # 26 | # m[t] = rho * m[t-1] + eps_M[t] 27 | # r[t] = r[t-1] + eps_R[t] 28 | # eps_M,t ~ N(0,1) 29 | # eps_R,t ~ N(0,1) 30 | # m[0] = r[0] = 0 31 | # 32 | # Estimates the values of alpha, beta, rho, sigma_M and sigma_R using 33 | # the two step procedure: 34 | # 1. Perform a linear regression of Y on X to obtain alpha and beta 35 | # 2. Determine rho, sigma_M and sigma_R through fitting a PAR model 36 | # to the residuals of the linear regression performed in step 1. 37 | # 38 | # If include_alpha is FALSE, then the parameter alpha is omitted from 39 | # the model, and we take r[0] = Y[0] - beta * X[0]. 40 | # 41 | # The parameter par_model specifies which parameters of the PAR model are 42 | # to be optimized. 43 | 44 | par_model <- match.arg(par_model) 45 | 46 | if (include_alpha) { 47 | fit.pci.twostep.a (Y, X, par_model, robust, nu) 48 | } else { 49 | fit.pci.twostep.a0 (Y, X, par_model, robust, nu) 50 | } 51 | } 52 | 53 | pci.jointpenalty.guess <- function (Y, X, include_alpha=FALSE) { 54 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 55 | # Generates a guess for the starting point of optimization for an PCI 56 | # fit as follows: 57 | # 1. Estimates beta by fitting the differenced X series to the 58 | # differenced Y series. 59 | # 2. Estimates alpha by a Y[1] - X[1,] %*% beta 60 | # 3. Estimates rho, sigma_M and sigma_R by fitting a PAR series 61 | # to Y - X %*% beta - alpha 62 | # Returns the guess that is found. 63 | # 64 | # If include_alpha is FALSE, then the parameter alpha is omitted 65 | # from the model. 66 | 67 | if (include_alpha) { 68 | pci.jointpenalty.guess.a(Y, X) 69 | } else { 70 | pci.jointpenalty.guess.a0(Y, X) 71 | } 72 | } 73 | 74 | fit.pci.jointpenalty.rw <- function (Y, X, 75 | lambda=0, 76 | p0=pci.jointpenalty.guess(Y,X,include_alpha), 77 | robust=FALSE, 78 | nu=5, 79 | pgtol=1e-8, 80 | include_alpha=FALSE) { 81 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 82 | # Fits an PCI model to Y,X where the residual series is modeled as 83 | # a random walk. Returns a three-component list: 84 | # par: The parameter estimates 85 | # se: The estimated standard error of the parameter estimates 86 | # nll: The negative log likelihood of the fit 87 | 88 | if (include_alpha) { 89 | fit.pci.jointpenalty.rw.a (Y, X, lambda, p0, robust, nu, pgtol) 90 | } else { 91 | fit.pci.jointpenalty.rw.a0 (Y, X, lambda, p0, robust, nu, pgtol) 92 | } 93 | } 94 | 95 | fit.pci.jointpenalty.mr <- function (Y, X, 96 | lambda=0, 97 | p0=pci.jointpenalty.guess(Y,X,include_alpha), 98 | robust=FALSE, 99 | nu=5, 100 | pgtol=1e-8, 101 | include_alpha=FALSE) { 102 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 103 | # Fits an PCI model to Y,X where the residual series is modeled as 104 | # a pure AR(1) series. Returns a three-component list: 105 | # par: The parameter estimates 106 | # se: The estimated standard error of the parameter estimates 107 | # nll: The negative log likelihood of the fit 108 | 109 | if (include_alpha) { 110 | fit.pci.jointpenalty.mr.a (Y, X, lambda, p0, robust, nu, pgtol) 111 | } else { 112 | fit.pci.jointpenalty.mr.a0 (Y, X, lambda, p0, robust, nu, pgtol) 113 | } 114 | } 115 | 116 | fit.pci.jointpenalty.both <- function (Y, X, 117 | lambda=0, 118 | p0=pci.jointpenalty.guess(Y,X,include_alpha), 119 | robust=FALSE, 120 | nu=5, 121 | pgtol=1e-8, 122 | include_alpha=FALSE) { 123 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 124 | # Fits an PCI model to Y,X. Returns a three-component list: 125 | # par: The parameter estimates 126 | # se: The estimated standard error of the parameter estimates 127 | # nll: The negative log likelihood of the fit 128 | 129 | if (include_alpha) { 130 | fit.pci.jointpenalty.both.a (Y, X, lambda, p0, robust, nu, pgtol) 131 | } else { 132 | fit.pci.jointpenalty.both.a0 (Y, X, lambda, p0, robust, nu, pgtol) 133 | } 134 | } 135 | 136 | fit.pci.jointpenalty <- function (Y, X, 137 | par_model=c("par", "ar1", "rw"), 138 | lambda=0, 139 | robust=FALSE, 140 | nu=5, 141 | include_alpha=FALSE) { 142 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 143 | # lambda is a penalty value that drives the optimization towards a 144 | # solution where sigma_R has the lowest possible value. 145 | # 146 | # Fits the model 147 | # 148 | # Y[t] = alpha + beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k] 149 | # + m[t] + r[t] 150 | # 151 | # where 152 | # 153 | # m[t] = rho * m[t-1] + eps_M[t] 154 | # r[t] = r[t-1] + eps_R[t] 155 | # eps_M,t ~ N(0,sigma_M^2) 156 | # eps_R,t ~ N(0,sigma_R^2) 157 | # m[0] = r[0] = 0 158 | # 159 | # Estimates the values of alpha, beta, rho, sigma_M and sigma_R using 160 | # the following procedure: 161 | # 1. Initial estimates of beta are obtained by regressing the 162 | # first differences of X[,i] on the first difference of Y. 163 | # 2. Given these estimates for beta, an initial estimate for 164 | # alpha is obtained as alpha = Y[1] - beta X[1,]. 165 | # 3. Initial estimates of rho, sigma_M and sigma_R are obtained by 166 | # fitting a PAR model to the residuals 167 | # 4. Having obtained these initial estimates for all of the parameters, 168 | # the following cost function is maximized to final parameter 169 | # estimates: 170 | # C[rho, sigma_M, sigma_R] = -LL[rho, sigma_M, sigma_R] + lambda * sigma_R^2 171 | # In the above, LL is the log likelihood function for the steady 172 | # state Kalman filter with parameters rho, sigma_M and sigma_R. 173 | # 174 | # The parameter par_model specifies which parameters of the PAR model are 175 | # to be optimized. If par == "par", then rho, sigma_M and sigma_R are each 176 | # estimated. If par == "ar1", then rho and sigma_M are estimated, while 177 | # sigma_R is fixed at 0. If par == "rw", then sigma_R is estimated, while 178 | # rho and sigma_M are fixed at 0. 179 | # 180 | # If include_alpha is FALSE, then the parameter alpha is omitted from 181 | # the model, and we take r[0] = Y[0] - beta * X[0]. 182 | 183 | par_model <- match.arg(par_model) 184 | 185 | if (include_alpha) { 186 | fit.pci.jointpenalty.a (Y, X, lambda, p0, robust, nu) 187 | } else { 188 | fit.pci.jointpenalty.a0 (Y, X, lambda, p0, robust) 189 | } 190 | } 191 | 192 | fit.pci <- function (Y, X, 193 | pci_opt_method=c("jp", "twostep"), 194 | par_model=c("par", "ar1", "rw"), 195 | lambda=0, 196 | robust=FALSE, 197 | nu=5, 198 | include_alpha=FALSE) { 199 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 200 | # lambda is a penalty value that drives the optimization towards a 201 | # solution where sigma_R has the lowest possible value. 202 | # 203 | # Fits the model 204 | # 205 | # Y[t] = alpha + beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k] 206 | # + m[t] + r[t] 207 | # 208 | # where 209 | # 210 | # m[t] = rho * m[t-1] + eps_M[t] 211 | # r[t] = r[t-1] + eps_R[t] 212 | # eps_M,t ~ N(0,sigma_M^2) 213 | # eps_R,t ~ N(0,sigma_R^2) 214 | # m[0] = r[0] = 0 215 | # 216 | # Fits the model using either the two step procedure or the joint penalty 217 | # procedure, according to the value of the parameter 'pci_opt_method'. Returns 218 | # an S3 object of class "pci.fit" representing the fit that was obtained. 219 | # 220 | # The parameter par_model specifies which parameters of the PAR model are 221 | # to be optimized. If par == "par", then rho, sigma_M and sigma_R are each 222 | # estimated. If par == "ar1", then rho and sigma_M are estimated, while 223 | # sigma_R is fixed at 0. If par == "rw", then sigma_R is estimated, while 224 | # rho and sigma_M are fixed at 0. 225 | # 226 | # If include_alpha is FALSE, then the parameter alpha is omitted from 227 | # the model, and we take r[0] = Y[0] - beta * X[0]. 228 | 229 | pci_opt_method <- match.arg(pci_opt_method) 230 | par_model <- match.arg(par_model) 231 | 232 | Yorig <- Y 233 | if (!is.null(dim(Y))) { 234 | if (dim(Y)[2] > 1) { 235 | if (missing(X)) { 236 | X <- Y[,2:ncol(Y),drop=FALSE] 237 | } else { 238 | stop("Y must be a single column") 239 | } 240 | } 241 | Y <- Y[,1] 242 | } 243 | 244 | if (include_alpha) { 245 | fit.pci.a (Y, X, pci_opt_method, par_model, lambda, robust, nu) 246 | } else { 247 | fit.pci.a0 (Y, X, pci_opt_method, par_model, lambda, robust, nu) 248 | } 249 | 250 | 251 | } 252 | 253 | print.pci.fit <- function (x, ...) { 254 | # Given a PCI structure A, prints it in summary form 255 | print.internal.pci.fit(x) 256 | } 257 | 258 | print.internal.pci.fit <- function (A, ...) { 259 | # Prints a PCI fit 260 | cat("Fitted values for PCI model\n") 261 | if ("alpha" %in% names(A)) { 262 | cat(" Y[t] = alpha + X[t] %*% beta + M[t] + R[t]\n") 263 | } else { 264 | cat(" Y[t] = X[t] %*% beta + M[t] + R[t]\n") 265 | } 266 | cat(" M[t] = rho * M[t-1] + eps_M [t], eps_M[t] ~ N(0, sigma_M^2)\n") 267 | cat(" R[t] = R[t-1] + eps_R [t], eps_R[t] ~ N(0, sigma_R^2)\n\n") 268 | 269 | cat(sprintf("%-10s %8s %8s\n", "", "Estimate", "Std. Err")) 270 | p <- function(n, v, s) { cat(sprintf("%-10s %8.4f %8.4f\n", n, v, s)) } 271 | p("alpha", A$alpha, A$alpha.se) 272 | for (i in 1:length(A$beta)) { 273 | p(names(A$beta[i]), A$beta[i], A$beta.se[i]) 274 | } 275 | p("rho", A$rho, A$rho.se) 276 | p("sigma_M", A$sigma_M, A$sigma_M.se) 277 | p("sigma_R", A$sigma_R, A$sigma_R.se) 278 | # p("M0", A$M0, A$M0.se) 279 | # p("R0", A$R0, A$R0.se) 280 | 281 | cat(sprintf("\n-LL = %.2f, R^2[MR] = %.3f\n", A$negloglik, A$pvmr)) 282 | } 283 | 284 | statehistory.pci <- function(A, data=A$data, basis=A$basis) { 285 | # On input, A is an pci.fit object as produced by fit.pci. 286 | # Creates a data.frame containing the inferred values of 287 | # the states of the mean-revering and random walk components 288 | # of the process, based upon the model parameters that were fit. 289 | # 290 | # Returns a data.frame containing the following columns: 291 | # Y: The value of the process at this time 292 | # X: The value of the hedge at this time 293 | # M: The inferred state of the mean reverting component 294 | # R: The inferred state of the random walk component 295 | # eps_M: The inferred shock to the mean reverting component 296 | # eps_R: The inferred shock to the random walk component 297 | 298 | if (is.null(dim(basis))) basis <- as.matrix(basis, ncol=1) 299 | if ("alpha" %in% names(A)) { 300 | Yhat <- basis %*% A$beta + A$alpha 301 | } else { 302 | Yhat <- basis %*% A$beta 303 | } 304 | Z <- data - Yhat 305 | 306 | n <- length(data) 307 | M <- numeric(n) 308 | R <- numeric(n) 309 | eps_M <- numeric(n) 310 | eps_R <- numeric(n) 311 | 312 | # Mprev <- A$M0 313 | # Mprev <- 0 314 | # Rprev <- A$R0 315 | Mprev <- 0 316 | Rprev <- data[1] - Yhat[1] 317 | 318 | K <- kalman.gain.par(rho=A$rho, sigma_M=A$sigma_M, sigma_R=A$sigma_R) 319 | 320 | for (i in 1:n) { 321 | xhat <- A$rho * Mprev + Rprev 322 | e <- Z[i] - xhat 323 | eps_M[i] <- e * K[1] 324 | eps_R[i] <- e * K[2] 325 | M[i] <- A$rho * Mprev + eps_M[i] 326 | R[i] <- Rprev + eps_R[i] 327 | Mprev <- M[i] 328 | Rprev <- R[i] 329 | } 330 | 331 | df <- data.frame(Y=data, Yhat=Yhat, Z=Z, M=M, R=R, eps_M=eps_M, eps_R=eps_R) 332 | colnames(df) <- c("Y", "Yhat", "Z", "M", "R", "eps_M", "eps_R") 333 | df 334 | } 335 | 336 | plot.pci.fit <- function (x, ...) { 337 | # Given a PCI structure A, plots it. 338 | 339 | plot.internal.pci.fit(x) 340 | } 341 | 342 | plot.internal.pci.fit <- function (A, ...) { 343 | # Plots an partially cointegrated model 344 | 345 | # Initial definition for Date, Value, Label 346 | Date <- Value <- Label<- NULL 347 | 348 | 349 | sh <- statehistory.pci(A) 350 | n <- nrow(sh) 351 | RW <- sh$Yhat + sh$R 352 | df1.1 <- data.frame(Date=A$index, Label="Actual", Value=sh$Y) 353 | df1.2 <- data.frame(Date=A$index, Label="Hedge", Value=sh$Yhat) 354 | df1.3 <- data.frame(Date=A$index, Label="Model", Value=RW) 355 | df1 <- rbind(df1.1, df1.2, df1.3) 356 | # p1 <- ggplot (df1, aes(x=Date, y=Value, colour=Label)) + geom_line () + 357 | # ylab("Price") + xlab("") + theme(legend.position="top") + 358 | # scale_colour_discrete(name="") + 359 | # scale_size_manual(values=c(2,0.5,0.5)) + 360 | # ggtitle("Price Series") 361 | 362 | labels <- c("Target", "Hedge", "Model") 363 | if (A$target_name != "Y") { 364 | labels[1] <- A$target_name 365 | } 366 | if (A$factor_names[1] != "X1") { 367 | factor_strings <- sapply(1:length(A$beta), 368 | function(i) sprintf("%5.2f %s", 369 | A$beta[i], A$factor_names[i])) 370 | labels[2] <- paste(factor_strings, collapse=" +") 371 | } 372 | 373 | p1 <- ggplot (df1, aes(x=Date, y=Value, colour=Label, size=Label, linetype=Label)) + geom_line () + 374 | ylab("Price") + xlab("") + theme(legend.position="top") + 375 | scale_colour_manual(name="", 376 | breaks=c("Actual", "Hedge", "Model"), 377 | labels=labels, 378 | # values=c("Black", "#0054A6", "#00AEEF")) + # Black, Blue, Cyan 379 | values=c("Black", "#00A651", "#00AEEF")) + # Black, Green, Cyan 380 | scale_size_manual(name="", 381 | breaks=c("Actual", "Hedge", "Model"), 382 | labels=labels, 383 | values=c(1.0,1.0,0.5)) + 384 | # values=c(0.75,0.75,1)) + 385 | scale_linetype_manual(name="", 386 | breaks=c("Actual", "Hedge", "Model"), 387 | labels=labels, 388 | values=c("solid", "solid", "solid"))+ 389 | # values=c("solid", "solid", "dashed"))+ 390 | ggtitle("Price Series") 391 | 392 | 393 | df2 <- data.frame(Date=A$index, Label="M[t]", Value=sh$M) 394 | sdR <- sd(sh$M) 395 | hlines <- data.frame(Value=c(2 * sdR, sdR, -sdR, -2 * sdR), 396 | Facet=c("two","one","one","two")) 397 | p2 <- ggplot(df2, aes(x=Date, y=Value)) + geom_line() + 398 | ggtitle("Mean Reverting Component") + ylab("Price") + xlab("") + 399 | geom_hline(data=hlines, aes(yintercept=Value, colour=Facet), linetype="dashed") + 400 | theme(legend.position="none") 401 | 402 | grid.newpage() 403 | pushViewport(viewport(layout=grid.layout(9, 1))) 404 | print(p1, vp=viewport(layout.pos.row=1:5, layout.pos.col=1)) 405 | print(p2, vp=viewport(layout.pos.row=6:9, layout.pos.col=1)) 406 | 407 | } 408 | 409 | as.data.frame.pci.fit <- function (x, row.names, optional, ...) { 410 | as.data.frame.internal.pci.fit (x) 411 | } 412 | 413 | 414 | 415 | as.data.frame.internal.pci.fit <- function (A, ...) { 416 | # Given an partially cointegrated model A, converts it into a one row data.frame 417 | # containing the following values from the model: 418 | # alpha 419 | # beta 420 | # rho 421 | # sigma_M 422 | # sigma_R 423 | 424 | if ("alpha" %in% names(A)) { 425 | df1 <- data.frame(alpha = A$alpha, beta=A$beta) 426 | } else { 427 | df1 <- data.frame(beta = A$beta) 428 | } 429 | df2 <- data.frame(rho = A$rho, sigma_M=A$sigma_M, sigma_R=A$sigma_R, M0=A$M0, R0=A$R0) 430 | if ("alpha.se" %in% names(A)) { 431 | df3 <- data.frame(alpha.se = A$alpha.se, beta.se = A$beta.se) 432 | } else { 433 | df3 <- data.frame(beta.se = A$beta.se) 434 | } 435 | df4 <- data.frame(rho.se=A$rho.se, sigma_M.se=A$sigma_M.se, sigma_R.se=A$sigma_R.se, 436 | M0.se=A$M0.se, R0.se=A$R0.se) 437 | df5 <- data.frame(negloglik=A$negloglik, pvmr=A$pvmr) 438 | df <- cbind(df1, df2, df3, df4, df5) 439 | df 440 | } 441 | 442 | -------------------------------------------------------------------------------- /vignettes/pci_vignette.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "A partialCI Guide" 3 | author: 4 | - "Matthew Clegg" 5 | - "Christopher Krauss" 6 | - "Jonas Rende" 7 | date: "`r Sys.Date()`" 8 | output: rmarkdown::html_vignette 9 | bibliography: rende.bib 10 | vignette: > 11 | %\VignetteIndexEntry{A partialCI Guide} 12 | %\VignetteEngine{knitr::rmarkdown} 13 | %\VignetteEncoding{UTF-8} 14 | --- 15 | 16 | 17 | 18 | ## Introduction 19 | 20 | The **partialCI** package fits a partial cointegration model to describe a time series. Partial cointegration (PCI) is a weakening of cointegration, allowing for the residual series to contain a mean-reverting and a random walk component. Analytically, this residual series is described by a partially autoregressive process (PAR -- see @Summers.1986, @Poterba.1988, and @Clegg.2015a), consisting of a stationary AR-process and a random walk. Whereas classic cointegration in the sense of @Engle.1987 requires all shocks to be transient, PCI is more flexible and allows for permanent shocks as well -- a realistic assumption across many (macro)economic applications. Even though neither the residual series, nor its mean-reverting and permanent component are directly observable, estimation is still possible in state space -- see @Brockwell.2010 and @Durbin.2012. 21 | 22 | ## The partial cointegration framework 23 | 24 | ### Model definition 25 | 26 | Based on @Engle.1987, @Clegg.2016 define the concept of partial cointegration as follows: \dfn{Definition}: "The components of the vector $X_t$ are said to be partially cointegrated of order $d$, $b$, denoted $X_t \sim PCI\left(d,b\right)$, if (i) all components of $X_t$ are $I\left(d\right)$; (ii) there exists a vector $\alpha$ so that $Z_t = \alpha \prime X_t$ and $Z_t$ can be decomposed as a sum $Z_t = R_t + M_t$, where $R_t \sim I\left(d\right)$ and $M_t \sim I\left(d - b\right)$." 27 | 28 | Let $Y_t$ denote the target time series and $X_{j,t}$ the $j^{th}$ factor time series at time $t$, where $j = \lbrace 1, 2, \dots, k \rbrace$. The target time series and the $k$ factor time series are partially cointegrated, if a parameter vector $\iota = \left\lbrace\beta_1, \beta_2, \dots, \beta_k, \rho, \sigma_M, \sigma_R, M_0, M_R\right\rbrace$ exists such that the subsequent model equations are satisfied: 29 | 30 | $$ 31 | \begin{aligned} 32 | Y_{t} &= \beta_1 X_{1,t} + \beta_2 X_{2,t} + ... + \beta_k X_{k,t} + W_t \\ 33 | W_t &= M_t + R_t \\ 34 | M_t &= \rho M_{t-1} + \varepsilon_{M,t}\\ 35 | R_t &= R_{t-1} + \varepsilon_{R,t}\\ 36 | \varepsilon_{M,t} &\sim \mathcal{N}\left(0, \sigma^2_M\right)\\ 37 | \varepsilon_{R,t} &\sim \mathcal{N}\left(0, \sigma^2_R\right)\\ 38 | \beta_j \in \mathbb{R}; \rho &\in \left(-1, 1\right);\sigma^2_M, \sigma^2_R \in \mathbb{R}_0^+. \\ 39 | \end{aligned} 40 | $$ 41 | Thereby, $W_t$ denotes the partially autoregressive process, $R_t$ the permanent component, $M_t$ the transient component and $\beta = \lbrace \beta_1, \beta_2, \dots, \beta_k \rbrace$ is the partially cointegrating vector. The permanent component is modeled as a random walk and the transient component as an AR(1)-process with $AR(1)$-coefficient $\rho$. The corresponding error terms $\varepsilon_{M,t}$ and $\varepsilon_{R,t}$ are assumed to follow mutually independent, normally distributed white noise processes with mean zero and variances $\sigma^2_M$ and $\sigma^2_R$. A key advantage of modeling the cointegrating process as a partially autoregressive process is that we are able to calculate the proportion of variance attributable to mean-reversion (PVMR), defined as (@Clegg.2016), 42 | $$ 43 | R^2_{MR} = \frac{VAR\left[\left(1-B\right)M_t\right]}{VAR\left[\left(1-B\right)W_t\right]} = \frac{2\sigma^2_M}{2\sigma^2_M + \left(1+\rho\right)\sigma^2_R} , \hspace{0.2cm} R^2_{MR} \in \left[0,1\right], 44 | $$ 45 | where $B$ denotes the backshift operator. The statistic $R^2_{MR}$ is useful to assess how close the cointegration process is to either a pure random walk $\left(R^2_{MR} = 0\right)$ or a pure AR(1)-process $\left(R^2_{MR} = 1\right)$. 46 | 47 | ### State space represenation 48 | 49 | The applied state space transformation is in line with @Clegg.2016. Given that the PAR process $W_t$ is not observable, we convert the PCI model into the following state space model, consisting of an observation and a state equation: 50 | $$ 51 | \begin{align} 52 | X_t &= H Z_t \\ 53 | Z_t &= FZ_{t-1} + W_t. 54 | \end{align} 55 | $$ 56 | Thereby, $Z_t$ denotes the state which is assumed to be influenced linearly by the state in the last period and a noise term $W_t$. The matrix $F$ is assumed to be time invariant. The observable part is denoted by $X_t$. By assumption, there is a linear dependence between $X_t$ and $Z_t$, captured in the time invariant matrix $H$. 57 | 58 | ### Estimation of a partial cointegration model 59 | Parameters are estimated via the maximum likelihood (ML) method. Using a quasi-Newton algorithm, the ML method searches for the parameters $\rho$, $\sigma^2_M$, $\sigma^2_R$ and the parameter vector $\beta$ which maximizes the likelihood function of the associated Kalman filter. 60 | 61 | 62 | 63 | ### A likelihood ratio test routine for partial cointegration} 64 | The likelihood ratio test (LRT) implemented in the **partialCI** package adopts the LRT routine for PAR models proposed by @Clegg.2015a. In a PCI scenario the null hypothesis consists of two conditions -- namely the hypothesis that the residual series is a pure random walk ($\mathcal{H}^R_0$) or a pure AR(1)-process $(\mathcal{H}^M_0)$. The two conditions are separately tested. Only if both, $\mathcal{H}^R_0$ and $\mathcal{H}^M_0$ are individually rejected, the null hypothesis of no partial cointegration is rejected. 65 | 66 | ## Using the PCI package 67 | 68 | The main functions of the **partialCI** package are fit.pci(), test.pci(), statehistory.pci(), and hedge.pci(). 69 | 70 | ### fit.pci() 71 | 72 | The function **fit.pci()** fits a partial cointegration model to a given collection of time series. 73 | 74 | ``` 75 | fit.pci(Y, X, pci_opt_method = c("jp", "twostep"), par_model = c("par", "ar1", "rw"), lambda = 0, robust = FALSE, nu = 5, include_alpha=FALSE)} 76 | ``` 77 | * `Y`: Denotes the target time series and `X` is a matrix containing the `k` factors used to model `Y`. 78 | * `pci_opt_method`: Specifies, whether the joint-penalty method (`"jp"`) or the (`"twostep"`) method is applied to obtain the model with the best fit. If `pci_opt_method` is specified as `"twostep"`, a two-step procedure similar to the method introduced by @Engle.1987 is performed. Which model is fitted to the residual series, depends on the specification for the argument `par_model`. In case of `"par"`, a partial autoregressive model is used, in case of `"ar1"`, an AR(1)-process and in case of `"rw"` a random walk (default: `par_model = "par"`). On the other hand, if the `pci_opt_method` is specified as `"jp"`, the joint-penalty method is applied, to estimate $\beta$, $\rho$, $\sigma_M^2$ and $\sigma_R^2$ jointly via ML. The likelihood score of the associated Kalman filter is extended by a penalty value $\lambda\sigma_R^2$ (default: `lambda = 0`), where $\lambda \in \mathbb{R}_0^+$ (default: `pci_opt_method = "jp"`). 79 | * `robust`: Determines whether the residuals are assumed to be normally (`FALSE`) or $t$-distributed (`TRUE`) (default: `robust = TRUE`). If `robust` is set to `TRUE` the degrees of freedom can be specified, using the argument `nu` (default: `nu = 5`). 80 | * `include_alpha`: If `TRUE`, an intercept $\alpha$ is added to the PCI relationship (default: `include_alpha = FALSE`). 81 | 82 | 83 | ### test.pci() 84 | 85 | The **test.pci()** function tests the goodness of fit of a PCI model. 86 | 87 | ``` 88 | test.pci(Y, X, alpha = 0.05, null_hyp = c("rw", "ar1"), robust = FALSE, pci_opt_method = c("jp", "twostep"))} 89 | ``` 90 | * `alpha`: Determines at which significance level the null hypothesis is rejected (default: `alpha = 0.05`). 91 | * `null_hyp`: Specifies whether the null hypothesis is a random walk (`"rw"`), an AR(1)-process (`"ar1"`) or a union of both hypotheses (`c("rw", "ar1")`) (default: `null_hyp = c("rw", "ar1")`). 92 | 93 | ### statehistory.pci() 94 | 95 | To estimate the sequence of hidden states the **statehistory.pci()** function can be applied. 96 | 97 | ``` 98 | statehistory.pci(A, data = A\$data, basis = A\$basis)} 99 | ``` 100 | * `A`: Denotes a fit.pci() object. 101 | * `data`: Is a matrix consisting of the target time series and the `k` factor time series (default: `data = A\$data`). 102 | * `code{basis`: Captures the coefficients of the factor time series (default: `basis = A\$basis`). 103 | 104 | ###hedge.pci() 105 | 106 | The function **hedge.pci()** finds those `k` factors from a predefined set of factors which yield the best fit to the target time series. 107 | 108 | ``` 109 | hedge.pci(Y, X, maxfact = 10, lambda = 0, use.multicore = TRUE, minimum.stepsize = 0, verbose = TRUE, exclude.cols = c(), search_type = c("lasso", "full", "limited"), pci_opt_method=c("jp", "twostep"))} 110 | ``` 111 | 112 | * `maxfact`: Denotes the maximum number of considered factors (default: * `maxfact = 10`). 113 | * `use.multicore`: If `TRUE`, parallel processing is activated (default: * `use.multicore = TRUE`). 114 | * `verbose`: Controls whether detailed information are printed (default: `verbose = TRUE`). 115 | * `exclude.cols`: Defines a set of factors which should be excluded from the search routine (default: `exclude.cols = c()`). 116 | * `search_type`: Determines the search algorithm applied to find the model that fits best to the target time series. The likelihood ratio score (LRT score) is used to compare the model fits, whereby lower scores are associated with better fits. If the option `"lasso"` is specified the lasso algorithm as implemented in the R package **glmnet** [@Friedman.2010] is deployed to search for the portfolio of factors that yields the best linear fit to the target time series. If the option `"full"` is specified, then at each step, all possible additions to the portfolio are considered and the one which yields the highest likelihood score improvement is chosen. If the option `"limited"` is specified, then at each step, the correlation of the residuals of the current portfolio is computed with respect to each of the candidate series in the input set $X$, and the top $B$ series are chosen for further consideration. Among these top $B$ candidates, the one which improves the likelihood score by the greatest amount is chosen. The parameter $B$ can be controled via `maxfact` (default: `search_type = "lasso"`). 117 | 118 | ## Example 119 | 120 | As an introductory example, we explore the relationship between Royal Dutch Shell plc A (RDS-A) and Royal Dutch Shell plc B (RDS-B), using daily (closing) price data from 1 January 2006 to 1 December 2016.RDS-A (@RDSA.2016) and RDS-B (@RDSB.2016) data are downloaded from Yahoo Finance. To download the price data we use the getYahooData() function, implemented in the R package **TTR** (@Ulrich.2016). 121 | 122 | ``` 123 | library(partialCI) 124 | library(TTR) 125 | 126 | RDSA<-getYahooData("RDS-A", 20060101, 20161201)$Close 127 | RDSB<-getYahooData("RDS-B", 20060101, 20161201)$Close 128 | ``` 129 | 130 | A classic cointegration analysis yields that the two time series are not cointegrated. 131 | 132 | ``` 133 | library(egcm) 134 | 135 | egcm_finance <- egcm(RDSA,RDSB,include.const = FALSE) 136 | ``` 137 | 138 | In particular, we apply the two-step approach of @Engle.1987 implemented in the R package **egcm** (@Clegg.2015c). 139 | 140 | The following residual plot (code: `plot(egcm\_finance\$residuals,type = "l")`) suggests that the residual series is not purely mean-reverting, but rather shows a stochastical trend as well as a mean-reverting behavior. 141 | 142 | 143 | ![Residual plot classic cointegration: RDS-A and RDS-B (1.01.2006 - 1.12.2016, daily)](figures/OLSRES_RSARSB_2006_2016.png) 144 | 145 | Hence, it is not suprising that RDS-A and RDS-B are not cointegrated. Using the PCI framework, we are able to fit a PCI model to RDS-A and RDS-B. 146 | 147 | ``` 148 | PCI_RDSA_RDSB<-fit.pci(RDSA, RDSB, pci_opt_method = c("jp"), par_model =c("par"), lambda = 0, robust = FALSE, nu = 5, include_alpha = FALSE)) 149 | ``` 150 | 151 | The R output is given as 152 | 153 | ``` 154 | Fitted values for PCI model 155 | Y[t] = X[t] %*% beta + M[t] + R[t] 156 | M[t] = rho * M[t-1] + eps_M [t], eps_M[t] ~ N(0, sigma_M^2) 157 | R[t] = R[t-1] + eps_R [t], eps_R[t] ~ N(0, sigma_R^2) 158 | 159 | Estimate Std. Err 160 | beta_Close 0.9274 0.0038 161 | rho 0.3959 0.0965 162 | sigma_M 0.1081 0.0083 163 | sigma_R 0.1195 0.0076 164 | 165 | -LL = -1117.29, R^2[MR] = 0.540, 166 | ``` 167 | where `beta_Close` denotes the partially cointegrating coefficient. The PVMR of 0.54 suggests that the spread time series also exhibits a clear mean-reverting behavior. 168 | 169 | In the subsequent step, we utilize the test.pci() function to check whether RDS-A and RDS-B are partially cointegrated. 170 | 171 | The R code 172 | 173 | ``` 174 | test.pci(RDSA, RDSB, alpha = 0.05, null_hyp = c("rw", "ar1"), robust = FALSE, pci_opt_method = c("jp")) 175 | ``` 176 | 177 | leads to the following output: 178 | 179 | ``` 180 | Likelihood ratio test of [Random Walk or CI(1)] vs Almost PCI(1) 181 | (joint penalty method) 182 | 183 | data: StockA 184 | 185 | Hypothesis Statistic p-value 186 | Random Walk -55.09 0.010 187 | AR(1) -52.88 0.010 188 | Combined 0.010. 189 | ``` 190 | A time series is classified as partially cointegrated, if and only if the random walk as well as the AR(1)-hypotheses are rejected. The $p$-value of 0.010 for the combined null hypothesis indicates that RDS-A and RDS-B are partially cointegrated in the considered period of time. 191 | 192 | Next, we demonstrate the use of the statehistory.pci() function which allows to estimate and extract the hidden states. 193 | The R code, 194 | 195 | ``` 196 | statehistory.pci(PCI_RDSA_RDSB)}, 197 | 198 | ``` 199 | 200 | results in the R output: 201 | ``` 202 | Y Yhat Z M R eps_M eps_R 203 | 2006-01-03 35.87002 35.26781 0.6022031 0.00000000 0.6022031 0.00000000 0.00000000 204 | 2006-01-04 36.23993 35.57175 0.6681755 0.02030490 0.6478706 0.02030490 0.04566752 205 | 2006-01-05 35.80276 35.24161 0.5611509 -0.02112621 0.5822771 -0.02916450 -0.06559352 206 | 2006-01-06 36.48653 35.83377 0.6527591 0.01590352 0.6368556 0.02426695 0.05457850 207 | ... 208 | 2016-11-25 50.18000 49.52231 0.6576906 -0.08762384 0.7453144 -0.07643882 -0.17191764 209 | 2016-11-28 49.20000 48.22397 0.9760311 0.04699758 0.9290335 0.08168603 0.18371909 210 | 2016-11-29 49.06000 48.02922 1.0307808 0.04419468 0.9865862 0.02558931 0.05755262 211 | 2016-11-30 51.10000 50.23639 0.8636066 -0.02573955 0.8893462 -0.04323530 -0.09724000 212 | 2016-12-01 51.78000 51.15450 0.6254956 -0.08826115 0.7137567 -0.07807140 -0.17558945. 213 | ``` 214 | The latter table covers the estimates of the hidden states $M$ and $R$ as well as the corresponding error terms `eps_M` and `eps_R`. `Z` is equal to the sum of $M$ and $R$. The estimate of the target time series is denoted by `Yhat`. 215 | 216 | The subsequent figure illustrates a plot of the extracted mean-reverting component of the spread associated with the RDS-A and RDS-B price time series (``plot(statehistory.pci(PCI_RDSA_RDSB)[,4],type = "l",ylab = "", xlab = "")`). 217 | 218 | ![Mean-reverting component RDS-A and RDS-B (1.01.2006 - 1.12.2016, daily)](figures/MCSDHIST_RSARSB_2006_2016.png) 219 | 220 | 221 | The horizontal blue lines are equal to two times the historical standard deviation in absolute terms of the mean-reverting component. A pairs trading strategy could exploit the mean-reverting behavior of $M_t$. Note that this example is in-sample; for a true out-of-sample application see @Clegg.2016. 222 | 223 | We continue with using hedge.pci() to find the set of sector ETFs forming the best hedging portfolio for the SPY index (S&P500 index). 224 | 225 | Thereby, the R code, 226 | ```` 227 | sectorETFS <- c("XLB", "XLE", "XLF", "XLI", "XLK", "XLP", "XLU", "XLV", "XLY") 228 | prices <- multigetYahooPrices(c("SPY", sectorETFS), start=20060101) 229 | 230 | hedge.pci(prices[,"SPY"], prices), 231 | ```` 232 | 233 | results in the subsequent output: 234 | ```` 235 | -LL LR[rw] p[rw] p[mr] rho R^2[MR] Factor | Factor coefficients 236 | 2320.00 -23.3743 0.0100 0.0100 0.5759 0.4526 XLI | 3.1106 237 | 1765.50 -46.5925 0.0100 0.0100 0.3170 0.4713 XLY | 1.8951 1.1989 238 | 1494.95 -53.7256 0.0100 0.0100 0.3244 0.5038 XLV | 1.6999 0.9106 0.6619 239 | 972.58 -65.9058 0.0100 0.0100 0.4060 0.5904 XLK | 1.3089 0.4933 0.5320. 1.5182 240 | ```` 241 | The table summarizes information about the best hedging portfolio, where each row corresponds to an increasing number of factors. Row 1: The best single-factor hedging portfolio comprises XLI (industrials) as only factor. Row 2: The best two-factor hedging portfolio consists of XLI and XLY (consumer discretionary). As such, XLY leads to the best improvement of the LRT score among all remaining factors. Row 3 includes XLV (health care) for the three-factor portfolio and row 4 XLK (technology) for the best four-factor portfolio. The last row corresponds to the overall best fit out of the nine potential sector ETFs, based on the LRT score. Note that for all rows, the union of random walk and AR(1)-null hypothesis is rejected at the 5 percent significant level, so we find a PCI model at each step. 242 | 243 | ## References -------------------------------------------------------------------------------- /R/fit_pci_wo_alpha.R: -------------------------------------------------------------------------------- 1 | # Functions pertaining to fitting partially cointegrated (PCI) series 2 | # Copyright (C) 2016 Matthew Clegg 3 | 4 | # This program is free software; you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation; either version 2 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # A copy of the GNU General Public License is available at 15 | # http://www.r-project.org/Licenses/ 16 | 17 | utils::globalVariables("p0") 18 | 19 | 20 | 21 | fit.pci.twostep.a0 <- function (Y, X, par_model=c("par", "ar1", "rw"), robust=FALSE, nu=5) { 22 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 23 | # Fits the model 24 | # 25 | # Y[t] = beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k] 26 | # + m[t] + r[t] 27 | # 28 | # where 29 | # 30 | # m[t] = rho * m[t-1] + eps_M[t] 31 | # r[t] = r[t-1] + eps_R[t] 32 | # eps_M,t ~ N(0,1) 33 | # eps_R,t ~ N(0,1) 34 | # m[0] = r[0] = 0 35 | # 36 | # Estimates the values of beta, rho, sigma_M and sigma_R using 37 | # the two step procedure: 38 | # 1. Perform a linear regression of Y on X to obtain beta 39 | # 2. Determine rho, sigma_M and sigma_R through fitting a PAR model 40 | # to the residuals of the linear regression performed in step 1. 41 | # 42 | # The parameter par_model specifies which parameters of the PAR model are 43 | # to be optimized. 44 | 45 | par_model <- match.arg(par_model) 46 | if (robust) { 47 | L <- summary(rlm(Y~X)) 48 | } else { 49 | L <- summary(lm(Y~X)) 50 | } 51 | PAR <- fit.par(L$residuals, model=par_model, robust=robust, nu=nu) 52 | 53 | fit <- structure(list( 54 | data=Y, 55 | basis=X, 56 | residuals=L$residuals, 57 | index=1:length(Y), 58 | alpha = 0, 59 | beta = coef(L)[2:nrow(coef(L)),1], 60 | rho = PAR$rho, 61 | sigma_M = PAR$sigma_M, 62 | sigma_R = PAR$sigma_R, 63 | M0 = PAR$par[4], 64 | R0 = PAR$par[5], 65 | beta.se = coef(L)[2:nrow(coef(L)),2], 66 | rho.se = PAR$stderr[1], 67 | sigma_M.se = PAR$stderr[2], 68 | sigma_R.se = PAR$stderr[3], 69 | M0.se = PAR$stderr[4], 70 | R0.se = PAR$stderr[5], 71 | negloglik = c(negloglik=PAR$negloglik), 72 | pvmr = PAR$pvmr, 73 | par.fit = PAR, 74 | par_model=par_model, 75 | robust=robust, 76 | pci.fit="twostep"), 77 | class="pci.fit") 78 | 79 | names(fit$beta) <- paste("beta_", colnames(X), sep="") 80 | names(fit$beta.se) <- paste(paste("beta_", colnames(X), sep=""), ".se", sep="") 81 | 82 | fit 83 | } 84 | 85 | pci.jointpenalty.guess.a0 <- function (Y, X) { 86 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 87 | # Generates a guess for the starting point of optimization for an PCI 88 | # fit as follows: 89 | # 1. Estimates beta by fitting the differenced X series to the 90 | # differenced Y series. 91 | # 2. Estimates rho, sigma_M and sigma_R by fitting a PAR series 92 | # to Y - X %*% beta - alpha 93 | # Returns the guess that is found 94 | 95 | X <- as.matrix(X) 96 | DY <- diff(Y) 97 | DX <- apply(X, 2, diff) 98 | beta0 <- coef(lm(DY ~ DX + 0)) 99 | res0 <- Y - X %*% beta0 100 | PAR0 <- fit.par(res0) 101 | rho0 <- PAR0$rho 102 | sigma_M0 <- PAR0$sigma_M 103 | sigma_R0 <- PAR0$sigma_R 104 | M00 <- 0 105 | p0 <- c(beta0, rho0, sigma_M0, sigma_R0, M00, res0[1]) 106 | names(p0)[1:ncol(X)] <- paste("beta_", colnames(X), sep="") 107 | names(p0)[length(p0)-1] <- "M0" 108 | names(p0)[length(p0)] <- "R0" 109 | p0 110 | } 111 | 112 | fit.pci.jointpenalty.rw.a0 <- function (Y, X, lambda=0, p0=pci.jointpenalty.guess(Y,X), 113 | robust=FALSE, nu=5, pgtol=1e-8) { 114 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 115 | # Fits an PCI model to Y,X where the residual series is modeled as 116 | # a random walk. Returns a three-component list: 117 | # par: The parameter estimates 118 | # se: The estimated standard error of the parameter estimates 119 | # nll: The negative log likelihood of the fit 120 | 121 | if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1) 122 | if (is.null(dim(X))) X <- as.matrix(X, ncol=1) 123 | 124 | n <- ncol(X) 125 | highval <- NA 126 | ll_calc_method <- if (robust) "csst" else "css" 127 | 128 | rw.ofun <- function (p) { 129 | beta <- p[1:n] 130 | sigma_R <- p[n+1] 131 | if (sigma_R <= 0.0) return(highval) 132 | Z <- Y - X %*% beta 133 | R0 <- Z[1] 134 | loglik.par (Z, 0, 0, sigma_R, 0, R0, ll_calc_method, nu=nu) + lambda * sigma_R^2 135 | } 136 | 137 | beta0 <- p0[1:n] 138 | res0 <- Y - X %*% beta0 139 | sdmax <- 2.0 * sd(diff(Y)) 140 | rw.p0 <- c(beta0, sdmax * 0.5) 141 | rw.pmin <- c(rep(-Inf, n), 0.0) 142 | rw.pmax <- c(rep(Inf, n), sdmax) 143 | highval <- rw.ofun(rw.p0) + 1.0 144 | 145 | rw.val <- optim(rw.p0, rw.ofun, method="L-BFGS-B", lower=rw.pmin, upper=rw.pmax, hessian=TRUE, control=list(pgtol=pgtol)) 146 | 147 | R0.par <- (Y - X %*% rw.val$par[1:n]) [1] 148 | rw.par <- c(rw.val$par[1:n], rho=0, sigma_M=0, sigma_R=rw.val$par[n+1], M0=0, R0=R0.par) 149 | names(rw.par)[1:n] <- paste("beta_", colnames(X), sep="") 150 | rw.se <- rep(NA_real_, nrow(rw.val$hessian)) 151 | suppressWarnings(try(rw.se <- sqrt(diag(solve(rw.val$hessian))), silent=TRUE)) 152 | rw.se <- c(rw.se[1:n], rho=NA, sigma_M=NA, sigma_R=rw.se[n+1], M0=NA, R0=NA) 153 | names(rw.se)[1:n] <- paste("beta_", colnames(X), sep="") 154 | 155 | return(list(par=rw.par, se=rw.se, nll=c(negloglik=rw.val$value))) 156 | } 157 | 158 | fit.pci.jointpenalty.mr.a0 <- function (Y, X, lambda=0, p0=pci.jointpenalty.guess.a0(Y,X), 159 | robust=FALSE, nu=5, pgtol=1e-8) { 160 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 161 | # Fits an PCI model to Y,X where the residual series is modeled as 162 | # a pure AR(1) series. Returns a three-component list: 163 | # par: The parameter estimates 164 | # se: The estimated standard error of the parameter estimates 165 | # nll: The negative log likelihood of the fit 166 | 167 | if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1) 168 | if (is.null(dim(X))) X <- as.matrix(X, ncol=1) 169 | 170 | n <- ncol(X) 171 | highval <- NA 172 | ll_calc_method <- if (robust) "csst" else "css" 173 | 174 | mr.ofun <- function (p) { 175 | beta <- p[1:n] 176 | rho <- p[n+1] 177 | sigma_M <- p[n+2] 178 | if (rho < -1 || rho > 1 || sigma_M <= 0.0) return(highval) 179 | Z <- Y - X %*% beta 180 | loglik.par (Z, rho, sigma_M, 0, 0, Z[1], ll_calc_method, nu=nu) 181 | } 182 | 183 | rw.fit <- fit.pci.jointpenalty.rw.a0 (Y, X, lambda, p0, robust=robust, nu=nu) 184 | rw.p0 <- c(rw.fit$par[1:n], rho=1, rw.fit$par[n+3]) 185 | names(rw.p0)[n+2] <- "sigma_M" 186 | 187 | beta0 <- p0[1:n] 188 | res0 <- Y - X %*% beta0 189 | PAR0.mr <- fit.par(res0, model="ar1", robust=robust, nu=nu) 190 | mr.p0 <- c(beta0, PAR0.mr$rho, PAR0.mr$sigma_M) 191 | names(mr.p0)[n+2] <- "sigma_M" 192 | 193 | mr.pmin <- c(rep(-Inf, n), -1, 0) 194 | mr.pmax <- c(rep(Inf, n), 1, 2.0 * rw.p0[n+3]) 195 | 196 | highval <- max(mr.ofun(rw.p0), mr.ofun(mr.p0)) + 1 197 | fit1 <- optim(rw.p0, mr.ofun, method="L-BFGS-B", lower=mr.pmin, upper=mr.pmax, hessian=TRUE, control=list(pgtol=pgtol)) 198 | fit2 <- optim(mr.p0, mr.ofun, method="L-BFGS-B", lower=mr.pmin, upper=mr.pmax, hessian=TRUE, control=list(pgtol=pgtol)) 199 | fit <- if(fit1$value < fit2$value) fit1 else fit2 200 | 201 | fit.R0 <- (Y - X %*% fit$par[1:n]) [1] 202 | mr.par <- c(fit$par[1:(n+2)], sigma_R=0, M0=0, R0=fit.R0) 203 | 204 | mr.se <- rep(NA_real_, nrow(fit$hessian)) 205 | suppressWarnings(try(mr.se <- sqrt(diag(solve(fit$hessian))), silent=TRUE)) 206 | mr.se <- c(mr.se[1:(n+2)], sigma_R=NA, M0=NA, R0=NA) 207 | 208 | return(list(par=mr.par, se=mr.se, nll=c(negloglik=fit$value))) 209 | } 210 | 211 | fit.pci.jointpenalty.both.a0 <- function (Y, X, lambda=0, p0=pci.jointpenalty.guess.a0(Y,X), 212 | robust=FALSE, nu=5, pgtol=1e-8) { 213 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 214 | # Fits an PCI model to Y,X. Returns a three-component list: 215 | # par: The parameter estimates 216 | # se: The estimated standard error of the parameter estimates 217 | # nll: The negative log likelihood of the fit 218 | 219 | if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1) 220 | if (is.null(dim(X))) X <- as.matrix(X, ncol=1) 221 | 222 | n <- ncol(X) 223 | highval <- NA 224 | ll_calc_method <- if (robust) "csst" else "css" 225 | 226 | fit.rw <- fit.pci.jointpenalty.rw.a0 (Y, X, lambda, p0, robust=robust, nu=nu) 227 | fit.mr <- fit.pci.jointpenalty.mr.a0 (Y, X, lambda, p0, robust=robust, nu=nu) 228 | fit.twostep <- fit.pci.twostep.a0 (Y, X, robust=robust, nu=nu) 229 | twostep.par <- c(fit.twostep$beta, 230 | fit.twostep$rho, fit.twostep$sigma_M, fit.twostep$sigma_R) 231 | 232 | start_list <- list(p0[1:(n+3)], fit.rw$par[1:(n+3)], fit.mr$par[1:(n+3)], twostep.par) 233 | 234 | objective <- function (p) { 235 | # print(p) 236 | beta <- p[1:n] 237 | rho <- p[n+1] 238 | sigma_M <- p[n+2] 239 | sigma_R <- p[n+3] 240 | if (rho < -1 || rho > 1) return(highval) 241 | if ((sigma_M < 0) || (sigma_R < 0)) return(highval) 242 | if ((sigma_M == 0) && (sigma_R == 0)) return(highval) 243 | Z <- Y - X %*% beta 244 | M0 <- 0 245 | R0 <- Z[1] 246 | ll <- loglik.par (Z, rho, sigma_M, sigma_R, M0, R0, ll_calc_method, nu=nu) + lambda * sigma_R^2 247 | # print(c(p, ll)) 248 | ll 249 | } 250 | # debug(objective) 251 | 252 | maxsig <- fit.rw$par[["sigma_R"]] 253 | pmin <- c(rep(-Inf, n), -1, 0, 0) 254 | pmax <- c(rep(Inf, n), 1, 2.0 * maxsig, 2.0 * maxsig) 255 | 256 | best_value <- objective(start_list[[1]])+1 257 | for (start in start_list) { 258 | highval <- objective(start) + 1 259 | rfit <- optim(start, objective, hessian=TRUE, method="L-BFGS-B", lower=pmin, upper=pmax,control=list(pgtol=pgtol)) 260 | if (rfit$value < best_value) { 261 | bestfit <- rfit 262 | best_value <- rfit$value 263 | # cat(sprintf("r %6.2f rho %8.4f sigma_M %8.4f sigma_R %8.4f -> %8.4f\n", 264 | # rrho, bestfit$par[1], bestfit$par[2], bestfit$par[3], bestfit$value)) 265 | } 266 | } 267 | 268 | bestfit.R0 <- (Y - X %*% bestfit$par[1:n]) [1] 269 | bestfit.par <- c(bestfit$par[1:(n+3)], M0=0, R0=bestfit.R0) 270 | bestfit.se <- rep(NA_real_, nrow(bestfit$hessian)) 271 | suppressWarnings(try(bestfit.se <- sqrt(diag(solve(bestfit$hessian))), silent=TRUE)) 272 | bestfit.se <- c(bestfit.se[1:(n+3)], M0=NA, R0=NA) 273 | 274 | # names(bestfit.par)[n+6] <- "R0" 275 | # names(bestfit.se)[n+6] <- "R0" 276 | 277 | return(list(par=bestfit.par, se=bestfit.se, nll=c(negloglik=bestfit$value))) 278 | 279 | } 280 | 281 | fit.pci.jointpenalty.a0 <- function (Y, X, par_model=c("par", "ar1", "rw"), lambda=0, 282 | robust=FALSE, nu=5) { 283 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 284 | # lambda is a penalty value that drives the optimization towards a 285 | # solution where sigma_R has the lowest possible value. 286 | # 287 | # Fits the model 288 | # 289 | # Y[t] = beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k] 290 | # + m[t] + r[t] 291 | # 292 | # where 293 | # 294 | # m[t] = rho * m[t-1] + eps_M[t] 295 | # r[t] = r[t-1] + eps_R[t] 296 | # eps_M,t ~ N(0,sigma_M^2) 297 | # eps_R,t ~ N(0,sigma_R^2) 298 | # m[0] = 0 299 | # r[0] = Y[0] - beta[1] X[0,1] - beta[2] X[0,2] - ... - beta[k] X[0,k] 300 | # 301 | # Estimates the values of beta, rho, sigma_M and sigma_R using 302 | # the following procedure: 303 | # 1. Initial estimates of beta are obtained by regressing the 304 | # first differences of X[,i] on the first difference of Y. 305 | # 2. Initial estimates of rho, sigma_M and sigma_R are obtained by 306 | # fitting a PAR model to the residuals 307 | # 3. Having obtained these initial estimates for all of the parameters, 308 | # the following cost function is maximized to final parameter 309 | # estimates: 310 | # C[rho, sigma_M, sigma_R] = -LL[rho, sigma_M, sigma_R] + lambda * sigma_R^2 311 | # In the above, LL is the log likelihood function for the steady 312 | # state Kalman filter with parameters rho, sigma_M and sigma_R. 313 | # 314 | # The parameter par_model specifies which parameters of the PAR model are 315 | # to be optimized. 316 | 317 | if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1) 318 | if (is.null(dim(X))) X <- as.matrix(X, ncol=1) 319 | 320 | par_model <- match.arg(par_model) 321 | p0 = pci.jointpenalty.guess.a0(Y,X) 322 | res <- switch (par_model, 323 | rw = fit.pci.jointpenalty.rw.a0(Y, X, lambda, p0, robust=robust, nu=nu), 324 | ar1 = fit.pci.jointpenalty.mr.a0(Y, X, lambda, p0, robust=robust, nu=nu), 325 | par = fit.pci.jointpenalty.both.a0(Y, X, lambda, p0, robust=robust, nu=nu)) 326 | n <- ncol(X) 327 | 328 | beta <- res$par[1:n] 329 | R <- Y - X %*% beta 330 | 331 | fit <- structure(list( 332 | data=Y, 333 | basis=X, 334 | residuals=R, 335 | index=1:length(Y), 336 | beta = beta, 337 | rho = res$par[n+1], 338 | sigma_M = res$par[n+2], 339 | sigma_R = res$par[n+3], 340 | M0 = res$par[n+4], 341 | R0 = res$par[n+5], 342 | beta.se = res$se[1:n], 343 | rho.se = res$se[n+1], 344 | sigma_M.se = res$se[n+2], 345 | sigma_R.se = res$se[n+3], 346 | M0.se = res$se[n+4], 347 | R0.se = res$se[n+5], 348 | negloglik = res$nll, 349 | pvmr = pvmr.par(res$par[n+1], res$par[n+2], res$par[n+3]), 350 | par.model=par_model, 351 | robust=robust, 352 | pci.fit="jointpenalty"), 353 | class="pci.fit") 354 | 355 | names(fit$beta) <- paste("beta_", colnames(X), sep="") 356 | names(fit$beta.se) <- paste(paste("beta_", colnames(X), sep=""), ".se", sep="") 357 | 358 | fit 359 | } 360 | 361 | fit.pci.a0 <- function (Y, X, pci_opt_method=c("jp", "twostep"), par_model=c("par", "ar1", "rw"), 362 | lambda=0, robust=FALSE, nu=5) { 363 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 364 | # lambda is a penalty value that drives the optimization towards a 365 | # solution where sigma_R has the lowest possible value. 366 | # 367 | # Fits the model 368 | # 369 | # Y[t] = beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k] 370 | # + m[t] + r[t] 371 | # 372 | # where 373 | # 374 | # m[t] = rho * m[t-1] + eps_M[t] 375 | # r[t] = r[t-1] + eps_R[t] 376 | # eps_M,t ~ N(0,sigma_M^2) 377 | # eps_R,t ~ N(0,sigma_R^2) 378 | # m[0] = 0 379 | # r[0] = Y[0] - beta[1] X[0,1] - beta[2] X[0,2] - ... - beta[k] X[0,k] 380 | # 381 | # Fits the model using either the two step procedure or the joint penalty 382 | # procedure, according to the value of the parameter 'pci_opt_method'. Returns 383 | # an S3 object of class "pci.fit" representing the fit that was obtained. 384 | # 385 | # The parameter par_model specifies which parameters of the PAR model are 386 | # to be optimized. 387 | 388 | pci_opt_method <- match.arg(pci_opt_method) 389 | par_model <- match.arg(par_model) 390 | 391 | Yorig <- Y 392 | if (!is.null(dim(Y))) { 393 | if (dim(Y)[2] > 1) { 394 | if (missing(X)) { 395 | X <- Y[,2:ncol(Y),drop=FALSE] 396 | } else { 397 | stop("Y must be a single column") 398 | } 399 | } 400 | Y <- Y[,1] 401 | } 402 | Y <- coredata(Y) 403 | 404 | Xorig <- X 405 | X <- as.matrix(X) 406 | 407 | A <- switch (pci_opt_method, 408 | twostep = fit.pci.twostep.a0(Y, X, par_model=par_model, robust=robust, nu=nu), 409 | jp = fit.pci.jointpenalty.a0(Y, X, par_model=par_model, lambda, robust=robust, nu=nu)) 410 | 411 | if (is.zoo(Yorig)) { 412 | A$index <- index(Yorig) 413 | } else { 414 | A$index <- 1:length(Y) 415 | } 416 | 417 | if (!is.null(names(Yorig))) { 418 | A$target_name <- names(Yorig)[1] 419 | } else if (!is.null(colnames(Yorig))) { 420 | A$target_name <- colnames(Yorig)[1] 421 | } else { 422 | A$target_name <- "Y" 423 | } 424 | 425 | if (!is.null(colnames(X))) { 426 | A$factor_names <- colnames(X) 427 | } else if (!is.null(names(X))) { 428 | A$factor_names <- names(X) 429 | } else { 430 | A$factor_names <- paste("X", 1:ncol(X), sep="") 431 | } 432 | 433 | A 434 | } 435 | 436 | -------------------------------------------------------------------------------- /R/fit_pci_with_alpha.R: -------------------------------------------------------------------------------- 1 | # Functions pertaining to fitting partially cointegrated (PCI) series 2 | # Copyright (C) 2016 Matthew Clegg 3 | 4 | # This program is free software; you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation; either version 2 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # A copy of the GNU General Public License is available at 15 | # http://www.r-project.org/Licenses/ 16 | 17 | utils::globalVariables("p0") 18 | 19 | fit.pci.twostep.a <- function (Y, X, par_model=c("par", "ar1", "rw"), robust=FALSE, nu=5) { 20 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 21 | # Fits the model 22 | # 23 | # Y[t] = alpha + beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k] 24 | # + m[t] + r[t] 25 | # 26 | # where 27 | # 28 | # m[t] = rho * m[t-1] + eps_M[t] 29 | # r[t] = r[t-1] + eps_R[t] 30 | # eps_M,t ~ N(0,1) 31 | # eps_R,t ~ N(0,1) 32 | # m[0] = r[0] = 0 33 | # 34 | # Estimates the values of alpha, beta, rho, sigma_M and sigma_R using 35 | # the two step procedure: 36 | # 1. Perform a linear regression of Y on X to obtain alpha and beta 37 | # 2. Determine rho, sigma_M and sigma_R through fitting a PAR model 38 | # to the residuals of the linear regression performed in step 1. 39 | # 40 | # The parameter par_model specifies which parameters of the PAR model are 41 | # to be optimized. 42 | 43 | par_model <- match.arg(par_model) 44 | if (robust) { 45 | L <- summary(rlm(Y~X)) 46 | } else { 47 | L <- summary(lm(Y~X)) 48 | } 49 | PAR <- fit.par(L$residuals, model=par_model, robust=robust, nu=nu) 50 | 51 | fit <- structure(list( 52 | data=Y, 53 | basis=X, 54 | residuals=L$residuals, 55 | index=1:length(Y), 56 | alpha = c(alpha=coef(L)[1,1]), 57 | beta = coef(L)[2:nrow(coef(L)),1], 58 | rho = PAR$rho, 59 | sigma_M = PAR$sigma_M, 60 | sigma_R = PAR$sigma_R, 61 | M0 = PAR$par[4], 62 | R0 = PAR$par[5], 63 | alpha.se = c(alpha.se=coef(L)[1,2]), 64 | beta.se = coef(L)[2:nrow(coef(L)),2], 65 | rho.se = PAR$stderr[1], 66 | sigma_M.se = PAR$stderr[2], 67 | sigma_R.se = PAR$stderr[3], 68 | M0.se = PAR$stderr[4], 69 | R0.se = PAR$stderr[5], 70 | negloglik = c(negloglik=PAR$negloglik), 71 | pvmr = PAR$pvmr, 72 | par.fit = PAR, 73 | par_model=par_model, 74 | robust=robust, 75 | pci.fit="twostep"), 76 | class="pci.fit") 77 | 78 | names(fit$beta) <- paste("beta_", colnames(X), sep="") 79 | names(fit$beta.se) <- paste(paste("beta_", colnames(X), sep=""), ".se", sep="") 80 | 81 | fit 82 | } 83 | 84 | pci.jointpenalty.guess.a <- function (Y, X) { 85 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 86 | # Generates a guess for the starting point of optimization for an PCI 87 | # fit as follows: 88 | # 1. Estimates beta by fitting the differenced X series to the 89 | # differenced Y series. 90 | # 2. Estimates alpha by a Y[1] - X[1,] %*% beta 91 | # 3. Estimates rho, sigma_M and sigma_R by fitting a PAR series 92 | # to Y - X %*% beta - alpha 93 | # Returns the guess that is found 94 | 95 | X <- as.matrix(X) 96 | DY <- diff(Y) 97 | DX <- apply(X, 2, diff) 98 | beta0 <- coef(lm(DY ~ DX + 0)) 99 | alpha0 <- as.numeric(Y[1] - X[1,] %*% beta0) 100 | res0 <- Y - X %*% beta0 - alpha0 101 | PAR0 <- fit.par(res0) 102 | rho0 <- PAR0$rho 103 | sigma_M0 <- PAR0$sigma_M 104 | sigma_R0 <- PAR0$sigma_R 105 | M00 <- 0 106 | p0 <- c(alpha0, beta0, rho0, sigma_M0, sigma_R0, M00, 0) 107 | names(p0)[1] <- "alpha" 108 | names(p0)[2:(ncol(X)+1)] <- paste("beta_", colnames(X), sep="") 109 | names(p0)[length(p0)-1] <- "M0" 110 | names(p0)[length(p0)] <- "R0" 111 | p0 112 | } 113 | 114 | fit.pci.jointpenalty.rw.a <- function (Y, X, lambda=0, p0=pci.jointpenalty.guess.a(Y,X), 115 | robust=FALSE, nu=5, pgtol=1e-8) { 116 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 117 | # Fits an PCI model to Y,X where the residual series is modeled as 118 | # a random walk. Returns a three-component list: 119 | # par: The parameter estimates 120 | # se: The estimated standard error of the parameter estimates 121 | # nll: The negative log likelihood of the fit 122 | 123 | if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1) 124 | if (is.null(dim(X))) X <- as.matrix(X, ncol=1) 125 | 126 | n <- ncol(X) 127 | highval <- NA 128 | ll_calc_method <- if (robust) "csst" else "css" 129 | 130 | rw.ofun <- function (p) { 131 | alpha <- p[1] 132 | beta <- p[2:(n+1)] 133 | sigma_R <- p[n+2] 134 | if (sigma_R <= 0.0) return(highval) 135 | Z <- Y - X %*% beta - alpha 136 | loglik.par (Z, 0, 0, sigma_R, 0, 0, ll_calc_method, nu=nu) + lambda * sigma_R^2 137 | } 138 | 139 | alpha0 <- p0[1] 140 | beta0 <- p0[2:(ncol(X)+1)] 141 | res0 <- Y - X %*% beta0 - alpha0 142 | sdmax <- 2.0 * sd(diff(Y)) 143 | rw.p0 <- c(alpha0, beta0, sdmax * 0.5) 144 | rw.pmin <- c(-Inf, rep(-Inf, n), 0.0) 145 | rw.pmax <- c(Inf, rep(Inf, n), sdmax) 146 | highval <- rw.ofun(rw.p0) + 1.0 147 | 148 | rw.val <- optim(rw.p0, rw.ofun, method="L-BFGS-B", lower=rw.pmin, upper=rw.pmax, hessian=TRUE, control=list(pgtol=pgtol)) 149 | rw.par <- c(rw.val$par[1:(n+1)], rho=0, sigma_M=0, sigma_R=rw.val$par[n+2], M0=0, R0=0) 150 | rw.se <- rep(NA_real_, nrow(rw.val$hessian)) 151 | suppressWarnings(try(rw.se <- sqrt(diag(solve(rw.val$hessian))), silent=TRUE)) 152 | rw.se <- c(rw.se[1:(n+1)], rho=NA, sigma_M=NA, sigma_R=rw.se[n+2], M0=NA, R0=NA) 153 | 154 | return(list(par=rw.par, se=rw.se, nll=c(negloglik=rw.val$value))) 155 | } 156 | 157 | fit.pci.jointpenalty.mr.a <- function (Y, X, lambda=0, p0=pci.jointpenalty.guess.a(Y,X), 158 | robust=FALSE, nu=5, pgtol=1e-8) { 159 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 160 | # Fits an PCI model to Y,X where the residual series is modeled as 161 | # a pure AR(1) series. Returns a three-component list: 162 | # par: The parameter estimates 163 | # se: The estimated standard error of the parameter estimates 164 | # nll: The negative log likelihood of the fit 165 | 166 | if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1) 167 | if (is.null(dim(X))) X <- as.matrix(X, ncol=1) 168 | 169 | n <- ncol(X) 170 | highval <- NA 171 | ll_calc_method <- if (robust) "csst" else "css" 172 | 173 | mr.ofun <- function (p) { 174 | alpha <- p[1] 175 | beta <- p[2:(n+1)] 176 | rho <- p[n+2] 177 | sigma_M <- p[n+3] 178 | if (rho < -1 || rho > 1 || sigma_M <= 0.0) return(highval) 179 | Z <- Y - X %*% beta - alpha 180 | loglik.par (Z, rho, sigma_M, 0, 0, 0, ll_calc_method, nu=nu) 181 | } 182 | 183 | rw.fit <- fit.pci.jointpenalty.rw.a (Y, X, lambda, p0, robust=robust, nu=nu) 184 | rw.p0 <- c(rw.fit$par[1:(n+1)], rho=1, rw.fit$par[n+4]) 185 | names(rw.p0)[n+3] <- "sigma_M" 186 | 187 | alpha0 <- p0[1] 188 | beta0 <- p0[2:(ncol(X)+1)] 189 | res0 <- Y - X %*% beta0 - alpha0 190 | PAR0.mr <- fit.par(res0, model="ar1", robust=robust, nu=nu) 191 | mr.p0 <- c(alpha0, beta0, PAR0.mr$rho, PAR0.mr$sigma_M) 192 | names(mr.p0)[n+3] <- "sigma_M" 193 | 194 | mr.pmin <- c(-Inf, rep(-Inf, n), -1, 0) 195 | mr.pmax <- c(Inf, rep(Inf, n), 1, 2.0 * rw.p0[n+3]) 196 | 197 | highval <- max(mr.ofun(rw.p0), mr.ofun(mr.p0)) + 1 198 | fit1 <- optim(rw.p0, mr.ofun, method="L-BFGS-B", lower=mr.pmin, upper=mr.pmax, hessian=TRUE, control=list(pgtol=pgtol)) 199 | fit2 <- optim(mr.p0, mr.ofun, method="L-BFGS-B", lower=mr.pmin, upper=mr.pmax, hessian=TRUE, control=list(pgtol=pgtol)) 200 | fit <- if(fit1$value < fit2$value) fit1 else fit2 201 | 202 | mr.par <- c(fit$par[1:(n+3)], sigma_R=0, M0=0, R0=0) 203 | 204 | mr.se <- rep(NA_real_, nrow(fit$hessian)) 205 | suppressWarnings(try(mr.se <- sqrt(diag(solve(fit$hessian))), silent=TRUE)) 206 | mr.se <- c(mr.se[1:(n+3)], sigma_R=NA, M0=NA, R0=NA) 207 | 208 | return(list(par=mr.par, se=mr.se, nll=c(negloglik=fit$value))) 209 | } 210 | 211 | fit.pci.jointpenalty.both.a <- function (Y, X, lambda=0, p0=pci.jointpenalty.guess.a(Y,X), 212 | robust=FALSE, nu=5, pgtol=1e-8) { 213 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 214 | # Fits an PCI model to Y,X. Returns a three-component list: 215 | # par: The parameter estimates 216 | # se: The estimated standard error of the parameter estimates 217 | # nll: The negative log likelihood of the fit 218 | 219 | if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1) 220 | if (is.null(dim(X))) X <- as.matrix(X, ncol=1) 221 | 222 | n <- ncol(X) 223 | highval <- NA 224 | ll_calc_method <- if (robust) "csst" else "css" 225 | 226 | fit.rw <- fit.pci.jointpenalty.rw.a (Y, X, lambda, p0, robust=robust, nu=nu) 227 | fit.mr <- fit.pci.jointpenalty.mr.a (Y, X, lambda, p0, robust=robust, nu=nu) 228 | fit.twostep <- fit.pci.twostep.a (Y, X, robust=robust, nu=nu) 229 | twostep.par <- c(fit.twostep$alpha, fit.twostep$beta, 230 | fit.twostep$rho, fit.twostep$sigma_M, fit.twostep$sigma_R) 231 | 232 | start_list <- list(p0[1:(n+4)], fit.rw$par[1:(n+4)], fit.mr$par[1:(n+4)], twostep.par) 233 | 234 | objective <- function (p) { 235 | # print(p) 236 | alpha <- p[1] 237 | beta <- p[2:(n+1)] 238 | rho <- p[n+2] 239 | sigma_M <- p[n+3] 240 | sigma_R <- p[n+4] 241 | if (rho < -1 || rho > 1) return(highval) 242 | if ((sigma_M < 0) || (sigma_R < 0)) return(highval) 243 | if ((sigma_M == 0) && (sigma_R == 0)) return(highval) 244 | M0 <- 0 245 | R0 <- 0 246 | Z <- Y - X %*% beta - alpha 247 | ll <- loglik.par (Z, rho, sigma_M, sigma_R, M0, R0, ll_calc_method, nu=nu) + lambda * sigma_R^2 248 | # print(c(p, ll)) 249 | ll 250 | } 251 | # debug(objective) 252 | 253 | maxsig <- fit.rw$par[["sigma_R"]] 254 | pmin <- c(-Inf, rep(-Inf, n), -1, 0, 0) 255 | pmax <- c(Inf, rep(Inf, n), 1, 2.0 * maxsig, 2.0 * maxsig) 256 | 257 | best_value <- objective(start_list[[1]])+1 258 | for (start in start_list) { 259 | highval <- objective(start) + 1 260 | rfit <- optim(start, objective, hessian=TRUE, method="L-BFGS-B", lower=pmin, upper=pmax,control=list(pgtol=pgtol)) 261 | if (rfit$value < best_value) { 262 | bestfit <- rfit 263 | best_value <- rfit$value 264 | # cat(sprintf("r %6.2f rho %8.4f sigma_M %8.4f sigma_R %8.4f -> %8.4f\n", 265 | # rrho, bestfit$par[1], bestfit$par[2], bestfit$par[3], bestfit$value)) 266 | } 267 | } 268 | 269 | bestfit.par <- c(bestfit$par[1:(n+4)], M0=0, R0=0) 270 | bestfit.se <- rep(NA_real_, nrow(bestfit$hessian)) 271 | suppressWarnings(try(bestfit.se <- sqrt(diag(solve(bestfit$hessian))), silent=TRUE)) 272 | bestfit.se <- c(bestfit.se[1:(n+4)], M0=NA, R0=NA) 273 | 274 | # names(bestfit.par)[n+6] <- "R0" 275 | # names(bestfit.se)[n+6] <- "R0" 276 | 277 | return(list(par=bestfit.par, se=bestfit.se, nll=c(negloglik=bestfit$value))) 278 | 279 | } 280 | 281 | fit.pci.jointpenalty.a <- function (Y, X, par_model=c("par", "ar1", "rw"), lambda=0, 282 | robust=FALSE, nu=5) { 283 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 284 | # lambda is a penalty value that drives the optimization towards a 285 | # solution where sigma_R has the lowest possible value. 286 | # 287 | # Fits the model 288 | # 289 | # Y[t] = alpha + beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k] 290 | # + m[t] + r[t] 291 | # 292 | # where 293 | # 294 | # m[t] = rho * m[t-1] + eps_M[t] 295 | # r[t] = r[t-1] + eps_R[t] 296 | # eps_M,t ~ N(0,sigma_M^2) 297 | # eps_R,t ~ N(0,sigma_R^2) 298 | # m[0] = r[0] = 0 299 | # 300 | # Estimates the values of alpha, beta, rho, sigma_M and sigma_R using 301 | # the following procedure: 302 | # 1. Initial estimates of beta are obtained by regressing the 303 | # first differences of X[,i] on the first difference of Y. 304 | # 2. Given these estimates for beta, an initial estimate for 305 | # alpha is obtained as alpha = Y[1] - beta X[1,]. 306 | # 3. Initial estimates of rho, sigma_M and sigma_R are obtained by 307 | # fitting a PAR model to the residuals 308 | # 4. Having obtained these initial estimates for all of the parameters, 309 | # the following cost function is maximized to final parameter 310 | # estimates: 311 | # C[rho, sigma_M, sigma_R] = -LL[rho, sigma_M, sigma_R] + lambda * sigma_R^2 312 | # In the above, LL is the log likelihood function for the steady 313 | # state Kalman filter with parameters rho, sigma_M and sigma_R. 314 | # 315 | # The parameter par_model specifies which parameters of the PAR model are 316 | # to be optimized. 317 | 318 | if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1) 319 | if (is.null(dim(X))) X <- as.matrix(X, ncol=1) 320 | 321 | par_model <- match.arg(par_model) 322 | p0 = pci.jointpenalty.guess(Y,X) 323 | res <- switch (par_model, 324 | rw = fit.pci.jointpenalty.rw.a(Y, X, lambda, p0, robust=robust, nu=nu), 325 | ar1 = fit.pci.jointpenalty.mr.a(Y, X, lambda, p0, robust=robust, nu=nu), 326 | par = fit.pci.jointpenalty.both.a(Y, X, lambda, p0, robust=robust, nu=nu)) 327 | n <- ncol(X) 328 | 329 | alpha <- res$par[1] 330 | beta <- res$par[2:(n+1)] 331 | R <- Y - alpha - X %*% beta 332 | 333 | fit <- structure(list( 334 | data=Y, 335 | basis=X, 336 | residuals=R, 337 | index=1:length(Y), 338 | alpha = alpha, 339 | beta = beta, 340 | rho = res$par[n+2], 341 | sigma_M = res$par[n+3], 342 | sigma_R = res$par[n+4], 343 | M0 = res$par[n+5], 344 | R0 = res$par[n+6], 345 | alpha.se = res$se[1], 346 | beta.se = res$se[2:(n+1)], 347 | rho.se = res$se[n+2], 348 | sigma_M.se = res$se[n+3], 349 | sigma_R.se = res$se[n+4], 350 | M0.se = res$se[n+5], 351 | R0.se = res$se[n+6], 352 | negloglik = res$nll, 353 | pvmr = pvmr.par(res$par[n+2], res$par[n+3], res$par[n+4]), 354 | par.model=par_model, 355 | robust=robust, 356 | pci.fit="jointpenalty"), 357 | class="pci.fit") 358 | 359 | names(fit$beta) <- paste("beta_", colnames(X), sep="") 360 | names(fit$beta.se) <- paste(paste("beta_", colnames(X), sep=""), ".se", sep="") 361 | 362 | fit 363 | } 364 | 365 | fit.pci.a <- function (Y, X, pci_opt_method=c("jp", "twostep"), par_model=c("par", "ar1", "rw"), 366 | lambda=0, robust=FALSE, nu=5) { 367 | # On input, Y is an n x 1 column vector, and X is an n x k matrix. 368 | # lambda is a penalty value that drives the optimization towards a 369 | # solution where sigma_R has the lowest possible value. 370 | # 371 | # Fits the model 372 | # 373 | # Y[t] = alpha + beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k] 374 | # + m[t] + r[t] 375 | # 376 | # where 377 | # 378 | # m[t] = rho * m[t-1] + eps_M[t] 379 | # r[t] = r[t-1] + eps_R[t] 380 | # eps_M,t ~ N(0,sigma_M^2) 381 | # eps_R,t ~ N(0,sigma_R^2) 382 | # m[0] = r[0] = 0 383 | # 384 | # Fits the model using either the two step procedure or the joint penalty 385 | # procedure, according to the value of the parameter 'pci_opt_method'. Returns 386 | # an S3 object of class "pci.fit" representing the fit that was obtained. 387 | # 388 | # The parameter par_model specifies which parameters of the PAR model are 389 | # to be optimized. 390 | 391 | pci_opt_method <- match.arg(pci_opt_method) 392 | par_model <- match.arg(par_model) 393 | 394 | Yorig <- Y 395 | if (!is.null(dim(Y))) { 396 | if (dim(Y)[2] > 1) { 397 | if (missing(X)) { 398 | X <- Y[,2:ncol(Y),drop=FALSE] 399 | } else { 400 | stop("Y must be a single column") 401 | } 402 | } 403 | Y <- Y[,1] 404 | } 405 | Y <- coredata(Y) 406 | 407 | Xorig <- X 408 | X <- as.matrix(X) 409 | 410 | A <- switch (pci_opt_method, 411 | twostep = fit.pci.twostep.a(Y, X, par_model=par_model, robust=robust, nu=nu), 412 | jp = fit.pci.jointpenalty.a(Y, X, par_model=par_model, lambda, robust=robust, nu=nu)) 413 | 414 | if (is.zoo(Yorig)) { 415 | A$index <- index(Yorig) 416 | } else { 417 | A$index <- 1:length(Y) 418 | } 419 | 420 | if (!is.null(names(Yorig))) { 421 | A$target_name <- names(Yorig)[1] 422 | } else if (!is.null(colnames(Yorig))) { 423 | A$target_name <- colnames(Yorig)[1] 424 | } else { 425 | A$target_name <- "Y" 426 | } 427 | 428 | if (!is.null(colnames(X))) { 429 | A$factor_names <- colnames(X) 430 | } else if (!is.null(names(X))) { 431 | A$factor_names <- names(X) 432 | } else { 433 | A$factor_names <- paste("X", 1:ncol(X), sep="") 434 | } 435 | 436 | A 437 | } 438 | 439 | -------------------------------------------------------------------------------- /R/hedge.R: -------------------------------------------------------------------------------- 1 | # hedge.R -- functions for finding the optimal hedging portfolio 2 | # for a given security, using the partially AR(1) and partially cointegrated models. 3 | 4 | # Copyright (C) 2016 Matthew Clegg 5 | 6 | # This program is free software; you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation; either version 2 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | # 16 | # A copy of the GNU General Public License is available at 17 | # http://www.r-project.org/Licenses/ 18 | 19 | 20 | LARGECAPS <- c("TLT", "AAPL", "GOOGL", "GOOG", "MSFT", "BRK-B", "XOM", "AMZN", 21 | "GE", "FB", "JNJ", "WFC", "JPM", "PG", "T", "PFE", "WMT", "DIS", 22 | "KO", "VZ", "BAC", "V", "HD", "CVX", "ORCL", "INTC", "C", "GILD", 23 | "MRK", "CMCSK", "CMCSA", "PEP", "CSCO", "PM", "IBM", "AGN", "AMGN", 24 | "BMY", "MO", "NKE", "UNH", "MA", "MDT", "MCD", "CVS", "BA", "MMM", 25 | "LLY", "SLB", "ABBV", "WBA", "UPS", "SBUX", "KHC", "CELG", "UTX", 26 | "GS", "HON", "AIG", "QCOM", "USB", "COST", "MDLZ", "LOW", "AXP", 27 | "MS", "ACN", "ABT", "LMT", "UNP", "RAI", "DHR", "PCLN", "BIIB", 28 | "COP", "DOW", "TXN", "CL", "DD", "BLK", "FOX", "ESRX", "REGN", 29 | "SPG", "FOXA", "GM", "MET", "TWX", "F", "NFLX", "OXY", "TMO", 30 | "CRM", "TWC", "EMC", "PNC", "BK", "PSX", "TJX", "ADBE", "DUK", 31 | "TGT", "GD", "SCHW", "NEE", "EOG", "KMB", "FDX", "MCK", "PYPL", 32 | "LYB", "MON", "COF", "PSA", "AMT", "SO", "CAT", "AVGO", "ADP", 33 | "D", "ALXN", "KR", "DAL", "PRU", "CTSH", "ACE", "KMI", "RTN", 34 | "ABC", "AET", "CI", "SYK", "EBAY", "ECL", "BRCM", "GIS", "TRV", 35 | "ANTM", "VLO", "ITW", "NOC", "CME", "YHOO", "YUM", "BDX", "HAL", 36 | "EMR", "LUV", "PCP", "MNST", "EL", "PX", "VRTX", "BBT", "CB", 37 | "CCL", "APD", "STT", "MMC", "MPC", "CAH", "EQR", "ICE", "LB", 38 | "APC", "PPG", "ATVI", "STZ", "CCI", "AAL", "JCI", "NSC", "VFC", 39 | "HCA", "AEP", "HPE", "AFL", "SYF", "MHFI", "AON", "CSX", "ILMN", 40 | "DE", "PCG", "INTU", "ETN", "TEL", "ORLY", "SHW", "HUM", "MYL", 41 | "ALL", "AVB", "BSX", "K", "BEN", "DLPH", "EXC", "BXLT", "SRE", 42 | "DFS", "CBS", "WM", "WMB", "GGP", "SYY", "AZO", "ZTS", "BHI", 43 | "AMAT", "PPL", "STI", "HCN", "PLD", "HPQ", "ROST", "UAL", "EA", 44 | "PXD", "FISV", "PRGO", "BF-B", "ADM", "GLW", "FIS", "RCL", "CERN", 45 | "MCO", "BAX", "HRL", "ZBH", "DG", "AMP", "EIX", "PEG", "ISRG", 46 | "PAYX", "BXP", "HIG", "ROP", "TROW", "TSN", "UA", "HSY", "ADI", 47 | "LVLT", "VNO", "VIAB", "PGR", "MAR", "OMC", "ED", "NVDA", "DLTR", 48 | "EQIX", "CAG", "XEL", "VTR", "STJ", "PCAR", "TAP", "CMG", "EW", 49 | "NTRS", "DPS", "ADS", "APA", "NLSN", "APH", "MU", "SWKS", "MTB", 50 | "FITB", "IP", "DVN", "HCP", "CLX", "SWK", "CPB", "CMI", "WY", 51 | "EXPE", "ALTR", "ES", "SE", "MJN", "HES", "WEC", "ESS", "RSG", 52 | "IR", "DVA", "SJM", "SNDK", "RHT", "WDC", "ADSK", "CTL", "NBL", 53 | "DTE", "PFG", "TYC", "IVZ", "ENDP", "ROK", "MHK", "PH", "FE", 54 | "BCR", "LNC", "NOV", "A", "L", "SYMC", "TSO", "GPC", "RF", "VMC", 55 | "EFX", "DISCA", "AME", "NUE", "HSIC", "WRK", "VRSK", "MSI", "CAM", 56 | "DISCK", "LH", "GWW", "XLNX", "WHR", "MAC", "O", "DHI", "M", 57 | "LRCX", "HST", "CBG", "AA", "HBI", "TRIP", "CA", "COL", "KMX", 58 | "NWL", "HOT", "TSCO", "UHS", "ETR", "SLG", "CTXS", "XL", "FAST", 59 | "TXT", "JNPR", "AAP", "CCE", "KEY", "MKC", "LLTC", "MRO", "EMN", 60 | "MOS", "NEM", "LEN", "XEC", "WAT", "BBY", "XRX", "GPS", "KIM", 61 | "KLAC", "AEE", "JWN", "HRS", "VRSN", "CF", "SIG", "TSS", "STX", 62 | "MLM", "HRB", "RL", "MAS", "SNA", "CINF", "WFM", "AKAM", "PNR", 63 | "DOV", "CTAS", "ARG", "SRCL", "TIF", "DGX", "IFF", "LLL", "CMS", 64 | "BWA", "NDAQ", "MCHP", "WU", "IPG", "BLL", "AMG", "GT", "HBAN", 65 | "KSS", "NTAP", "HOG", "BBBY", "SEE", "FCX", "CHRW", "ETFC", "UNM", 66 | "KSU", "EXPD", "JBHT", "WYN", "XRAY", "CVC", "PCL", "MAT", "MNK", 67 | "HAS", "COH", "SCG", "QRVO", "EQT", "NWS", "CMA", "SPLS", "KORS", 68 | "VAR", "GMCR", "SNI", "GAS", "TMK", "FTI", "FFIV", "DRI", "GRMN", 69 | "COG", "CNP", "HAR", "AN", "PNW", "URI", "FLR", "PHM", "PVH", 70 | "XYL", "RHI", "POM", "WYNN", "LUK", "ALLE", "LEG", "AES", "TGNA", 71 | "TE", "ZION", "NI", "AIV", "FSLR", "CPGX", "AVY", "FTR", "NFX", 72 | "FLS", "PKI", "IRM", "ADT", "HP", "AIZ", "FMC", "JEC", "NWSA", 73 | "PBCT", "OKE", "RIG") 74 | 75 | SECTORETFS <- c("XLB", # Materials Select Sector SPDR 76 | "XLE", # Energy Select Sector SPDR 77 | "XLF", # Financial Select Sector SPDR 78 | "XLI", # Industrial Select Sector SPDR 79 | "XLK", # Technology Select Sector SPDR 80 | "XLP", # Consumer Staples Select Sector SPDR 81 | "XLU", # Utilities Select Sector SPDR 82 | "XLV", # Health Care Select Sector SPDR 83 | "XLY" # Consumer Discretionary SPDR 84 | ) 85 | 86 | hedge.pci <- function (Y, X, 87 | maxfact=10, # Maximum number of factors to include in hedge 88 | lambda=0, # Penalty factor to be applied to random walk portion of fit 89 | use.multicore=TRUE, # TRUE if parallel processing should be used 90 | minimum.stepsize=0, # Required amount by which the score of the fit should be improved 91 | verbose = TRUE, # TRUE if information should be displayed as fitting progresses 92 | exclude.cols=c(), # A list of columns from X that should be excluded from consideration 93 | search_type=c("lasso","full", "limited"), # The type of search that is to be performed 94 | pci_opt_method=c("jp", "twostep"), # The method used to perform PCI fitting 95 | ... 96 | ) { 97 | # Searches for an optimal hedge of Y using the securities given in X. 98 | # 99 | # Input values 100 | # Y: An N x 1 column vector or data data.frame, 101 | # representing the series that is to be hedged. 102 | # X: An N x L data.frame, where each column represents a possible factor to 103 | # be used in an partially cointegrated fit. 104 | # maxfact: The maximum number of factors to be used in modeling Y. 105 | # lambda: This specifies a penalty to be applied to the random walk 106 | # portion of the AR(1) model, driving the solution towards one with a minimal value 107 | # of sigma_RW 108 | # use.multicore: If TRUE, parallel processing will be used to improve performance. 109 | # minimum.stepsize: If this is non-NA, then the search stops if an improvement 110 | # cannot be found of at least this much. 111 | # verbose: If TRUE, information is printed about the search as it progresses. 112 | # exclude.cols: Specifies the indices of columns from X which should be excluded 113 | # from consideration as part of the hedge. Alternative, the list of excluded 114 | # columns may be given as a set of strings, in which case they are interpreted 115 | # as column names. 116 | # search_type: If "lasso", then the lasso algorithm is used to find the factors 117 | # that give the best linear fit. If "full", then a greedy algorithm is 118 | # used to search for factors to be used in the hedge. At each step, 119 | # all possible additions to the portfolio are considered, and the best one 120 | # is chosen for inclusion. If "limited", then at each iteration, a preliminary 121 | # screening step is performed to identify the securities with the highest 122 | # correlations to the residuals of the currently selected portfolio. The 123 | # top securities from this list are then checked for whether they would 124 | # improve the portfolio, and the best one is included. 125 | # 126 | # Fits the model 127 | # 128 | # Y[t] = alpha + beta[1] X_i1[t,1] + beta[2] X_i2[t,2] + ... + beta [m] X_iM[t,k] 129 | # + m[t] + r[t] 130 | # 131 | # where 132 | # 133 | # X_ij is a column from X, 1 <= ij <= M 134 | # m[t] = rho * m[t-1] + eps_M[t] 135 | # r[t] = r[t-1] + eps_R[t] 136 | # eps_M,t ~ N(0,sigma_M^2) 137 | # eps_R,t ~ N(0,sigma_R^2) 138 | # r[0] = 0 139 | # 140 | # Searches for up to maxfact factors from X which provided the best 141 | # fit of an partially cointegrated model to Y. 142 | # 143 | # Returns an S3 object of class pci.hedge which describes the hedge that was found. 144 | 145 | pci_opt_method <- match.arg(pci_opt_method) 146 | if (length(exclude.cols) > 0 && is(exclude.cols, "character")) { 147 | if (!all(exclude.cols %in% colnames(X))) { 148 | missing.cols <- exclude.cols[!(exclude.cols %in% colnames(X))] 149 | stop("exclude.cols contains column names not found in X: ", missing.cols) 150 | } 151 | ch.exclude.cols <- exclude.cols 152 | exclude.cols <- match(ch.exclude.cols, colnames(X)) 153 | } 154 | 155 | target_in_source <- which(sapply(1:ncol(X), function(k) all(Y == X[,k]))) 156 | if (length(target_in_source) > 0) { 157 | # cat("Found target at column ", target_in_source, "\n\n") 158 | exclude.cols <- union(exclude.cols, target_in_source) 159 | } 160 | 161 | Imap <- setdiff(1:ncol(X), exclude.cols) 162 | Xexcl <- X[,Imap] 163 | 164 | fit <- switch(match.arg(search_type), 165 | lasso = multihedge.pci.lasso (Y, Xexcl, maxfact=maxfact, 166 | lambda=lambda, use.multicore=use.multicore, 167 | minimum.stepsize=minimum.stepsize, 168 | verbose=verbose, 169 | pci_opt_method=pci_opt_method, ...), 170 | full = multihedge.pci.greedy (Y, Xexcl, maxfact=maxfact, 171 | lambda=lambda, use.multicore=use.multicore, 172 | minimum.stepsize=minimum.stepsize, 173 | verbose=verbose, 174 | pci_opt_method=pci_opt_method, ...), 175 | limited = multihedge.pci.branching (Y, Xexcl, maxfact=maxfact, 176 | lambda=lambda, use.multicore=use.multicore, 177 | minimum.stepsize=minimum.stepsize, 178 | verbose=verbose, 179 | pci_opt_method=pci_opt_method, ...) 180 | ) 181 | 182 | fit$indexes <- Imap[fit$indexes] 183 | fit$target <- Y 184 | fit$factors <- X 185 | fit 186 | } 187 | 188 | print.pci.hedge <- function (x, ...) { 189 | 190 | print.internal.pci.hedge(x) 191 | } 192 | 193 | print.internal.pci.hedge <- function (AH) { 194 | # Prints summary information for an partially cointegrated hedge model 195 | print(AH$pci) 196 | } 197 | 198 | plot.pci.hedge <- function (x, ...) { 199 | 200 | 201 | plot.internal.pci.hedge(x) 202 | } 203 | 204 | 205 | plot.internal.pci.hedge <- function (AH) { 206 | plot(AH$pci) 207 | } 208 | 209 | multihedge.pci.general <- function ( 210 | Y, # Target that is to be hedged 211 | X, # A matrix of factors to be considered for use in hedging 212 | find_branches, # A function which returns a list of branches to be 213 | # examined from the current node. 214 | maxfact=10, # Maximum number of factors to include in hedge 215 | use.multicore=TRUE, # TRUE if multiple CPU's should be used 216 | minimum.stepsize=0.5, # Cutoff score used to terminate early 217 | verbose = TRUE, # TRUE if information should be printed about execution 218 | lambda=0, # Penalty factor to be applied to random walk portion of fit 219 | ... # Additional parameters to be passed to PCI routines 220 | ) { 221 | # Constructs a sequence of hedges of Y in terms of securities chosen from X. 222 | # Uses a modified greedy algorithm to search for the best hedge. Given a 223 | # current node specified as a collection I of columns from X, the function 224 | # find_branches(I) returns a list of nodes that should be explored next. 225 | # The best of these nodes is added to I and then the search continues. 226 | 227 | 228 | factor_score <- function (I) { 229 | # On input, I is a collection of indexes. Computes the hedge score 230 | # for Y hedged against X[,I]. Lower scores are better. 231 | # tf <- test.pci(Y, X[,I,drop=FALSE], null_hyp="rw") 232 | # tf$statistic 233 | score <- likelihood_ratio.pci(Y, X[,I,drop=FALSE], null_model="rw", ...) 234 | score 235 | } 236 | 237 | find_factor <- function (I) { 238 | # On input, I is a collection of indexes. For each index j not in I, 239 | # computes the hedge score of that index. Returns I \cup {j*}, where j* 240 | # is the index of that factor that most improves the factor score. 241 | J <- find_branches(I) 242 | if (length(J) == 0) return(I) 243 | if (use.multicore) { 244 | jscores <- mclapply(J, function(js) factor_score(c(I,js))) 245 | } else { 246 | # jscores <- sapply(J, function (js) factor_score(c(I, js))) 247 | jscores <- lapply(J, function (js) factor_score(c(I, js))) 248 | } 249 | jscores <- c(jscores, recursive=TRUE) 250 | jstar <- J[which.min(jscores)] 251 | c(I, jstar) 252 | } 253 | 254 | if (verbose && !is.null(names(Y))) { 255 | cat(sprintf("Hedges computed for %s\n\n", names(Y)[1])) 256 | } 257 | 258 | if (verbose) cat(sprintf("%8s %8s %8s %8s %8s %8s %8s | Factor coefficients\n", "-LL", "LR[rw]", "p[rw]", "p[mr]", "rho", "R^2[MR]", "Factor")) 259 | 260 | I <- c() 261 | # debug (find_factor) 262 | score <- 0 263 | pci.fit <- NULL 264 | while (length(I) < maxfact && (length(I) < ncol(X))) { 265 | Inew <- find_factor(I) 266 | if (length(Inew) == length(I)) break 267 | pci.fit <- fit.pci(Y, X[,Inew,drop=FALSE], lambda=lambda, ...) 268 | tr <- test.pci(Y, X[,Inew,drop=FALSE], null_hyp="rw", ...) 269 | tm <- test.pci(Y, X[,Inew,drop=FALSE], null_hyp="ar1", ...) 270 | score.new <- factor_score(Inew) 271 | if ((length(I) > 0) && !is.na(minimum.stepsize) && 272 | (score.new + minimum.stepsize > score)) break; 273 | if (verbose) { 274 | cat(sprintf("%8.2f %8.4f %8.4f %8.4f %8.4f %8.4f %8s | ", pci.fit$negloglik, score.new, 275 | tr$p.value, tm$p.value, 276 | pci.fit$rho, pci.fit$pvmr, colnames(X)[Inew[length(Inew)]])) 277 | for (j in 1:length(Inew)) { 278 | betaj_str <- paste("beta_", colnames(X)[Inew[j]], sep="") 279 | betaj <- pci.fit$beta[[betaj_str]] 280 | cat(sprintf("%8.4f ", betaj)) 281 | } 282 | cat("\n") 283 | } 284 | score <- score.new 285 | I <- Inew 286 | } 287 | 288 | pci.fit <- fit.pci(Y, X[,I,drop=FALSE], lambda=lambda, ...) 289 | 290 | fit <- structure(list(pci = pci.fit, 291 | indexes = I, 292 | index_names = colnames(X)[I] 293 | ), 294 | class="pci.hedge") 295 | 296 | if (verbose) printf("\n") 297 | fit 298 | } 299 | 300 | multihedge.pci.lasso <- function ( 301 | Y, 302 | X, 303 | ... 304 | ) { 305 | # Constructs a sequence of hedges of Y in terms of securities chosen from X 306 | # Uses the lasso to search for a hedge of Y in terms of securities 307 | # from X that is significant as an PCI model. 308 | 309 | lasso <- glmnet(X, Y) 310 | 311 | find_branches <- function (I) { 312 | # On input, I is a collection of indexes. For each index j not in I, 313 | # computes the hedge score of that index. Returns I \cup {j*}, where j* 314 | # is the index of that factor that most improves the factor score. 315 | J <- c() 316 | ix <- 1 317 | while (all(J %in% I)) { 318 | ix <- ix + 1 319 | J <- which(lasso$beta[,ix] != 0) 320 | } 321 | return(setdiff(J, I)[1]) 322 | } 323 | 324 | multihedge.pci.general(Y, X, find_branches, ...) 325 | } 326 | 327 | multihedge.pci.branching <- function ( 328 | Y, 329 | X, 330 | max.branch = 10, 331 | lambda=0, # Penalty factor to be applied to random walk portion of fit 332 | pci_opt_method=c("jp", "twostep"), # The method used to perform PCI fitting 333 | robust=FALSE, # TRUE if robust fitting should be used 334 | ... 335 | ) { 336 | # Constructs a sequence of hedges of Y in terms of securities chosen from X 337 | # Uses a modified greedy algorithm to search for a hedge of Y in terms of securities 338 | # from X that is significant as an PCI model. 339 | 340 | pci_opt_method <- match.arg(pci_opt_method) 341 | DX <- diff(X) 342 | 343 | find_branches <- function (I) { 344 | # On input, I is a collection of indexes. For each index j not in I, 345 | # calculates a ranking for that index. Returns the top max.branch 346 | # indexes. 347 | J <- setdiff(1:ncol(X), I) 348 | if (length(J) <= max.branch) return(J) 349 | 350 | if (length(I) == 0) { 351 | Z <- Y 352 | } else { 353 | pci.fit <- fit.pci(Y, X[,I], lambda=lambda, 354 | pci_opt_method=pci_opt_method, robust=robust) 355 | Z <- pci.fit$residuals 356 | } 357 | 358 | DZ <- diff(Z) 359 | scores <- cor(DZ, DX[,J]) 360 | best.scores <- J[order(scores, decreasing=TRUE)][1:max.branch] 361 | best.scores 362 | } 363 | 364 | multihedge.pci.general(Y, X, find_branches, lambda=lambda, 365 | pci_opt_method=pci_opt_method, robust=robust, ...) 366 | } 367 | 368 | multihedge.pci.greedy <- function ( 369 | Y, 370 | X, 371 | ... 372 | ) { 373 | 374 | # Constructs a sequence of hedges of Y in terms of securities chosen from X 375 | # Uses a greedy algorithm to search for a hedge of Y in terms of securities 376 | # from X that is significant as an PCI model. 377 | 378 | find_branches <- function (I) { 379 | setdiff(1:ncol(X), I) 380 | } 381 | 382 | multihedge.pci.general(Y, X, find_branches, ...) 383 | } 384 | 385 | -------------------------------------------------------------------------------- /tests/tests.R: -------------------------------------------------------------------------------- 1 | all.tests.pass <- TRUE 2 | all.tests.error.count <- 0 3 | 4 | library(zoo) 5 | library(xts) 6 | 7 | test <- function(expr, out="", val=eval.parent(parse(text=expr), 1), tol=1e-4) { 8 | # expr is a string representing an R expression, and 9 | # out is the output that is expected. Prints and evaluates 10 | # expr. If out is given and it matches the output of 11 | # evaluating expr, returns TRUE. Otherwise, returns FALSE. 12 | 13 | cat(expr, "-> ") 14 | 15 | p <- function (v) { 16 | if (length(v) < 5) { 17 | cat(v) 18 | } else { 19 | cat(class(v), "(", length(val), ")") 20 | } 21 | } 22 | p(val) 23 | 24 | result <- all.equal(val, out, tolerance=tol) 25 | if (!isTRUE(result)) { 26 | if (!missing(out)) { 27 | cat(" (Expecting ") 28 | p(out) 29 | cat(")") 30 | } 31 | cat("\nERROR: ", result, "\n") 32 | all.tests.pass <<- FALSE 33 | all.tests.error.count <<- all.tests.error.count + 1 34 | } else { 35 | cat(" OK\n") 36 | } 37 | 38 | isTRUE(result) 39 | } 40 | 41 | assert <- function (expr, out) { 42 | # expr is astring representing an R expression, 43 | # and out is the output that is expected. Prints 44 | # and evaluates expr. If out matches the output of 45 | # evaluating expr, returns TRUE. Otherwise, stops 46 | # the execution with an error message. 47 | if (!test(expr, out)) { 48 | stop("Expression ", deparse(substitute(expr)), 49 | " does not evaluate to its expected value\n") 50 | } 51 | } 52 | 53 | test_support <- function () { 54 | test("partialCI:::ctext(\"\", 0)", "") 55 | test("partialCI:::ctext(\"\", 1)", " ") 56 | test("partialCI:::ctext(\"hello\", 0)", "") 57 | test("partialCI:::ctext(\"hello\", 1)", "h") 58 | test("partialCI:::ctext(\"hello\", 5)", "hello") 59 | test("partialCI:::ctext(\"hello\", 6)", "hello ") 60 | test("partialCI:::ctext(\"hello\", 7)", " hello ") 61 | test("partialCI:::ctext(\"hello\", 9)", " hello ") 62 | } 63 | 64 | OIL <- structure(c(95.14, 93.66, 93.12, 93.31, 91.9, 91.36, 92.39, 91.45, 65 | 92.15, 93.78, 93.54, 93.96, 94.51, 96.35, 97.23, 96.66, 95.82, 66 | 97.49, 97.34, 98.25, 97.55, 96.44, 97.24, 97.4, 97.84, 99.98, 67 | 100.12, 99.96, 100.38, 100.27, 100.31, 102.54, 103.46, 103.2, 68 | 102.53, 103.17, 102.2, 102.93, 102.68, 102.88, 105.34, 103.64, 69 | 101.75, 101.82, 102.82, 101.39, 100.29, 98.29, 98.57, 99.23, 70 | 98.43, 100.08, 100.71, 99.68, 99.97, 100.05, 99.66, 100.61, 101.25, 71 | 101.73, 101.57, 99.69, 99.6, 100.29, 101.16, 100.43, 102.57, 72 | 103.55, 103.37, 103.68, 104.05, 103.7, 103.71, 104.33, 104.35, 73 | 101.69, 101.47, 102.2, 100.85, 101.13, 101.56, 100.07, 99.69, 74 | 100.09, 99.74, 99.81, 101.06, 100.52, 100.32, 100.89, 102.01, 75 | 102.63, 101.74, 102.31, 102.95, 102.8, 104.31, 104.03, 105.01, 76 | 104.78, 103.37, 104.26, 103.4, 103.07, 103.34, 103.27, 103.17, 77 | 103.32, 105.09, 105.02, 105.04, 107.2, 107.49, 107.52, 106.95, 78 | 106.64, 107.08, 107.95, 106.83, 106.64, 107.04, 106.49, 106.46, 79 | 106.07, 106.06, 105.18, 104.76, 104.19, 104.06, 102.93, 103.61, 80 | 101.48, 101.73, 100.56, 101.88, 103.84, 103.83, 105.34, 104.59, 81 | 103.81, 102.76, 105.23, 105.68, 104.91, 104.29, 98.23, 97.86, 82 | 98.26, 97.34, 96.93, 97.34, 97.61, 98.09, 97.36, 97.57, 95.54, 83 | 97.3, 96.44, 94.35, 96.4, 93.97, 93.61, 95.39, 95.78, 95.82, 84 | 96.44, 97.86, 92.92, 95.5, 94.51, 93.32, 92.64, 92.73, 91.71, 85 | 92.89, 92.18, 92.86, 94.91, 94.33, 93.07, 92.43, 91.46, 91.55, 86 | 93.6, 93.59, 95.55, 94.53, 91.17, 90.74, 91.02, 89.76, 90.33, 87 | 88.89, 87.29, 85.76, 85.87, 85.73, 81.72, 81.82, 82.33, 82.8, 88 | 82.76, 83.25, 80.52, 82.81, 81.27, 81.26, 81.36, 82.25, 81.06, 89 | 80.53, 78.77, 77.15, 78.71, 77.87, 78.71, 77.43, 77.85, 77.16, 90 | 74.13, 75.91, 75.64, 74.55, 74.55, 75.63, 76.52, 75.74, 74.04, 91 | 73.7, 65.94, 68.98, 66.99, 67.3, 66.73, 65.89, 63.13, 63.74, 92 | 60.99, 60.01, 57.81, 55.96, 55.97, 56.43, 54.18, 56.91, 55.25, 93 | 56.78, 55.7, 54.59, 53.46, 54.14, 53.45, 52.72, 50.05, 47.98, 94 | 48.69, 48.8, 48.35, 46.06, 45.92, 48.49, 46.37, 48.49, 46.79, 95 | 47.85, 45.93, 45.26, 44.8, 45.84, 44.08, 44.12, 47.79, 49.25, 96 | 53.04, 48.45, 50.48, 51.66, 52.99, 107.94, 106.57, 106.71, 107.01, 97 | 107.42, 107.49, 106.44, 108.02, 107.12, 108.09, 107.46, 108.45, 98 | 109.17, 109.69, 109.69, 109.14, 108.72, 109.1, 108.83, 109.36, 99 | 108.16, 106.55, 107.04, 106.81, 108.15, 110.12, 110.18, 109.21, 100 | 108.62, 108.98, 108.63, 110.14, 110.37, 109.42, 109.03, 109.76, 101 | 109.19, 109.39, 108.54, 108.98, 111.26, 109.17, 108.15, 107.99, 102 | 109.14, 108.27, 108.35, 107.88, 107.48, 108.08, 106.99, 106.79, 103 | 105.95, 105.73, 107.2, 106.59, 107.01, 105.9, 106.58, 106.64, 104 | 105.95, 105.7, 103.37, 104.88, 106.41, 104.89, 105.83, 107.39, 105 | 107.1, 107.34, 107.68, 109.1, 109.71, 109.79, 109.69, 108.54, 106 | 108.48, 109.79, 109.53, 109.12, 109.89, 108.63, 108.63, 109.48, 107 | 109.48, 108.3, 108.17, 108.19, 108.26, 108.37, 108.78, 109.87, 108 | 109.74, 110.9, 110.84, 110.35, 111.32, 110.89, 110.19, 109.81, 109 | 109.09, 109.98, 109.21, 109.34, 108.87, 109.07, 108.43, 109.21, 110 | 110.55, 109.18, 109.83, 112.18, 113.15, 113.42, 114.02, 114.25, 111 | 115.19, 114.55, 113.62, 113.74, 112.84, 112.61, 112.62, 111.03, 112 | 110.84, 110.18, 108.98, 108.7, 107.65, 106.84, 106.2, 105.77, 113 | 104.73, 104.73, 105.41, 106.04, 106.03, 105.71, 106.48, 106.85, 114 | 105.78, 106.89, 106.7, 106.98, 106.47, 104.94, 103.45, 103.63, 115 | 102.82, 104.17, 104.02, 103.36, 103.47, 101.68, 102.27, 101.15, 116 | 101.13, 99.37, 99.74, 99.92, 100.28, 100.09, 100.49, 100.5, 100.4, 117 | 100.71, 101.12, 100.21, 100.88, 101.21, 99.51, 99.53, 98.08, 118 | 96.26, 96.42, 96.31, 96.43, 97.39, 97.7, 96.82, 96.75, 95.37, 119 | 94.87, 94.53, 95.2, 95.08, 95.7, 94.67, 94.57, 91.29, 90.8, 90.65, 120 | 90.9, 90.25, 90.47, 88.66, 87.82, 86.36, 84.02, 84.02, 85.27, 121 | 84.42, 85.17, 86.38, 85.94, 86, 85.64, 85.57, 86.91, 85.5, 84.17, 122 | 84.9, 82.12, 82.88, 82.08, 83.2, 82.9, 80.94, 80.42, 77.74, 77.51, 123 | 76.86, 77.23, 77.21, 77.61, 79.2, 79.62, 77.62, 77.39, 71.89, 124 | 70.87, 71.13, 70.13, 68.48, 68, 65.64, 66.11, 63.32, 63.65, 61.67, 125 | 61.09, 60.26, 59.84, 58.81, 58.87, 58.31, 59.07, 58.67, 58.72, 126 | 57.86, 55.6, 55.27, 55.38, 51.08, 50.12, 49.06, 49.43, 47.64, 127 | 46.9, 45.13, 45.82, 47.66, 47.38, 46.49, 46.5, 46.09, 46.69, 128 | 46.07, 46.55, 47.07, 46.61, 47.52, 51.74, 54.41, 55.07, 55.98, 129 | 55.88, 57, 26.95, 26.63, 26.52, 26.45, 26.14, 26.14, 26.27, 25.94, 130 | 26.07, 26.52, 26.44, 26.47, 26.74, 27.08, 26.95, 26.9, 26.71, 131 | 27.06, 27.05, 27.11, 26.88, 26.71, 26.92, 26.98, 27.31, 27.78, 132 | 27.73, 27.8, 27.94, 28.02, 28.07, 28.48, 28.6, 28.69, 28.56, 133 | 28.68, 28.5, 28.59, 28.54, 28.65, 29.2, 28.85, 28.31, 28.57, 134 | 28.73, 28.36, 28.04, 27.74, 27.72, 27.91, 27.55, 27.81, 27.78, 135 | 27.79, 28.06, 28.01, 28.02, 28.23, 28.56, 28.64, 28.62, 28.05, 136 | 28.01, 28.34, 28.57, 28.44, 28.9, 29.11, 29.06, 29.02, 29.24, 137 | 29.24, 29.32, 29.47, 29.43, 29, 28.92, 29.08, 28.7, 28.76, 28.76, 138 | 28.46, 28.31, 28.4, 28.28, 28.3, 28.65, 28.53, 28.48, 28.67, 139 | 29.01, 29.13, 28.98, 29.14, 29.3, 29.44, 29.75, 29.79, 29.92, 140 | 29.86, 29.49, 29.68, 29.47, 29.36, 29.48, 29.38, 29.41, 29.45, 141 | 29.79, 29.85, 29.88, 30.43, 30.35, 30.53, 30.45, 30.6, 30.69, 142 | 30.82, 30.78, 30.82, 30.94, 30.75, 30.79, 30.76, 30.75, 30.43, 143 | 30.35, 30.28, 30.21, 29.91, 30.15, 29.61, 29.82, 29.44, 29.63, 144 | 29.8, 29.68, 29.97, 29.88, 29.96, 29.83, 29.86, 29.67, 29.52, 145 | 29.22, 28.9, 28.77, 28.99, 28.85, 28.8, 28.99, 28.89, 28.93, 146 | 28.74, 28.84, 28.13, 28.33, 27.98, 27.83, 27.89, 28.07, 28, 27.99, 147 | 28.09, 28.05, 28.16, 28.4, 27.76, 28.3, 28.16, 27.92, 27.67, 148 | 27.55, 27.27, 27.58, 27.29, 27.4, 27.91, 27.74, 27.46, 27.43, 149 | 27.13, 27.25, 27.65, 27.53, 27.74, 27.94, 27.13, 26.87, 26.91, 150 | 26.47, 26.73, 26.31, 26.01, 25.28, 25.39, 25.16, 24.22, 24.07, 151 | 24.47, 24.63, 24.46, 24.6, 24, 24.46, 24.34, 24.13, 24.28, 24.64, 152 | 24.23, 24.18, 23.49, 23.09, 23.63, 23.35, 23.48, 23.11, 23.18, 153 | 22.97, 22.28, 22.71, 22.57, 22.2, 22.22, 22.62, 22.92, 22.64, 154 | 22.04, 21.94, 20.03, 20.6, 20.04, 20.04, 19.82, 19.51, 18.65, 155 | 18.86, 18.13, 17.61, 17.03, 16.24, 16.49, 16.52, 16.05, 17.03, 156 | 16.16, 16.68, 16.28, 16.06, 15.66, 15.65, 15.7, 15.28, 14.43, 157 | 13.82, 14.05, 14.2, 14, 13.27, 13.43, 13.99, 13.3, 14.04, 13.32, 158 | 13.57, 13.27, 12.94, 12.82, 13.11, 12.57, 12.65, 13.62, 14.27, 159 | 15.09, 14.09, 14.56, 14.76, 14.98), 160 | .indexTZ = "UTC", .indexCLASS = "Date", tclass = "Date", tzone = "UTC", src = "FRED", 161 | updated = structure(1423948051.10322, class = c("POSIXct", 162 | "POSIXt")), 163 | index = structure(c(1388620800, 1388707200, 1388966400, 164 | 1389052800, 1389139200, 1389225600, 1389312000, 1389571200, 1389657600, 165 | 1389744000, 1389830400, 1389916800, 1390262400, 1390348800, 1390435200, 166 | 1390521600, 1390780800, 1390867200, 1390953600, 1391040000, 1391126400, 167 | 1391385600, 1391472000, 1391558400, 1391644800, 1391731200, 1391990400, 168 | 1392076800, 1392163200, 1392249600, 1392336000, 1392681600, 1392768000, 169 | 1392854400, 1392940800, 1393200000, 1393286400, 1393372800, 1393459200, 170 | 1393545600, 1393804800, 1393891200, 1393977600, 1394064000, 1394150400, 171 | 1394409600, 1394496000, 1394582400, 1394668800, 1394755200, 1395014400, 172 | 1395100800, 1395187200, 1395273600, 1395360000, 1395619200, 1395705600, 173 | 1395792000, 1395878400, 1395964800, 1396224000, 1396310400, 1396396800, 174 | 1396483200, 1396569600, 1396828800, 1396915200, 1397001600, 1397088000, 175 | 1397174400, 1397433600, 1397520000, 1397606400, 1397692800, 1398038400, 176 | 1398124800, 1398211200, 1398297600, 1398384000, 1398643200, 1398729600, 177 | 1398816000, 1398902400, 1398988800, 1399248000, 1399334400, 1399420800, 178 | 1399507200, 1399593600, 1399852800, 1399939200, 1400025600, 1400112000, 179 | 1400198400, 1400457600, 1400544000, 1400630400, 1400716800, 1400803200, 180 | 1401148800, 1401235200, 1401321600, 1401408000, 1401667200, 1401753600, 181 | 1401840000, 1401926400, 1402012800, 1402272000, 1402358400, 1402444800, 182 | 1402531200, 1402617600, 1402876800, 1402963200, 1403049600, 1403136000, 183 | 1403222400, 1403481600, 1403568000, 1403654400, 1403740800, 1403827200, 184 | 1404086400, 1404172800, 1404259200, 1404345600, 1404691200, 1404777600, 185 | 1404864000, 1404950400, 1405036800, 1405296000, 1405382400, 1405468800, 186 | 1405555200, 1405641600, 1405900800, 1405987200, 1406073600, 1406160000, 187 | 1406246400, 1406505600, 1406592000, 1406678400, 1406764800, 1406851200, 188 | 1407110400, 1407196800, 1407283200, 1407369600, 1407456000, 1407715200, 189 | 1407801600, 1407888000, 1407974400, 1408060800, 1408320000, 1408406400, 190 | 1408492800, 1408579200, 1408665600, 1408924800, 1409011200, 1409097600, 191 | 1409184000, 1409270400, 1409616000, 1409702400, 1409788800, 1409875200, 192 | 1410134400, 1410220800, 1410307200, 1410393600, 1410480000, 1410739200, 193 | 1410825600, 1410912000, 1410998400, 1411084800, 1411344000, 1411430400, 194 | 1411516800, 1411603200, 1411689600, 1411948800, 1412035200, 1412121600, 195 | 1412208000, 1412294400, 1412553600, 1412640000, 1412726400, 1412812800, 196 | 1412899200, 1413158400, 1413244800, 1413331200, 1413417600, 1413504000, 197 | 1413763200, 1413849600, 1413936000, 1414022400, 1414108800, 1414368000, 198 | 1414454400, 1414540800, 1414627200, 1414713600, 1414972800, 1415059200, 199 | 1415145600, 1415232000, 1415318400, 1415577600, 1415664000, 1415750400, 200 | 1415836800, 1415923200, 1416182400, 1416268800, 1416355200, 1416441600, 201 | 1416528000, 1416787200, 1416873600, 1416960000, 1417132800, 1417392000, 202 | 1417478400, 1417564800, 1417651200, 1417737600, 1417996800, 1418083200, 203 | 1418169600, 1418256000, 1418342400, 1418601600, 1418688000, 1418774400, 204 | 1418860800, 1418947200, 1419206400, 1419292800, 1419379200, 1419552000, 205 | 1419811200, 1419897600, 1419984000, 1420156800, 1420416000, 1420502400, 206 | 1420588800, 1420675200, 1420761600, 1421020800, 1421107200, 1421193600, 207 | 1421280000, 1421366400, 1421712000, 1421798400, 1421884800, 1421971200, 208 | 1422230400, 1422316800, 1422403200, 1422489600, 1422576000, 1422835200, 209 | 1422921600, 1423008000, 1423094400, 1423180800, 1423440000), 210 | tzone = "UTC", tclass = "Date"), .Dim = c(278L, 3L), 211 | .Dimnames = list(NULL, c("WTI", "BRENT", "DBO")), class = c("xts", "zoo")) 212 | 213 | test_lr <- function (fast_only=FALSE) { 214 | test("partialCI:::loglik.pci.fkf(OIL$WTI, OIL$BRENT, 0, 1, -0.0195, 0.8483, 0.8364)", 447.8704) 215 | test("partialCI:::loglik.pci.ss(OIL$WTI, OIL$BRENT, 0, 1, -0.0195, 0.8483, 0.8364)", 443.1509) 216 | test("partialCI:::loglik.pci.css(OIL$WTI, OIL$BRENT, 0, 1, -0.0195, 0.8483, 0.8364)", 443.1509) 217 | test("partialCI:::loglik.pci.sst(OIL$WTI, OIL$BRENT, 0, 1, -0.0195, 0.8483, 0.8364)", 444.8827) 218 | test("partialCI:::loglik.pci.csst(OIL$WTI, OIL$BRENT, 0, 1, -0.0195, 0.8483, 0.8364)", 444.8827) 219 | 220 | } 221 | 222 | test.likelihood_ratio.pci <- function (fast_only=FALSE) { 223 | test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT)", c(negloglik=-19.88135), tol=0.1) 224 | test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT, null_model=\"ar1\")", c(negloglik=-18.71606), tol=0.1) 225 | test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT, pci_opt_method=\"twostep\")", c(negloglik=-21.24474)) 226 | test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT, null_model=\"ar1\", pci_opt_method=\"twostep\")", c(negloglik=-19.97211)) 227 | test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT, robust=TRUE)", c(negloglik=-16.94564), tol=0.1) 228 | test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT, robust=TRUE, null_model=\"ar1\")", c(negloglik=-16.71372), tol=0.1) 229 | test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT, robust=TRUE, pci_opt_method=\"twostep\")", c(negloglik=-18.84097)) 230 | test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT, robust=TRUE, null_model=\"ar1\", pci_opt_method=\"twostep\")", c(negloglik=-16.87271)) 231 | } 232 | 233 | test_lr2 <- function(fast_only=FALSE) { 234 | # test.likelihood_ratio.par(fast_only) 235 | 236 | test("partialAR:::par.rw.pvalue(-3.5,400) < 0.05", TRUE) 237 | test("partialAR:::par.rw.pvalue(-1,500) > 0.10", TRUE) 238 | test("partialAR:::par.mr.pvalue(-1,600) < 0.05", TRUE) 239 | test("partialAR:::par.mr.pvalue(-0.1, 700) > 0.05", TRUE) 240 | test("partialAR:::par.rw.pvalue(-3.5,400, robust=TRUE) < 0.05", TRUE) 241 | test("partialAR:::par.rw.pvalue(-1,500, robust=TRUE) > 0.10", TRUE) 242 | test("partialAR:::par.mr.pvalue(-1,600, robust=TRUE) < 0.05", TRUE) 243 | test("partialAR:::par.mr.pvalue(-0.1, 700, robust=TRUE) > 0.05", TRUE) 244 | 245 | test("partialAR:::par.mr.pvalue(-2,400,ar1test='kpss') < 0.05", TRUE) 246 | test("partialAR:::par.mr.pvalue(-0.5, 500,ar1test='kpss') > 0.05", TRUE) 247 | test("partialAR:::par.mr.pvalue(-2,600, robust=TRUE,ar1test='kpss') < 0.05", TRUE) 248 | test("partialAR:::par.mr.pvalue(-0.5, 700, robust=TRUE,ar1test='kpss') > 0.05", TRUE) 249 | 250 | test("partialAR:::par.joint.pvalue(-4,-0.5,500) < 0.05", TRUE) 251 | test("partialAR:::par.joint.pvalue(-1,-0.25,500) > 0.05", TRUE) 252 | test("partialAR:::par.joint.pvalue(-5,-0.8,500, robust=TRUE) < 0.05", TRUE) 253 | test("partialAR:::par.joint.pvalue(-3,-0.1,500, robust=TRUE) > 0.05", TRUE) 254 | test("partialAR:::par.joint.pvalue(-5,-2,500, ar1test='kpss') < 0.05", TRUE) 255 | test("partialAR:::par.joint.pvalue(-3,-1,500, ar1test='kpss') > 0.05", TRUE) 256 | test("partialAR:::par.joint.pvalue(-4,-0.5,50000)", 0.03) 257 | test("partialAR:::par.joint.pvalue(-4,-0.5,50)", 0.10) 258 | test("partialAR:::par.joint.pvalue(4,-0.5,50)", 1) 259 | test("partialAR:::par.joint.pvalue(-4,-0.5,49)", 1) 260 | 261 | test("partialAR:::test.par.nullrw(data.L)$p.value < 0.05", TRUE) 262 | test("partialAR:::test.par.nullrw(data.IBM)$p.value > 0.05", TRUE) 263 | test("partialAR:::test.par.nullrw(data.L, robust=TRUE)$p.value < 0.10", TRUE) 264 | test("partialAR:::test.par.nullrw(data.IBM, robust=TRUE)$p.value > 0.10", TRUE) 265 | 266 | test("partialAR:::test.par.nullmr(data.L)$p.value <= 0.01", TRUE) 267 | test("partialAR:::test.par.nullmr(data.L, robust=TRUE)$p.value <= 0.01", TRUE) 268 | test("partialAR:::test.par.nullmr(data.L, ar1test='kpss')$p.value <= 0.01", TRUE) 269 | test("partialAR:::test.par.nullmr(data.L, robust=TRUE, ar1test='kpss')$p.value <= 0.01", TRUE) 270 | 271 | test("partialAR:::test.par.nullmr(data.IBM)$p.value < 0.05", TRUE) 272 | test("partialAR:::test.par.nullmr(data.IBM, robust=TRUE)$p.value < 0.10", TRUE) 273 | test("partialAR:::test.par.nullmr(data.IBM, ar1test='kpss')$p.value > 0.10", TRUE) 274 | test("partialAR:::test.par.nullmr(data.IBM, ar1test='kpss', robust=TRUE)$p.value > 0.10", TRUE) 275 | 276 | test("partialAR:::test.par(data.L, null_hyp='rw')$p.value == partialAR:::test.par.nullrw(data.L)$p.value", TRUE) 277 | test("partialAR:::test.par(data.IBM, null_hyp='rw')$p.value == partialAR:::test.par.nullrw(data.IBM)$p.value", TRUE) 278 | test("partialAR:::test.par(data.L, null_hyp='mr')$p.value == partialAR:::test.par.nullmr(data.L)$p.value", TRUE) 279 | test("partialAR:::test.par(data.IBM, null_hyp='mr')$p.value == partialAR:::test.par.nullmr(data.IBM)$p.value", TRUE) 280 | 281 | test("partialAR:::test.par(data.L)$p.value['PAR'] <= 0.01", c(PAR=TRUE)) 282 | test("partialAR:::test.par(data.L, robust=TRUE)$p.value['PAR'] <= 0.10", c(PAR=TRUE)) 283 | test("partialAR:::test.par(data.IBM)$p.value['PAR'] > 0.10", c(PAR=TRUE)) 284 | test("partialAR:::test.par(data.IBM, robust=TRUE)$p.value['PAR'] > 0.10", c(PAR=TRUE)) 285 | test("partialAR:::test.par(data.L, ar1test='kpss')$p.value['PAR'] <= 0.01", c(PAR=TRUE)) 286 | test("partialAR:::test.par(data.L, ar1test='kpss',robust=TRUE)$p.value['PAR'] <= 0.10", c(PAR=TRUE)) 287 | test("partialAR:::test.par(data.IBM, ar1test='kpss')$p.value['PAR'] > 0.10", c(PAR=TRUE)) 288 | 289 | print(partialAR:::test.par(data.L)) 290 | print(partialAR:::test.par(data.L, robust=TRUE)) 291 | 292 | test("partialAR:::which.hypothesis.partest(partialAR:::test.par(data.L))", "PAR") 293 | test("partialAR:::which.hypothesis.partest(partialAR:::test.par(data.L, robust=TRUE))", "RRW") 294 | test("partialAR:::which.hypothesis.partest(partialAR:::test.par(data.IBM))", "RW") 295 | 296 | partialAR:::print.par.lrt(); cat("\n\n") 297 | partialAR:::print.par.lrt(robust=TRUE); cat("\n\n") 298 | partialAR:::print.par.lrt(latex=TRUE); cat("\n\n") 299 | 300 | # partialAR:::print.par.lrt.mr(); cat("\n\n") 301 | # partialAR:::print.par.lrt.mr(robust=TRUE); cat("\n\n") 302 | # partialAR:::print.par.lrt.mr(latex=TRUE); cat("\n\n") 303 | 304 | partialAR:::print.par.lrt.rw(); cat("\n\n") 305 | partialAR:::print.par.lrt.rw(robust=TRUE); cat("\n\n") 306 | partialAR:::print.par.lrt.rw(latex=TRUE); cat("\n\n") 307 | 308 | } 309 | 310 | test_fit_twostep <- function (fast_only = FALSE) { 311 | F <- partialCI:::fit.pci.twostep(OIL$WTI, OIL$BRENT, include_alpha=TRUE) 312 | test("F$alpha", c(alpha=3.988743)) 313 | test("F$beta", c(beta_BRENT=0.9014), tol=0.01) 314 | test("F$rho", c(rho=-0.002161227)) 315 | test("F$sigma_M", c(sigma_M=0.8509465)) 316 | test("F$sigma_R", c(sigma_R=0.7937507)) 317 | test("F$alpha.se", c(alpha.se=0.7837437)) 318 | test("F$rho.se", c(rho.se=0.1696716)) 319 | test("F$sigma_M.se", c(sigma_M.se=0.1056687)) 320 | test("F$sigma_R.se", c(sigma_R.se=0.111671)) 321 | test("F$negloglik", c(negloglik=436.6048)) 322 | test("F$pvmr", c(pvmr=0.6972993)) 323 | 324 | F0 <- partialCI:::fit.pci.twostep(OIL$WTI, OIL$BRENT) 325 | test("F0$beta", c(beta_BRENT=0.9014), tol=0.01) 326 | test("F0$rho", c(rho=-0.002161227)) 327 | test("F0$sigma_M", c(sigma_M=0.8509465)) 328 | test("F0$sigma_R", c(sigma_R=0.7937507)) 329 | test("F0$rho.se", c(rho.se=0.1696716)) 330 | test("F0$sigma_M.se", c(sigma_M.se=0.1056687)) 331 | test("F0$sigma_R.se", c(sigma_R.se=0.111671)) 332 | test("F0$negloglik", c(negloglik=436.6048)) 333 | test("F0$pvmr", c(pvmr=0.6972993)) 334 | 335 | FR0 <- partialCI:::fit.pci.twostep(OIL$WTI, OIL$BRENT, robust=TRUE) 336 | test("FR0$beta", c(beta_BRENT=0.911297), tol=0.01) 337 | test("FR0$rho", c(rho=-0.006326975)) 338 | test("FR0$sigma_M", c(sigma_M=0.6299893)) 339 | test("FR0$sigma_R", c(sigma_R=0.6845977)) 340 | test("FR0$rho.se", c(rho.se=0.180226)) 341 | test("FR0$sigma_M.se", c(sigma_M.se=0.09403069)) 342 | test("FR0$sigma_R.se", c(sigma_R.se=0.08726811)) 343 | test("FR0$negloglik", c(negloglik=429.9065)) 344 | test("FR0$pvmr", c(pvmr=0.6302378)) 345 | 346 | FR <- partialCI:::fit.pci.twostep(OIL$WTI, OIL$BRENT, robust=TRUE, include_alpha=TRUE) 347 | test("FR$alpha", c(alpha=3.344333)) 348 | test("FR$beta", c(beta_BRENT=0.911297), tol=0.01) 349 | test("FR$rho", c(rho=-0.006326975)) 350 | test("FR$sigma_M", c(sigma_M=0.6299893)) 351 | test("FR$sigma_R", c(sigma_R=0.6845977)) 352 | test("FR$alpha.se", c(alpha.se=0.6301423)) 353 | test("FR$rho.se", c(rho.se=0.180226)) 354 | test("FR$sigma_M.se", c(sigma_M.se=0.09403069)) 355 | test("FR$sigma_R.se", c(sigma_R.se=0.08726811)) 356 | test("FR$negloglik", c(negloglik=429.9065)) 357 | test("FR$pvmr", c(pvmr=0.6302378)) 358 | 359 | } 360 | 361 | test_fit_jointpenalty <- function (fast_only = FALSE) { 362 | guess <- c(alpha = 22.415066, beta_BRENT = 0.67375332, rho=-0.04976431, 363 | sigma_M=0.78760586, sigma_R=0.83733729, M0=0, R0=0) 364 | test("partialCI:::pci.jointpenalty.guess(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), include_alpha=TRUE)", guess) 365 | 366 | guess0 <- structure(c(0.673753323158499, -0.0499696397350701, 0.787500882009433, 367 | 0.837430798633094, 0, 22.4150662982716), .Names = c("beta_BRENT", 368 | "rho", "sigma_M", "sigma_R", "M0", "R0")) 369 | test("partialCI:::pci.jointpenalty.guess(as.zoo(OIL$WTI), as.zoo(OIL$BRENT))", guess0) 370 | 371 | rw.par <- structure(c(22.4150662982132, 0.673753316670566, 0, 0, 1.23155657754959, 0, 0), 372 | .Names = c("alpha", "beta_BRENT", "rho", "sigma_M", "sigma_R", "M0", "R0")) 373 | test("partialCI:::fit.pci.jointpenalty.rw(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), include_alpha=TRUE)$par", rw.par) 374 | 375 | rw.par0 <- structure(c(0.673753316670566, 0, 0, 1.23155657754959, 0, 22.415066298274), 376 | .Names = c("beta_BRENT", "rho", "sigma_M", "sigma_R", "M0", "R0")) 377 | test("partialCI:::fit.pci.jointpenalty.rw(as.zoo(OIL$WTI), as.zoo(OIL$BRENT))$par", rw.par0) 378 | 379 | mr.par <- structure(c(19.0448876678824, 0.707408539377214, 0.979236271148003, 380 | 1.22640513769963, 0, 0, 0), 381 | .Names = c("alpha", "beta_BRENT", 382 | "rho", "sigma_M", "sigma_R", "M0", "R0")) 383 | test("partialCI:::fit.pci.jointpenalty.mr(as.zoo(OIL$WTI), as.zoo(OIL$BRENT),include_alpha=TRUE)$par", mr.par) 384 | 385 | mr.par0 <- structure(c(0.704082607377607, 0.98035806173428, 1.22650034762294, 386 | 0, 0, 19.1413233596611), .Names = c("beta_BRENT", "rho", "sigma_M", 387 | "sigma_R", "M0", "R0")) 388 | test("partialCI:::fit.pci.jointpenalty.mr(as.zoo(OIL$WTI), as.zoo(OIL$BRENT))$par", mr.par0) 389 | 390 | both.par <- structure(c(14.5845792002844, 0.743252984476208, 0, 0.826758135981716, 391 | 0.794395597894045, 0, 0), 392 | .Names = c("alpha", "beta_BRENT", "rho", 393 | "sigma_M", "sigma_R", "M0", "R0")) 394 | 395 | test("partialCI:::fit.pci.jointpenalty.both(as.zoo(OIL$WTI), as.zoo(OIL$BRENT),include_alpha=TRUE)$par", both.par, tol=0.05) 396 | 397 | both.par0 <- structure(c(0.743252984476208, 0, 0.826758135981716, 398 | 0.794395597894045, 0, 14.5845792002844), 399 | .Names = c("beta_BRENT", "rho", 400 | "sigma_M", "sigma_R", "M0", "R0")) 401 | 402 | test("partialCI:::fit.pci.jointpenalty.both(as.zoo(OIL$WTI), as.zoo(OIL$BRENT))$par", both.par0, tol=0.05) 403 | 404 | rwrob.par <- structure(c(28.1949715154527, 0.620210939294621, 0, 0, 0.948726114365134, 405 | 0, 0), .Names = c("alpha", "beta_BRENT", "rho", "sigma_M", "sigma_R", 406 | "M0", "R0")) 407 | test("partialCI:::fit.pci.jointpenalty.rw(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), robust=TRUE, include_alpha=TRUE)$par", rwrob.par) 408 | 409 | rwrob.par0 <- structure(c(0.620210939294621, 0, 0, 0.948726114365134, 410 | 0, 28.1949715154527), .Names = c("beta_BRENT", "rho", "sigma_M", "sigma_R", 411 | "M0", "R0")) 412 | test("partialCI:::fit.pci.jointpenalty.rw(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), robust=TRUE)$par", rwrob.par0) 413 | 414 | mrrob.par <- structure(c(28.1948554986119, 0.62081912655694, 0.993012863223067, 415 | 0.948691236006445, 0, 0, 0), .Names = c("alpha", "beta_BRENT", 416 | "rho", "sigma_M", "sigma_R", "M0", "R0")) 417 | test("partialCI:::fit.pci.jointpenalty.mr(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), robust=TRUE, include_alpha=TRUE)$par", mrrob.par) 418 | 419 | mrrob.par0 <- structure(c(0.634017098277415, 0.992411921549702, 0.948914138555383, 420 | 0, 0, 26.7041944119358), .Names = c("beta_BRENT", "rho", "sigma_M", 421 | "sigma_R", "M0", "R0")) 422 | test("partialCI:::fit.pci.jointpenalty.mr(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), robust=TRUE)$par", mrrob.par0) 423 | 424 | bothrob.par <- structure(c(20.098585528449, 0.692830550628575, 0, 0.599736912154181, 425 | 0.67441954256228, 0, 0), .Names = c("alpha", "beta_BRENT", "rho", 426 | "sigma_M", "sigma_R", "M0", "R0")) 427 | test("partialCI:::fit.pci.jointpenalty.both(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), robust=TRUE, include_alpha=TRUE)$par", bothrob.par, tol=0.05) 428 | 429 | bothrob.par0 <- structure(c(0.692830550628575, 0, 0.599736912154181, 430 | 0.67441954256228, 0, 20.098585528449), .Names = c("beta_BRENT", "rho", 431 | "sigma_M", "sigma_R", "M0", "R0")) 432 | test("partialCI:::fit.pci.jointpenalty.both(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), robust=TRUE)$par", bothrob.par0, tol=0.05) 433 | 434 | } 435 | 436 | test_fit <- function (fast_only = FALSE) { 437 | test_fit_twostep(fast_only) 438 | test_fit_jointpenalty(fast_only) 439 | 440 | test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT))$negloglik", c(negloglik=432.4849)) 441 | test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), robust=TRUE)$negloglik", c(negloglik=422.9159)) 442 | test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), \"twostep\")$negloglik", c(negloglik=436.6048)) 443 | test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), \"twostep\", robust=TRUE)$negloglik", c(negloglik=429.9065)) 444 | test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), par_model=\"ar1\")$negloglik", c(negloglik=451.201)) 445 | test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), par_model=\"rw\")$negloglik", c(negloglik=452.3663)) 446 | test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), par_model=\"ar1\", robust=TRUE)$negloglik", c(negloglik=439.6296)) 447 | test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), par_model=\"rw\", robust=TRUE)$negloglik", c(negloglik=439.8615)) 448 | 449 | f <- partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), include_alpha=TRUE) 450 | last_state <- structure(list(Y = 52.99, Yhat = 57.0022715118106, Z = -4.01227151181061, 451 | M = 0.359169355426614, R = -4.37144086723722, eps_M = 0.369402426645854, 452 | eps_R = 0.579553335373364), .Names = c("Y", "Yhat", "Z", 453 | "M", "R", "eps_M", "eps_R"), row.names = "2015-02-09", class = "data.frame") 454 | test("tail(partialCI:::statehistory.pci(partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), include_alpha=TRUE)),1)", last_state, tol=0.05) 455 | 456 | f0 <- partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT)) 457 | last_state0 <- structure(list(Y = 52.99, Yhat = 42.3231449335717, Z = 10.6668550664283, 458 | M = 0.355883860455669, R = 10.3109712059726, eps_M = 0.368030242920157, 459 | eps_R = 0.58348828109853), .Names = c("Y", "Yhat", "Z", "M", 460 | "R", "eps_M", "eps_R"), row.names = "2015-02-09", class = "data.frame") 461 | 462 | test("tail(partialCI:::statehistory.pci(partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT),)),1)", last_state0, tol=0.05) 463 | 464 | 465 | 466 | df <- structure(list(alpha = 14.6829401164186, beta = 0.742455481013507, 467 | rho = -0.0232576651129888, sigma_M = 0.816415189337928, sigma_R = 0.804964566979587, 468 | M0 = 0, R0 = 0, alpha.se = 6.1306344674343, beta.se = 0.0559775667569459, 469 | rho.se = 0.172275543552826, sigma_M.se = 0.108139711899864, 470 | sigma_R.se = 0.109367951001435, M0.se = NA_real_, R0.se = NA_real_, 471 | negloglik = 432.475807192823, pvmr = 0.67807277837379), .Names = c("alpha", 472 | "beta", "rho", "sigma_M", "sigma_R", "M0", "R0", "alpha.se", 473 | "beta.se", "rho.se", "sigma_M.se", "sigma_R.se", "M0.se", "R0.se", 474 | "negloglik", "pvmr"), row.names = "beta_BRENT", class = "data.frame") 475 | 476 | test("as.data.frame(partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), include_alpha=TRUE))", df, tol=0.05) 477 | 478 | df0 <- structure(list(beta = 0.742511314624065, rho = -0.0275437658638479, 479 | sigma_M = 0.813095055217725, sigma_R = 0.808578327606414, 480 | M0 = 0, R0 = 14.9933286994785, beta.se = 0.0560546241014983, 481 | rho.se = 0.17155905646127, sigma_M.se = 0.107698809017417, 482 | sigma_R.se = 0.108177642504822, M0.se = NA_real_, R0.se = NA_real_, 483 | negloglik = 432.520303570585, pvmr = 0.675291879436992), .Names = c("beta", 484 | "rho", "sigma_M", "sigma_R", "M0", "R0", "beta.se", "rho.se", 485 | "sigma_M.se", "sigma_R.se", "M0.se", "R0.se", "negloglik", "pvmr" 486 | ), row.names = "beta_BRENT", class = "data.frame") 487 | test("as.data.frame(partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT)))", df0, tol=0.05) 488 | } 489 | 490 | test_pci <- function (fast_only=FALSE) { 491 | # Comprehensive unit testing for PAR package 492 | 493 | options(warn=1) 494 | library(zoo) 495 | 496 | test_lr(fast_only) 497 | test.likelihood_ratio.pci(fast_only) 498 | test_fit(fast_only) 499 | # test_lr2(fast_only) 500 | 501 | if (all.tests.pass) { 502 | cat("SUCCESS! All tests passed.\n") 503 | } else { 504 | stop("ERRORS! ", all.tests.error.count," tests failed\n") 505 | } 506 | } 507 | 508 | test_pci(TRUE) 509 | --------------------------------------------------------------------------------