├── .Rbuildignore
├── .gitignore
├── src
    ├── cfit.o
    ├── RcppExports.o
    ├── partialCI.dll
    ├── partialCI_init.o
    ├── partialCI_init.c
    ├── RcppExports.cpp
    └── cfit.cc
├── vignettes
    ├── figures
    │   ├── OLSRES_RSARSB_2006_2016.png
    │   └── MCSDHIST_RSARSB_2006_2016.png
    └── pci_vignette.Rmd
├── partialCI.Rproj
├── cran-comments.md
├── R
    ├── RcppExports.R
    ├── rand.R
    ├── support.R
    ├── extras.R
    ├── lrtables.R
    ├── fit_pci.R
    ├── fit_pci_wo_alpha.R
    ├── fit_pci_with_alpha.R
    └── hedge.R
├── man
    ├── multigetYahooPrices.Rd
    ├── yfit.pci.Rd
    ├── yhedge.pci.Rd
    ├── statehistory.pci.Rd
    ├── which.hypothesis.pcitest.Rd
    ├── likelihood_ratio.pci.Rd
    ├── rpci.Rd
    ├── loglik.pci.Rd
    ├── test.pci.Rd
    ├── partialCI-package.Rd
    ├── fit.pci.Rd
    └── hedge.pci.Rd
├── DESCRIPTION
├── NAMESPACE
├── README.md
└── tests
    └── tests.R


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/src/cfit.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matthewclegg/partialCI/HEAD/src/cfit.o


--------------------------------------------------------------------------------
/src/RcppExports.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matthewclegg/partialCI/HEAD/src/RcppExports.o


--------------------------------------------------------------------------------
/src/partialCI.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matthewclegg/partialCI/HEAD/src/partialCI.dll


--------------------------------------------------------------------------------
/src/partialCI_init.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matthewclegg/partialCI/HEAD/src/partialCI_init.o


--------------------------------------------------------------------------------
/vignettes/figures/OLSRES_RSARSB_2006_2016.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matthewclegg/partialCI/HEAD/vignettes/figures/OLSRES_RSARSB_2006_2016.png


--------------------------------------------------------------------------------
/vignettes/figures/MCSDHIST_RSARSB_2006_2016.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matthewclegg/partialCI/HEAD/vignettes/figures/MCSDHIST_RSARSB_2006_2016.png


--------------------------------------------------------------------------------
/partialCI.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Test environments
 2 | * local Windows 10 install, R 3.3.2 and R-devel
 3 | * local Windows 7 install, R 3.3.2 and R-devel
 4 | * local ubuntu 16.04, R 3.3.2
 5 | * win-builder (devel and release)
 6 | 
 7 | ## R CMD check results
 8 | 
 9 | 0 errors | 0 warnings | 1 note
10 | 
11 | * This is a new release.
12 | 
13 | * checking CRAN incoming feasibility ... NOTE
14 | Maintainer: 'Jonas Rende <jonas.rende@fau.de>'
15 | 
16 | ## Reverse dependencies
17 | 
18 | This is a new release, so there are no reverse dependencies.
19 | 
20 | ---
21 | 
22 | * I have run R CMD check on downstream dependencies of partialCI. 
23 | All packages that I could install passed.
24 |   
25 |   
26 | * FAILURE SUMMARY
27 | 
28 | ## Check results
29 | 0 packages with problems
30 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | lagged_variance_c <- function(X, k, n) {
 5 |     .Call('partialCI_lagged_variance_c', PACKAGE = 'partialCI', X, k, n)
 6 | }
 7 | 
 8 | estimate_rho_par_c <- function(X) {
 9 |     .Call('partialCI_estimate_rho_par_c', PACKAGE = 'partialCI', X)
10 | }
11 | 
12 | estimate_par_c <- function(X, rho_max = 1.0) {
13 |     .Call('partialCI_estimate_par_c', PACKAGE = 'partialCI', X, rho_max)
14 | }
15 | 
16 | pvmr_par_c <- function(rho, sigma_M, sigma_R) {
17 |     .Call('partialCI_pvmr_par_c', PACKAGE = 'partialCI', rho, sigma_M, sigma_R)
18 | }
19 | 
20 | kalman_gain_par_mr <- function(rho, sigma_M, sigma_R) {
21 |     .Call('partialCI_kalman_gain_par_mr', PACKAGE = 'partialCI', rho, sigma_M, sigma_R)
22 | }
23 | 
24 | loglik_par_c <- function(Y, rho, sigma_M, sigma_R, M0, R0) {
25 |     .Call('partialCI_loglik_par_c', PACKAGE = 'partialCI', Y, rho, sigma_M, sigma_R, M0, R0)
26 | }
27 | 
28 | loglik_par_t_c <- function(Y, rho, sigma_M, sigma_R, M0, R0, nu = 5.0) {
29 |     .Call('partialCI_loglik_par_t_c', PACKAGE = 'partialCI', Y, rho, sigma_M, sigma_R, M0, R0, nu)
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/src/partialCI_init.c:
--------------------------------------------------------------------------------
 1 | // This file was automatically generated by 'Kmisc::registerFunctions()'
 2 | 
 3 | #include <R.h>
 4 | #include <Rinternals.h>
 5 | 
 6 | #include <R_ext/Rdynload.h>
 7 | 
 8 | SEXP partialCI_lagged_variance_c(SEXP XSEXP, SEXP kSEXP, SEXP nSEXP);
 9 | SEXP partialCI_estimate_rho_par_c(SEXP XSEXP);
10 | SEXP partialCI_estimate_par_c(SEXP XSEXP, SEXP rho_maxSEXP);
11 | SEXP partialCI_pvmr_par_c(SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP);
12 | SEXP partialCI_kalman_gain_par_mr(SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP);
13 | SEXP partialCI_loglik_par_c(SEXP YSEXP, SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP, SEXP M0SEXP, SEXP R0SEXP);
14 | SEXP partialCI_loglik_par_t_c(SEXP YSEXP, SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP, SEXP M0SEXP, SEXP R0SEXP, SEXP nuSEXP);
15 | 
16 | R_CallMethodDef callMethods[]  = {
17 |   {"partialCI_lagged_variance_c", (DL_FUNC) &partialCI_lagged_variance_c, 3},
18 |   {"partialCI_estimate_rho_par_c", (DL_FUNC) &partialCI_estimate_rho_par_c, 1},
19 |   {"partialCI_estimate_par_c", (DL_FUNC) &partialCI_estimate_par_c, 2},
20 |   {"partialCI_pvmr_par_c", (DL_FUNC) &partialCI_pvmr_par_c, 3},
21 |   {"partialCI_kalman_gain_par_mr", (DL_FUNC) &partialCI_kalman_gain_par_mr, 3},
22 |   {"partialCI_loglik_par_c", (DL_FUNC) &partialCI_loglik_par_c, 6},
23 |   {"partialCI_loglik_par_t_c", (DL_FUNC) &partialCI_loglik_par_t_c, 7},
24 |   {NULL, NULL, 0}
25 | };
26 | 
27 | void R_init_partialCI(DllInfo *info) {
28 |   R_registerRoutines(info, NULL, callMethods, NULL, NULL);
29 |   R_useDynamicSymbols(info, FALSE);
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/man/multigetYahooPrices.Rd:
--------------------------------------------------------------------------------
 1 | \name{multigetYahooPrices}
 2 | \alias{multigetYahooPrices}
 3 | \title{
 4 | Fetches closing prices of multiple stock tickers
 5 | }
 6 | \description{
 7 | Fetches a \code{zoo} \code{data.frame} of daily closing prices of multiple stock tickers.
 8 | }
 9 | \usage{
10 | multigetYahooPrices(components, start, end, quiet = FALSE, adjust = TRUE)
11 | }
12 | \arguments{
13 |   \item{components}{Character vector of Yahoo ticker symbols}
14 |   \item{start}{First date of desired data in YYYYMMDD format.  Default is
15 |     earliest date of all series}
16 |   \item{end}{Last date of desired data in YYYYMMDD format.  Default is the
17 |     last date for which data is available}
18 |   \item{quiet}{If \code{FALSE}, then information is printed about the 
19 |     progress of the fetch operation}
20 |   \item{adjust}{If \code{TRUE}, then adjusted closing prices are returned.
21 |     Otherwise, unadjusted prices are returned.}
22 | }
23 | % \details{
24 | %%  ~~ If necessary, more details than the description above ~~
25 | % }
26 | \value{
27 | Returns a \code{\link{zoo}} \code{\link{data.frame}} containing the
28 | closing prices of the series listed in the \code{components} parameter,
29 | one column per price series.
30 | }
31 | \author{
32 | Matthew Clegg \email{matthewcleggphd@gmail.com}
33 | 
34 | Christopher Krauss \email{christopher.krauss@fau.de}
35 | 
36 | Jonas Rende \email{jonas.rende@fau.de}
37 | }
38 | 
39 | \seealso{\code{\link[TTR]{getYahooData}}
40 | }
41 | \examples{
42 | \dontrun{
43 | ##---- Should be DIRECTLY executable !! ----
44 | ##-- ==>  Define data, use random,
45 | ##--	or do  help(data=index)  for the standard data sets.
46 | 
47 | ### Note: you must have a working internet
48 | ### connection for these examples to work!
49 | spy.voo <- multigetYahooPrices(c("SPY","VOO"))
50 | }
51 | }


--------------------------------------------------------------------------------
/man/yfit.pci.Rd:
--------------------------------------------------------------------------------
 1 | \name{yfit.pci}
 2 | \alias{yfit.pci}
 3 | \title{
 4 | Fetch series from Yahoo and perform a partial cointegration fit.
 5 | }
 6 | \description{
 7 | Fetch series from Yahoo and perform a partial cointegration fit.
 8 | }
 9 | \usage{
10 | yfit.pci(target, factors, start, end, na.rm=FALSE, ...)
11 | }
12 | \arguments{
13 |   \item{target}{The ticker symbol of the stock price series that is to be modeled. }
14 |   \item{factors}{A list of ticker symbols of stock price series to be used in modeling
15 |     \code{target}}
16 |   \item{start}{The starting date for which data is to be fetched, given in the
17 |     format YYYYMMDD.  Default: 2 years ago today.}
18 |   \item{end}{The ending date for which data is to be fetched, given in the
19 |     format YYYYMMDD.  Default: today.}
20 |   \item{na.rm}{If TRUE, then \code{NA}'s will be removed from the \code{data.frame}
21 |     of fetched prices.  A heuristic approach is used to decide between deleting
22 |     securities versus deleting days.}
23 |   \item{...}{Additional optional parameters to be passed to \code{\link{fit.pci}}}
24 | }
25 | 
26 | %\details{
27 | %%  ~~ If necessary, more details than the description above ~~
28 | %}
29 | \value{
30 | An \code{S3} object of class \code{pci.fit} representing the best fit that
31 | was found.
32 | }
33 | \author{
34 | Matthew Clegg \email{matthewcleggphd@gmail.com}
35 | 
36 | Christopher Krauss \email{christopher.krauss@fau.de}
37 | 
38 | Jonas Rende \email{jonas.rende@fau.de}
39 | }
40 | 
41 | \seealso{
42 | \code{\link{fit.pci}}
43 | }
44 | 
45 | \examples{
46 | # Compare a cointegration fit Coca-Cola and Pepsi to a partial cointegration fit.
47 | # Note that yegcm(X, Y) has a different parameter ordering than yfit.pci(Y, X)
48 | # yegcm("PEP", "KO", start=as.numeric(format(Sys.Date() - 365*2, "%Y%m%d")))
49 | # yfit.pci("KO", "PEP")
50 | }
51 | \keyword{ ts }
52 | \keyword{ models }
53 | 
54 | 


--------------------------------------------------------------------------------
/man/yhedge.pci.Rd:
--------------------------------------------------------------------------------
 1 | \name{yhedge.pci}
 2 | \alias{yhedge.pci}
 3 | \title{
 4 | Hedge portfolio for a stock price series
 5 | }
 6 | \description{
 7 | Computes the hedge of a stock price series fetched form Yahoo!
 8 | using one or more other stock price series also fetched form Yahoo!
 9 | }
10 | \usage{
11 | yhedge.pci(target, factors, start, end, na.rm=FALSE, ...)
12 | }
13 | \arguments{
14 |   \item{target}{The ticker symbol of the stock price series that is to be modeled. }
15 |   \item{factors}{A list of ticker symbols of stock price series to be used in modeling
16 |     \code{target}}
17 |   \item{start}{The starting date for which data is to be fetched, given in the
18 |     format YYYYMMDD.  Default: 2 years ago today.}
19 |   \item{end}{The ending date for which data is to be fetched, given in the
20 |     format YYYYMMDD.  Default: today.}
21 |   \item{na.rm}{If TRUE, then \code{NA}'s will be removed from the \code{data.frame}
22 |     of fetched prices.  A heuristic approach is used to decide between deleting
23 |     securities versus deleting days.}
24 |   \item{...}{Additional optional parameters to be passed to \code{\link{fit.pci}}}
25 | }
26 | 
27 | %\details{
28 | %%  ~~ If necessary, more details than the description above ~~
29 | %}
30 | \value{
31 | An \code{S3} object of class \code{pci.hedge} representing the best fit that
32 | was found.
33 | }
34 | \author{
35 | Matthew Clegg \email{matthewcleggphd@gmail.com}
36 | 
37 | Christopher Krauss \email{christopher.krauss@fau.de}
38 | 
39 | Jonas Rende \email{jonas.rende@fau.de}
40 | }
41 | 
42 | \seealso{
43 | \code{\link{fit.pci}}
44 | }
45 | 
46 | \examples{
47 | # Compute the best hedge of Coca-Cola using sector ETFS.
48 | # sectorETFS <- c("XLB","XLE","XLF","XLI","XLK","XLP","XLU","XLV","XLY")
49 | # hedge <- yhedge.pci("KO", sectorETFS)
50 | # hedge
51 | # test.pci(hedge$pci)
52 | # plot(hedge)
53 | }
54 | \keyword{ ts }
55 | \keyword{ models }
56 | 
57 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: partialCI
 2 | Type: Package
 3 | Title: Partial Cointegration
 4 | Version: 1.1.1
 5 | Date: 2017-04-21
 6 | Authors@R: c(person("Matthew", "Clegg", role=c("aut"),
 7 |                     email="matthewcleggphd@gmail.com"),
 8 |             person("Christopher", "Krauss", role=c("aut"),
 9 |                     email="christopher.krauss@fau.de"),
10 |             person("Jonas", "Rende", role=c("cre","aut"),
11 |                     email="jonas.rende@fau.de"))
12 | Author: Matthew Clegg [aut],
13 |     Christopher Krauss [aut],
14 |     Jonas Rende [cre, aut]
15 | Maintainer: Jonas Rende <jonas.rende@fau.de>
16 | Description: A collection of time series is partially cointegrated if a linear combination of these time series can be found so that the residual spread is partially autoregressive - meaning that it can be represented as a sum of an autoregressive series and a random walk. This concept is useful in modeling certain sets of financial time series and beyond, as it allows for the spread to contain transient and permanent components alike. Partial cointegration has been introduced by Clegg and Krauss (2016) <https://hdl.handle.net/10419/140632>, along with a large-scale empirical application to financial market data. The partialCI package comprises estimation, testing, and simulation routines for partial cointegration models in state space. Clegg et al. (2017) <https://hdl.handle.net/10419/150014> provide an in in-depth discussion of the package functionality as well as illustrating examples in the fields of finance and macroeconomics.
17 | License: GPL-2 | GPL-3
18 | Depends:
19 |     partialAR
20 | Imports:
21 |     zoo,
22 |     parallel,
23 |     ggplot2,
24 |     grid,
25 |     MASS,
26 |     TTR,
27 |     data.table,
28 |     glmnet,
29 |     methods,
30 |     Rcpp,
31 |     FKF,
32 |     xts
33 | Suggests:
34 |     egcm,
35 |     knitr,
36 |     rmarkdown
37 | RoxygenNote: 6.0.1
38 | VignetteBuilder: knitr
39 | LinkingTo: Rcpp
40 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | S3method(as.data.frame,pci.fit)
 2 | S3method(plot,pci.fit)
 3 | S3method(plot,pci.hedge)
 4 | S3method(print,pci.fit)
 5 | S3method(print,pci.hedge)
 6 | S3method(print,pcitest)
 7 | export(fit.pci)
 8 | export(hedge.pci)
 9 | export(likelihood_ratio.pci)
10 | export(loglik.pci)
11 | export(multigetYahooPrices)
12 | export(rpci)
13 | export(statehistory.pci)
14 | export(test.pci)
15 | export(which.hypothesis.pcitest)
16 | export(yfit.pci)
17 | export(yhedge.pci)
18 | import(glmnet)
19 | importFrom(FKF,fkf)
20 | importFrom(MASS,rlm)
21 | importFrom(Rcpp,sourceCpp)
22 | importFrom(TTR,getYahooData)
23 | importFrom(data.table,data.table)
24 | importFrom(data.table,setkey)
25 | importFrom(data.table,setnames)
26 | importFrom(ggplot2,Facet)
27 | importFrom(ggplot2,aes)
28 | importFrom(ggplot2,geom_hline)
29 | importFrom(ggplot2,geom_line)
30 | importFrom(ggplot2,ggplot)
31 | importFrom(ggplot2,ggtitle)
32 | importFrom(ggplot2,scale_colour_manual)
33 | importFrom(ggplot2,scale_linetype_manual)
34 | importFrom(ggplot2,scale_size_manual)
35 | importFrom(ggplot2,theme)
36 | importFrom(ggplot2,xlab)
37 | importFrom(ggplot2,ylab)
38 | importFrom(graphics,plot)
39 | importFrom(grid,grid.layout)
40 | importFrom(grid,grid.newpage)
41 | importFrom(grid,pushViewport)
42 | importFrom(grid,viewport)
43 | importFrom(methods,is)
44 | importFrom(parallel,mclapply)
45 | importFrom(partialAR,fit.par)
46 | importFrom(partialAR,kalman.gain.par)
47 | importFrom(partialAR,loglik.par)
48 | importFrom(partialAR,pvmr.par)
49 | importFrom(partialAR,rpar)
50 | importFrom(stats,approx)
51 | importFrom(stats,coef)
52 | importFrom(stats,cor)
53 | importFrom(stats,lm)
54 | importFrom(stats,optim)
55 | importFrom(stats,quantile)
56 | importFrom(stats,rnorm)
57 | importFrom(stats,rt)
58 | importFrom(stats,sd)
59 | importFrom(utils,read.table)
60 | importFrom(utils,write.table)
61 | importFrom(zoo,coredata)
62 | importFrom(zoo,index)
63 | importFrom(zoo,is.zoo)
64 | importFrom(zoo,zoo)
65 | import(xts)
66 | useDynLib(partialCI, .registration=TRUE)
67 | 


--------------------------------------------------------------------------------
/man/statehistory.pci.Rd:
--------------------------------------------------------------------------------
 1 | \name{statehistory.pci}
 2 | \alias{statehistory.pci}
 3 | \title{
 4 | Generates the sequence of inferred states of a partial cointegration model
 5 | }
 6 | \description{Generates the sequence of inferred states of a partial cointegration model}
 7 | \usage{
 8 | statehistory.pci(A, data = A$data, basis = A$basis)
 9 | }
10 | %- maybe also 'usage' for other objects documented here.
11 | \arguments{
12 |   \item{A}{
13 | An object returned by \code{\link{fit.pci}} representing a partial cointegration fit.
14 | }
15 |   \item{data}{
16 | The data history for which the inferred states are to be computed.  This should
17 | be a \code{(k+1) x n} matrix, where \code{k} is the number of independent variables
18 | and \code{n} is the number of observations.  If this is omitted, then uses the
19 | data history that was used in fitting the model \code{A}.
20 | }
21 |   \item{basis}{
22 | The coefficients of the independent variables.  This is a vector
23 | of length \code{k}.  If this is omitted,
24 | then uses the coefficients that were computed in fitting the model \code{A}.
25 | }
26 | }
27 | \details{
28 | Computes the expected internal states of the model over the course
29 | of the data history.
30 | }
31 | \value{
32 | Returns a \code{data.frame} with the following columns:
33 | \item{Y}{The variable being modeled}
34 | \item{X1,...,X_N}{The independent variables}
35 | \item{Z}{The residual series \code{Y - beta \%*\% X}}
36 | \item{M}{The estimated state of the mean reverting component}
37 | \item{R}{The estimated state of the random walk component}
38 | \item{eps_M}{The innovation to the mean reverting component}
39 | \item{eps_R}{The innovation to the random walk component}
40 | }
41 | \author{
42 | Matthew Clegg \email{matthewcleggphd@gmail.com}
43 | 
44 | Christopher Krauss \email{christopher.krauss@fau.de}
45 | 
46 | Jonas Rende \email{jonas.rende@fau.de}
47 | }
48 | \seealso{
49 | \code{\link[egcm:egcm]{egcm}} Engle-Granger cointegration model
50 | 
51 | \code{\link[partialAR:partialAR]{partialAR}} Partially autoregressive models
52 | }
53 | \examples{
54 | ##---- Should be DIRECTLY executable !! ----
55 | ##-- ==>  Define data, use random,
56 | ##--	or do  help(data=index)  for the standard data sets.
57 | 
58 | YX <- rpci(n=1000, alpha=1, beta=c(2,3), sigma_C=c(0.1,0.1), rho=0.9, sigma_M=1, sigma_R=2)
59 | f <- fit.pci(YX[,1], YX[,2:ncol(YX)])
60 | statehistory.pci(f)
61 | 
62 | }
63 | \keyword{ ts }
64 | \keyword{ models }
65 | 


--------------------------------------------------------------------------------
/man/which.hypothesis.pcitest.Rd:
--------------------------------------------------------------------------------
 1 | \name{which.hypothesis.pcitest}
 2 | \alias{which.hypothesis.pcitest}
 3 | \title{
 4 | Returns the preferred hypothesis when testing for partial cointegration
 5 | }
 6 | \description{Returns the preferred hypothesis when testing for partial cointegration}
 7 | \usage{
 8 | which.hypothesis.pcitest(AT)
 9 | }
10 | %- maybe also 'usage' for other objects documented here.
11 | \arguments{
12 |   \item{AT}{
13 | An object of class \code{"pcitest"} that has been returned by a 
14 | previous call to \code{\link{test.pci}}.
15 | }
16 | }
17 | \details{
18 | Based upon the critical value \code{alpha} that was given in the
19 | call to \code{test.pci} and the p-value that was computed, 
20 | determines which hypothesis best fits the data.  
21 | }
22 | \value{
23 | If a non-robust fit was used, then one of the following values is returned:
24 | \item{\code{"PCI"}}{Partially cointegrated.  Both the random walk hypothesis
25 |   and the AR(1) hypothesis were rejected.}
26 | \item{\code{"RW"}}{Random walk.}
27 | \item{\code{"AR1"}}{Autoregressive of order one.}
28 | 
29 | If a robust fit was used, then one of the following values is returned:
30 | \item{\code{"RPCI"}}{Partially cointegrated.  Both the random walk hypothesis
31 |   and the AR(1) hypothesis were rejected.}
32 | \item{\code{"RRW"}}{Random walk.}
33 | \item{\code{"RAR1"}}{Autoregressive of order one.}
34 | 
35 | }
36 | \author{
37 | Matthew Clegg \email{matthewcleggphd@gmail.com}
38 | 
39 | Christopher Krauss \email{christopher.krauss@fau.de}
40 | 
41 | Jonas Rende \email{jonas.rende@fau.de}
42 | }
43 | \seealso{
44 | \code{\link{fit.pci}} Fits a partially cointegrated model
45 | 
46 | \code{\link{likelihood_ratio.pci}} Computes the likelihood ratio of a PCI model
47 |   versus a null model
48 | }
49 | \examples{
50 | \dontrun{
51 | # The following should usually return "PCI"
52 | YX <- rpci(n=1000, alpha=1, beta=c(2,3), sigma_C=c(0.1,0.1), rho=0.8, sigma_M=1, sigma_R=1)
53 | which.hypothesis.pcitest(test.pci(YX[,1], YX[,2:ncol(YX)]))
54 | 
55 | # The following should usally return "RW"
56 | YX.rw <- rpci(n=1000, alpha=1, beta=c(2,3), sigma_C=c(0.1,0.1), rho=0.8, sigma_M=0, sigma_R=1)
57 | which.hypothesis.pcitest(test.pci(YX.rw[,1], YX.rw[,2:ncol(YX.rw)]))
58 | 
59 | # The following should usually return "AR1"
60 | YX.mr <- rpci(n=1000, alpha=1, beta=c(2,3), sigma_C=c(0.1,0.1), rho=0.8, sigma_M=1, sigma_R=0)
61 | which.hypothesis.pcitest(test.pci(YX.mr[,1], YX.mr[,2:ncol(YX.mr)]))
62 | }
63 | }
64 | \keyword{ ts }
65 | \keyword{ models }
66 | 
67 | 


--------------------------------------------------------------------------------
/R/rand.R:
--------------------------------------------------------------------------------
 1 | # rand.R -- functions for generating random variates
 2 | 
 3 | # Copyright (C) 2016 Matthew Clegg
 4 | 
 5 | #  This program is free software; you can redistribute it and/or modify
 6 | #  it under the terms of the GNU General Public License as published by
 7 | #  the Free Software Foundation; either version 2 of the License, or
 8 | #  (at your option) any later version.
 9 | #
10 | #  This program is distributed in the hope that it will be useful,
11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #  GNU General Public License for more details.
14 | #
15 | #  A copy of the GNU General Public License is available at
16 | #  http://www.r-project.org/Licenses/
17 | 
18 | 
19 | rpci <- function (n, alpha, beta, sigma_C, rho, sigma_M, sigma_R, 
20 |                   include.state = FALSE, robust=FALSE, nu=5) {
21 |     # Generates a random partially cointegrated sequence.  On input, n is the
22 |     # length of the sequence to be generated.  beta is a vector of length k
23 |     # representing the coefficients of the factor loadings, and sigma_C is a
24 |     # vector of length k representing the standard deviations of the increments
25 |     # of the factor loadings.  
26 |     #
27 |     # Generates a random realization of the sequence
28 |     #    y_t = alpha + beta_1 F_{1,t} + beta_2 F_{2,t} + ... + beta_k F_{k,t} + m_t + r_t
29 |     #    F_{i,j} = F_{i,j-1} + delta_{i,j}
30 |     #    m_t = rho m_{t-1} + eps_{M,t}
31 |     #    r_t = r_{t-1} + eps_{R,t}
32 |     #    delta_{i,j} ~ N(0, sigma_C^2)
33 |     #    eps_{M,t} ~ N(0, sigma_M^2)
34 |     #    eps_{R,t} ~ N(0, sigma_R^2)
35 |     #    
36 |     # If include.state is FALSE, returns an n x (k+1) matrix whose columns
37 |     # are y, F_1, F_2, ..., F_k.  If include.state is TRUE, returns an
38 |     # n x (2k + 6) matrix whose columns are 
39 |     #   y, F_1, F_2, ..., F_k, x, m, r, delta_1, delta_2, ..., delta_k,
40 |     #   eps_M, eps_R
41 | 
42 |     if (!robust) {
43 |         D <- do.call("cbind", lapply(1:length(beta), function(i) rnorm(n,0,sigma_C[i])))
44 |     } else {
45 |         D <- do.call("cbind", lapply(1:length(beta), function(i) rt(n,nu) * sigma_C[i]))
46 |     }
47 |     F <- apply(D, 2, cumsum)
48 |     colnames(F) <- paste ("F", 1:ncol(F), sep="_")
49 |     PAR <- rpar (n, rho, sigma_M, sigma_R, include.state=TRUE, robust=robust, nu=nu)
50 |     Y <- F %*% beta + PAR$X + alpha
51 |     DF <- cbind(Y, F)
52 |     colnames(DF)[1] <- "Y"
53 |     if (include.state) {
54 |         colnames(D) <- paste("delta", 1:ncol(D), sep="_")
55 |         DF <- cbind(DF, X=PAR$X, M=PAR$M, R=PAR$R, D, eps_M=PAR$eps_M, eps_R=PAR$eps_R)
56 |     }
57 |     DF
58 | }
59 | 


--------------------------------------------------------------------------------
/R/support.R:
--------------------------------------------------------------------------------
 1 | # support.R -- a miscellaneous collection of supporting functions
 2 | 
 3 | # Copyright (C) 2016 Matthew Clegg
 4 | 
 5 | #  This program is free software; you can redistribute it and/or modify
 6 | #  it under the terms of the GNU General Public License as published by
 7 | #  the Free Software Foundation; either version 2 of the License, or
 8 | #  (at your option) any later version.
 9 | #
10 | #  This program is distributed in the hope that it will be useful,
11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #  GNU General Public License for more details.
14 | #
15 | #  A copy of the GNU General Public License is available at
16 | #  http://www.r-project.org/Licenses/
17 | 
18 | ctext <- function (str, n) {
19 |     # Centers the string str in a field of n blanks
20 |     if (nchar(str) == n) return(str)
21 |     if (nchar(str) > n) return(substr(str, 1, n))
22 |     
23 |     nleft <- floor((n - nchar(str))/2)
24 |     nright <- n - (nchar(str) + nleft)
25 |     left_pad <- paste(rep(" ", nleft), collapse="")
26 |     right_pad <- paste(rep(" ", nright), collapse="")
27 |     pstr <- paste(left_pad, str, right_pad, sep="")
28 |     pstr
29 | }
30 | 
31 | printf <- function (...) { cat(sprintf(...)); }
32 | println <- function (...) { cat(sprintf(...)); cat("\n"); }
33 | 
34 | quantile.table.from.samples <- function (colname, samples,
35 |                                          quantiles = seq(0.01, 0.99, by=0.01)) {
36 |   # Builds a table of quantile values from the data.frame samples
37 |   
38 |   sample_sizes <- sort(unique(samples$n))
39 |   
40 |   nq <- length(quantiles)
41 |   ns <- length(sample_sizes)
42 |   
43 |   qtab <- matrix(NA, ncol=ns+1, nrow=nq+1)
44 |   qtab[2:(nq+1), 1] <- quantiles
45 |   qtab[1, 2:(ns+1)] <- sample_sizes
46 |   
47 |   for (i in 2:(ns+1)) {
48 |     n <- sample_sizes[i-1]
49 |     qtab[2:(nq+1), i] <- quantile(samples[samples$n == n, colname], quantiles)
50 |   }
51 |   
52 |   qtab
53 | }
54 | 
55 | quantile_table_interpolate <- function (qtab, sample_size, stat, stop.on.na=FALSE) {
56 | 	# On input, qtab is a dataframe of quantiles.  Each column corresponds to
57 | 	# a sample size, and each row corresponds to a quantile value.  The sample
58 | 	# sizes are given in the first row, and the quantiles are given in the
59 | 	# first column.  
60 | 	n <- nrow(qtab)
61 | 	i <- findInterval(sample_size, qtab[1,2:ncol(qtab)])+1
62 | 	if (i == 1) {
63 | 		parent_name <- as.character(sys.call(-1)[[1]])
64 | 		if (stop.on.na) {
65 | 			stop (parent_name," requires a minimum of ", qtab[1,2], " observations.")
66 | 		} else {
67 | 			warning (parent_name, " requires a minimum of ", qtab[1,2], " observations.")
68 | 			return(NA)
69 | 		}
70 | 	}
71 | 	y1 <- approx(qtab[2:n, i], qtab[2:n, 1], stat, rule=2)$y
72 | 	if (i < ncol(qtab)) {
73 | 		y2 <- approx(qtab[2:n, i+1], qtab[2:n, 1], stat, rule=2)$y
74 | 		n1 <- qtab[1,i]
75 | 		n2 <- qtab[1,i+1]
76 | 		y <- y1 * (n2 - sample_size) / (n2 - n1) + y2 * (sample_size - n1)/(n2 - n1)
77 | 	} else {
78 | 		y <- y1
79 | 	}
80 | 	y
81 | }
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/man/likelihood_ratio.pci.Rd:
--------------------------------------------------------------------------------
 1 | \name{likelihood_ratio.pci}
 2 | \alias{likelihood_ratio.pci}
 3 | \title{
 4 | Computes the likelihood ratio of the partially cointegrated model vs the null model
 5 | }
 6 | \description{
 7 | Computes the likelihood ratio of the partially cointegrated model vs the null model
 8 | }
 9 | \usage{
10 | likelihood_ratio.pci(Y, X, 
11 |   robust = FALSE, 
12 |   null_model = c("rw", "ar1"), 
13 |   pci_opt_method = c("jp", "twostep"), 
14 |   nu = 5)
15 | }
16 | %- maybe also 'usage' for other objects documented here.
17 | \arguments{
18 |   \item{Y}{
19 |   The time series that is to be modeled.  A plain or \code{\link{zoo}} vector of length \code{n}.
20 | }
21 |   \item{X}{
22 |   A (possibly \code{\link{zoo}}) matrix of dimensions \code{n} x \code{k}.  
23 |   If \code{k=1}, then this may be a plain or \code{\link{zoo}} vector.
24 | }
25 |   \item{robust}{
26 | If \code{TRUE}, then the residuals are assumed to follow a t-distribution with
27 | \code{nu} degrees of freedom.  Default: \code{FALSE}.
28 | }
29 | 
30 |   \item{null_model}{
31 | This specifies the model that is assumed under the null hypothesis.
32 | \itemize{
33 | \item{rw}{Random walk.  Assumes sigma_M = rho = 0. Default.}
34 | \item{ar1}{Autoregressive of order one.  Assumes sigma_R=0.}
35 | }
36 | }
37 |   \item{pci_opt_method}{
38 | Method to be used for fitting Y to X.
39 | \itemize{
40 | \item{jp}{The coefficients of Y are jointly optimized
41 | with the parameters of the AAR fit of the residuals.  Default.}
42 | \item{twostep}{A modified Engle-Granger procedure is used, where
43 | the coefficients of Y are first estimated, and then an AAR
44 | model is fit to the residuals.}
45 | }}
46 |   \item{nu}{
47 | If \code{robust} is \code{TRUE}, then this is the degrees of freedom
48 | parameter used in fitting the t-distribution.  Default: 5.
49 | }
50 | }
51 | \details{
52 | First searches for the optimal fit under the null model, and computes
53 | the log of the likelihood score of this fit.  Then, searches for the optimal
54 | fit under the full model, and computes the log of the likelihood score of
55 | this fit.  Returns the difference of the two likelihood scores.  Since the
56 | null model is nested in the full model, the log likelihood ratio score
57 | is guaranteed to be negative.
58 | }
59 | \value{The log of the ratio of the likelihoods of the two models.
60 | }
61 | \references{
62 |   Clegg, Matthew, 2015.
63 |   Modeling Time Series with Both Permanent and Transient Components
64 |   using the Partially Autoregressive Model.
65 |   \emph{Available at SSRN: http://ssrn.com/abstract=2556957}
66 | }
67 | \author{
68 | Matthew Clegg \email{matthewcleggphd@gmail.com}
69 | 
70 | Christopher Krauss \email{christopher.krauss@fau.de}
71 | 
72 | Jonas Rende \email{jonas.rende@fau.de}
73 | }
74 | \seealso{
75 | \code{\link{fit.pci}} Fitting partially cointegrated models
76 | }
77 | \examples{
78 | YX <- rpci(n=1000, alpha=1, beta=c(2,3,4), sigma_C=c(1,1,1), rho=0.9, sigma_M=0.1, sigma_R=0.2)
79 | likelihood_ratio.pci(YX[,1], YX[,2:ncol(YX)])
80 | }
81 | % Add one or more standard keywords, see file 'KEYWORDS' in the
82 | % R documentation directory.
83 | \keyword{ ts }
84 | \keyword{ models }% __ONLY ONE__ keyword per line
85 | 


--------------------------------------------------------------------------------
/man/rpci.Rd:
--------------------------------------------------------------------------------
 1 | \name{rpci}
 2 | \alias{rpci}
 3 | \title{
 4 | Generates a random instance of a partial cointegration model
 5 | }
 6 | \description{Generates a random instance of a partial cointegration model}
 7 | \usage{
 8 | rpci(n, alpha, beta, sigma_C, rho, sigma_M, sigma_R, 
 9 |   include.state = FALSE, robust = FALSE, nu = 5)
10 | }
11 | \arguments{
12 |   \item{n}{
13 | Number of observations to generate
14 | }
15 |   \item{alpha}{
16 | Constant term of the model
17 | }
18 |   \item{beta}{
19 | A vector of factor loadings
20 | }
21 |   \item{sigma_C}{
22 | A vector of standard deviations
23 | }
24 |   \item{rho}{
25 | The coefficient of mean reversion
26 | }
27 |   \item{sigma_R}{
28 | The standard deviation of the innovations of the random walk portion of the residual series
29 | }
30 |   \item{sigma_M}{
31 | The standard deviation of the innovations of the mean-reverting portion of the residual series
32 | }
33 |   \item{include.state}{
34 | If TRUE, then the output data.frame contains the innovations to the factors and residual 
35 | series, as well as the state of the residual series.  Default: FALSE
36 | }
37 |   \item{robust}{
38 | If TRUE, then a t-distribution is used to generate the innovations.
39 | Otherwise, the innovations are normally distributed.  Default: FALSE.
40 | }
41 |   \item{nu}{
42 | The degrees of freedom parameter used for t-distributed innovations.  Default: 5.
43 | }
44 | }
45 | \details{
46 | Generates a random set of partially cointegrated vectors.  On input, \code{n} is the
47 | length of the sequence to be generated.  \code{beta} is a vector of length k
48 | representing the coefficients of the factor loadings, and \code{sigma_C} is a
49 | vector of length k representing the standard deviations of the increments
50 | of the factor loadings.  
51 |   
52 | Generates a random realization of the sequence
53 |     \deqn{Y_t = \alpha + \beta_1 F_{1,t} + \beta_2 F_{2,t} + ... + \beta_k F_{k,t} + M_t + R_t}
54 |     \deqn{F_{i,j} = F_{i,j-1} + \delta_{i,j}}
55 |     \deqn{M_t = \rho m_{t-1} + \epsilon_{M,t}}
56 |     \deqn{R_t = r_{t-1} + \epsilon_{R,t}}
57 |     \deqn{\delta_{i,j} ~ N(0, \sigma_{C,i}^2)}
58 |     \deqn{\epsilon_{M,t} ~ N(0, \sigma_M^2)}
59 |     \deqn{\epsilon_{R,t} ~ N(0, \sigma_R^2)}
60 |     
61 | }
62 | \value{
63 | A \code{data.frame} of \code{n} rows representing the realization of the partially
64 | cointegrated sequence.
65 | 
66 | If \code{include.state} is \code{FALSE}, returns an \code{n x (k+1)} matrix whose columns
67 | are \code{y, F_1, F_2, ..., F_k}.  If include.state is \code{TRUE}, returns an
68 | \code{n x (2k + 6)} matrix whose columns are 
69 |  \code{y, F_1, F_2, ..., F_k, x, M, R, delta_1, delta_2, ..., delta_k, epsilon_M, epsilon_R}.
70 | }
71 | \author{
72 | Matthew Clegg \email{matthewcleggphd@gmail.com}
73 | 
74 | Christopher Krauss \email{christopher.krauss@fau.de}
75 | 
76 | Jonas Rende \email{jonas.rende@fau.de}
77 | }
78 | 
79 | \seealso{
80 | \code{\link{fit.pci}}
81 | }
82 | \examples{
83 | rpci(10, alpha =1, beta=1, sigma_C=1, rho=0.9, sigma_R=1, sigma_M=1)
84 | }
85 | % Add one or more standard keywords, see file 'KEYWORDS' in the
86 | % R documentation directory.
87 | \keyword{ ts }
88 | \keyword{ models }% __ONLY ONE__ keyword per line
89 | 


--------------------------------------------------------------------------------
/man/loglik.pci.Rd:
--------------------------------------------------------------------------------
  1 | \name{loglik.pci}
  2 | \alias{loglik.pci}
  3 | \title{
  4 | Computes the log likelihood of a partially cointegrated model
  5 | }
  6 | \description{
  7 | Computes the log likelihood of a partially cointegrated model
  8 | }
  9 | \usage{
 10 | loglik.pci(Y, X, alpha, beta, rho, sigma_M, sigma_R, 
 11 |   M0 = 0, R0 = 0, 
 12 |   calc_method = c("css", "fkf", "ss", "sst", "csst"), 
 13 |   nu = pci.nu.default())
 14 | }
 15 | %- maybe also 'usage' for other objects documented here.
 16 | \arguments{
 17 |   \item{Y}{
 18 | The time series that is to be modeled.  A plain or \code{\link{zoo}} vector of length \code{n}.
 19 | }
 20 |   \item{X}{
 21 | A (possibly \code{\link{zoo}}) matrix of dimensions \code{n} x \code{k}.  If \code{k=1}, then this may be a plain or \code{\link{zoo}} vector.
 22 | }
 23 |   \item{alpha}{
 24 | The constant term to be used in the fit.
 25 | }
 26 |   \item{beta}{
 27 | A vector of length \code{k} representing the weightings to be given to the components of \code{X}.
 28 | }
 29 |   \item{rho}{
 30 | The coefficient of mean reversion.
 31 | }
 32 |   \item{sigma_M}{
 33 | The standard deviation of the innovations of the mean-reverting component of the model.
 34 | }
 35 |   \item{sigma_R}{
 36 | The standard deviation of the innovations of the random walk component of the model.
 37 | }
 38 |   \item{M0}{
 39 | The initial value of the mean-reverting component.  Default = 0.
 40 | }
 41 |   \item{R0}{
 42 | The initial value of the random walk component.  Default = 0.
 43 | }
 44 |   \item{calc_method}{
 45 |   Specifies the Kalman filter implementation that will be used for computing
 46 |   the likelihood score:
 47 | \itemize{
 48 | \item "ss" Steady-state Kalman filter
 49 | \item "css" C++ implementation of steady-state Kalman filter 
 50 | \item "fkf" Kalman filter implementation of the FKF package
 51 | \item "sst" Steady-state Kalman filter using t-distributed innovations
 52 | \item "csst" C++ implementation of steady-state Kalman filter using t-distributed innovations
 53 | }
 54 | Default: \code{css}
 55 | }
 56 | 
 57 | \item{nu}{
 58 | The degrees-of-freedom parameter to be used if \code{calc_method} is "sst" or "csst".
 59 | }
 60 | }
 61 | \details{
 62 | The partial cointegration model is given by the equations:
 63 | 
 64 | \deqn{
 65 |   Y_t = \alpha + \beta_1 * X_{t,1} + beta_2 * X_{t,2} + ... + beta_k * X_{t,k} + M_t + R_t}{
 66 |   Y[t] = alpha + beta[1] * X[t,1] + beta[2] * X[t,2] + ... + beta[k] * X[t,k] + M[t] + R[t]
 67 | }
 68 | \deqn{M_t = \rho M_{t-1} + \epsilon_{M,t}}{M[t] = rho * M[t-1] + epsilon_M[t]}
 69 | \deqn{R_t = R_{t-1} + \epsilon_{R,t}}{R[t] = R[t-1] + epsilon_R[t]}
 70 | \deqn{-1 < \rho < 1}{-1 < rho < 1}
 71 | \deqn{\epsilon_{M,t} \sim N(0,\sigma_M^2)}{epsilon_M[t] ~ N(0, sigma_M^2)}
 72 | \deqn{\epsilon_{R,t} \sim N(0,\sigma_R^2)}{epsilon_R[t] ~ N(0, sigma_R^2)}
 73 | 
 74 | Given the input series 
 75 |   \code{Y} and \code{X}, 
 76 | and given the parameter values 
 77 |   \code{alpha}, \code{beta}, \code{rho}, \code{M0} and \code{R0}, 
 78 | the innovations \code{epsilon_M[t]} and \code{epsilon_R[t]} are calculated
 79 | using a Kalman filter.  Based upon these values, the log-likelihood score
 80 | is then computed and returned.
 81 | }
 82 | \value{
 83 | The log of the likelihood score of the Kalman filter
 84 | }
 85 | \references{
 86 |   Clegg, Matthew, 2015.
 87 |   Modeling Time Series with Both Permanent and Transient Components
 88 |   using the Partially Autoregressive Model.
 89 |   \emph{Available at SSRN: http://ssrn.com/abstract=2556957}
 90 | }
 91 | \author{
 92 | Matthew Clegg \email{matthewcleggphd@gmail.com}
 93 | 
 94 | Christopher Krauss \email{christopher.krauss@fau.de}
 95 | 
 96 | Jonas Rende \email{jonas.rende@fau.de}
 97 | }
 98 | \seealso{
 99 | \code{\link[egcm:egcm]{egcm}} Engle-Granger cointegration model
100 | 
101 | \code{\link[partialAR:partialAR]{partialAR}} Partially autoregressive models
102 | }
103 | \examples{
104 | ##---- Should be DIRECTLY executable !! ----
105 | ##-- ==>  Define data, use random,
106 | ##--	or do  help(data=index)  for the standard data sets.
107 | 
108 |     set.seed(1)
109 |     YX <- rpci(n=500, alpha=1, beta=c(2,3,4), sigma_C=c(1,1,1), rho=0.9, sigma_M=0.1, sigma_R=0.2)
110 |     loglik.pci(YX[,1], YX[,2:ncol(YX)], alpha=1, beta=c(2,3,4), rho=0.9, sigma_M=0.1, sigma_R=0.2)
111 | 
112 | }
113 | % Add one or more standard keywords, see file 'KEYWORDS' in the
114 | % R documentation directory.
115 | \keyword{ ts }
116 | \keyword{ models }
117 | 


--------------------------------------------------------------------------------
/man/test.pci.Rd:
--------------------------------------------------------------------------------
  1 | \name{test.pci}
  2 | \alias{test.pci}
  3 | \title{Tests the goodness of fit of a partial cointegration model}
  4 | \description{
  5 | Tests the goodness of fit of a partial cointegration model
  6 | }
  7 | \usage{
  8 | test.pci(Y, X, alpha = 0.05, 
  9 |   null_hyp = c("rw", "ar1"), 
 10 |   robust = FALSE, 
 11 |   pci_opt_method = c("jp", "twostep"))
 12 | }
 13 | %- maybe also 'usage' for other objects documented here.
 14 | \arguments{
 15 |   \item{Y}{
 16 |   The time series that is to be modeled.  A plain or \code{\link{zoo}} vector of length \code{n}.
 17 | }
 18 |   \item{X}{
 19 |   A (possibly \code{\link{zoo}}) matrix of dimensions \code{n} x \code{k}.  
 20 |   If \code{k=1}, then this may be a plain or \code{\link{zoo}} vector.
 21 | }
 22 |   \item{alpha}{
 23 | The cutoff value to be used for determining whether or not to accept the
 24 | null hypothesis.  If the the \code{p-value} computed through the likelihood
 25 | ratio test is below this value, then the null hypothesis is rejected.
 26 | Default: 0.05.
 27 | }
 28 |   \item{null_hyp}{
 29 | This specifies the null hypothesis.
 30 | This can be either \code{"rw"}, \code{"ar1"} or \code{c("rw","ar1")}.
 31 | If \code{"rw"}, then the null hypothesis is a random walk.
 32 | If \code{"ar1"}, then the null hypothesis is an autoregressive process
 33 | of order 1.  (In this case, the null hypothesis calls for Y and X to be
 34 | cointegrated.)  If \code{(c("rw","ar1")}, then the null hypothesis is
 35 | either a random walk or AR(1) process.  Default: both.
 36 | }
 37 | 
 38 | \item{robust}{
 39 |   If \code{TRUE}, then the residuals are assumed to follow a t-distribution.  Default: \code{FALSE}.
 40 | }
 41 | \item{pci_opt_method}{
 42 | The method that will be used for fitting a partially cointegrated
 43 | model to \code{X} and \code{Y}.  This can be either \code{"jp"}
 44 | (joint penalty) or \code{"twostep"} (Engle-Granger two-step).
 45 | See \code{\link{fit.pci}} for a complete explanation.
 46 | Default: \code{"jp"}.
 47 | }
 48 | }
 49 | \details{
 50 | The likelihood ratio test is used to determine whether 
 51 | the null hypothesis should be accepted or whether the alternative
 52 | of partial cointegration should be accepted.  That is to say,
 53 | a search is performed for the best fitting model under the null
 54 | hypothesis, and the log likelihood score of this model is computed.
 55 | Then a search is performed for the best fitting model under the
 56 | alternative hypothesis of partial cointegration, and the log
 57 | likelihood score of this model is computed.  The associated
 58 | p-values have been computed through simulation.
 59 | 
 60 | If the null hypothesis is the union \code{c("rw", "ar1")}, then
 61 | a table of p-values is used that has been pre-computed, however
 62 | this table is not unique.
 63 | }
 64 | \value{An object of class \code{"pcitest"} containing the
 65 | results of the hypothesis test.
 66 | 
 67 | }
 68 | 
 69 | \references{
 70 |   Clegg, Matthew, 2015.
 71 |   Modeling Time Series with Both Permanent and Transient Components
 72 |   using the Partially Autoregressive Model.
 73 |   \emph{Available at SSRN: http://ssrn.com/abstract=2556957}
 74 |   
 75 |   Clegg, Matthew and Krauss, Christopher, 2016. Pairs trading with partial cointegration. FAU Discussion Papers in Economics, University of Erlangen-Nuernberg. 
 76 |   \emph{Available at https://www.iwf.rw.fau.de/files/2016/05/05-2016.pdf}    
 77 | }
 78 | 
 79 | \author{
 80 | Matthew Clegg \email{matthewcleggphd@gmail.com}
 81 | 
 82 | Christopher Krauss \email{christopher.krauss@fau.de}
 83 | 
 84 | Jonas Rende \email{jonas.rende@fau.de}
 85 | }
 86 | \seealso{
 87 | \code{\link{fit.pci}} Fits a partially cointegrated model
 88 | 
 89 | \code{\link{likelihood_ratio.pci}} Computes the likelihood ratio of a PCI model
 90 |   versus a null model
 91 | }
 92 | \examples{
 93 | # The following should reject both the random walk and AR(1) models
 94 | 
 95 | \dontrun{
 96 | YX <- rpci(n=1000, alpha=1, beta=c(2,3), sigma_C=c(0.1,0.1), rho=0.8, sigma_M=1, sigma_R=1)
 97 | test.pci(YX[,1], YX[,2:ncol(YX)])
 98 | 
 99 | # The following should accept the random walk model and reject the AR(1) model
100 | YX.rw <- rpci(n=1000, alpha=1, beta=c(2,3), sigma_C=c(0.1,0.1), rho=0.8, sigma_M=0, sigma_R=1)
101 | test.pci(YX.rw[,1], YX.rw[,2:ncol(YX.rw)])
102 | 
103 | # The following should reject the random walk model and accept the AR(1) model
104 | YX.mr <- rpci(n=1000, alpha=1, beta=c(2,3), sigma_C=c(0.1,0.1), rho=0.8, sigma_M=1, sigma_R=0)
105 | test.pci(YX.mr[,1], YX.mr[,2:ncol(YX.mr)])
106 | }
107 | }
108 | % Add one or more standard keywords, see file 'KEYWORDS' in the
109 | % R documentation directory.
110 | \keyword{ ts }
111 | \keyword{ models }
112 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
  1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
  2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
  3 | 
  4 | #include <Rcpp.h>
  5 | 
  6 | using namespace Rcpp;
  7 | 
  8 | // lagged_variance_c
  9 | double lagged_variance_c(NumericVector X, int k, int n);
 10 | RcppExport SEXP partialCI_lagged_variance_c(SEXP XSEXP, SEXP kSEXP, SEXP nSEXP) {
 11 | BEGIN_RCPP
 12 |     Rcpp::RObject rcpp_result_gen;
 13 |     Rcpp::RNGScope rcpp_rngScope_gen;
 14 |     Rcpp::traits::input_parameter< NumericVector >::type X(XSEXP);
 15 |     Rcpp::traits::input_parameter< int >::type k(kSEXP);
 16 |     Rcpp::traits::input_parameter< int >::type n(nSEXP);
 17 |     rcpp_result_gen = Rcpp::wrap(lagged_variance_c(X, k, n));
 18 |     return rcpp_result_gen;
 19 | END_RCPP
 20 | }
 21 | // estimate_rho_par_c
 22 | double estimate_rho_par_c(NumericVector X);
 23 | RcppExport SEXP partialCI_estimate_rho_par_c(SEXP XSEXP) {
 24 | BEGIN_RCPP
 25 |     Rcpp::RObject rcpp_result_gen;
 26 |     Rcpp::RNGScope rcpp_rngScope_gen;
 27 |     Rcpp::traits::input_parameter< NumericVector >::type X(XSEXP);
 28 |     rcpp_result_gen = Rcpp::wrap(estimate_rho_par_c(X));
 29 |     return rcpp_result_gen;
 30 | END_RCPP
 31 | }
 32 | // estimate_par_c
 33 | NumericVector estimate_par_c(NumericVector X, double rho_max);
 34 | RcppExport SEXP partialCI_estimate_par_c(SEXP XSEXP, SEXP rho_maxSEXP) {
 35 | BEGIN_RCPP
 36 |     Rcpp::RObject rcpp_result_gen;
 37 |     Rcpp::RNGScope rcpp_rngScope_gen;
 38 |     Rcpp::traits::input_parameter< NumericVector >::type X(XSEXP);
 39 |     Rcpp::traits::input_parameter< double >::type rho_max(rho_maxSEXP);
 40 |     rcpp_result_gen = Rcpp::wrap(estimate_par_c(X, rho_max));
 41 |     return rcpp_result_gen;
 42 | END_RCPP
 43 | }
 44 | // pvmr_par_c
 45 | double pvmr_par_c(double rho, double sigma_M, double sigma_R);
 46 | RcppExport SEXP partialCI_pvmr_par_c(SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP) {
 47 | BEGIN_RCPP
 48 |     Rcpp::RObject rcpp_result_gen;
 49 |     Rcpp::RNGScope rcpp_rngScope_gen;
 50 |     Rcpp::traits::input_parameter< double >::type rho(rhoSEXP);
 51 |     Rcpp::traits::input_parameter< double >::type sigma_M(sigma_MSEXP);
 52 |     Rcpp::traits::input_parameter< double >::type sigma_R(sigma_RSEXP);
 53 |     rcpp_result_gen = Rcpp::wrap(pvmr_par_c(rho, sigma_M, sigma_R));
 54 |     return rcpp_result_gen;
 55 | END_RCPP
 56 | }
 57 | // kalman_gain_par_mr
 58 | double kalman_gain_par_mr(double rho, double sigma_M, double sigma_R);
 59 | RcppExport SEXP partialCI_kalman_gain_par_mr(SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP) {
 60 | BEGIN_RCPP
 61 |     Rcpp::RObject rcpp_result_gen;
 62 |     Rcpp::RNGScope rcpp_rngScope_gen;
 63 |     Rcpp::traits::input_parameter< double >::type rho(rhoSEXP);
 64 |     Rcpp::traits::input_parameter< double >::type sigma_M(sigma_MSEXP);
 65 |     Rcpp::traits::input_parameter< double >::type sigma_R(sigma_RSEXP);
 66 |     rcpp_result_gen = Rcpp::wrap(kalman_gain_par_mr(rho, sigma_M, sigma_R));
 67 |     return rcpp_result_gen;
 68 | END_RCPP
 69 | }
 70 | // loglik_par_c
 71 | double loglik_par_c(NumericVector Y, double rho, double sigma_M, double sigma_R, double M0, double R0);
 72 | RcppExport SEXP partialCI_loglik_par_c(SEXP YSEXP, SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP, SEXP M0SEXP, SEXP R0SEXP) {
 73 | BEGIN_RCPP
 74 |     Rcpp::RObject rcpp_result_gen;
 75 |     Rcpp::RNGScope rcpp_rngScope_gen;
 76 |     Rcpp::traits::input_parameter< NumericVector >::type Y(YSEXP);
 77 |     Rcpp::traits::input_parameter< double >::type rho(rhoSEXP);
 78 |     Rcpp::traits::input_parameter< double >::type sigma_M(sigma_MSEXP);
 79 |     Rcpp::traits::input_parameter< double >::type sigma_R(sigma_RSEXP);
 80 |     Rcpp::traits::input_parameter< double >::type M0(M0SEXP);
 81 |     Rcpp::traits::input_parameter< double >::type R0(R0SEXP);
 82 |     rcpp_result_gen = Rcpp::wrap(loglik_par_c(Y, rho, sigma_M, sigma_R, M0, R0));
 83 |     return rcpp_result_gen;
 84 | END_RCPP
 85 | }
 86 | // loglik_par_t_c
 87 | double loglik_par_t_c(NumericVector Y, double rho, double sigma_M, double sigma_R, double M0, double R0, double nu);
 88 | RcppExport SEXP partialCI_loglik_par_t_c(SEXP YSEXP, SEXP rhoSEXP, SEXP sigma_MSEXP, SEXP sigma_RSEXP, SEXP M0SEXP, SEXP R0SEXP, SEXP nuSEXP) {
 89 | BEGIN_RCPP
 90 |     Rcpp::RObject rcpp_result_gen;
 91 |     Rcpp::RNGScope rcpp_rngScope_gen;
 92 |     Rcpp::traits::input_parameter< NumericVector >::type Y(YSEXP);
 93 |     Rcpp::traits::input_parameter< double >::type rho(rhoSEXP);
 94 |     Rcpp::traits::input_parameter< double >::type sigma_M(sigma_MSEXP);
 95 |     Rcpp::traits::input_parameter< double >::type sigma_R(sigma_RSEXP);
 96 |     Rcpp::traits::input_parameter< double >::type M0(M0SEXP);
 97 |     Rcpp::traits::input_parameter< double >::type R0(R0SEXP);
 98 |     Rcpp::traits::input_parameter< double >::type nu(nuSEXP);
 99 |     rcpp_result_gen = Rcpp::wrap(loglik_par_t_c(Y, rho, sigma_M, sigma_R, M0, R0, nu));
100 |     return rcpp_result_gen;
101 | END_RCPP
102 | }
103 | 


--------------------------------------------------------------------------------
/R/extras.R:
--------------------------------------------------------------------------------
  1 | # extras.R
  2 | # Copyright (C) 2016 by Matthew Clegg
  3 | 
  4 | #  This program is free software; you can redistribute it and/or modify
  5 | #  it under the terms of the GNU General Public License as published by
  6 | #  the Free Software Foundation; either version 2 of the License, or
  7 | #  (at your option) any later version.
  8 | #
  9 | #  This program is distributed in the hope that it will be useful,
 10 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | #  GNU General Public License for more details.
 13 | #
 14 | #  A copy of the GNU General Public License is available at
 15 | #  http://www.r-project.org/Licenses/
 16 | 
 17 | 
 18 | # A few extra helper routines for the egcm module.
 19 | 
 20 | #  This program is free software; you can redistribute it and/or modify
 21 | #  it under the terms of the GNU General Public License as published by
 22 | #  the Free Software Foundation; either version 2 of the License, or
 23 | #  (at your option) any later version.
 24 | #
 25 | #  This program is distributed in the hope that it will be useful,
 26 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 27 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 28 | #  GNU General Public License for more details.
 29 | #
 30 | #  A copy of the GNU General Public License is available at
 31 | #  http://www.r-project.org/Licenses/
 32 | 
 33 | price_matrix.rm.na <- function (DF, target=1) {
 34 |   # Heuristically removes NA's from a data.frame representing a matrix of
 35 |   # prices. 
 36 |   
 37 |   # Remove all rows where the target has an NA
 38 |   DF <- DF[!is.na(DF[,target]),]
 39 |   
 40 |   # Remove all rows where there are two or more NA's
 41 |   DF <- DF[apply(DF, 1, function(x) sum(is.na(x)) <= 1),]
 42 |   
 43 |   # Remove all columns containing two or more NA's
 44 |   DF <- DF[,apply(DF, 2, function(x) sum(is.na(x)) <= 1),]
 45 |   
 46 |   # Remove all rows containing an NA
 47 |   DF <- DF[apply(DF, 1, function(x) all(!is.na(x))), ]
 48 |   
 49 |   DF
 50 | }
 51 | 
 52 | multigetYahooPrices <- function (
 53 |   components,     # Character vector of Yahoo ticker symbols
 54 |   start,          # First date of desired data in YYYYMMDD format.
 55 |   end,            # Last date of desired data in YYYYMMDD format.
 56 |   quiet=FALSE,    # If TRUE, prints information as symbols are fetched
 57 |   adjust=TRUE     # If TRUE, adjusted closing prices are returned.
 58 | ) {
 59 |   # Given a list of ticker symbols, fetches the
 60 |   # adjusted closing prices of each of the symbols in the
 61 |   # list from Yahoo!, and creates a zoo data.frame.
 62 |   
 63 |   yahooParams <- list(symbol="NA", adjust=adjust, quiet=quiet)
 64 |   if (!missing(start)) yahooParams["start"] <- start
 65 |   if (!missing(end)) yahooParams["end"] <- end
 66 |   
 67 |   get_ticker <- function (t) { 
 68 |     if (!quiet) cat(t, " ")
 69 |     p <- NULL
 70 |     #    try (p <- getYahooData (t, startdate, enddate, adjust=adjust) )
 71 |     yahooParams["symbol"] <- t
 72 |     try (p <- do.call("getYahooData", yahooParams))
 73 |     if (is.null(dim(p))) return(NULL)
 74 |     zdf <- zoo(data.frame(p$Close), index(p))
 75 |     colnames(zdf) <- t
 76 |     zdf
 77 |   }
 78 |   
 79 |   all_prices <- lapply(components, get_ticker)
 80 |   if (!quiet) cat("\n")
 81 |   not_null <- !c(lapply(all_prices, is.null), recursive=TRUE)
 82 |   all_prices <- do.call("cbind", all_prices[not_null])
 83 |   all_prices
 84 | }
 85 | 
 86 | yfit.pci <- function(target, factors, 
 87 |                   start=as.numeric(format(Sys.Date()-365*2, "%Y%m%d")),   # Starting date 
 88 |                   end=as.numeric(format(Sys.Date(), "%Y%m%d")),         # Ending date 
 89 |                   na.rm=FALSE,
 90 |                   ...) { # Additional parameters to be passed to fit.pci
 91 |   # Fetches the price series of a target security and set of potential
 92 |   # factor securities from Yahoo and constructs a partial cointegration 
 93 |   # model from them.
 94 |   
 95 |   #    require(TTR)
 96 |   prices <- multigetYahooPrices(c(target, factors), start, end, quiet=TRUE)
 97 |   if (any(is.na(prices))) {
 98 |     if (!na.rm && any(is.na(prices))) {
 99 |       has.any <- colnames(prices)[apply(prices,2,function(x) any(is.na(x)))]
100 |       stop("The following tickers have NAs: ", has.any, "\nTry re-running with na.rm=TRUE")
101 |     }
102 |   } else {
103 |     prices <- price_matrix.rm.na(prices)
104 |   }
105 |     
106 |   e <- fit.pci(prices[,1,drop=FALSE], prices[,2:ncol(prices),drop=FALSE], ...)
107 |   e
108 | }
109 | 
110 | yhedge.pci <- function(target, factors, 
111 |                      start=as.numeric(format(Sys.Date()-365*2, "%Y%m%d")),   # Starting date 
112 |                      end=as.numeric(format(Sys.Date(), "%Y%m%d")),         # Ending date 
113 |                      na.rm=FALSE,
114 |                      ...) { # Additional parameters to be passed to hedge.pci
115 |   # Fetches the price series of a target security and set of potential
116 |   # factor securities from Yahoo and constructs a partial cointegration 
117 |   # model from them.
118 |   
119 |   #    require(TTR)
120 |   prices <- multigetYahooPrices(c(target, factors), start, end, quiet=TRUE)
121 |   if (any(is.na(prices))) {
122 |     if (!na.rm && any(is.na(prices))) {
123 |       has.any <- colnames(prices)[apply(prices,2,function(x) any(is.na(x)))]
124 |       stop("The following tickers have NAs: ", has.any, "\nTry re-running with na.rm=TRUE")
125 |     }
126 |   } else {
127 |     prices <- price_matrix.rm.na(prices)
128 |   }
129 |   
130 |   e <- hedge.pci(prices[,1,drop=FALSE], prices[,2:ncol(prices),drop=FALSE], ...)
131 |   e
132 | }
133 | 
134 | 


--------------------------------------------------------------------------------
/man/partialCI-package.Rd:
--------------------------------------------------------------------------------
  1 | \name{partialCI-package}
  2 | \alias{partialCI-package}
  3 | \alias{partialCI}
  4 | \docType{package}
  5 | \title{
  6 | Partial Cointegration
  7 | }
  8 | \description{
  9 | A collection of time series is said to be partially cointegrated if they have a linear
 10 | combination that is partially autoregressive, e.g., that can be represented as a sum of an
 11 | autoregressive series and a random walk.  This may be useful in modeling
 12 | certain sets of financial time series.
 13 | 
 14 | To find the partially cointegrated model that best fits two series X and Y, use:
 15 | 
 16 | \preformatted{
 17 | > fit.pci(Y, X)
 18 | }
 19 | 
 20 | An interface to Yahoo! Finance permits you to find the best fits for two particular 
 21 | stocks of interest:
 22 | 
 23 | \preformatted{
 24 | > yfit.pci("RDS-B", "RDS-A")
 25 | Fitted values for PCI model
 26 |   Y[t] = alpha + X[t] %*% beta + M[t] + R[t]
 27 |   M[t] = rho * M[t-1] + eps_M [t], eps_M[t] ~ N(0, sigma_M^2)
 28 |   R[t] = R[t-1] + eps_R [t], eps_R[t] ~ N(0, sigma_R^2)
 29 | 
 30 |            Estimate Std. Err
 31 | alpha        0.2063   0.8804
 32 | beta_RDS-A   1.0531   0.0133
 33 | rho          0.9055   0.0355
 34 | sigma_M      0.2431   0.0162
 35 | sigma_R      0.0993   0.0350
 36 | 
 37 | -LL = 41.30, R^2[MR] = 0.863
 38 | }
 39 | 
 40 | This example was run on 1/7/2016. RDS-A and RDS-B are two classes of shares offered by 
 41 | Royal Dutch Shell that differ slightly in aspects of their tax treatment. The above fit 
 42 | shows that the spread between the two shares is mostly mean-reverting but that it contains 
 43 | a small random walk component. The mean-reverting component accounts for 86.3\% of 
 44 | the variance of the daily returns. The value of 0.9055 for rho corresponds to a half-life 
 45 | of mean reversion of about 7 trading days.
 46 | 
 47 | To test the goodness of fit, the \code{test.pci} function can be used:
 48 | 
 49 | \preformatted{
 50 | > h <- yfit.pci("RDS-B", "RDS-A")
 51 | > test.pci(h)
 52 | 
 53 | 	Likelihood ratio test of [Random Walk or CI(1)] vs Almost PCI(1) (joint penalty method)
 54 | 
 55 | data:  h
 56 | 
 57 | Hypothesis              Statistic    p-value
 58 | Random Walk                 -4.94      0.010
 59 | AR(1)                       -4.08      0.010
 60 | Combined                               0.010
 61 | }
 62 | 
 63 | The \code{test.pci} function tests each of two different null hypotheses:
 64 | (a) the residual series is purely a random walk, and (b) the residual series is
 65 | purely autoregressive.  In addition, the union of these hypothesis is
 66 | also tested.  For practical applications, one is usually most interested in
 67 | rejecting the first of these null hypotheses, e.g., that the residual series
 68 | is purely a random walk.
 69 | 
 70 | The partialCI package also contains a function for searching for hedging portfolios. 
 71 | Given a particular stock (or time series), a search can be conducted to find the set 
 72 | of stocks that best replicate the target stock. In the following example, a hedge is 
 73 | sought for SPY using sector ETF's.
 74 | 
 75 | \preformatted{
 76 | > sectorETFS <- c("XLB", "XLE", "XLF", "XLI", "XLK", "XLP", "XLU", "XLV", "XLY")
 77 | > prices <- multigetYahooPrices(c("SPY", sectorETFS), start=20140101)
 78 | > hedge.pci(prices[,"SPY"], prices)
 79 |      -LL   LR[rw]    p[rw]    p[mr]      rho  R^2[MR]   Factor |   Factor coefficients
 80 |   490.67  -1.7771   0.1782   0.0100   0.9587   0.8246      XLF |   6.8351 
 81 |   283.26  -4.3988   0.0137   0.0786   0.9642   1.0000      XLK |   3.6209   2.2396 
 82 |   168.86  -6.4339   0.0100   0.0100   0.7328   0.6619      XLI |   2.3191   1.6542   1.1391 
 83 | 
 84 | Fitted values for PCI model
 85 |   Y[t] = alpha + X[t] %*% beta + M[t] + R[t]
 86 |   M[t] = rho * M[t-1] + eps_M [t], eps_M[t] ~ N(0, sigma_M^2)
 87 |   R[t] = R[t-1] + eps_R [t], eps_R[t] ~ N(0, sigma_R^2)
 88 | 
 89 |            Estimate Std. Err
 90 | alpha       14.2892   1.5598
 91 | beta_XLF     2.3191   0.1439
 92 | beta_XLK     1.6542   0.0804
 93 | beta_XLI     1.1391   0.0662
 94 | rho          0.7328   0.1047
 95 | sigma_M      0.2678   0.0315
 96 | sigma_R      0.2056   0.0401
 97 | 
 98 | -LL = 168.86, R^2[MR] = 0.662
 99 | }
100 | 
101 | The top table displays the quality of the fit that is found as each new factor is 
102 | added to the fit. The best fit consisting of only one factor is found by using XLF 
103 | (the financials sector). The negative log likelihod score for this model is 490.67. 
104 | However, the random walk hypothesis (p[rw]) cannot be rejected at the 5\% level. 
105 | When adding XLK (the technology sector), the negative log likelihood drops to 283.26 
106 | and the random walk hypothesis for the spread can now be rejected. This means that SPY 
107 | is at least partially cointegrated and possibly fully cointegrated with a portfolio 
108 | consisting of XLF and XLK in the right proportions. The best overall fit is obtained 
109 | by also adding XLI (industrials) to the hedging portfolio. The final fit is
110 | 
111 | \preformatted{
112 |   SPY = $14.29 + 2.32 XLF + 1.65 XLK + 1.14 XLI
113 | }
114 | 
115 | For this fit, the proportion of variance attributable to the mean reverting 
116 | component is 66.2\%, and the half life of mean reversion is about 2.2 days.
117 | 
118 | Please feel free to contact us if you have questions or suggestions.
119 | 
120 | Matthew Clegg, Christopher Krauss and Jonas Rende
121 | 
122 | Feb 13, 2017
123 | }
124 | \author{
125 | Matthew Clegg \email{matthewcleggphd@gmail.com}
126 | 
127 | Christopher Krauss \email{christopher.krauss@fau.de}
128 | 
129 | Jonas Rende \email{jonas.rende@fau.de}
130 | }
131 | 
132 | \seealso{
133 | \code{\link{fit.pci}} 
134 | \code{\link{yfit.pci}}
135 | \code{\link{test.pci}} 
136 | \code{\link{hedge.pci}}
137 | \code{\link{yhedge.pci}}
138 | }
139 | 
140 | \keyword{ ts }
141 | \keyword{ models }% __ONLY ONE__ keyword per line
142 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # partialCI
  2 | R package for fitting the partially cointegrated model
  3 | 
  4 | A collection of time series is partially cointegrated if a linear combination of these time series can be found so that the residual spread is partially autoregressive - meaning that it can be represented as a sum of an autoregressive series and a random walk. This concept is useful in modeling certain sets of financial time series and beyond, as it allows for the spread to contain transient and permanent components alike. Partial cointegration has been introduced by Clegg and Krauss (2016) <http://hdl.handle.net/10419/140632>, along with a large-scale empirical application to financial market data. The partialCI package comprises estimation, testing, and simulation routines for partial cointegration models in state space. Clegg et al. (2017) <http://hdl.handle.net/10419/150014> provide an in in-depth discussion of the package functionality as well as illustrating examples in the fields of finance and macroeconomics.
  5 | If a collection of time series is partially cointegrated, then the spread between
  6 | them can be interepreted as a mean-reverting series that has possibly been 
  7 | contaminated with a (hopefully small) random walk.
  8 | 
  9 | The developer version can be find on Github. To use the developer version of the partialCI package, you will need to start by installing it,
 10 | which can be done using devtools:
 11 | 
 12 | ```
 13 | > install.packages("devtools")   # if devtools is not already installed
 14 | > install_github("matthewclegg/partialCI")
 15 | ```
 16 | 
 17 | To find the partially cointegrated model that best fits two series 
 18 | X and Y, use:
 19 | 
 20 | ```
 21 | > fit.pci(Y, X)
 22 | ```
 23 | 
 24 | An interface to Yahoo! Finance permits you to find the best fits for
 25 | two particular stocks of interest:
 26 | 
 27 | ```
 28 | > yfit.pci("RDS-B", "RDS-A")
 29 | Fitted values for PCI model
 30 |   Y[t] = alpha + X[t] %*% beta + M[t] + R[t]
 31 |   M[t] = rho * M[t-1] + eps_M [t], eps_M[t] ~ N(0, sigma_M^2)
 32 |   R[t] = R[t-1] + eps_R [t], eps_R[t] ~ N(0, sigma_R^2)
 33 | 
 34 |            Estimate Std. Err
 35 | alpha        0.2063   0.8804
 36 | beta_RDS-A   1.0531   0.0133
 37 | rho          0.9055   0.0355
 38 | sigma_M      0.2431   0.0162
 39 | sigma_R      0.0993   0.0350
 40 | 
 41 | -LL = 41.30, R^2[MR] = 0.863
 42 | ```
 43 | 
 44 | This example was run on 1/7/2016.  RDS-A and RDS-B are two 
 45 | classes of shares offered by Royal Dutch Shell that differ slightly
 46 | in aspects of their tax treatment.  The above fit shows that
 47 | the spread between the two shares is mostly mean-reverting but that
 48 | it contains a small random walk component.  The mean-reverting
 49 | component accounts for 86.3% of the variance of the daily returns.
 50 | The value of 0.9055 for rho corresponds to a half-life of mean
 51 | reversion of about 7 trading days.
 52 | 
 53 | To test the goodness of fit, the test.pci function can be used:
 54 | 
 55 | ```
 56 | > h <- yfit.pci("RDS-B", "RDS-A")
 57 | > test.pci(h)
 58 | 
 59 | 	Likelihood ratio test of [Random Walk or CI(1)] vs Almost PCI(1) (joint penalty method)
 60 | 
 61 | data:  h
 62 | 
 63 | Hypothesis              Statistic    p-value
 64 | Random Walk                 -4.94      0.010
 65 | AR(1)                       -4.08      0.010
 66 | Combined                               0.010
 67 | 
 68 | ```
 69 | 
 70 | The test.pci function tests each of two different null hypotheses:
 71 | (a) the residual series is purely a random walk, and (b) the residual series is
 72 | purely autoregressive.  In addition, the union of these hypothesis is
 73 | also tested.  For practical applications, one is usually most interested in
 74 | rejecting the first of these null hypotheses, e.g., that the residual series
 75 | is purely a random walk.
 76 | 
 77 | The partialCI package also contains a function for searching for
 78 | hedging portfolios.  Given a particular stock (or time series),
 79 | a search can be conducted to find the set of stocks that best
 80 | replicate the target stock.  In the following example, a hedge 
 81 | is sought for SPY using sector ETF's.
 82 | 
 83 | ```
 84 | > sectorETFS <- c("XLB", "XLE", "XLF", "XLI", "XLK", "XLP", "XLU", "XLV", "XLY")
 85 | > prices <- multigetYahooPrices(c("SPY", sectorETFS), start=20140101)
 86 | > hedge.pci(prices[,"SPY"], prices)
 87 |      -LL   LR[rw]    p[rw]    p[mr]      rho  R^2[MR]   Factor |   Factor coefficients
 88 |   490.67  -1.7771   0.1782   0.0100   0.9587   0.8246      XLF |   6.8351 
 89 |   283.26  -4.3988   0.0137   0.0786   0.9642   1.0000      XLK |   3.6209   2.2396 
 90 |   168.86  -6.4339   0.0100   0.0100   0.7328   0.6619      XLI |   2.3191   1.6542   1.1391 
 91 | 
 92 | Fitted values for PCI model
 93 |   Y[t] = alpha + X[t] %*% beta + M[t] + R[t]
 94 |   M[t] = rho * M[t-1] + eps_M [t], eps_M[t] ~ N(0, sigma_M^2)
 95 |   R[t] = R[t-1] + eps_R [t], eps_R[t] ~ N(0, sigma_R^2)
 96 | 
 97 |            Estimate Std. Err
 98 | alpha       14.2892   1.5598
 99 | beta_XLF     2.3191   0.1439
100 | beta_XLK     1.6542   0.0804
101 | beta_XLI     1.1391   0.0662
102 | rho          0.7328   0.1047
103 | sigma_M      0.2678   0.0315
104 | sigma_R      0.2056   0.0401
105 | 
106 | -LL = 168.86, R^2[MR] = 0.662
107 | ```
108 | 
109 | The top table displays the quality of the fit that is found as each new
110 | factor is added to the fit.  The best fit consisting of only one factor
111 | is found by using XLF (the financials sector).  The negative log likelihod
112 | score for this model is 490.67.  However, the random walk
113 | hypothesis (p[rw]) cannot be rejected at the 5% level.  When adding
114 | XLK (the technology sector), the negative log likelihood drops to 283.26
115 | and the random walk hypothesis for the spread can now be rejected.  This means
116 | that SPY is at least partially cointegrated and possibly fully cointegrated 
117 | with a portfolio consisting of XLF and XLK in the right proportions.  The
118 | best overall fit is obtained by also adding XLI (industrials) to the hedging
119 | portfolio.  The final fit is
120 | 
121 | ```
122 |   SPY = $14.29 + 2.32 XLF + 1.65 XLK + 1.14 XLI
123 | ```
124 | 
125 | For this fit, the proportion of variance attributable to the mean reverting
126 | component is 66.2%, and the half life of mean reversion is about 2.2 days.
127 | 
128 | Please feel free to write to us if you have questions or suggestions.
129 | 
130 | Matthew Clegg  
131 | 
132 | matthewcleggphd@gmail.com  
133 | 
134 | Christopher Krauss
135 | 
136 | christopher.krauss@fau.de
137 | 
138 | Jonas Rende 
139 | 
140 | jonas.rende@fau.de
141 | 
142 | April 21, 2017  
143 | 
144 | 


--------------------------------------------------------------------------------
/R/lrtables.R:
--------------------------------------------------------------------------------
  1 | # lrtables.R -- functions for building up tables of likelihood ratios
  2 | # Copyright (C) 2016 Matthew Clegg
  3 | 
  4 | #  This program is free software; you can redistribute it and/or modify
  5 | #  it under the terms of the GNU General Public License as published by
  6 | #  the Free Software Foundation; either version 2 of the License, or
  7 | #  (at your option) any later version.
  8 | #
  9 | #  This program is distributed in the hope that it will be useful,
 10 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | #  GNU General Public License for more details.
 13 | #
 14 | #  A copy of the GNU General Public License is available at
 15 | #  http://www.r-project.org/Licenses/
 16 | 
 17 | # 1.  Start R
 18 | # 2.  Load the partialCI package
 19 | # 3.  Create the list of samples and likelihood tables by entering the 
 20 | #     following command:
 21 | #       partialCI:::build_lrtables()
 22 | #     On my Macbook Pro, this takes about a week to run.
 23 | # 4.  Reload the set of likelihood tables:
 24 | #       partialCI:::load_lrtables()
 25 | # 5.  Regenerate the lrdata.R file
 26 | #       partialCI:::dump_lrdata()
 27 | # 6.  Locate the new lrdata.R file on disk and move it to the partialCI/R
 28 | #     source code directory.
 29 | # 7.  Rebuild the partialCI package
 30 | #       R CMD build partialCI
 31 | # 8.  Reinstall the partialCI package
 32 | #       R CMD install partialCI.tar.gz
 33 | # 9.  Delete the temporary directories samples/ and tables/
 34 | 
 35 | # if(getRversion() >= "2.15.1")  utils::globalVariables(c("PAR.SAMPLES",
 36 | #  "PAR.JOINT.CRITICAL.VALUES.DT",
 37 | #  "PAR.JOINT.CRITICAL.VALUES.KPSS.DT",
 38 | #  "PAR.SAMPLES.DT"))
 39 | 
 40 | # if (!exists("PAR.SAMPLES")) PAR.SAMPLES <- NULL
 41 | # if (!exists("PAR.SAMPLES.DT")) PAR.SAMPLES.DT <- NULL
 42 | # if (!exists("PAR.POWER.SAMPLES")) PAR.POWER.SAMPLES <- NULL
 43 | # if (!exists("PAR.POWER.PVMR.SAMPLES")) PAR.POWER.PVMR.SAMPLES <- NULL
 44 | # if (!exists("PAR.JOINT.CRITICAL.VALUES.DT")) PAR.JOINT.CRITICAL.VALUES.DT <- NULL
 45 | # if (!exists("PAR.JOINT.CRITICAL.VALUES.KPSS.DT")) PAR.JOINT.CRITICAL.VALUES.KPSS.DT <- NULL
 46 | 
 47 | utils::globalVariables(c("PCI.SAMPLES"))
 48 | 
 49 | LR_TABLES_LIST <- c("PCI.RWNULL.JP.LRQT", 
 50 |                     "PCI.MRNULL.JP.LRQT",
 51 |                     "PCI.RWNULL.ROB.JP.LRQT", 
 52 |                     "PCI.MRNULL.ROB.JP.LRQT",
 53 |                     "PCI.RWNULL.TWOSTEP.LRQT", 
 54 |                     "PCI.MRNULL.TWOSTEP.LRQT",
 55 |                     "PCI.RWNULL.ROB.TWOSTEP.LRQT", 
 56 |                     "PCI.MRNULL.ROB.TWOSTEP.LRQT",
 57 |                     "PCI.JOINT.CRITICAL.VALUES")
 58 | 
 59 | for (t in LR_TABLES_LIST) {
 60 |   if (!exists(t)) assign(t, NULL)
 61 | }
 62 | 
 63 | build_lrtables <- function (dir="tables", debug=FALSE, nrep=10000, rebuild_samples=TRUE) { 
 64 |   # Rebuilds all of the tables that are contained in this file.
 65 |   # This function takes several days to a week to run to completion.
 66 |   # The tables are written to files in the specified directory,
 67 |   # where they can then be loaded back into R and used to update
 68 |   # this file.
 69 |   
 70 |   # Initial definition for PCI.Samples
 71 |   PCI.SAMPLES<-NULL
 72 |  
 73 |    if (debug) {
 74 |     nr <- 1
 75 |   } else {
 76 |     nr <- nrep
 77 |   }
 78 |   
 79 |   if (rebuild_samples) {
 80 |     cat("Rebuilding PCI likelihood ratio samples ...\n")
 81 |     pci.generate.likelihood_ratio.samples(nrep=nrep)
 82 |   }
 83 |   
 84 |   pci.load.likelihood_ratio.samples()
 85 |   SAMPLES <- PCI.SAMPLES
 86 |   
 87 |   dir.create(dir, recursive=TRUE, showWarnings=FALSE)
 88 |  
 89 |   pci.rwnull.jp.lrqt <- quantile.table.from.samples(
 90 |     "rw_lrt", 
 91 |     SAMPLES[SAMPLES$sigma_M==0.0 & SAMPLES$robust == FALSE & SAMPLES$pci_opt == "jp",])
 92 |   pci.rwnull.twostep.lrqt <- quantile.table.from.samples(
 93 |      "rw_lrt", 
 94 |       SAMPLES[SAMPLES$sigma_M==0.0 & SAMPLES$robust == FALSE & SAMPLES$pci_opt == "twostep",])
 95 |   pci.rwnull.rob.jp.lrqt <- quantile.table.from.samples(
 96 |     "rw_lrt", 
 97 |     SAMPLES[SAMPLES$sigma_M==0.0 & SAMPLES$robust == TRUE & SAMPLES$pci_opt == "jp",])
 98 |   pci.rwnull.rob.twostep.lrqt <- quantile.table.from.samples(
 99 |     "rw_lrt", 
100 |     SAMPLES[SAMPLES$sigma_M==0.0 & SAMPLES$robust == TRUE & SAMPLES$pci_opt == "twostep",])
101 |   
102 |   pci.mrnull.jp.lrqt <- quantile.table.from.samples(
103 |     "mr_lrt", 
104 |     SAMPLES[SAMPLES$sigma_R==0.0 & SAMPLES$robust == FALSE & SAMPLES$pci_opt == "jp",])
105 |   pci.mrnull.twostep.lrqt <- quantile.table.from.samples(
106 |     "mr_lrt", 
107 |     SAMPLES[SAMPLES$sigma_R==0.0 & SAMPLES$robust == FALSE & SAMPLES$pci_opt == "twostep",])
108 |   pci.mrnull.rob.jp.lrqt <- quantile.table.from.samples(
109 |     "mr_lrt", 
110 |     SAMPLES[SAMPLES$sigma_R==0.0 & SAMPLES$robust == TRUE & SAMPLES$pci_opt == "jp",])
111 |   pci.mrnull.rob.twostep.lrqt <- quantile.table.from.samples(
112 |     "mr_lrt", 
113 |     SAMPLES[SAMPLES$sigma_R==0.0 & SAMPLES$robust == TRUE & SAMPLES$pci_opt == "twostep",])  
114 |   
115 |   dput (pci.rwnull.jp.lrqt, sprintf("%s/%s", dir, "PCI.RWNULL.JP.LRQT"))  
116 |   dput (pci.rwnull.twostep.lrqt, sprintf("%s/%s", dir, "PCI.RWNULL.TWOSTEP.LRQT"))  
117 |   dput (pci.rwnull.rob.jp.lrqt, sprintf("%s/%s", dir, "PCI.RWNULL.ROB.JP.LRQT"))  
118 |   dput (pci.rwnull.rob.twostep.lrqt, sprintf("%s/%s", dir, "PCI.RWNULL.ROB.TWOSTEP.LRQT"))  
119 |   
120 |   dput (pci.mrnull.jp.lrqt, sprintf("%s/%s", dir, "PCI.MRNULL.JP.LRQT"))  
121 |   dput (pci.mrnull.twostep.lrqt, sprintf("%s/%s", dir, "PCI.MRNULL.TWOSTEP.LRQT"))  
122 |   dput (pci.mrnull.rob.jp.lrqt, sprintf("%s/%s", dir, "PCI.MRNULL.ROB.JP.LRQT"))  
123 |   dput (pci.mrnull.rob.twostep.lrqt, sprintf("%s/%s", dir, "PCI.MRNULL.ROB.TWOSTEP.LRQT"))  
124 |     
125 |   pci.joint.critical.values <- pci.findall.joint.critical.values()
126 |   dput (pci.joint.critical.values, sprintf("%s/%s", dir, "PCI.JOINT.CRITICAL.VALUES"))
127 |   
128 |   printf ("%s done\n", Sys.time())  
129 | }
130 | 
131 | load_table <- function (..., dir="tables") {
132 |   # Loads a table and stores it in a global variable
133 |   for (table_name in list(...)) {
134 |     printf("Loading %s\n", table_name)
135 |     tab <- dget(sprintf("%s/%s", dir, table_name))
136 |     if (exists(table_name, envir=asNamespace("partialCI"))) {
137 |       unlockBinding(table_name, asNamespace("partialCI"))
138 |     }
139 |     assign(table_name, tab, envir = asNamespace("partialCI"))
140 |   }
141 | }
142 | 
143 | load_lrtables <- function () {  
144 |   sapply(LR_TABLES_LIST, function (t) load_table(t))
145 |   0
146 | }
147 | 
148 | dump_lrdata <- function(filename="lrdata.R") {
149 |   
150 |   dump(LR_TABLES_LIST, filename)  
151 |   cat("\n\n", file=filename, append=TRUE)
152 | }
153 | 
154 | 


--------------------------------------------------------------------------------
/man/fit.pci.Rd:
--------------------------------------------------------------------------------
  1 | \name{fit.pci}
  2 | \alias{fit.pci}
  3 | \title{
  4 | Fits the partial cointegration model to a collection of time series
  5 | }
  6 | \description{
  7 | Fits the partial cointegration model to a collection of time series
  8 | }
  9 | \usage{
 10 | fit.pci(Y, X, 
 11 |   pci_opt_method = c("jp", "twostep"), 
 12 |   par_model = c("par", "ar1", "rw"), 
 13 |   lambda = 0, 
 14 |   robust = FALSE, nu = 5,
 15 |   include_alpha=FALSE)
 16 | }
 17 | %- maybe also 'usage' for other objects documented here.
 18 | \arguments{
 19 |   \item{Y}{
 20 | The time series that is to be modeled.  A plain or \code{\link{zoo}} vector of length \code{n}.
 21 | }
 22 |   \item{X}{
 23 | A (possibly \code{\link{zoo}}) matrix of dimensions \code{n} x \code{k}.  If \code{k=1}, then this may be a plain or \code{\link{zoo}} vector.
 24 | }
 25 |   \item{pci_opt_method}{
 26 | Specifies the method that will be used for finding the best fitting model.  One of the following:
 27 | \itemize{
 28 | \item "jp" The joint-penalty method (see below)
 29 | \item "twostep" The two-step method (see below)
 30 | }
 31 | Default: \code{jp}
 32 | 
 33 | }
 34 |   \item{par_model}{
 35 | The model used for the residual series.  One of the following:
 36 | \itemize{
 37 | \item "par"  The residuals are assumed to follow a partially autoregressive model.
 38 | \item "ar1"  The residuals are assumed to be autoregressive of order one.
 39 | \item "rw"   The residuals are assumed to follow a random walk.
 40 | }
 41 | Default: \code{par}
 42 | }
 43 | \item{lambda}{
 44 | The penalty parameter to be used in the joint-penalty (\code{jp}) estimation method.
 45 | Default: 0.
 46 | }
 47 | \item{robust}{
 48 | If \code{TRUE}, then the residuals are assumed to follow a t-distribution with
 49 | \code{nu} degrees of freedom.  Default: \code{FALSE}.
 50 | }
 51 |   \item{nu}{
 52 | The degrees-of-freedom parameter to be used in robust estimation.  Default: 5.
 53 | }
 54 | \item{include_alpha}{If TRUE, then a constant term is estimated with the model.
 55 | If FALSE, the constant term is omitted.  Default: \code{FALSE}.
 56 | }
 57 | }
 58 | \details{
 59 | The partial cointegration model is given by the equations:
 60 | 
 61 | \deqn{
 62 |   Y_t = \alpha + \beta_1 * X_{t,1} + beta_2 * X_{t,2} + ... + beta_k * X_{t,k} + M_t + R_t}{
 63 |   Y[t] = alpha + beta[1] * X[t,1] + beta[2] * X[t,2] + ... + beta[k] * X[t,k] + M[t] + R[t]
 64 | }
 65 | \deqn{M_t = \rho M_{t-1} + \epsilon_{M,t}}{M[t] = rho * M[t-1] + epsilon_M[t]}
 66 | \deqn{R_t = R_{t-1} + \epsilon_{R,t}}{R[t] = R[t-1] + epsilon_R[t]}
 67 | \deqn{-1 < \rho < 1}{-1 < rho < 1}
 68 | \deqn{\epsilon_{M,t} \sim N(0,\sigma_M^2)}{epsilon_M[t] ~ N(0, sigma_M^2)}
 69 | \deqn{\epsilon_{R,t} \sim N(0,\sigma_R^2)}{epsilon_R[t] ~ N(0, sigma_R^2)}
 70 | 
 71 | Given the input series 
 72 |   \code{Y} and \code{X},
 73 | this function searches for the parameter values 
 74 |   \code{alpha}, \code{beta}, \code{rho}
 75 | that give the best fit of this model when using a Kalman filter.
 76 | 
 77 | If \code{pci_opt_method} is \code{twostep}, then a two-step procedure is used.
 78 | In the first step, a linear regression is performed of \code{X} on \code{Y} to determine
 79 | the parameters \code{alpha} and \code{beta}.  From this regression, a series of residuals
 80 | is determined.  In the second step, a model is fit to the residual series.  If 
 81 | \code{par_model} is \code{par}, then a partially autoregressive model is fit to
 82 | the residual series. If \code{par_model} is \code{ar1}, then an autoregressive model
 83 | is fit to the residual series.  If \code{par_model} is \code{rw} then a random walk
 84 | model is fit to the residual series.  Note that if \code{pci_opt_method} is \code{twostep}
 85 | and \code{par_model} is \code{ar1}, then this reduces to the Engle-Granger two-step
 86 | procedure.
 87 | 
 88 | If \code{pci_opt_method} is \code{jp}, then the joint-penalty procedure is used.
 89 | In this method, the parameters \code{alpha} and \code{beta} are estimated jointly
 90 | with the parameter \code{rho} using a gradient-search optimization function.
 91 | In addition, a penalty value of 
 92 |   \eqn{\lambda * \sigma_R^2}{lambda * sigma_R^2}
 93 | is added to the Kalman filter likelihood score when searching for the
 94 | optimum solution.  By choosing a positive value for \code{lambda}, you can drive
 95 | the solution towards a value that places greater emphasis on the mean-reverting
 96 | component.  
 97 | 
 98 | Because the joint-penalty method uses gradient search, the final parameter
 99 | values found are dependent upon the starting point.  There is no guarantee
100 | that a global optimum will be found.  However, the joint-penalty method chooses
101 | several different starting points, so as to increase the chance of finding a
102 | global optimum.  One of the chosen starting points consists of the parameters
103 | found through the two-step procedure.  Because of this, the joint-penalty method
104 | is guaranteed to find parameter values which give a likelihood score at least
105 | as good as those found using the two-step procedure.  Sometimes the improvement
106 | over the two-step procedure is substantial.
107 | 
108 | The parameter \code{include_alpha} determines whether or not a constant term
109 | is included with the fit.  Note that the model \code{alpha = a, R0 = 0} is
110 | equivalent to the model \code{alpha = 0, R0 = a}.  Thus, the constant term can
111 | be interpreted as an estimate of the mean of the system at time \code{t=0}.  
112 | As the random walk evolves, the mean will drift correspondingly.
113 | }
114 | \value{
115 | An object of class \code{pci.fit} containing the fit that was found.  The following components
116 | may be of interest
117 | \item{alpha}{The constant term of the fit}
118 | \item{alpha.se}{The estimated standard error of \code{alpha}}
119 | \item{beta}{The vector of weights}
120 | \item{beta.se}{The standard errors of the components of \code{beta}}
121 | \item{rho}{The estimated coefficient of mean reversion}
122 | \item{rho.se}{The standard error of \code{rho}}
123 | \item{negloglik}{The negative of the log likelihood}
124 | \item{pvmr}{The proportion of variance attributable to mean reversion}
125 | %%  ~Describe the value returned
126 | %%  If it is a LIST, use
127 | %%  \item{comp1 }{Description of 'comp1'}
128 | %%  \item{comp2 }{Description of 'comp2'}
129 | %% ...
130 | }
131 | \references{
132 |   Clegg, Matthew, 2015.
133 |   Modeling Time Series with Both Permanent and Transient Components
134 |   using the Partially Autoregressive Model.
135 |   \emph{Available at SSRN: http://ssrn.com/abstract=2556957}
136 |   
137 |   Clegg, Matthew and Krauss, Christopher, 2016. Pairs trading with partial cointegration. FAU Discussion Papers in Economics, University of Erlangen-Nuernberg. 
138 |   \emph{Available at https://www.iwf.rw.fau.de/files/2016/05/05-2016.pdf}    
139 | }
140 | \author{
141 | Matthew Clegg \email{matthewcleggphd@gmail.com}
142 | 
143 | Christopher Krauss \email{christopher.krauss@fau.de}
144 | 
145 | Jonas Rende \email{jonas.rende@fau.de}
146 | }
147 | \seealso{
148 | \code{\link[egcm:egcm]{egcm}} Engle-Granger cointegration model
149 | 
150 | \code{\link[partialAR:partialAR]{partialAR}} Partially autoregressive models
151 | }
152 | \examples{
153 | ##---- Should be DIRECTLY executable !! ----
154 | ##-- ==>  Define data, use random,
155 | ##--	or do  help(data=index)  for the standard data sets.
156 | 
157 | 
158 | YX <- rpci(n=1000, alpha=1, beta=c(2,3,4), sigma_C=c(1,1,1), rho=0.9,sigma_M=0.1, sigma_R=0.2)
159 | fit.pci(YX[,1], YX[,2:ncol(YX)])
160 | 
161 | }
162 | % Add one or more standard keywords, see file 'KEYWORDS' in the
163 | % R documentation directory.
164 | \keyword{ ts }
165 | \keyword{ models }
166 | 
167 | 


--------------------------------------------------------------------------------
/man/hedge.pci.Rd:
--------------------------------------------------------------------------------
  1 | \name{hedge.pci}
  2 | \alias{hedge.pci}
  3 | \title{
  4 | Searches for a partially cointegrated hedge for a given time series
  5 | }
  6 | \description{
  7 | Given a time series and a collection of possible factors, finds a subset of the
  8 | factors that provides the best fit to the given time series using the partially
  9 | cointegrated model.
 10 | }
 11 | \usage{
 12 | hedge.pci(Y, X, 
 13 |   maxfact = 10, 
 14 |   lambda = 0, 
 15 |   use.multicore = TRUE, 
 16 |   minimum.stepsize = 0, 
 17 |   verbose = TRUE, 
 18 |   exclude.cols = c(), 
 19 |   search_type = c("lasso", "full", "limited"), 
 20 |   pci_opt_method=c("jp", "twostep"), 
 21 |   ...)
 22 | }
 23 | \arguments{
 24 | \item{Y}{
 25 |   An \code{N x 1} column vector or data \code{data.frame}, 
 26 |   representing the series that is to be hedged.
 27 | }
 28 | \item{X}{
 29 |   An \code{N x L} \code{data.frame}, where each column represents a possible factor to
 30 |   be used in a partially cointegrated fit.
 31 | }
 32 | \item{maxfact}{
 33 |   The maximum number of columns from \code{X} that will be selected for modeling \code{Y}.
 34 |   Default: 10
 35 | }
 36 | \item{lambda}{
 37 |   A penalty to be applied to the random walk
 38 |   portion of the partialAR model.  A positive value for \code{lambda}
 39 |   will drive the model towards a solution with a smaller random walk component.
 40 |   Default: 0
 41 | }
 42 | \item{use.multicore}{
 43 |   If \code{TRUE}, parallel processing will be used to improve performance.
 44 |   See \code{\link[parallel:mclapply]{parallel:mclapply}}
 45 |   Default: \code{TRUE}
 46 | }
 47 | \item{minimum.stepsize}{
 48 |   If this is non-NA, then the search stops if an improvement
 49 |   cannot be found of at least this much.
 50 |   Default: 0
 51 | }
 52 | \item{verbose}{
 53 |   If \code{TRUE}, then detailed information is printed about 
 54 |   the execution. 
 55 |   Default: \code{TRUE}
 56 | }
 57 | \item{exclude.cols}{
 58 |   A list of column indexes specifying columns from \code{X} which
 59 |   should be excluded from consideration.  Alternatively, the list of
 60 |   excluded columns may be given as a list of strings, in which case
 61 |   they are interepreted as column names.
 62 |   Default: \code{c()}
 63 | }
 64 |   \item{search_type}{
 65 |   If "lasso", then the lasso algorithm (see \code{\link{glmnet}}) is
 66 |   used to identify the factors that provide the best linear fit to
 67 |   the target sequence.
 68 |   If "full", then a greedy algorithm is used to search for
 69 |   factors to be used in the hedge.  At each step, all possible additions
 70 |   to the portfolio are considered, and the best one is chosen for inclusion.
 71 |   If "limited", then at each iteration, a preliminary screening step is performed
 72 |   to identify the securities with the highest correlations to the residuals
 73 |   of the currently selected portfolio.  The top securities from this list are
 74 |   then checked for whether they would improve the portfolio, and the best one
 75 |  included.
 76 | }
 77 |   \item{pci_opt_method}{
 78 | Specifies the method that will be used for finding the best fitting model.  One of the following:
 79 | \itemize{
 80 | \item \code{"jp"} The joint-penalty method (see \code{\link{fit.pci}})
 81 | \item \code{"twostep"} The two-step method (see \code{\link{fit.pci}})
 82 | }
 83 | Default: \code{jp}
 84 | 
 85 | }
 86 |   \item{\dots}{
 87 | Other parameters to be passed onto the search function.  See the source
 88 | code.
 89 | }
 90 | }
 91 | \details{
 92 | The hedge is constructed by searching for column indices \code{i1,i2, ..., iN} 
 93 | from among the columns of \code{X} which yield the best fit to the partially
 94 | cointegrated fit:
 95 | 
 96 | \deqn{
 97 |   Y_t = \alpha + \beta_1 * X_{t,i1} + beta_2 * X_{t,i2} + ... + beta_N * X_{t,iN} + M_t + R_t}{
 98 |   Y[t] = alpha + beta[1] * X[t,i1] + beta[2] * X[t,i2] + ... + beta[k] * X[t,iN] + M[t] + R[t]
 99 | }
100 | \deqn{M_t = \rho M_{t-1} + \epsilon_{M,t}}{M[t] = rho * M[t-1] + epsilon_M[t]}
101 | \deqn{R_t = R_{t-1} + \epsilon_{R,t}}{R[t] = R[t-1] + epsilon_R[t]}
102 | \deqn{-1 < \rho < 1}{-1 < rho < 1}
103 | \deqn{\epsilon_{M,t} \sim N(0,\sigma_M^2)}{epsilon_M[t] ~ N(0, sigma_M^2)}
104 | \deqn{\epsilon_{R,t} \sim N(0,\sigma_R^2)}{epsilon_R[t] ~ N(0, sigma_R^2)}
105 | 
106 | if \code{search_type="lasso"} is specified, then the lasso algorithm
107 | (see \code{\link{glmnet}}) is used to search for the factors that give
108 | the best linear fit to the target sequence \code{Y}.  Having determined
109 | the list of factors, the cutoff point is determined based successive
110 | improvements to the likelihood score of the fitted model.
111 | 
112 | Otherwise, a greedy algorithm (\code{search_type="full"}) or a modified greedy algorithm
113 | (\code{search_type="limited"}) is used.  This proceeds by searching through all
114 | columns of \code{X} (except those listed in \code{exclude.cols}) to find the
115 | column that gives the best fit to \code{Y}, as determined by 
116 | the likelihood score of the partially cointegrated model. This column becomes the initial
117 | hedging portfolio.  Having selected columns \code{i1, i2, ..., iK}, the next
118 | column is found by searching through all remaining columns of \code{X} (except those
119 | listed in \code{exclude.cols}) for the column which gives the best improvement
120 | to the partially cointegrated fit.  However, if the best improvement is less than
121 | \code{minimum.stepsize}, or if \code{maxfact} columns have already been added,
122 | then the search terminates.
123 | 
124 | In the case of the modified greedy algorithm (\code{search_type="limited"}), a
125 | preprocessing step is used at the beginning of each iteration.  In this preprocessing
126 | step, the correlation is computed between each unused column of \code{X} and the
127 | residual series of the currently computed best fit.  The top \code{B} choices are then
128 | considered for inclusion in the portfolio, where \code{B} is a branching factor.
129 | The branching factor can be controlled by setting the value of the optional parameter
130 | \code{max.branch}.  Its default value is 10.
131 | 
132 | The \code{lasso} algorithm is by far the fastest, followed by the \code{limited} greedy search.
133 | So, the best strategy is probably to start by using the \code{lasso}.  If it fails to
134 | produce acceptable results, then move on to the \code{limited} greedy algorithm and finally
135 | the \code{full} search.
136 | }
137 | \value{
138 | Returns an S3 object of class \code{pci.hedge} containing the following fields
139 | \item{pci}{The best partially cointegrated fit that was found}
140 | \item{indexes}{The indexes of the columns from \code{X} that were selected}
141 | \item{index_names}{The names of the columns from \code{X} that were selected}
142 | }
143 | \author{
144 | Matthew Clegg \email{matthewcleggphd@gmail.com}
145 | 
146 | Christopher Krauss \email{christopher.krauss@fau.de}
147 | 
148 | Jonas Rende \email{jonas.rende@fau.de}
149 | }
150 | \seealso{
151 | \code{\link{fit.pci}} Fitting of partially cointegrated models
152 | 
153 | \code{\link[partialAR:partialAR]{partialAR}} Partially autoregressive models
154 | 
155 | \code{\link[egcm:egcm]{egcm}} Engle-Granger cointegration model
156 | }
157 | \examples{
158 | ##---- Should be DIRECTLY executable !! ----
159 | ##-- ==>  Define data, use random,
160 | ##--	or do  help(data=index)  for the standard data sets.
161 | 
162 | \dontrun{YX <- rpci(n=1000, alpha=1, beta=c(2,3,4,5,6), 
163 |   sigma_C=c(0.1,0.1,0.1,0.1,0.1), rho=0.9, sigma_M=1, sigma_R=1)
164 | YXC <- cbind(YX, matrix(rnorm(5000), ncol=5))
165 | hedge.pci(YX[,1], YX[,2:ncol(YX)])
166 | hedge.pci(YXC[,1], YXC[,2:ncol(YXC)])}
167 | 
168 | }
169 | % Add one or more standard keywords, see file 'KEYWORDS' in the
170 | % R documentation directory.
171 | \keyword{ ts }
172 | \keyword{ models }
173 | 


--------------------------------------------------------------------------------
/src/cfit.cc:
--------------------------------------------------------------------------------
  1 | /* cfit.C -- high-speed fitting of partially AR(1) models */
  2 | /* Copyright 2015 Matthew Clegg */
  3 | 
  4 | /*
  5 | #  This program is free software; you can redistribute it and/or modify
  6 | #  it under the terms of the GNU General Public License as published by
  7 | #  the Free Software Foundation; either version 2 of the License, or
  8 | #  (at your option) any later version.
  9 | #
 10 | #  This program is distributed in the hope that it will be useful,
 11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | #  GNU General Public License for more details.
 14 | #
 15 | #  A copy of the GNU General Public License is available at
 16 | #  http://www.r-project.org/Licenses/
 17 | */
 18 | 
 19 | #include <Rcpp.h>
 20 | using namespace Rcpp;
 21 | 
 22 | const double kPi = 3.14159265358979323846;
 23 | 
 24 | // [[Rcpp::export]]
 25 | double lagged_variance_c (NumericVector X, int k, int n) {
 26 |     // Computes the variance of (1-B^k)X[(k+1)..n]
 27 |     
 28 |     if (k < 1 || k >= n-2 || n < 4 || n+k > X.size()) return(NA_REAL);
 29 |     
 30 |     double s = 0.0;
 31 |     double ssq = 0.0;
 32 |     for (int i = 0; i < n; i++) {
 33 |         double dx = X[i+k] - X[i];
 34 |         s += dx;  // This is a telescoping sum, but taking advantage of this
 35 |                   // fact is not going to improve execution time noticeably
 36 |         ssq += dx*dx;
 37 |     }
 38 |     double v = (ssq - s*s/n)/(n-1);
 39 |     return (v);
 40 | }
 41 | 
 42 | // [[Rcpp::export]]
 43 | 
 44 | double estimate_rho_par_c (NumericVector X) {
 45 | //	Computes an estimate of mean reversion for the mean-reverting
 46 | //	portion of a PAR process.  If v[k] = Var[X[t+k]-X[t]], then
 47 | //	rho is given by the variance formula:
 48 | //	   rho = - (v[1] - 2 * v[2] + v[3]) / (2 * v[1] - v[2])
 49 |     
 50 |     int n = X.size();
 51 |     if (n < 5) return(NA_REAL);
 52 |     
 53 |     double xv1 = lagged_variance_c(X, 1, n-3);
 54 |     double xv2 = lagged_variance_c(X, 2, n-3);
 55 |     double xv3 = lagged_variance_c(X, 3, n-3);
 56 |     
 57 |     double rho = -(xv1 - 2 * xv2 + xv3) / (2 * xv1 - xv2);
 58 |     if (rho < -0.99) rho = -1.0;
 59 |     if (rho > 0.99) rho = 1.0;
 60 |     
 61 |     return rho;
 62 | }
 63 | 
 64 | // [[Rcpp::export]]
 65 | 
 66 | NumericVector estimate_par_c (NumericVector X, double rho_max = 1.0) {
 67 |     /* Estimates the parameters of an partially AR(1) sequence using the 
 68 |        variances of the differences.  On input, X is an array of doubles,
 69 |        representing the time series of the data.  p is an output vector
 70 |        of length 3.  On return, the p[1] will contain rho, p[2] will
 71 |        contain sigma_M and p[3] will contain sigma_R.
 72 |     */
 73 |     
 74 |     int n = X.size();
 75 |     if (n < 5) return(NumericVector::create(NA_REAL, NA_REAL, NA_REAL));
 76 |     
 77 |     double xv1 = lagged_variance_c(X, 1, n-3);
 78 |     double xv2 = lagged_variance_c(X, 2, n-3);
 79 |     double xv3 = lagged_variance_c(X, 3, n-3);
 80 |     
 81 |     double rho = -(xv1 - 2 * xv2 + xv3) / (2 * xv1 - xv2);
 82 |     if (rho > rho_max) rho = rho_max;
 83 |     if (rho < -0.99) {
 84 |         return NumericVector::create(-1.0, sqrt(xv1), 0.0);
 85 |     } else if (rho > 0.99) {
 86 |         return NumericVector::create(0.0, 0.0, sqrt(xv1));
 87 |     }
 88 |     
 89 |     double sigma2_M = 0.5 * ((rho + 1.0)/(rho - 1.0)) * (xv2 - 2.0 * xv1);
 90 |     if (sigma2_M > 0.5 * xv2) sigma2_M = 0.5 * xv2;  // Dead code?
 91 |     if (sigma2_M < 0.0) sigma2_M = 0.0;
 92 |         
 93 |     double sigma2_R = 0.5 * (xv2 - 2.0 * sigma2_M);
 94 |     
 95 |     double sigma_M = sqrt(sigma2_M);
 96 |     double sigma_R = sqrt(sigma2_R);
 97 |     
 98 |     return NumericVector::create(rho, sigma_M, sigma_R);
 99 | }
100 | 
101 | // [[Rcpp::export]]
102 | 
103 | double pvmr_par_c(double rho, double sigma_M, double sigma_R) {
104 |     // Returns the proportion of variance attributable to mean reversion
105 |     // for a PAR process with parameters rho, sigma_M and sigma_R
106 | 
107 |     if ((rho < -1.0) || (rho > 1.0)) return(NA_REAL);
108 |     if (sigma_M < 0.0) return(NA_REAL);
109 |     if (sigma_R < 0.0) return(NA_REAL);
110 |     if ((sigma_M == 0.0) && (sigma_R == 0.0)) return(NA_REAL);
111 |     
112 |     double sigma2_M = sigma_M * sigma_M;
113 |     double sigma2_R = sigma_R * sigma_R;
114 |     double r2 = (2 * sigma2_M) / (2 * sigma2_M + (1 + rho) * sigma2_R);
115 |     return r2;
116 | }
117 | 
118 | // [[Rcpp::export]]
119 | 
120 | double kalman_gain_par_mr (double rho, double sigma_M, double sigma_R) {
121 |     // Computes the values of the steady state Kalman gain K_mr
122 |     // of the mean-reverting portion of the state equation
123 |     // based on the parameters rho, sigma_M and sigma_R given
124 |     // as input.  The Kalman gain of the random walk portion of the
125 |     // state equation is 1 - K_mr.
126 | 
127 |     if ((rho < -1.0) || (rho > 1.0)) return(NA_REAL);
128 |     if (sigma_M < 0.0) return(NA_REAL);
129 |     if (sigma_R < 0.0) return(NA_REAL);
130 |     if ((sigma_M == 0.0) && (sigma_R == 0.0)) return(NA_REAL);
131 | 
132 |     if (sigma_M == 0.0) return 0.0;
133 |     if (sigma_R == 0.0) return 1.0;
134 | 
135 |     double sigma2_M = sigma_M * sigma_M;
136 |     double sigma2_R = sigma_R * sigma_R;
137 |     double rad = sqrt((rho + 1.0)*(rho + 1.0) * sigma2_R + 4.0 * sigma2_M);
138 |     
139 |     double num = 2.0 * sigma2_M;
140 |     double den = sigma_R*(rad + (1.0 + rho) * sigma_R) + 2.0 * sigma2_M;
141 |     double K_mr = num / den;
142 |     
143 |     return K_mr;
144 | }
145 | 
146 | // [[Rcpp::export]]
147 | 
148 | double loglik_par_c (NumericVector Y, double rho, double sigma_M, double sigma_R, double M0, double R0) {
149 |     /* Computes the log likelihood of Y given parameters p using
150 |        a steady state Kalman filter.  Returns the negative log
151 |        likelihood value.
152 |     */
153 | 
154 |     if ((rho < -1.0) || (rho > 1.0)) return(NA_REAL);
155 |     if (sigma_M < 0.0) return(NA_REAL);
156 |     if (sigma_R < 0.0) return(NA_REAL);
157 |     if ((sigma_M == 0.0) && (sigma_R == 0.0)) return(NA_REAL);
158 | 
159 |     int n = Y.size();
160 |     if (n < 1) return(NA_REAL);
161 | 
162 |     double K_mr = kalman_gain_par_mr (rho, sigma_M, sigma_R);
163 |     double K_rw = 1.0 - K_mr;
164 |     
165 |     double esumsq = 0.0;
166 |     double M = M0;
167 |     double R = R0;
168 |     double tvar = sigma_M*sigma_M + sigma_R*sigma_R;
169 |     
170 |     for (int i=0; i < n; i++) {
171 |         double xhat = rho * M + R;
172 |         double e = Y[i] - xhat;
173 |         esumsq += e*e;
174 |         M = rho * M + e * K_mr;
175 |         R += e * K_rw;
176 |     }
177 |     
178 |     double nll = (n/2.0)*log(tvar * 2.0*kPi) + esumsq/(2.0*tvar);
179 |     return nll;
180 | }
181 | 
182 | // [[Rcpp::export]]
183 | 
184 | double loglik_par_t_c (NumericVector Y, double rho, double sigma_M, double sigma_R, 
185 |     double M0, double R0, double nu=5.0) {
186 |     /* Computes the log likelihood of Y given parameters p using
187 |        a steady state Kalman filter using the assumption that the
188 |        error terms will be t-distributed.  Returns the negative log
189 |        likelihood value.
190 |     */
191 |         
192 |     if ((rho < -1.0) || (rho > 1.0)) return(NA_REAL);
193 |     if (sigma_M < 0.0) return(NA_REAL);
194 |     if (sigma_R < 0.0) return(NA_REAL);
195 |     if ((sigma_M == 0.0) && (sigma_R == 0.0)) return(NA_REAL);
196 |     if (nu < 2) return(NA_REAL);
197 | 
198 |     int n = Y.size();
199 |     if (n < 1) return(NA_REAL);
200 | 
201 |     double K_mr = kalman_gain_par_mr (rho, sigma_M, sigma_R);
202 |     double K_rw = 1.0 - K_mr;
203 |     
204 |     double esum = 0.0;
205 |     double M = M0;
206 |     double R = R0;
207 | //    double tvar = (sigma_M*sigma_M + sigma_R*sigma_R) * (nu - 2.0) / nu;
208 |     double tvar = (sigma_M*sigma_M + sigma_R*sigma_R);
209 |     double tsd = sqrt(tvar);
210 |     double const_term = n * (lgamma((nu+1.0)*0.5) - 0.5*log(nu * kPi) - 
211 |         lgamma(nu*0.5) - log(tsd));
212 |     
213 |     for (int i=0; i < n; i++) {
214 |         double xhat = rho * M + R;
215 |         double e = Y[i] - xhat;
216 |         esum += log(1 + (e*e/tvar)/nu);
217 |         M = rho * M + e * K_mr;
218 |         R += e * K_rw;
219 |     }
220 |     
221 |     double nll = ((nu + 1.0) * 0.5) * esum - const_term;
222 |     return nll;
223 | }
224 | 


--------------------------------------------------------------------------------
/R/fit_pci.R:
--------------------------------------------------------------------------------
  1 | # Functions pertaining to fitting partially cointegrated (PCI) series
  2 | # Copyright (C) 2016 Matthew Clegg
  3 | #  This program is free software; you can redistribute it and/or modify
  4 | #  it under the terms of the GNU General Public License as published by
  5 | #  the Free Software Foundation; either version 2 of the License, or
  6 | #  (at your option) any later version.
  7 | #
  8 | #  This program is distributed in the hope that it will be useful,
  9 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | #  GNU General Public License for more details.
 12 | #
 13 | #  A copy of the GNU General Public License is available at
 14 | #  http://www.r-project.org/Licenses/
 15 | 
 16 | fit.pci.twostep <- function (Y, X, par_model=c("par", "ar1", "rw"), robust=FALSE, nu=5,
 17 |   include_alpha=FALSE) {
 18 |   #  On input, Y is an n x 1 column vector, and X is an n x k matrix.
 19 |   #  Fits the model
 20 |   #
 21 |   #  Y[t] = alpha + beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k]
 22 |   #    + m[t] + r[t]
 23 |   #
 24 |   # where
 25 |   #
 26 |   #   m[t] = rho * m[t-1] + eps_M[t]
 27 |   #   r[t] = r[t-1] + eps_R[t]
 28 |   #   eps_M,t ~ N(0,1)
 29 |   #   eps_R,t ~ N(0,1)
 30 |   #   m[0] = r[0] = 0
 31 |   # 
 32 |   # Estimates the values of alpha, beta, rho, sigma_M and sigma_R using
 33 |   # the two step procedure:
 34 |   #   1.  Perform a linear regression of Y on X to obtain alpha and beta
 35 |   #   2.  Determine rho, sigma_M and sigma_R through fitting a PAR model
 36 |   #       to the residuals of the linear regression performed in step 1.
 37 |   #
 38 |   # If include_alpha is FALSE, then the parameter alpha is omitted from
 39 |   # the model, and we take r[0] = Y[0] - beta * X[0].
 40 |   #
 41 |   # The parameter par_model specifies which parameters of the PAR model are
 42 |   # to be optimized.  
 43 | 
 44 |   par_model <- match.arg(par_model)
 45 |   
 46 |   if (include_alpha) {
 47 |     fit.pci.twostep.a (Y, X, par_model, robust, nu)
 48 |   } else {
 49 |     fit.pci.twostep.a0 (Y, X, par_model, robust, nu)
 50 |   }
 51 | }
 52 | 
 53 | pci.jointpenalty.guess <- function (Y, X, include_alpha=FALSE) {
 54 |     # On input, Y is an n x 1 column vector, and X is an n x k matrix.
 55 |     # Generates a guess for the starting point of optimization for an PCI
 56 |     # fit as follows:
 57 |     #   1.  Estimates beta by fitting the differenced X series to the
 58 |     #       differenced Y series.
 59 |     #   2.  Estimates alpha by a Y[1] - X[1,] %*% beta
 60 |     #   3.  Estimates rho, sigma_M and sigma_R by fitting a PAR series
 61 |     #       to  Y - X %*% beta - alpha
 62 |     # Returns the guess that is found.
 63 |     #
 64 |     # If include_alpha is FALSE, then the parameter alpha is omitted
 65 |     # from the model.
 66 | 
 67 |     if (include_alpha) {
 68 |       pci.jointpenalty.guess.a(Y, X)
 69 |     } else {
 70 |       pci.jointpenalty.guess.a0(Y, X)
 71 |     }
 72 | }
 73 | 
 74 | fit.pci.jointpenalty.rw <- function (Y, X, 
 75 |   lambda=0, 
 76 |   p0=pci.jointpenalty.guess(Y,X,include_alpha), 
 77 |   robust=FALSE, 
 78 |   nu=5, 
 79 |   pgtol=1e-8, 
 80 |   include_alpha=FALSE) {
 81 |     # On input, Y is an n x 1 column vector, and X is an n x k matrix.
 82 |     # Fits an PCI model to Y,X where the residual series is modeled as
 83 |     # a random walk.  Returns a three-component list:
 84 |     #   par: The parameter estimates
 85 |     #   se:  The estimated standard error of the parameter estimates
 86 |     #   nll: The negative log likelihood of the fit
 87 | 
 88 |     if (include_alpha) {
 89 |       fit.pci.jointpenalty.rw.a (Y, X, lambda, p0, robust, nu, pgtol)
 90 |     } else {
 91 |       fit.pci.jointpenalty.rw.a0 (Y, X, lambda, p0, robust, nu, pgtol)
 92 |     }
 93 | }
 94 | 
 95 | fit.pci.jointpenalty.mr <- function (Y, X, 
 96 |   lambda=0, 
 97 |   p0=pci.jointpenalty.guess(Y,X,include_alpha), 
 98 |   robust=FALSE, 
 99 |   nu=5, 
100 |   pgtol=1e-8, 
101 |   include_alpha=FALSE) {
102 |     # On input, Y is an n x 1 column vector, and X is an n x k matrix.
103 |     # Fits an PCI model to Y,X where the residual series is modeled as
104 |     # a pure AR(1) series.  Returns a three-component list:
105 |     #   par: The parameter estimates
106 |     #   se:  The estimated standard error of the parameter estimates
107 |     #   nll: The negative log likelihood of the fit
108 | 
109 |     if (include_alpha) {
110 |       fit.pci.jointpenalty.mr.a (Y, X, lambda, p0, robust, nu, pgtol)
111 |     } else {
112 |       fit.pci.jointpenalty.mr.a0 (Y, X, lambda, p0, robust, nu, pgtol)
113 |     }
114 | }
115 | 
116 | fit.pci.jointpenalty.both <- function (Y, X, 
117 |   lambda=0, 
118 |   p0=pci.jointpenalty.guess(Y,X,include_alpha), 
119 |   robust=FALSE, 
120 |   nu=5, 
121 |   pgtol=1e-8, 
122 |   include_alpha=FALSE) {
123 |     # On input, Y is an n x 1 column vector, and X is an n x k matrix.
124 |     # Fits an PCI model to Y,X.  Returns a three-component list:
125 |     #   par: The parameter estimates
126 |     #   se:  The estimated standard error of the parameter estimates
127 |     #   nll: The negative log likelihood of the fit
128 | 
129 |     if (include_alpha) {
130 |       fit.pci.jointpenalty.both.a (Y, X, lambda, p0, robust, nu, pgtol)
131 |     } else {
132 |       fit.pci.jointpenalty.both.a0 (Y, X, lambda, p0, robust, nu, pgtol)
133 |     }
134 | }
135 | 
136 | fit.pci.jointpenalty <- function (Y, X, 
137 |   par_model=c("par", "ar1", "rw"), 
138 |   lambda=0, 
139 |   robust=FALSE, 
140 |   nu=5, 
141 |   include_alpha=FALSE) {
142 |   #  On input, Y is an n x 1 column vector, and X is an n x k matrix.
143 |   #  lambda is a penalty value that drives the optimization towards a
144 |   #  solution where sigma_R has the lowest possible value.
145 |   #
146 |   #  Fits the model
147 |   #
148 |   #  Y[t] = alpha + beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k]
149 |   #    + m[t] + r[t]
150 |   #
151 |   # where
152 |   #
153 |   #   m[t] = rho * m[t-1] + eps_M[t]
154 |   #   r[t] = r[t-1] + eps_R[t]
155 |   #   eps_M,t ~ N(0,sigma_M^2)
156 |   #   eps_R,t ~ N(0,sigma_R^2)
157 |   #   m[0] = r[0] = 0
158 |   # 
159 |   # Estimates the values of alpha, beta, rho, sigma_M and sigma_R using
160 |   # the following procedure:
161 |   #   1.  Initial estimates of beta are obtained by regressing the
162 |   #       first differences of X[,i] on the first difference of Y.
163 |   #   2.  Given these estimates for beta, an initial estimate for
164 |   #       alpha is obtained as alpha = Y[1] - beta X[1,].
165 |   #   3.  Initial estimates of rho, sigma_M and sigma_R are obtained by
166 |   #       fitting a PAR model to the residuals
167 |   #   4.  Having obtained these initial estimates for all of the parameters,
168 |   #       the following cost function is maximized to final parameter
169 |   #       estimates:
170 |   #          C[rho, sigma_M, sigma_R] = -LL[rho, sigma_M, sigma_R] + lambda * sigma_R^2
171 |   #       In the above, LL is the log likelihood function for the steady
172 |   #       state Kalman filter with parameters rho, sigma_M and sigma_R.
173 |   #
174 |   # The parameter par_model specifies which parameters of the PAR model are
175 |   # to be optimized.  If par == "par", then rho, sigma_M and sigma_R are each
176 |   # estimated.  If par == "ar1", then rho and sigma_M are estimated, while
177 |   # sigma_R is fixed at 0.  If par == "rw", then sigma_R is estimated, while
178 |   # rho and sigma_M are fixed at 0.
179 |   #
180 |   # If include_alpha is FALSE, then the parameter alpha is omitted from
181 |   # the model, and we take r[0] = Y[0] - beta * X[0].
182 | 
183 |   par_model <- match.arg(par_model)
184 | 
185 |   if (include_alpha) {
186 |     fit.pci.jointpenalty.a (Y, X, lambda, p0, robust, nu)
187 |   } else {
188 |     fit.pci.jointpenalty.a0 (Y, X, lambda, p0, robust)
189 |   }
190 | }
191 | 
192 | fit.pci <- function (Y, X, 
193 |   pci_opt_method=c("jp", "twostep"), 
194 |   par_model=c("par", "ar1", "rw"), 
195 |   lambda=0, 
196 |   robust=FALSE, 
197 |   nu=5, 
198 |   include_alpha=FALSE) {
199 |   #  On input, Y is an n x 1 column vector, and X is an n x k matrix.
200 |   #  lambda is a penalty value that drives the optimization towards a
201 |   #  solution where sigma_R has the lowest possible value.
202 |   #
203 |   #  Fits the model
204 |   #
205 |   #  Y[t] = alpha + beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k]
206 |   #    + m[t] + r[t]
207 |   #
208 |   # where
209 |   #
210 |   #   m[t] = rho * m[t-1] + eps_M[t]
211 |   #   r[t] = r[t-1] + eps_R[t]
212 |   #   eps_M,t ~ N(0,sigma_M^2)
213 |   #   eps_R,t ~ N(0,sigma_R^2)
214 |   #   m[0] = r[0] = 0
215 |   #
216 |   # Fits the model using either the two step procedure or the joint penalty
217 |   # procedure, according to the value of the parameter 'pci_opt_method'.  Returns
218 |   # an S3 object of class "pci.fit" representing the fit that was obtained.
219 |   #
220 |   # The parameter par_model specifies which parameters of the PAR model are
221 |   # to be optimized.  If par == "par", then rho, sigma_M and sigma_R are each
222 |   # estimated.  If par == "ar1", then rho and sigma_M are estimated, while
223 |   # sigma_R is fixed at 0.  If par == "rw", then sigma_R is estimated, while
224 |   # rho and sigma_M are fixed at 0.
225 |   #
226 |   # If include_alpha is FALSE, then the parameter alpha is omitted from
227 |   # the model, and we take r[0] = Y[0] - beta * X[0].
228 |   
229 |   pci_opt_method <- match.arg(pci_opt_method)
230 |   par_model <- match.arg(par_model)
231 |   
232 |   Yorig <- Y
233 |   if (!is.null(dim(Y))) {
234 |     if (dim(Y)[2] > 1) {
235 |       if (missing(X)) {
236 |         X <- Y[,2:ncol(Y),drop=FALSE]
237 |       } else {
238 |         stop("Y must be a single column")
239 |       }
240 |     }
241 |     Y <- Y[,1]
242 |   }
243 |   
244 |   if (include_alpha) {
245 |     fit.pci.a (Y, X, pci_opt_method, par_model, lambda, robust, nu)
246 |   } else {
247 |     fit.pci.a0 (Y, X, pci_opt_method, par_model, lambda, robust, nu)
248 |   }
249 |   
250 |   
251 | }
252 | 
253 | print.pci.fit <- function (x, ...) {
254 |   # Given a PCI structure A, prints it in summary form
255 |   print.internal.pci.fit(x)
256 | }
257 | 
258 | print.internal.pci.fit <- function (A, ...) {
259 |     # Prints a PCI fit
260 |     cat("Fitted values for PCI model\n")
261 |     if ("alpha" %in% names(A)) {
262 |       cat("  Y[t] = alpha + X[t] %*% beta + M[t] + R[t]\n")
263 |     } else {
264 |       cat("  Y[t] = X[t] %*% beta + M[t] + R[t]\n")
265 |     }
266 |     cat("  M[t] = rho * M[t-1] + eps_M [t], eps_M[t] ~ N(0, sigma_M^2)\n")
267 |     cat("  R[t] = R[t-1] + eps_R [t], eps_R[t] ~ N(0, sigma_R^2)\n\n")
268 | 
269 |     cat(sprintf("%-10s %8s %8s\n", "", "Estimate", "Std. Err"))
270 |     p <- function(n, v, s) { cat(sprintf("%-10s %8.4f %8.4f\n", n, v, s)) }
271 |     p("alpha", A$alpha, A$alpha.se)
272 |     for (i in 1:length(A$beta)) {
273 |         p(names(A$beta[i]), A$beta[i], A$beta.se[i])
274 |     }
275 |     p("rho", A$rho, A$rho.se)
276 |     p("sigma_M", A$sigma_M, A$sigma_M.se)
277 |     p("sigma_R", A$sigma_R, A$sigma_R.se)
278 | #    p("M0", A$M0, A$M0.se)
279 | #    p("R0", A$R0, A$R0.se)
280 |     
281 |     cat(sprintf("\n-LL = %.2f, R^2[MR] = %.3f\n", A$negloglik, A$pvmr))
282 | }
283 | 
284 | statehistory.pci <- function(A, data=A$data, basis=A$basis) {
285 |     # On input, A is an pci.fit object as produced by fit.pci.
286 |     # Creates a data.frame containing the inferred values of
287 |     # the states of the mean-revering and random walk components
288 |     # of the process, based upon the model parameters that were fit.
289 |     #
290 |     # Returns a data.frame containing the following columns:
291 |     #   Y:   The value of the process at this time
292 |     #   X:   The value of the hedge at this time
293 |     #   M:   The inferred state of the mean reverting component
294 |     #   R:   The inferred state of the random walk component
295 |     #   eps_M: The inferred shock to the mean reverting component
296 |     #   eps_R: The inferred shock to the random walk component
297 | 
298 |     if (is.null(dim(basis))) basis <- as.matrix(basis, ncol=1)
299 |     if ("alpha" %in% names(A)) {
300 |       Yhat <- basis %*% A$beta + A$alpha
301 |     } else {
302 |       Yhat <- basis %*% A$beta
303 |     }
304 |     Z <- data - Yhat
305 | 
306 |     n <- length(data)
307 |     M <- numeric(n)
308 |     R <- numeric(n)
309 |     eps_M <- numeric(n)
310 |     eps_R <- numeric(n)
311 |     
312 | #    Mprev <- A$M0
313 | #    Mprev <- 0
314 | #    Rprev <- A$R0
315 |     Mprev <- 0
316 |     Rprev <- data[1] - Yhat[1]
317 | 
318 |     K <- kalman.gain.par(rho=A$rho, sigma_M=A$sigma_M, sigma_R=A$sigma_R)
319 | 
320 |     for (i in 1:n) {
321 |         xhat <- A$rho * Mprev + Rprev
322 |         e <- Z[i] - xhat
323 |         eps_M[i] <- e * K[1]
324 |         eps_R[i] <- e * K[2]
325 |         M[i] <- A$rho * Mprev + eps_M[i]
326 |         R[i] <- Rprev + eps_R[i]
327 |         Mprev <- M[i]
328 |         Rprev <- R[i]
329 |     }
330 | 
331 |     df <- data.frame(Y=data, Yhat=Yhat, Z=Z, M=M, R=R, eps_M=eps_M, eps_R=eps_R)
332 |     colnames(df) <- c("Y", "Yhat", "Z", "M", "R", "eps_M", "eps_R")
333 |     df
334 | }
335 | 
336 | plot.pci.fit <- function (x, ...) {
337 |   # Given a PCI structure A, plots it.
338 |   
339 |   plot.internal.pci.fit(x)
340 | }
341 | 
342 | plot.internal.pci.fit <- function (A, ...) {
343 |     # Plots an partially cointegrated model
344 |     
345 |   # Initial definition for Date, Value, Label
346 |   Date <- Value <- Label<- NULL
347 |   
348 |   
349 |     sh <- statehistory.pci(A)
350 |     n <- nrow(sh)
351 |     RW <- sh$Yhat + sh$R
352 |     df1.1 <- data.frame(Date=A$index, Label="Actual", Value=sh$Y)
353 |     df1.2 <- data.frame(Date=A$index, Label="Hedge", Value=sh$Yhat)
354 |     df1.3 <- data.frame(Date=A$index, Label="Model", Value=RW)
355 |     df1 <- rbind(df1.1, df1.2, df1.3)
356 | #    p1 <- ggplot (df1, aes(x=Date, y=Value, colour=Label)) + geom_line () +
357 | #        ylab("Price") + xlab("") + theme(legend.position="top") +
358 | #    		scale_colour_discrete(name="") +
359 | #            scale_size_manual(values=c(2,0.5,0.5)) +
360 | #            ggtitle("Price Series") 
361 | 
362 |     labels <- c("Target", "Hedge", "Model")
363 |     if (A$target_name != "Y") {
364 |       labels[1] <- A$target_name
365 |     }
366 |     if (A$factor_names[1] != "X1") {
367 |       factor_strings <- sapply(1:length(A$beta), 
368 |                                function(i) sprintf("%5.2f %s",
369 |                                                    A$beta[i], A$factor_names[i]))
370 |       labels[2] <- paste(factor_strings, collapse=" +")
371 |     }
372 |     
373 |     p1 <- ggplot (df1, aes(x=Date, y=Value, colour=Label, size=Label, linetype=Label)) + geom_line () +
374 |         ylab("Price") + xlab("") + theme(legend.position="top") +
375 |     		scale_colour_manual(name="",
376 |                 breaks=c("Actual", "Hedge", "Model"),
377 |                 labels=labels,
378 | #                values=c("Black", "#0054A6", "#00AEEF")) +  # Black, Blue, Cyan
379 |                 values=c("Black", "#00A651", "#00AEEF")) +  # Black, Green, Cyan
380 |             scale_size_manual(name="", 
381 |                 breaks=c("Actual", "Hedge", "Model"),
382 |                 labels=labels,
383 |                 values=c(1.0,1.0,0.5)) +
384 | #                values=c(0.75,0.75,1)) +
385 |             scale_linetype_manual(name="",
386 |                 breaks=c("Actual", "Hedge", "Model"),
387 |                 labels=labels,
388 |                 values=c("solid", "solid", "solid"))+
389 | #                values=c("solid", "solid", "dashed"))+
390 |            ggtitle("Price Series") 
391 |             
392 |     
393 |     df2 <- data.frame(Date=A$index, Label="M[t]", Value=sh$M)
394 |     sdR <- sd(sh$M)
395 |     hlines <- data.frame(Value=c(2 * sdR, sdR, -sdR, -2 * sdR),
396 |         Facet=c("two","one","one","two"))
397 |     p2 <- ggplot(df2, aes(x=Date, y=Value)) + geom_line() +
398 |         ggtitle("Mean Reverting Component") + ylab("Price") + xlab("") +
399 |         geom_hline(data=hlines, aes(yintercept=Value, colour=Facet), linetype="dashed") +
400 |          theme(legend.position="none")
401 |     
402 | 	grid.newpage()
403 | 	pushViewport(viewport(layout=grid.layout(9, 1)))
404 | 	print(p1, vp=viewport(layout.pos.row=1:5, layout.pos.col=1))
405 | 	print(p2, vp=viewport(layout.pos.row=6:9, layout.pos.col=1))    
406 |     
407 | }
408 | 
409 | as.data.frame.pci.fit <- function (x, row.names, optional, ...) {
410 |   as.data.frame.internal.pci.fit (x)
411 | }
412 | 
413 | 
414 | 
415 | as.data.frame.internal.pci.fit <- function (A, ...) {
416 |     # Given an partially cointegrated model A, converts it into a one row data.frame
417 |     # containing the following values from the model:
418 |     #   alpha
419 |     #   beta
420 |     #   rho
421 |     #   sigma_M
422 |     #   sigma_R
423 | 
424 |     if ("alpha" %in% names(A)) {    
425 |       df1 <- data.frame(alpha = A$alpha, beta=A$beta)
426 |     } else {
427 |       df1 <- data.frame(beta = A$beta)
428 |     }
429 |     df2 <- data.frame(rho = A$rho, sigma_M=A$sigma_M, sigma_R=A$sigma_R, M0=A$M0, R0=A$R0)
430 |     if ("alpha.se" %in% names(A)) {
431 |       df3 <- data.frame(alpha.se = A$alpha.se, beta.se = A$beta.se)
432 |     } else {
433 |       df3 <- data.frame(beta.se = A$beta.se)
434 |     }
435 |     df4 <- data.frame(rho.se=A$rho.se, sigma_M.se=A$sigma_M.se, sigma_R.se=A$sigma_R.se, 
436 |         M0.se=A$M0.se, R0.se=A$R0.se)
437 |     df5 <- data.frame(negloglik=A$negloglik, pvmr=A$pvmr)
438 |     df <- cbind(df1, df2, df3, df4, df5)
439 |     df
440 | }
441 | 
442 | 


--------------------------------------------------------------------------------
/vignettes/pci_vignette.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "A partialCI Guide"
  3 | author: 
  4 | - "Matthew Clegg"
  5 | - "Christopher Krauss"
  6 | - "Jonas Rende"
  7 | date: "`r Sys.Date()`"
  8 | output: rmarkdown::html_vignette
  9 | bibliography: rende.bib
 10 | vignette: >
 11 |   %\VignetteIndexEntry{A partialCI Guide}
 12 |   %\VignetteEngine{knitr::rmarkdown}
 13 |   %\VignetteEncoding{UTF-8}
 14 | ---
 15 | 
 16 | 
 17 | 
 18 | ## Introduction
 19 | 
 20 | The **partialCI** package fits a partial cointegration model to describe a time series. Partial cointegration (PCI) is a weakening of cointegration, allowing for the residual series to contain a mean-reverting and a random walk component. Analytically, this residual series is described by a partially autoregressive process (PAR -- see @Summers.1986, @Poterba.1988, and @Clegg.2015a), consisting of a stationary AR-process and a random walk. Whereas classic cointegration in the sense of @Engle.1987 requires all shocks to be transient, PCI is more flexible and allows for permanent shocks as well -- a realistic assumption across many (macro)economic applications. Even though neither the residual series, nor its mean-reverting and permanent component are directly observable, estimation is still possible in state space -- see @Brockwell.2010 and @Durbin.2012. 
 21 | 
 22 | ## The partial cointegration framework
 23 | 
 24 | ### Model definition
 25 | 
 26 | Based on @Engle.1987,  @Clegg.2016 define the concept of partial cointegration as follows: \dfn{Definition}: "The components of the vector $X_t$ are said to be partially cointegrated of order $d$, $b$, denoted $X_t \sim PCI\left(d,b\right)$, if (i) all components of $X_t$ are $I\left(d\right)$; (ii) there exists a vector  $\alpha$ so that $Z_t = \alpha \prime X_t$ and $Z_t$ can be decomposed as a sum $Z_t = R_t + M_t$, where $R_t \sim I\left(d\right)$ and $M_t \sim I\left(d - b\right)$."
 27 | 
 28 | Let $Y_t$ denote the target time series and $X_{j,t}$ the $j^{th}$ factor time series at time $t$, where $j = \lbrace 1, 2, \dots, k \rbrace$. The target time series and the $k$ factor time series are partially cointegrated, if a parameter vector $\iota = \left\lbrace\beta_1, \beta_2, \dots, \beta_k, \rho, \sigma_M, \sigma_R, M_0, M_R\right\rbrace$ exists such that the subsequent model equations are satisfied:
 29 | 
 30 | $$
 31 | \begin{aligned}
 32 |       Y_{t} &= \beta_1 X_{1,t} + \beta_2 X_{2,t} + ... + \beta_k X_{k,t}  + W_t \\
 33 |       W_t &= M_t + R_t \\
 34 |       M_t &= \rho M_{t-1} + \varepsilon_{M,t}\\
 35 |       R_t &= R_{t-1} + \varepsilon_{R,t}\\
 36 |        \varepsilon_{M,t} &\sim \mathcal{N}\left(0, \sigma^2_M\right)\\
 37 |         \varepsilon_{R,t} &\sim \mathcal{N}\left(0, \sigma^2_R\right)\\
 38 |         \beta_j \in \mathbb{R}; \rho &\in \left(-1, 1\right);\sigma^2_M, \sigma^2_R \in \mathbb{R}_0^+.  \\
 39 | \end{aligned}
 40 | $$
 41 | Thereby, $W_t$ denotes the partially autoregressive process, $R_t$ the permanent component, $M_t$ the transient component and $\beta = \lbrace \beta_1, \beta_2, \dots, \beta_k \rbrace$ is the partially cointegrating vector. The permanent component is modeled as a random walk and the transient component as an AR(1)-process with $AR(1)$-coefficient $\rho$. The corresponding error terms $\varepsilon_{M,t}$ and $\varepsilon_{R,t}$ are assumed to follow mutually independent, normally distributed white noise processes with mean zero and variances $\sigma^2_M$ and $\sigma^2_R$. A key advantage of modeling the cointegrating process as a partially autoregressive process is that we are able to calculate the proportion of variance attributable to mean-reversion (PVMR), defined as (@Clegg.2016),
 42 | $$
 43 | R^2_{MR} = \frac{VAR\left[\left(1-B\right)M_t\right]}{VAR\left[\left(1-B\right)W_t\right]} = \frac{2\sigma^2_M}{2\sigma^2_M + \left(1+\rho\right)\sigma^2_R} , \hspace{0.2cm} R^2_{MR} \in \left[0,1\right],  
 44 | $$
 45 | where $B$ denotes the backshift operator. The statistic $R^2_{MR}$ is useful to assess how close the cointegration process is to either a pure random walk $\left(R^2_{MR} = 0\right)$  or a pure AR(1)-process $\left(R^2_{MR} = 1\right)$. 
 46 | 
 47 | ### State space represenation
 48 | 
 49 | The applied state space transformation is in line with @Clegg.2016. Given that the PAR process $W_t$ is not observable, we convert the PCI model into the following state space model, consisting of an observation and a state equation:  
 50 | $$
 51 | \begin{align}
 52 | X_t &= H Z_t \\ 
 53 | Z_t &= FZ_{t-1} + W_t.
 54 | \end{align}
 55 | $$
 56 | Thereby, $Z_t$ denotes the state which is assumed to be influenced linearly by the state in the last period and a noise term $W_t$. The matrix $F$ is assumed to be time invariant. The observable part is denoted by $X_t$. By assumption, there is a linear dependence between $X_t$ and $Z_t$, captured in the time invariant matrix $H$.
 57 | 
 58 | ### Estimation of a partial cointegration model
 59 | Parameters are estimated via the maximum likelihood (ML) method. Using a quasi-Newton algorithm, the ML method searches for the parameters $\rho$, $\sigma^2_M$, $\sigma^2_R$ and the parameter vector $\beta$ which maximizes the likelihood function of the associated Kalman filter.
 60 | 
 61 | 
 62 | 
 63 | ### A likelihood ratio test routine for partial cointegration}
 64 | The likelihood ratio test (LRT) implemented in the **partialCI** package adopts the LRT routine for PAR models proposed by @Clegg.2015a. In a PCI scenario the null hypothesis consists of two conditions -- namely the hypothesis that the residual series is a pure random walk ($\mathcal{H}^R_0$) or a pure AR(1)-process $(\mathcal{H}^M_0)$. The two conditions are separately tested. Only if both, $\mathcal{H}^R_0$ and $\mathcal{H}^M_0$ are individually rejected, the null hypothesis of no partial cointegration is rejected.
 65 | 
 66 | ## Using the PCI package
 67 | 
 68 | The main functions of the **partialCI** package are fit.pci(), test.pci(), statehistory.pci(), and hedge.pci().
 69 | 
 70 | ### fit.pci()
 71 | 
 72 | The function **fit.pci()** fits a partial cointegration model to a given collection of time series. 
 73 | 
 74 | ```
 75 | fit.pci(Y, X, pci_opt_method = c("jp", "twostep"), par_model = c("par", "ar1", "rw"), lambda = 0, robust = FALSE, nu = 5, include_alpha=FALSE)} 
 76 | ```
 77 | * `Y`: Denotes the target time series and `X` is a matrix containing the `k` factors used to model `Y`.
 78 | * `pci_opt_method`: Specifies, whether the joint-penalty method (`"jp"`) or the (`"twostep"`) method is applied to obtain the model with the best fit. If `pci_opt_method` is specified as `"twostep"`, a two-step procedure similar to the method introduced by @Engle.1987 is performed. Which model is fitted to the residual series, depends on the specification for the argument `par_model`. In case of `"par"`, a partial autoregressive model is used, in case of `"ar1"`, an AR(1)-process and in case of `"rw"` a random walk (default: `par_model = "par"`). On the other hand, if the `pci_opt_method` is specified as `"jp"`, the joint-penalty method is applied, to estimate $\beta$, $\rho$, $\sigma_M^2$ and $\sigma_R^2$ jointly via ML. The likelihood score of the associated Kalman filter is extended by a penalty value $\lambda\sigma_R^2$ (default: `lambda = 0`), where $\lambda \in \mathbb{R}_0^+$ (default: `pci_opt_method = "jp"`).
 79 | * `robust`: Determines whether the residuals are assumed to be normally (`FALSE`) or $t$-distributed (`TRUE`) (default: `robust = TRUE`). If `robust` is set to `TRUE` the degrees of freedom can be specified, using the argument `nu` (default: `nu = 5`). 
 80 | * `include_alpha`: If `TRUE`, an intercept $\alpha$ is added to the PCI relationship (default: `include_alpha = FALSE`).
 81 | 
 82 | 
 83 | ###  test.pci()
 84 | 
 85 | The **test.pci()** function tests the goodness of fit of a PCI model. 
 86 | 
 87 | ```
 88 | test.pci(Y, X, alpha = 0.05, null_hyp = c("rw", "ar1"),  robust = FALSE, pci_opt_method = c("jp", "twostep"))}
 89 | ```
 90 | * `alpha`: Determines at which significance level the null hypothesis is rejected (default: `alpha = 0.05`).
 91 | * `null_hyp`: Specifies whether the null hypothesis is a random walk (`"rw"`), an AR(1)-process (`"ar1"`) or a union of both hypotheses (`c("rw", "ar1")`) (default: `null_hyp = c("rw", "ar1")`).
 92 | 
 93 | ### statehistory.pci()
 94 | 
 95 | To estimate the sequence of hidden states the **statehistory.pci()** function can be applied. 
 96 | 
 97 | ```
 98 | statehistory.pci(A, data = A\$data, basis = A\$basis)}
 99 | ```
100 | * `A`: Denotes a fit.pci() object.
101 | * `data`: Is a matrix consisting of the target time series and the `k` factor time series (default: `data = A\$data`).
102 | * `code{basis`: Captures the coefficients of the factor time series (default: `basis = A\$basis`).
103 | 
104 | ###hedge.pci()
105 | 
106 | The function **hedge.pci()** finds those `k` factors from a predefined set of factors which yield the best fit to the target time series. 
107 | 
108 | ```
109 | hedge.pci(Y, X, maxfact = 10, lambda = 0, use.multicore = TRUE, minimum.stepsize = 0, verbose = TRUE, exclude.cols = c(), search_type = c("lasso", "full", "limited"), pci_opt_method=c("jp", "twostep"))}
110 | ```
111 | 
112 | * `maxfact`: Denotes the maximum number of considered factors (default: * `maxfact = 10`).
113 | * `use.multicore`: If `TRUE`, parallel processing is activated (default: * `use.multicore = TRUE`).
114 | * `verbose`: Controls whether detailed information are printed (default: `verbose = TRUE`).
115 | * `exclude.cols`: Defines a set of factors which should be excluded from the search routine (default: `exclude.cols = c()`).
116 | * `search_type`: Determines the search algorithm applied to find the model that fits best to the target time series. The likelihood ratio score (LRT score) is used to compare the model fits, whereby lower scores are associated with better fits. If the option `"lasso"` is specified the lasso algorithm as implemented in the R package **glmnet** [@Friedman.2010] is deployed to search for the portfolio of factors that yields the best linear fit to the target time series. If the option `"full"` is specified, then at each step, all possible additions to the portfolio are considered and the one which yields the highest likelihood score improvement is chosen. If the option `"limited"` is specified, then at each step, the correlation of the residuals of the current portfolio is computed with respect to each of the candidate series in the input set $X$, and the top $B$ series are chosen for further consideration. Among these top $B$ candidates, the one which improves the likelihood score by the greatest amount is chosen. The parameter $B$ can be controled via `maxfact` (default: `search_type = "lasso"`).
117 | 
118 | ## Example
119 | 
120 | As an introductory example, we explore the relationship between Royal Dutch Shell plc A (RDS-A) and Royal Dutch Shell plc B (RDS-B), using daily (closing) price data from 1 January 2006 to 1 December 2016.RDS-A (@RDSA.2016) and RDS-B (@RDSB.2016) data are downloaded from Yahoo Finance. To download the price data we use the getYahooData() function, implemented in the R package **TTR** (@Ulrich.2016).
121 | 
122 | ```
123 | 	library(partialCI)
124 | 	library(TTR)
125 | 	
126 | 	RDSA<-getYahooData("RDS-A", 20060101, 20161201)$Close
127 | 	RDSB<-getYahooData("RDS-B", 20060101, 20161201)$Close
128 | ```
129 | 
130 | A classic cointegration analysis yields that the two time series are not cointegrated.
131 | 
132 | ```
133 | 	library(egcm)
134 | 
135 | 	egcm_finance <- egcm(RDSA,RDSB,include.const = FALSE)
136 | ```
137 | 
138 | In particular, we apply the two-step approach of @Engle.1987 implemented in the R package **egcm** (@Clegg.2015c).
139 | 
140 | The following residual plot (code: `plot(egcm\_finance\$residuals,type = "l")`) suggests that the residual series is not purely mean-reverting, but rather shows a stochastical trend as well as a mean-reverting behavior. 
141 | 
142 | 
143 | ![Residual plot classic cointegration: RDS-A and RDS-B (1.01.2006 - 1.12.2016, daily)](figures/OLSRES_RSARSB_2006_2016.png)
144 | 
145 | Hence, it is not suprising that RDS-A and RDS-B are not cointegrated. Using the PCI framework, we are able to fit a PCI model to RDS-A and RDS-B.
146 | 
147 | ```
148 | PCI_RDSA_RDSB<-fit.pci(RDSA, RDSB, pci_opt_method = c("jp"), par_model =c("par"), lambda = 0, robust = FALSE, nu = 5, include_alpha = FALSE))
149 | ```
150 | 
151 | The R output is given as
152 | 
153 | ```
154 | 	Fitted values for PCI model
155 |   	Y[t] = X[t] %*% beta + M[t] + R[t]
156 |   	M[t] = rho * M[t-1] + eps_M [t], eps_M[t] ~ N(0, sigma_M^2)
157 |   	R[t] = R[t-1] + eps_R [t], eps_R[t] ~ N(0, sigma_R^2)
158 | 
159 |                    	 Estimate Std. Err
160 | 	beta_Close   	0.9274   0.0038
161 | 	rho         	 0.3959   0.0965
162 | 	sigma_M      	0.1081   0.0083
163 | 	sigma_R      	0.1195   0.0076
164 | 
165 | 	-LL = -1117.29, R^2[MR] = 0.540,
166 | ```
167 | where `beta_Close` denotes the partially cointegrating coefficient. The PVMR of 0.54 suggests that the spread time series also exhibits a clear mean-reverting behavior.
168 | 
169 | In the subsequent step, we utilize the test.pci() function to check whether RDS-A and RDS-B are partially cointegrated.
170 | 
171 | The R code
172 | 
173 | ```
174 | test.pci(RDSA, RDSB, alpha = 0.05, null_hyp = c("rw", "ar1"), robust = FALSE, pci_opt_method = c("jp"))
175 | ```
176 | 
177 | leads to the following output:
178 | 
179 | ```
180 | 	Likelihood ratio test of [Random Walk or CI(1)] vs Almost PCI(1)
181 | 		(joint penalty method)
182 | 
183 | 	data:  StockA
184 | 
185 | 	Hypothesis              Statistic    p-value
186 | 	Random Walk                -55.09      0.010
187 | 	AR(1)                      -52.88      0.010
188 | 	Combined                               0.010.
189 | ```
190 | A time series is classified as partially cointegrated, if and only if the random walk as well as the AR(1)-hypotheses are rejected. The $p$-value of 0.010 for the combined null hypothesis indicates that RDS-A and RDS-B are partially cointegrated in the considered period of time.
191 | 
192 | Next, we demonstrate the use of the statehistory.pci() function which allows to estimate and extract the hidden states.
193 | The R code,
194 | 
195 | ```
196 | statehistory.pci(PCI_RDSA_RDSB)},
197 | 
198 | ```
199 | 
200 | results in the R output:
201 | ```
202 |                  Y     Yhat         Z           M         R        eps_M        eps_R
203 | 	2006-01-03 35.87002 35.26781 0.6022031  0.00000000 0.6022031  0.00000000  0.00000000
204 | 	2006-01-04 36.23993 35.57175 0.6681755  0.02030490 0.6478706  0.02030490  0.04566752
205 | 	2006-01-05 35.80276 35.24161 0.5611509 -0.02112621 0.5822771 -0.02916450 -0.06559352
206 | 	2006-01-06 36.48653 35.83377 0.6527591  0.01590352 0.6368556  0.02426695  0.05457850
207 | 	...
208 | 	2016-11-25 50.18000 49.52231 0.6576906 -0.08762384 0.7453144 -0.07643882 -0.17191764
209 | 	2016-11-28 49.20000 48.22397 0.9760311  0.04699758 0.9290335  0.08168603  0.18371909
210 | 	2016-11-29 49.06000 48.02922 1.0307808  0.04419468 0.9865862  0.02558931  0.05755262
211 | 	2016-11-30 51.10000 50.23639 0.8636066 -0.02573955 0.8893462 -0.04323530 -0.09724000
212 | 	2016-12-01 51.78000 51.15450 0.6254956 -0.08826115 0.7137567 -0.07807140 -0.17558945.
213 | ```
214 | The latter table covers the estimates of the hidden states $M$ and $R$ as well as the corresponding error terms `eps_M` and `eps_R`. `Z` is equal to the sum of $M$ and $R$. The estimate of the target time series is denoted by `Yhat`.
215 | 
216 | The subsequent figure illustrates a plot of the extracted mean-reverting component of the spread associated with the RDS-A and RDS-B price time series (``plot(statehistory.pci(PCI_RDSA_RDSB)[,4],type = "l",ylab = "", xlab = "")`). 
217 | 
218 | ![Mean-reverting component RDS-A and RDS-B (1.01.2006 - 1.12.2016, daily)](figures/MCSDHIST_RSARSB_2006_2016.png)
219 | 
220 | 
221 | The horizontal blue lines are equal to two times the historical standard deviation in absolute terms of the mean-reverting component. A pairs trading strategy could exploit the mean-reverting behavior of $M_t$. Note that this example is in-sample; for a true out-of-sample application see @Clegg.2016.
222 | 
223 | We continue with using hedge.pci() to find the set of sector ETFs forming the best hedging portfolio for the SPY index (S&P500 index).
224 | 
225 | Thereby, the R code,
226 | ````
227 | 	sectorETFS <- c("XLB", "XLE", "XLF", "XLI", "XLK", "XLP", "XLU", "XLV", "XLY")
228 | 	prices <- multigetYahooPrices(c("SPY", sectorETFS), start=20060101)
229 | 
230 | 	hedge.pci(prices[,"SPY"], prices),
231 | ````
232 | 
233 | results in the subsequent output: 
234 | ````
235 |   -LL   LR[rw]    p[rw]    p[mr]      rho  R^2[MR]      Factor |   Factor coefficients
236 |  2320.00 -23.3743   0.0100   0.0100   0.5759   0.4526      XLI |   3.1106 
237 |  1765.50 -46.5925   0.0100   0.0100   0.3170   0.4713      XLY |   1.8951  1.1989 
238 |  1494.95 -53.7256   0.0100   0.0100   0.3244   0.5038      XLV |   1.6999  0.9106  0.6619 
239 |   972.58 -65.9058   0.0100   0.0100   0.4060   0.5904      XLK |   1.3089  0.4933  0.5320. 1.5182 
240 | ````
241 | The table summarizes information about the best hedging portfolio, where each row corresponds to an increasing number of factors.  Row 1: The best single-factor hedging portfolio comprises XLI (industrials) as only factor. Row 2: The best two-factor hedging portfolio consists of XLI and XLY (consumer discretionary). As such, XLY leads to the best improvement of the LRT score among all remaining factors. Row 3 includes XLV (health care) for the three-factor portfolio and row 4 XLK (technology) for the best four-factor portfolio. The last row corresponds to the overall best fit out of the nine potential sector ETFs, based on the LRT score. Note that for all rows, the union of random walk and AR(1)-null hypothesis is rejected at the 5 percent significant level, so we find a PCI model at each step. 
242 | 
243 | ## References


--------------------------------------------------------------------------------
/R/fit_pci_wo_alpha.R:
--------------------------------------------------------------------------------
  1 | # Functions pertaining to fitting partially cointegrated (PCI) series
  2 | # Copyright (C) 2016 Matthew Clegg
  3 | 
  4 | #  This program is free software; you can redistribute it and/or modify
  5 | #  it under the terms of the GNU General Public License as published by
  6 | #  the Free Software Foundation; either version 2 of the License, or
  7 | #  (at your option) any later version.
  8 | #
  9 | #  This program is distributed in the hope that it will be useful,
 10 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | #  GNU General Public License for more details.
 13 | #
 14 | #  A copy of the GNU General Public License is available at
 15 | #  http://www.r-project.org/Licenses/
 16 | 
 17 | utils::globalVariables("p0")
 18 | 
 19 | 
 20 | 
 21 | fit.pci.twostep.a0 <- function (Y, X, par_model=c("par", "ar1", "rw"), robust=FALSE, nu=5) {
 22 |   #  On input, Y is an n x 1 column vector, and X is an n x k matrix.
 23 |   #  Fits the model
 24 |   #
 25 |   #  Y[t] = beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k]
 26 |   #    + m[t] + r[t]
 27 |   #
 28 |   # where
 29 |   #
 30 |   #   m[t] = rho * m[t-1] + eps_M[t]
 31 |   #   r[t] = r[t-1] + eps_R[t]
 32 |   #   eps_M,t ~ N(0,1)
 33 |   #   eps_R,t ~ N(0,1)
 34 |   #   m[0] = r[0] = 0
 35 |   # 
 36 |   # Estimates the values of beta, rho, sigma_M and sigma_R using
 37 |   # the two step procedure:
 38 |   #   1.  Perform a linear regression of Y on X to obtain beta
 39 |   #   2.  Determine rho, sigma_M and sigma_R through fitting a PAR model
 40 |   #       to the residuals of the linear regression performed in step 1.
 41 |   #
 42 |   # The parameter par_model specifies which parameters of the PAR model are
 43 |   # to be optimized.  
 44 |   
 45 |     par_model <- match.arg(par_model)
 46 |     if (robust) {
 47 |         L <- summary(rlm(Y~X))
 48 |     } else {
 49 |         L <- summary(lm(Y~X))
 50 |     }
 51 |     PAR <- fit.par(L$residuals, model=par_model, robust=robust, nu=nu)
 52 | 
 53 |     fit <- structure(list(
 54 |         data=Y,
 55 |         basis=X,
 56 |         residuals=L$residuals,
 57 |         index=1:length(Y),
 58 |         alpha = 0,
 59 |         beta = coef(L)[2:nrow(coef(L)),1],
 60 |         rho = PAR$rho,
 61 |         sigma_M = PAR$sigma_M,
 62 |         sigma_R = PAR$sigma_R,
 63 |         M0 = PAR$par[4],
 64 |         R0 = PAR$par[5],
 65 |         beta.se = coef(L)[2:nrow(coef(L)),2],
 66 |         rho.se = PAR$stderr[1],
 67 |         sigma_M.se = PAR$stderr[2],
 68 |         sigma_R.se = PAR$stderr[3],
 69 |         M0.se = PAR$stderr[4],
 70 |         R0.se = PAR$stderr[5],
 71 |         negloglik = c(negloglik=PAR$negloglik),
 72 |         pvmr = PAR$pvmr,
 73 |         par.fit = PAR,
 74 |         par_model=par_model,
 75 |         robust=robust,
 76 |         pci.fit="twostep"), 
 77 |     class="pci.fit")
 78 |     
 79 |     names(fit$beta) <- paste("beta_", colnames(X), sep="")
 80 |     names(fit$beta.se) <- paste(paste("beta_", colnames(X), sep=""), ".se", sep="")
 81 |     
 82 |     fit
 83 | }
 84 | 
 85 | pci.jointpenalty.guess.a0 <- function (Y, X) {
 86 |     # On input, Y is an n x 1 column vector, and X is an n x k matrix.
 87 |     # Generates a guess for the starting point of optimization for an PCI
 88 |     # fit as follows:
 89 |     #   1.  Estimates beta by fitting the differenced X series to the
 90 |     #       differenced Y series.
 91 |     #   2.  Estimates rho, sigma_M and sigma_R by fitting a PAR series
 92 |     #       to  Y - X %*% beta - alpha
 93 |     # Returns the guess that is found
 94 | 
 95 |     X <- as.matrix(X)
 96 |     DY <- diff(Y)
 97 |     DX <- apply(X, 2, diff)
 98 |     beta0 <- coef(lm(DY ~ DX + 0))
 99 |     res0 <- Y - X %*% beta0
100 |     PAR0 <- fit.par(res0)
101 |     rho0 <- PAR0$rho
102 |     sigma_M0 <- PAR0$sigma_M
103 |     sigma_R0 <- PAR0$sigma_R
104 |     M00 <- 0
105 |     p0 <- c(beta0, rho0, sigma_M0, sigma_R0, M00, res0[1])
106 |     names(p0)[1:ncol(X)] <- paste("beta_", colnames(X), sep="")
107 |     names(p0)[length(p0)-1] <- "M0"
108 |     names(p0)[length(p0)] <- "R0"
109 |     p0
110 | }
111 | 
112 | fit.pci.jointpenalty.rw.a0 <- function (Y, X, lambda=0, p0=pci.jointpenalty.guess(Y,X), 
113 |                                      robust=FALSE, nu=5, pgtol=1e-8) {
114 |     # On input, Y is an n x 1 column vector, and X is an n x k matrix.
115 |     # Fits an PCI model to Y,X where the residual series is modeled as
116 |     # a random walk.  Returns a three-component list:
117 |     #   par: The parameter estimates
118 |     #   se:  The estimated standard error of the parameter estimates
119 |     #   nll: The negative log likelihood of the fit
120 | 
121 |     if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1)
122 |     if (is.null(dim(X))) X <- as.matrix(X, ncol=1)
123 |     
124 |     n <- ncol(X) 
125 |     highval <- NA
126 |     ll_calc_method <- if (robust) "csst" else "css"
127 |     
128 |     rw.ofun <- function (p) {
129 |         beta <- p[1:n]
130 |         sigma_R <- p[n+1]
131 |         if (sigma_R <= 0.0) return(highval)
132 |         Z <- Y - X %*% beta
133 |         R0 <- Z[1]
134 |         loglik.par (Z, 0, 0, sigma_R, 0, R0, ll_calc_method, nu=nu) + lambda * sigma_R^2
135 |     }
136 | 
137 |     beta0 <- p0[1:n]
138 |     res0 <- Y - X %*% beta0
139 |     sdmax <- 2.0 * sd(diff(Y))
140 |     rw.p0 <- c(beta0, sdmax * 0.5)
141 |     rw.pmin <- c(rep(-Inf, n), 0.0)
142 |     rw.pmax <- c(rep(Inf, n), sdmax)
143 |     highval <- rw.ofun(rw.p0) + 1.0
144 | 
145 |     rw.val <- optim(rw.p0, rw.ofun, method="L-BFGS-B", lower=rw.pmin, upper=rw.pmax, hessian=TRUE, control=list(pgtol=pgtol))
146 |     
147 |     R0.par <- (Y - X %*% rw.val$par[1:n]) [1]
148 |     rw.par <- c(rw.val$par[1:n], rho=0, sigma_M=0, sigma_R=rw.val$par[n+1], M0=0, R0=R0.par)
149 |     names(rw.par)[1:n] <- paste("beta_", colnames(X), sep="")
150 |     rw.se <- rep(NA_real_, nrow(rw.val$hessian))
151 |     suppressWarnings(try(rw.se <- sqrt(diag(solve(rw.val$hessian))), silent=TRUE))
152 |     rw.se <- c(rw.se[1:n], rho=NA, sigma_M=NA, sigma_R=rw.se[n+1], M0=NA, R0=NA)
153 |     names(rw.se)[1:n] <- paste("beta_", colnames(X), sep="")
154 | 
155 |     return(list(par=rw.par, se=rw.se, nll=c(negloglik=rw.val$value)))
156 | }
157 | 
158 | fit.pci.jointpenalty.mr.a0 <- function (Y, X, lambda=0, p0=pci.jointpenalty.guess.a0(Y,X), 
159 |                                      robust=FALSE, nu=5, pgtol=1e-8) {
160 |     # On input, Y is an n x 1 column vector, and X is an n x k matrix.
161 |     # Fits an PCI model to Y,X where the residual series is modeled as
162 |     # a pure AR(1) series.  Returns a three-component list:
163 |     #   par: The parameter estimates
164 |     #   se:  The estimated standard error of the parameter estimates
165 |     #   nll: The negative log likelihood of the fit
166 | 
167 |     if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1)
168 |     if (is.null(dim(X))) X <- as.matrix(X, ncol=1)
169 |   
170 |     n <- ncol(X) 
171 |     highval <- NA
172 |     ll_calc_method <- if (robust) "csst" else "css"
173 | 
174 |     mr.ofun <- function (p) {
175 |         beta <- p[1:n]
176 |         rho <- p[n+1]
177 |         sigma_M <- p[n+2]
178 |         if (rho < -1 || rho > 1 || sigma_M <= 0.0) return(highval)
179 |         Z <- Y - X %*% beta
180 |         loglik.par (Z, rho, sigma_M, 0, 0, Z[1], ll_calc_method, nu=nu)
181 |     }
182 | 
183 |     rw.fit <- fit.pci.jointpenalty.rw.a0 (Y, X, lambda, p0, robust=robust, nu=nu)
184 |     rw.p0 <- c(rw.fit$par[1:n], rho=1, rw.fit$par[n+3])
185 |     names(rw.p0)[n+2] <- "sigma_M"
186 |     
187 |     beta0 <- p0[1:n]
188 |     res0 <- Y - X %*% beta0
189 |     PAR0.mr <- fit.par(res0, model="ar1", robust=robust, nu=nu)   
190 |     mr.p0 <- c(beta0, PAR0.mr$rho, PAR0.mr$sigma_M)
191 |     names(mr.p0)[n+2] <- "sigma_M"
192 |     
193 |     mr.pmin <- c(rep(-Inf, n), -1, 0)
194 |     mr.pmax <- c(rep(Inf, n), 1, 2.0 * rw.p0[n+3])
195 | 
196 |     highval <- max(mr.ofun(rw.p0), mr.ofun(mr.p0)) + 1
197 |     fit1 <- optim(rw.p0, mr.ofun, method="L-BFGS-B", lower=mr.pmin, upper=mr.pmax, hessian=TRUE, control=list(pgtol=pgtol))
198 |     fit2 <- optim(mr.p0, mr.ofun, method="L-BFGS-B", lower=mr.pmin, upper=mr.pmax, hessian=TRUE, control=list(pgtol=pgtol))
199 |     fit <- if(fit1$value < fit2$value) fit1 else fit2
200 |     
201 |     fit.R0 <- (Y - X %*% fit$par[1:n]) [1]
202 |     mr.par <- c(fit$par[1:(n+2)], sigma_R=0, M0=0, R0=fit.R0)
203 | 
204 |     mr.se <- rep(NA_real_, nrow(fit$hessian))
205 |     suppressWarnings(try(mr.se <- sqrt(diag(solve(fit$hessian))), silent=TRUE))
206 |     mr.se <- c(mr.se[1:(n+2)], sigma_R=NA, M0=NA, R0=NA)
207 | 
208 |     return(list(par=mr.par, se=mr.se, nll=c(negloglik=fit$value)))
209 | }
210 | 
211 | fit.pci.jointpenalty.both.a0 <- function (Y, X, lambda=0, p0=pci.jointpenalty.guess.a0(Y,X), 
212 |                                        robust=FALSE, nu=5, pgtol=1e-8) {
213 |     # On input, Y is an n x 1 column vector, and X is an n x k matrix.
214 |     # Fits an PCI model to Y,X.  Returns a three-component list:
215 |     #   par: The parameter estimates
216 |     #   se:  The estimated standard error of the parameter estimates
217 |     #   nll: The negative log likelihood of the fit
218 | 
219 |     if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1)
220 |     if (is.null(dim(X))) X <- as.matrix(X, ncol=1)
221 |   
222 |     n <- ncol(X)
223 |     highval <- NA
224 |     ll_calc_method <- if (robust) "csst" else "css"
225 | 
226 |     fit.rw <- fit.pci.jointpenalty.rw.a0 (Y, X, lambda, p0, robust=robust, nu=nu)
227 |     fit.mr <- fit.pci.jointpenalty.mr.a0 (Y, X, lambda, p0, robust=robust, nu=nu)
228 |     fit.twostep <- fit.pci.twostep.a0 (Y, X, robust=robust, nu=nu)
229 |     twostep.par <- c(fit.twostep$beta, 
230 |         fit.twostep$rho, fit.twostep$sigma_M, fit.twostep$sigma_R)
231 |     
232 |     start_list <- list(p0[1:(n+3)], fit.rw$par[1:(n+3)], fit.mr$par[1:(n+3)], twostep.par)
233 | 
234 |     objective <- function (p) {
235 | #        print(p)
236 |         beta <- p[1:n]
237 |         rho <- p[n+1]
238 |         sigma_M <- p[n+2]
239 |         sigma_R <- p[n+3]
240 |         if (rho < -1 || rho > 1) return(highval)
241 |         if ((sigma_M < 0) || (sigma_R < 0)) return(highval)
242 |         if ((sigma_M == 0) && (sigma_R == 0)) return(highval)
243 |         Z <- Y - X %*% beta
244 |         M0 <- 0
245 |         R0 <- Z[1]
246 |         ll <- loglik.par (Z, rho, sigma_M, sigma_R, M0, R0, ll_calc_method, nu=nu) + lambda * sigma_R^2
247 | #        print(c(p, ll))
248 |         ll
249 |     }
250 | #    debug(objective)
251 | 
252 |     maxsig <- fit.rw$par[["sigma_R"]]
253 |     pmin <- c(rep(-Inf, n), -1, 0, 0)
254 |     pmax <- c(rep(Inf, n), 1, 2.0 * maxsig, 2.0 * maxsig)
255 | 
256 |     best_value <- objective(start_list[[1]])+1
257 |     for (start in start_list) {
258 |         highval <- objective(start) + 1
259 |         rfit <- optim(start, objective, hessian=TRUE, method="L-BFGS-B", lower=pmin, upper=pmax,control=list(pgtol=pgtol))
260 |         if (rfit$value < best_value) {
261 |             bestfit <- rfit
262 |             best_value <- rfit$value
263 | #            cat(sprintf("r %6.2f rho %8.4f sigma_M %8.4f sigma_R %8.4f -> %8.4f\n",
264 | #                rrho, bestfit$par[1], bestfit$par[2], bestfit$par[3], bestfit$value))
265 |         }        
266 |     }
267 | 
268 |     bestfit.R0 <- (Y - X %*% bestfit$par[1:n]) [1]
269 |     bestfit.par <- c(bestfit$par[1:(n+3)], M0=0, R0=bestfit.R0)
270 |     bestfit.se <- rep(NA_real_, nrow(bestfit$hessian))
271 |     suppressWarnings(try(bestfit.se <- sqrt(diag(solve(bestfit$hessian))), silent=TRUE))
272 |     bestfit.se <- c(bestfit.se[1:(n+3)], M0=NA, R0=NA)
273 | 
274 | #    names(bestfit.par)[n+6] <- "R0"
275 | #    names(bestfit.se)[n+6] <- "R0"
276 |     
277 |     return(list(par=bestfit.par, se=bestfit.se, nll=c(negloglik=bestfit$value)))
278 |     
279 | }
280 | 
281 | fit.pci.jointpenalty.a0 <- function (Y, X, par_model=c("par", "ar1", "rw"), lambda=0, 
282 |                                   robust=FALSE, nu=5) {
283 |   #  On input, Y is an n x 1 column vector, and X is an n x k matrix.
284 |   #  lambda is a penalty value that drives the optimization towards a
285 |   #  solution where sigma_R has the lowest possible value.
286 |   #
287 |   #  Fits the model
288 |   #
289 |   #  Y[t] = beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k]
290 |   #    + m[t] + r[t]
291 |   #
292 |   # where
293 |   #
294 |   #   m[t] = rho * m[t-1] + eps_M[t]
295 |   #   r[t] = r[t-1] + eps_R[t]
296 |   #   eps_M,t ~ N(0,sigma_M^2)
297 |   #   eps_R,t ~ N(0,sigma_R^2)
298 |   #   m[0] = 0
299 |   #   r[0] = Y[0] - beta[1] X[0,1] - beta[2] X[0,2] - ... - beta[k] X[0,k]
300 |   # 
301 |   # Estimates the values of beta, rho, sigma_M and sigma_R using
302 |   # the following procedure:
303 |   #   1.  Initial estimates of beta are obtained by regressing the
304 |   #       first differences of X[,i] on the first difference of Y.
305 |   #   2.  Initial estimates of rho, sigma_M and sigma_R are obtained by
306 |   #       fitting a PAR model to the residuals
307 |   #   3.  Having obtained these initial estimates for all of the parameters,
308 |   #       the following cost function is maximized to final parameter
309 |   #       estimates:
310 |   #          C[rho, sigma_M, sigma_R] = -LL[rho, sigma_M, sigma_R] + lambda * sigma_R^2
311 |   #       In the above, LL is the log likelihood function for the steady
312 |   #       state Kalman filter with parameters rho, sigma_M and sigma_R.
313 |   #
314 |   # The parameter par_model specifies which parameters of the PAR model are
315 |   # to be optimized.  
316 | 
317 |     if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1)
318 |     if (is.null(dim(X))) X <- as.matrix(X, ncol=1)
319 |   
320 |     par_model <- match.arg(par_model)
321 |     p0 = pci.jointpenalty.guess.a0(Y,X)
322 |     res <- switch (par_model,
323 |         rw = fit.pci.jointpenalty.rw.a0(Y, X, lambda, p0, robust=robust, nu=nu),
324 |         ar1 = fit.pci.jointpenalty.mr.a0(Y, X, lambda, p0, robust=robust, nu=nu),
325 |         par = fit.pci.jointpenalty.both.a0(Y, X, lambda, p0, robust=robust, nu=nu))
326 |     n <- ncol(X)   
327 |     
328 |     beta <- res$par[1:n]
329 |     R <- Y - X %*% beta
330 |     
331 |     fit <- structure(list(
332 |         data=Y,
333 |         basis=X,
334 |         residuals=R,
335 |         index=1:length(Y),
336 |         beta = beta,
337 |         rho = res$par[n+1],
338 |         sigma_M = res$par[n+2],
339 |         sigma_R = res$par[n+3],
340 |         M0 = res$par[n+4],
341 |         R0 = res$par[n+5],
342 |         beta.se = res$se[1:n],
343 |         rho.se = res$se[n+1],
344 |         sigma_M.se = res$se[n+2],
345 |         sigma_R.se = res$se[n+3],
346 |         M0.se = res$se[n+4],
347 |         R0.se = res$se[n+5],
348 |         negloglik = res$nll,
349 |         pvmr = pvmr.par(res$par[n+1], res$par[n+2], res$par[n+3]),
350 |         par.model=par_model,
351 |         robust=robust,
352 |         pci.fit="jointpenalty"), 
353 |     class="pci.fit")
354 |     
355 |     names(fit$beta) <- paste("beta_", colnames(X), sep="")
356 |     names(fit$beta.se) <- paste(paste("beta_", colnames(X), sep=""), ".se", sep="")
357 |     
358 |     fit
359 | }
360 | 
361 | fit.pci.a0 <- function (Y, X, pci_opt_method=c("jp", "twostep"), par_model=c("par", "ar1", "rw"), 
362 |    lambda=0, robust=FALSE, nu=5) {
363 |   #  On input, Y is an n x 1 column vector, and X is an n x k matrix.
364 |   #  lambda is a penalty value that drives the optimization towards a
365 |   #  solution where sigma_R has the lowest possible value.
366 |   #
367 |   #  Fits the model
368 |   #
369 |   #  Y[t] = beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k]
370 |   #    + m[t] + r[t]
371 |   #
372 |   # where
373 |   #
374 |   #   m[t] = rho * m[t-1] + eps_M[t]
375 |   #   r[t] = r[t-1] + eps_R[t]
376 |   #   eps_M,t ~ N(0,sigma_M^2)
377 |   #   eps_R,t ~ N(0,sigma_R^2)
378 |   #   m[0] = 0
379 |   #   r[0] = Y[0] - beta[1] X[0,1] - beta[2] X[0,2] - ... - beta[k] X[0,k]
380 |   #
381 |   # Fits the model using either the two step procedure or the joint penalty
382 |   # procedure, according to the value of the parameter 'pci_opt_method'.  Returns
383 |   # an S3 object of class "pci.fit" representing the fit that was obtained.
384 |   #
385 |   # The parameter par_model specifies which parameters of the PAR model are
386 |   # to be optimized.  
387 |   
388 |     pci_opt_method <- match.arg(pci_opt_method)
389 |     par_model <- match.arg(par_model)
390 |     
391 |     Yorig <- Y
392 |     if (!is.null(dim(Y))) {
393 |         if (dim(Y)[2] > 1) {
394 |             if (missing(X)) {
395 |                 X <- Y[,2:ncol(Y),drop=FALSE]
396 |             } else {
397 |                 stop("Y must be a single column")
398 |             }
399 |         }
400 |         Y <- Y[,1]
401 |     }
402 |     Y <- coredata(Y)
403 | 
404 |     Xorig <- X
405 |     X <- as.matrix(X)
406 |     
407 |     A <- switch (pci_opt_method,
408 |         twostep = fit.pci.twostep.a0(Y, X, par_model=par_model, robust=robust, nu=nu),
409 |         jp = fit.pci.jointpenalty.a0(Y, X, par_model=par_model, lambda, robust=robust, nu=nu))
410 | 
411 |     if (is.zoo(Yorig)) {
412 |         A$index <- index(Yorig)
413 |     } else {
414 |         A$index <- 1:length(Y)
415 |     }
416 |     
417 |     if (!is.null(names(Yorig))) {
418 |       A$target_name <- names(Yorig)[1]
419 |     } else if (!is.null(colnames(Yorig))) {
420 |       A$target_name <- colnames(Yorig)[1]
421 |     } else {
422 |       A$target_name <- "Y"
423 |     }
424 |     
425 |     if (!is.null(colnames(X))) {
426 |       A$factor_names <- colnames(X)
427 |     } else if (!is.null(names(X))) {
428 |       A$factor_names <- names(X)
429 |     } else {
430 |       A$factor_names <- paste("X", 1:ncol(X), sep="")
431 |     }
432 |     
433 |     A
434 | }
435 | 
436 | 


--------------------------------------------------------------------------------
/R/fit_pci_with_alpha.R:
--------------------------------------------------------------------------------
  1 | # Functions pertaining to fitting partially cointegrated (PCI) series
  2 | # Copyright (C) 2016 Matthew Clegg
  3 | 
  4 | #  This program is free software; you can redistribute it and/or modify
  5 | #  it under the terms of the GNU General Public License as published by
  6 | #  the Free Software Foundation; either version 2 of the License, or
  7 | #  (at your option) any later version.
  8 | #
  9 | #  This program is distributed in the hope that it will be useful,
 10 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | #  GNU General Public License for more details.
 13 | #
 14 | #  A copy of the GNU General Public License is available at
 15 | #  http://www.r-project.org/Licenses/
 16 | 
 17 | utils::globalVariables("p0")
 18 | 
 19 | fit.pci.twostep.a <- function (Y, X, par_model=c("par", "ar1", "rw"), robust=FALSE, nu=5) {
 20 |   #  On input, Y is an n x 1 column vector, and X is an n x k matrix.
 21 |   #  Fits the model
 22 |   #
 23 |   #  Y[t] = alpha + beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k]
 24 |   #    + m[t] + r[t]
 25 |   #
 26 |   # where
 27 |   #
 28 |   #   m[t] = rho * m[t-1] + eps_M[t]
 29 |   #   r[t] = r[t-1] + eps_R[t]
 30 |   #   eps_M,t ~ N(0,1)
 31 |   #   eps_R,t ~ N(0,1)
 32 |   #   m[0] = r[0] = 0
 33 |   # 
 34 |   # Estimates the values of alpha, beta, rho, sigma_M and sigma_R using
 35 |   # the two step procedure:
 36 |   #   1.  Perform a linear regression of Y on X to obtain alpha and beta
 37 |   #   2.  Determine rho, sigma_M and sigma_R through fitting a PAR model
 38 |   #       to the residuals of the linear regression performed in step 1.
 39 |   #
 40 |   # The parameter par_model specifies which parameters of the PAR model are
 41 |   # to be optimized.  
 42 |   
 43 |     par_model <- match.arg(par_model)
 44 |     if (robust) {
 45 |         L <- summary(rlm(Y~X))
 46 |     } else {
 47 |         L <- summary(lm(Y~X))
 48 |     }
 49 |     PAR <- fit.par(L$residuals, model=par_model, robust=robust, nu=nu)
 50 | 
 51 |     fit <- structure(list(
 52 |         data=Y,
 53 |         basis=X,
 54 |         residuals=L$residuals,
 55 |         index=1:length(Y),
 56 |         alpha = c(alpha=coef(L)[1,1]),
 57 |         beta = coef(L)[2:nrow(coef(L)),1],
 58 |         rho = PAR$rho,
 59 |         sigma_M = PAR$sigma_M,
 60 |         sigma_R = PAR$sigma_R,
 61 |         M0 = PAR$par[4],
 62 |         R0 = PAR$par[5],
 63 |         alpha.se = c(alpha.se=coef(L)[1,2]),
 64 |         beta.se = coef(L)[2:nrow(coef(L)),2],
 65 |         rho.se = PAR$stderr[1],
 66 |         sigma_M.se = PAR$stderr[2],
 67 |         sigma_R.se = PAR$stderr[3],
 68 |         M0.se = PAR$stderr[4],
 69 |         R0.se = PAR$stderr[5],
 70 |         negloglik = c(negloglik=PAR$negloglik),
 71 |         pvmr = PAR$pvmr,
 72 |         par.fit = PAR,
 73 |         par_model=par_model,
 74 |         robust=robust,
 75 |         pci.fit="twostep"), 
 76 |     class="pci.fit")
 77 |     
 78 |     names(fit$beta) <- paste("beta_", colnames(X), sep="")
 79 |     names(fit$beta.se) <- paste(paste("beta_", colnames(X), sep=""), ".se", sep="")
 80 |     
 81 |     fit
 82 | }
 83 | 
 84 | pci.jointpenalty.guess.a <- function (Y, X) {
 85 |     # On input, Y is an n x 1 column vector, and X is an n x k matrix.
 86 |     # Generates a guess for the starting point of optimization for an PCI
 87 |     # fit as follows:
 88 |     #   1.  Estimates beta by fitting the differenced X series to the
 89 |     #       differenced Y series.
 90 |     #   2.  Estimates alpha by a Y[1] - X[1,] %*% beta
 91 |     #   3.  Estimates rho, sigma_M and sigma_R by fitting a PAR series
 92 |     #       to  Y - X %*% beta - alpha
 93 |     # Returns the guess that is found
 94 | 
 95 |     X <- as.matrix(X)
 96 |     DY <- diff(Y)
 97 |     DX <- apply(X, 2, diff)
 98 |     beta0 <- coef(lm(DY ~ DX + 0))
 99 |     alpha0 <- as.numeric(Y[1] - X[1,] %*% beta0)
100 |     res0 <- Y - X %*% beta0 - alpha0
101 |     PAR0 <- fit.par(res0)
102 |     rho0 <- PAR0$rho
103 |     sigma_M0 <- PAR0$sigma_M
104 |     sigma_R0 <- PAR0$sigma_R
105 |     M00 <- 0
106 |     p0 <- c(alpha0, beta0, rho0, sigma_M0, sigma_R0, M00, 0)
107 |     names(p0)[1] <- "alpha"
108 |     names(p0)[2:(ncol(X)+1)] <- paste("beta_", colnames(X), sep="")
109 |     names(p0)[length(p0)-1] <- "M0"
110 |     names(p0)[length(p0)] <- "R0"
111 |     p0
112 | }
113 | 
114 | fit.pci.jointpenalty.rw.a <- function (Y, X, lambda=0, p0=pci.jointpenalty.guess.a(Y,X), 
115 |                                      robust=FALSE, nu=5, pgtol=1e-8) {
116 |     # On input, Y is an n x 1 column vector, and X is an n x k matrix.
117 |     # Fits an PCI model to Y,X where the residual series is modeled as
118 |     # a random walk.  Returns a three-component list:
119 |     #   par: The parameter estimates
120 |     #   se:  The estimated standard error of the parameter estimates
121 |     #   nll: The negative log likelihood of the fit
122 | 
123 |     if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1)
124 |     if (is.null(dim(X))) X <- as.matrix(X, ncol=1)
125 |     
126 |     n <- ncol(X) 
127 |     highval <- NA
128 |     ll_calc_method <- if (robust) "csst" else "css"
129 |     
130 |     rw.ofun <- function (p) {
131 |         alpha <- p[1]
132 |         beta <- p[2:(n+1)]
133 |         sigma_R <- p[n+2]
134 |         if (sigma_R <= 0.0) return(highval)
135 |         Z <- Y - X %*% beta - alpha
136 |         loglik.par (Z, 0, 0, sigma_R, 0, 0, ll_calc_method, nu=nu) + lambda * sigma_R^2
137 |     }
138 | 
139 |     alpha0 <- p0[1]
140 |     beta0 <- p0[2:(ncol(X)+1)]
141 |     res0 <- Y - X %*% beta0 - alpha0
142 |     sdmax <- 2.0 * sd(diff(Y))
143 |     rw.p0 <- c(alpha0, beta0, sdmax * 0.5)
144 |     rw.pmin <- c(-Inf, rep(-Inf, n), 0.0)
145 |     rw.pmax <- c(Inf, rep(Inf, n), sdmax)
146 |     highval <- rw.ofun(rw.p0) + 1.0
147 | 
148 |     rw.val <- optim(rw.p0, rw.ofun, method="L-BFGS-B", lower=rw.pmin, upper=rw.pmax, hessian=TRUE, control=list(pgtol=pgtol))
149 |     rw.par <- c(rw.val$par[1:(n+1)], rho=0, sigma_M=0, sigma_R=rw.val$par[n+2], M0=0, R0=0)
150 |     rw.se <- rep(NA_real_, nrow(rw.val$hessian))
151 |     suppressWarnings(try(rw.se <- sqrt(diag(solve(rw.val$hessian))), silent=TRUE))
152 |     rw.se <- c(rw.se[1:(n+1)], rho=NA, sigma_M=NA, sigma_R=rw.se[n+2], M0=NA, R0=NA)
153 | 
154 |     return(list(par=rw.par, se=rw.se, nll=c(negloglik=rw.val$value)))
155 | }
156 | 
157 | fit.pci.jointpenalty.mr.a <- function (Y, X, lambda=0, p0=pci.jointpenalty.guess.a(Y,X), 
158 |                                      robust=FALSE, nu=5, pgtol=1e-8) {
159 |     # On input, Y is an n x 1 column vector, and X is an n x k matrix.
160 |     # Fits an PCI model to Y,X where the residual series is modeled as
161 |     # a pure AR(1) series.  Returns a three-component list:
162 |     #   par: The parameter estimates
163 |     #   se:  The estimated standard error of the parameter estimates
164 |     #   nll: The negative log likelihood of the fit
165 | 
166 |     if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1)
167 |     if (is.null(dim(X))) X <- as.matrix(X, ncol=1)
168 |   
169 |     n <- ncol(X) 
170 |     highval <- NA
171 |     ll_calc_method <- if (robust) "csst" else "css"
172 | 
173 |     mr.ofun <- function (p) {
174 |         alpha <- p[1]
175 |         beta <- p[2:(n+1)]
176 |         rho <- p[n+2]
177 |         sigma_M <- p[n+3]
178 |         if (rho < -1 || rho > 1 || sigma_M <= 0.0) return(highval)
179 |         Z <- Y - X %*% beta - alpha
180 |         loglik.par (Z, rho, sigma_M, 0, 0, 0, ll_calc_method, nu=nu)
181 |     }
182 | 
183 |     rw.fit <- fit.pci.jointpenalty.rw.a (Y, X, lambda, p0, robust=robust, nu=nu)
184 |     rw.p0 <- c(rw.fit$par[1:(n+1)], rho=1, rw.fit$par[n+4])
185 |     names(rw.p0)[n+3] <- "sigma_M"
186 |     
187 |     alpha0 <- p0[1]
188 |     beta0 <- p0[2:(ncol(X)+1)]
189 |     res0 <- Y - X %*% beta0 - alpha0
190 |     PAR0.mr <- fit.par(res0, model="ar1", robust=robust, nu=nu)   
191 |     mr.p0 <- c(alpha0, beta0, PAR0.mr$rho, PAR0.mr$sigma_M)
192 |     names(mr.p0)[n+3] <- "sigma_M"
193 |     
194 |     mr.pmin <- c(-Inf, rep(-Inf, n), -1, 0)
195 |     mr.pmax <- c(Inf, rep(Inf, n), 1, 2.0 * rw.p0[n+3])
196 | 
197 |     highval <- max(mr.ofun(rw.p0), mr.ofun(mr.p0)) + 1
198 |     fit1 <- optim(rw.p0, mr.ofun, method="L-BFGS-B", lower=mr.pmin, upper=mr.pmax, hessian=TRUE, control=list(pgtol=pgtol))
199 |     fit2 <- optim(mr.p0, mr.ofun, method="L-BFGS-B", lower=mr.pmin, upper=mr.pmax, hessian=TRUE, control=list(pgtol=pgtol))
200 |     fit <- if(fit1$value < fit2$value) fit1 else fit2
201 |     
202 |     mr.par <- c(fit$par[1:(n+3)], sigma_R=0, M0=0, R0=0)
203 | 
204 |     mr.se <- rep(NA_real_, nrow(fit$hessian))
205 |     suppressWarnings(try(mr.se <- sqrt(diag(solve(fit$hessian))), silent=TRUE))
206 |     mr.se <- c(mr.se[1:(n+3)], sigma_R=NA, M0=NA, R0=NA)
207 | 
208 |     return(list(par=mr.par, se=mr.se, nll=c(negloglik=fit$value)))
209 | }
210 | 
211 | fit.pci.jointpenalty.both.a <- function (Y, X, lambda=0, p0=pci.jointpenalty.guess.a(Y,X), 
212 |                                        robust=FALSE, nu=5, pgtol=1e-8) {
213 |     # On input, Y is an n x 1 column vector, and X is an n x k matrix.
214 |     # Fits an PCI model to Y,X.  Returns a three-component list:
215 |     #   par: The parameter estimates
216 |     #   se:  The estimated standard error of the parameter estimates
217 |     #   nll: The negative log likelihood of the fit
218 | 
219 |     if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1)
220 |     if (is.null(dim(X))) X <- as.matrix(X, ncol=1)
221 |   
222 |     n <- ncol(X)
223 |     highval <- NA
224 |     ll_calc_method <- if (robust) "csst" else "css"
225 | 
226 |     fit.rw <- fit.pci.jointpenalty.rw.a (Y, X, lambda, p0, robust=robust, nu=nu)
227 |     fit.mr <- fit.pci.jointpenalty.mr.a (Y, X, lambda, p0, robust=robust, nu=nu)
228 |     fit.twostep <- fit.pci.twostep.a (Y, X, robust=robust, nu=nu)
229 |     twostep.par <- c(fit.twostep$alpha, fit.twostep$beta, 
230 |         fit.twostep$rho, fit.twostep$sigma_M, fit.twostep$sigma_R)
231 |     
232 |     start_list <- list(p0[1:(n+4)], fit.rw$par[1:(n+4)], fit.mr$par[1:(n+4)], twostep.par)
233 | 
234 |     objective <- function (p) {
235 | #        print(p)
236 |         alpha <- p[1]
237 |         beta <- p[2:(n+1)]
238 |         rho <- p[n+2]
239 |         sigma_M <- p[n+3]
240 |         sigma_R <- p[n+4]
241 |         if (rho < -1 || rho > 1) return(highval)
242 |         if ((sigma_M < 0) || (sigma_R < 0)) return(highval)
243 |         if ((sigma_M == 0) && (sigma_R == 0)) return(highval)
244 |         M0 <- 0
245 |         R0 <- 0
246 |         Z <- Y - X %*% beta - alpha
247 |         ll <- loglik.par (Z, rho, sigma_M, sigma_R, M0, R0, ll_calc_method, nu=nu) + lambda * sigma_R^2
248 | #        print(c(p, ll))
249 |         ll
250 |     }
251 | #    debug(objective)
252 | 
253 |     maxsig <- fit.rw$par[["sigma_R"]]
254 |     pmin <- c(-Inf, rep(-Inf, n), -1, 0, 0)
255 |     pmax <- c(Inf, rep(Inf, n), 1, 2.0 * maxsig, 2.0 * maxsig)
256 | 
257 |     best_value <- objective(start_list[[1]])+1
258 |     for (start in start_list) {
259 |         highval <- objective(start) + 1
260 |         rfit <- optim(start, objective, hessian=TRUE, method="L-BFGS-B", lower=pmin, upper=pmax,control=list(pgtol=pgtol))
261 |         if (rfit$value < best_value) {
262 |             bestfit <- rfit
263 |             best_value <- rfit$value
264 | #            cat(sprintf("r %6.2f rho %8.4f sigma_M %8.4f sigma_R %8.4f -> %8.4f\n",
265 | #                rrho, bestfit$par[1], bestfit$par[2], bestfit$par[3], bestfit$value))
266 |         }        
267 |     }
268 | 
269 |     bestfit.par <- c(bestfit$par[1:(n+4)], M0=0, R0=0)
270 |     bestfit.se <- rep(NA_real_, nrow(bestfit$hessian))
271 |     suppressWarnings(try(bestfit.se <- sqrt(diag(solve(bestfit$hessian))), silent=TRUE))
272 |     bestfit.se <- c(bestfit.se[1:(n+4)], M0=NA, R0=NA)
273 | 
274 | #    names(bestfit.par)[n+6] <- "R0"
275 | #    names(bestfit.se)[n+6] <- "R0"
276 |     
277 |     return(list(par=bestfit.par, se=bestfit.se, nll=c(negloglik=bestfit$value)))
278 |     
279 | }
280 | 
281 | fit.pci.jointpenalty.a <- function (Y, X, par_model=c("par", "ar1", "rw"), lambda=0, 
282 |                                   robust=FALSE, nu=5) {
283 |   #  On input, Y is an n x 1 column vector, and X is an n x k matrix.
284 |   #  lambda is a penalty value that drives the optimization towards a
285 |   #  solution where sigma_R has the lowest possible value.
286 |   #
287 |   #  Fits the model
288 |   #
289 |   #  Y[t] = alpha + beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k]
290 |   #    + m[t] + r[t]
291 |   #
292 |   # where
293 |   #
294 |   #   m[t] = rho * m[t-1] + eps_M[t]
295 |   #   r[t] = r[t-1] + eps_R[t]
296 |   #   eps_M,t ~ N(0,sigma_M^2)
297 |   #   eps_R,t ~ N(0,sigma_R^2)
298 |   #   m[0] = r[0] = 0
299 |   # 
300 |   # Estimates the values of alpha, beta, rho, sigma_M and sigma_R using
301 |   # the following procedure:
302 |   #   1.  Initial estimates of beta are obtained by regressing the
303 |   #       first differences of X[,i] on the first difference of Y.
304 |   #   2.  Given these estimates for beta, an initial estimate for
305 |   #       alpha is obtained as alpha = Y[1] - beta X[1,].
306 |   #   3.  Initial estimates of rho, sigma_M and sigma_R are obtained by
307 |   #       fitting a PAR model to the residuals
308 |   #   4.  Having obtained these initial estimates for all of the parameters,
309 |   #       the following cost function is maximized to final parameter
310 |   #       estimates:
311 |   #          C[rho, sigma_M, sigma_R] = -LL[rho, sigma_M, sigma_R] + lambda * sigma_R^2
312 |   #       In the above, LL is the log likelihood function for the steady
313 |   #       state Kalman filter with parameters rho, sigma_M and sigma_R.
314 |   #
315 |   # The parameter par_model specifies which parameters of the PAR model are
316 |   # to be optimized.  
317 | 
318 |     if (is.null(dim(Y))) Y <- as.matrix(Y, ncol=1)
319 |     if (is.null(dim(X))) X <- as.matrix(X, ncol=1)
320 |   
321 |     par_model <- match.arg(par_model)
322 |     p0 = pci.jointpenalty.guess(Y,X)
323 |     res <- switch (par_model,
324 |         rw = fit.pci.jointpenalty.rw.a(Y, X, lambda, p0, robust=robust, nu=nu),
325 |         ar1 = fit.pci.jointpenalty.mr.a(Y, X, lambda, p0, robust=robust, nu=nu),
326 |         par = fit.pci.jointpenalty.both.a(Y, X, lambda, p0, robust=robust, nu=nu))
327 |     n <- ncol(X)   
328 |     
329 |     alpha <- res$par[1]
330 |     beta <- res$par[2:(n+1)]
331 |     R <- Y - alpha - X %*% beta
332 |     
333 |     fit <- structure(list(
334 |         data=Y,
335 |         basis=X,
336 |         residuals=R,
337 |         index=1:length(Y),
338 |         alpha = alpha,
339 |         beta = beta,
340 |         rho = res$par[n+2],
341 |         sigma_M = res$par[n+3],
342 |         sigma_R = res$par[n+4],
343 |         M0 = res$par[n+5],
344 |         R0 = res$par[n+6],
345 |         alpha.se = res$se[1],
346 |         beta.se = res$se[2:(n+1)],
347 |         rho.se = res$se[n+2],
348 |         sigma_M.se = res$se[n+3],
349 |         sigma_R.se = res$se[n+4],
350 |         M0.se = res$se[n+5],
351 |         R0.se = res$se[n+6],
352 |         negloglik = res$nll,
353 |         pvmr = pvmr.par(res$par[n+2], res$par[n+3], res$par[n+4]),
354 |         par.model=par_model,
355 |         robust=robust,
356 |         pci.fit="jointpenalty"), 
357 |     class="pci.fit")
358 |     
359 |     names(fit$beta) <- paste("beta_", colnames(X), sep="")
360 |     names(fit$beta.se) <- paste(paste("beta_", colnames(X), sep=""), ".se", sep="")
361 |     
362 |     fit
363 | }
364 | 
365 | fit.pci.a <- function (Y, X, pci_opt_method=c("jp", "twostep"), par_model=c("par", "ar1", "rw"), 
366 |    lambda=0, robust=FALSE, nu=5) {
367 |   #  On input, Y is an n x 1 column vector, and X is an n x k matrix.
368 |   #  lambda is a penalty value that drives the optimization towards a
369 |   #  solution where sigma_R has the lowest possible value.
370 |   #
371 |   #  Fits the model
372 |   #
373 |   #  Y[t] = alpha + beta[1] X[t,1] + beta[2] X[t,2] + ... + beta [k] X[t,k]
374 |   #    + m[t] + r[t]
375 |   #
376 |   # where
377 |   #
378 |   #   m[t] = rho * m[t-1] + eps_M[t]
379 |   #   r[t] = r[t-1] + eps_R[t]
380 |   #   eps_M,t ~ N(0,sigma_M^2)
381 |   #   eps_R,t ~ N(0,sigma_R^2)
382 |   #   m[0] = r[0] = 0
383 |   #
384 |   # Fits the model using either the two step procedure or the joint penalty
385 |   # procedure, according to the value of the parameter 'pci_opt_method'.  Returns
386 |   # an S3 object of class "pci.fit" representing the fit that was obtained.
387 |   #
388 |   # The parameter par_model specifies which parameters of the PAR model are
389 |   # to be optimized.  
390 |   
391 |     pci_opt_method <- match.arg(pci_opt_method)
392 |     par_model <- match.arg(par_model)
393 |     
394 |     Yorig <- Y
395 |     if (!is.null(dim(Y))) {
396 |         if (dim(Y)[2] > 1) {
397 |             if (missing(X)) {
398 |                 X <- Y[,2:ncol(Y),drop=FALSE]
399 |             } else {
400 |                 stop("Y must be a single column")
401 |             }
402 |         }
403 |         Y <- Y[,1]
404 |     }
405 |     Y <- coredata(Y)
406 | 
407 |     Xorig <- X
408 |     X <- as.matrix(X)
409 |     
410 |     A <- switch (pci_opt_method,
411 |         twostep = fit.pci.twostep.a(Y, X, par_model=par_model, robust=robust, nu=nu),
412 |         jp = fit.pci.jointpenalty.a(Y, X, par_model=par_model, lambda, robust=robust, nu=nu))
413 | 
414 |     if (is.zoo(Yorig)) {
415 |         A$index <- index(Yorig)
416 |     } else {
417 |         A$index <- 1:length(Y)
418 |     }
419 |     
420 |     if (!is.null(names(Yorig))) {
421 |       A$target_name <- names(Yorig)[1]
422 |     } else if (!is.null(colnames(Yorig))) {
423 |       A$target_name <- colnames(Yorig)[1]
424 |     } else {
425 |       A$target_name <- "Y"
426 |     }
427 |     
428 |     if (!is.null(colnames(X))) {
429 |       A$factor_names <- colnames(X)
430 |     } else if (!is.null(names(X))) {
431 |       A$factor_names <- names(X)
432 |     } else {
433 |       A$factor_names <- paste("X", 1:ncol(X), sep="")
434 |     }
435 |     
436 |     A
437 | }
438 | 
439 | 


--------------------------------------------------------------------------------
/R/hedge.R:
--------------------------------------------------------------------------------
  1 | # hedge.R -- functions for finding the optimal hedging portfolio
  2 | #  for a given security, using the partially AR(1) and partially cointegrated models.
  3 | 
  4 | # Copyright (C) 2016 Matthew Clegg
  5 | 
  6 | #  This program is free software; you can redistribute it and/or modify
  7 | #  it under the terms of the GNU General Public License as published by
  8 | #  the Free Software Foundation; either version 2 of the License, or
  9 | #  (at your option) any later version.
 10 | #
 11 | #  This program is distributed in the hope that it will be useful,
 12 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 13 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14 | #  GNU General Public License for more details.
 15 | #
 16 | #  A copy of the GNU General Public License is available at
 17 | #  http://www.r-project.org/Licenses/
 18 | 
 19 | 
 20 | LARGECAPS <- c("TLT", "AAPL", "GOOGL", "GOOG", "MSFT", "BRK-B", "XOM", "AMZN", 
 21 |                "GE", "FB", "JNJ", "WFC", "JPM", "PG", "T", "PFE", "WMT", "DIS", 
 22 |                "KO", "VZ", "BAC", "V", "HD", "CVX", "ORCL", "INTC", "C", "GILD", 
 23 |                "MRK", "CMCSK", "CMCSA", "PEP", "CSCO", "PM", "IBM", "AGN", "AMGN", 
 24 |                "BMY", "MO", "NKE", "UNH", "MA", "MDT", "MCD", "CVS", "BA", "MMM", 
 25 |                "LLY", "SLB", "ABBV", "WBA", "UPS", "SBUX", "KHC", "CELG", "UTX", 
 26 |                "GS", "HON", "AIG", "QCOM", "USB", "COST", "MDLZ", "LOW", "AXP", 
 27 |                "MS", "ACN", "ABT", "LMT", "UNP", "RAI", "DHR", "PCLN", "BIIB", 
 28 |                "COP", "DOW", "TXN", "CL", "DD", "BLK", "FOX", "ESRX", "REGN", 
 29 |                "SPG", "FOXA", "GM", "MET", "TWX", "F", "NFLX", "OXY", "TMO", 
 30 |                "CRM", "TWC", "EMC", "PNC", "BK", "PSX", "TJX", "ADBE", "DUK", 
 31 |                "TGT", "GD", "SCHW", "NEE", "EOG", "KMB", "FDX", "MCK", "PYPL", 
 32 |                "LYB", "MON", "COF", "PSA", "AMT", "SO", "CAT", "AVGO", "ADP", 
 33 |                "D", "ALXN", "KR", "DAL", "PRU", "CTSH", "ACE", "KMI", "RTN", 
 34 |                "ABC", "AET", "CI", "SYK", "EBAY", "ECL", "BRCM", "GIS", "TRV", 
 35 |                "ANTM", "VLO", "ITW", "NOC", "CME", "YHOO", "YUM", "BDX", "HAL", 
 36 |                "EMR", "LUV", "PCP", "MNST", "EL", "PX", "VRTX", "BBT", "CB", 
 37 |                "CCL", "APD", "STT", "MMC", "MPC", "CAH", "EQR", "ICE", "LB", 
 38 |                "APC", "PPG", "ATVI", "STZ", "CCI", "AAL", "JCI", "NSC", "VFC", 
 39 |                "HCA", "AEP", "HPE", "AFL", "SYF", "MHFI", "AON", "CSX", "ILMN", 
 40 |                "DE", "PCG", "INTU", "ETN", "TEL", "ORLY", "SHW", "HUM", "MYL", 
 41 |                "ALL", "AVB", "BSX", "K", "BEN", "DLPH", "EXC", "BXLT", "SRE", 
 42 |                "DFS", "CBS", "WM", "WMB", "GGP", "SYY", "AZO", "ZTS", "BHI", 
 43 |                "AMAT", "PPL", "STI", "HCN", "PLD", "HPQ", "ROST", "UAL", "EA", 
 44 |                "PXD", "FISV", "PRGO", "BF-B", "ADM", "GLW", "FIS", "RCL", "CERN", 
 45 |                "MCO", "BAX", "HRL", "ZBH", "DG", "AMP", "EIX", "PEG", "ISRG", 
 46 |                "PAYX", "BXP", "HIG", "ROP", "TROW", "TSN", "UA", "HSY", "ADI", 
 47 |                "LVLT", "VNO", "VIAB", "PGR", "MAR", "OMC", "ED", "NVDA", "DLTR", 
 48 |                "EQIX", "CAG", "XEL", "VTR", "STJ", "PCAR", "TAP", "CMG", "EW", 
 49 |                "NTRS", "DPS", "ADS", "APA", "NLSN", "APH", "MU", "SWKS", "MTB", 
 50 |                "FITB", "IP", "DVN", "HCP", "CLX", "SWK", "CPB", "CMI", "WY", 
 51 |                "EXPE", "ALTR", "ES", "SE", "MJN", "HES", "WEC", "ESS", "RSG", 
 52 |                "IR", "DVA", "SJM", "SNDK", "RHT", "WDC", "ADSK", "CTL", "NBL", 
 53 |                "DTE", "PFG", "TYC", "IVZ", "ENDP", "ROK", "MHK", "PH", "FE", 
 54 |                "BCR", "LNC", "NOV", "A", "L", "SYMC", "TSO", "GPC", "RF", "VMC", 
 55 |                "EFX", "DISCA", "AME", "NUE", "HSIC", "WRK", "VRSK", "MSI", "CAM", 
 56 |                "DISCK", "LH", "GWW", "XLNX", "WHR", "MAC", "O", "DHI", "M", 
 57 |                "LRCX", "HST", "CBG", "AA", "HBI", "TRIP", "CA", "COL", "KMX", 
 58 |                "NWL", "HOT", "TSCO", "UHS", "ETR", "SLG", "CTXS", "XL", "FAST", 
 59 |                "TXT", "JNPR", "AAP", "CCE", "KEY", "MKC", "LLTC", "MRO", "EMN", 
 60 |                "MOS", "NEM", "LEN", "XEC", "WAT", "BBY", "XRX", "GPS", "KIM", 
 61 |                "KLAC", "AEE", "JWN", "HRS", "VRSN", "CF", "SIG", "TSS", "STX", 
 62 |                "MLM", "HRB", "RL", "MAS", "SNA", "CINF", "WFM", "AKAM", "PNR", 
 63 |                "DOV", "CTAS", "ARG", "SRCL", "TIF", "DGX", "IFF", "LLL", "CMS", 
 64 |                "BWA", "NDAQ", "MCHP", "WU", "IPG", "BLL", "AMG", "GT", "HBAN", 
 65 |                "KSS", "NTAP", "HOG", "BBBY", "SEE", "FCX", "CHRW", "ETFC", "UNM", 
 66 |                "KSU", "EXPD", "JBHT", "WYN", "XRAY", "CVC", "PCL", "MAT", "MNK", 
 67 |                "HAS", "COH", "SCG", "QRVO", "EQT", "NWS", "CMA", "SPLS", "KORS", 
 68 |                "VAR", "GMCR", "SNI", "GAS", "TMK", "FTI", "FFIV", "DRI", "GRMN", 
 69 |                "COG", "CNP", "HAR", "AN", "PNW", "URI", "FLR", "PHM", "PVH", 
 70 |                "XYL", "RHI", "POM", "WYNN", "LUK", "ALLE", "LEG", "AES", "TGNA", 
 71 |                "TE", "ZION", "NI", "AIV", "FSLR", "CPGX", "AVY", "FTR", "NFX", 
 72 |                "FLS", "PKI", "IRM", "ADT", "HP", "AIZ", "FMC", "JEC", "NWSA", 
 73 |                "PBCT", "OKE", "RIG")
 74 | 
 75 | SECTORETFS <- c("XLB",      # Materials Select Sector SPDR
 76 |                 "XLE",      # Energy Select Sector SPDR
 77 |                 "XLF",      # Financial Select Sector SPDR
 78 |                 "XLI",      # Industrial Select Sector SPDR
 79 |                 "XLK",      # Technology Select Sector SPDR
 80 |                 "XLP",      # Consumer Staples Select Sector SPDR
 81 |                 "XLU",      # Utilities Select Sector SPDR
 82 |                 "XLV",      # Health Care Select Sector SPDR
 83 |                 "XLY"       # Consumer Discretionary SPDR
 84 |               )
 85 | 
 86 | hedge.pci <- function (Y, X, 
 87 |     maxfact=10,   # Maximum number of factors to include in hedge
 88 |     lambda=0,     # Penalty factor to be applied to random walk portion of fit
 89 |     use.multicore=TRUE, # TRUE if parallel processing should be used
 90 |     minimum.stepsize=0, # Required amount by which the score of the fit should be improved
 91 |     verbose = TRUE,     # TRUE if information should be displayed as fitting progresses
 92 |     exclude.cols=c(),    # A list of columns from X that should be excluded from consideration
 93 |     search_type=c("lasso","full", "limited"), # The type of search that is to be performed
 94 |     pci_opt_method=c("jp", "twostep"), # The method used to perform PCI fitting
 95 |     ...
 96 |     ) {
 97 |     # Searches for an optimal hedge of Y using the securities given in X.
 98 |     #  
 99 |     # Input values
100 |     #   Y:   An N x 1 column vector or data data.frame, 
101 |     #     representing the series that is to be hedged.
102 |     #   X:  An N x L data.frame, where each column represents a possible factor to
103 |     #     be used in an partially cointegrated fit.
104 |     #   maxfact:  The maximum number of factors to be used in modeling Y.
105 |     #   lambda:  This specifies a penalty to be applied to the random walk
106 |     #     portion of the AR(1) model, driving the solution towards one with a minimal value
107 |     #     of sigma_RW
108 |     #   use.multicore:  If TRUE, parallel processing will be used to improve performance.
109 |     #   minimum.stepsize:  If this is non-NA, then the search stops if an improvement
110 |     #     cannot be found of at least this much.
111 |     #   verbose:  If TRUE, information is printed about the search as it progresses.
112 |     #   exclude.cols:  Specifies the indices of columns from X which should be excluded
113 |     #     from consideration as part of the hedge.  Alternative, the list of excluded
114 |     #     columns may be given as a set of strings, in which case they are interpreted
115 |     #     as column names.
116 |     #   search_type:  If "lasso", then the lasso algorithm is used to find the factors
117 |     #     that give the best linear fit.  If "full", then a greedy algorithm is 
118 |     #     used to search for factors to be used in the hedge.  At each step, 
119 |     #     all possible additions to the portfolio are considered, and the best one 
120 |     #     is chosen for inclusion.  If "limited", then at each iteration, a preliminary
121 |     #     screening step is performed to identify the securities with the highest 
122 |     #     correlations to the residuals of the currently selected portfolio.  The 
123 |     #     top securities from this list are then checked for whether they would 
124 |     #     improve the portfolio, and the best one is included.
125 |     #
126 |     #  Fits the model
127 |     #
128 |     #  Y[t] = alpha + beta[1] X_i1[t,1] + beta[2] X_i2[t,2] + ... + beta [m] X_iM[t,k]
129 |     #    + m[t] + r[t]
130 | 	#
131 | 	# where
132 | 	#
133 |     #   X_ij is a column from X, 1 <= ij <= M
134 |     #   m[t] = rho * m[t-1] + eps_M[t]
135 |     #   r[t] = r[t-1] + eps_R[t]
136 | 	#   eps_M,t ~ N(0,sigma_M^2)
137 |     #   eps_R,t ~ N(0,sigma_R^2)
138 |     #   r[0] = 0
139 |     #
140 |     # Searches for up to maxfact factors from X which provided the best
141 |     # fit of an partially cointegrated model to Y.  
142 |     #
143 |     # Returns an S3 object of class pci.hedge which describes the hedge that was found.
144 | 
145 |     pci_opt_method <- match.arg(pci_opt_method)    
146 |     if (length(exclude.cols) > 0 && is(exclude.cols, "character")) {
147 |       if (!all(exclude.cols %in% colnames(X))) {
148 |         missing.cols <- exclude.cols[!(exclude.cols %in% colnames(X))]
149 |         stop("exclude.cols contains column names not found in X: ", missing.cols)
150 |       } 
151 |       ch.exclude.cols <- exclude.cols
152 |       exclude.cols <- match(ch.exclude.cols, colnames(X))
153 |     }
154 | 
155 |     target_in_source <- which(sapply(1:ncol(X), function(k) all(Y == X[,k])))
156 |     if (length(target_in_source) > 0) {
157 |       #       cat("Found target at column ", target_in_source, "\n\n")
158 |       exclude.cols <- union(exclude.cols, target_in_source)
159 |     }
160 |     
161 |     Imap <- setdiff(1:ncol(X), exclude.cols)
162 |     Xexcl <- X[,Imap]
163 |     
164 |     fit <- switch(match.arg(search_type),
165 |         lasso = multihedge.pci.lasso (Y, Xexcl, maxfact=maxfact, 
166 |            lambda=lambda, use.multicore=use.multicore,
167 |            minimum.stepsize=minimum.stepsize,
168 |            verbose=verbose,  
169 |            pci_opt_method=pci_opt_method, ...),
170 |         full = multihedge.pci.greedy (Y, Xexcl, maxfact=maxfact, 
171 |             lambda=lambda, use.multicore=use.multicore,
172 |             minimum.stepsize=minimum.stepsize,
173 |             verbose=verbose, 
174 |             pci_opt_method=pci_opt_method, ...),
175 |         limited = multihedge.pci.branching (Y, Xexcl, maxfact=maxfact, 
176 |             lambda=lambda, use.multicore=use.multicore,
177 |             minimum.stepsize=minimum.stepsize,
178 |             verbose=verbose, 
179 |             pci_opt_method=pci_opt_method, ...)
180 |         )
181 |     
182 |     fit$indexes <- Imap[fit$indexes]
183 |     fit$target <- Y
184 |     fit$factors <- X
185 |     fit
186 | }
187 | 
188 | print.pci.hedge <- function (x, ...) {
189 |   
190 |   print.internal.pci.hedge(x)
191 | }
192 | 
193 | print.internal.pci.hedge <- function (AH) {
194 |     # Prints summary information for an partially cointegrated hedge model
195 |     print(AH$pci)
196 | }
197 | 
198 | plot.pci.hedge <- function (x, ...) {
199 |  
200 |   
201 |   plot.internal.pci.hedge(x)
202 | }
203 | 
204 | 
205 | plot.internal.pci.hedge <- function (AH) {
206 |     plot(AH$pci)
207 | }
208 | 
209 | multihedge.pci.general <- function (
210 |   Y,                     # Target that is to be hedged
211 |   X,                     # A matrix of factors to be considered for use in hedging
212 |   find_branches,         # A function which returns a list of branches to be
213 |                          # examined from the current node.
214 |   maxfact=10,            # Maximum number of factors to include in hedge
215 |   use.multicore=TRUE,    # TRUE if multiple CPU's should be used
216 |   minimum.stepsize=0.5,  # Cutoff score used to terminate early
217 |   verbose = TRUE,        # TRUE if information should be printed about execution
218 |   lambda=0,              # Penalty factor to be applied to random walk portion of fit
219 |   ...                    # Additional parameters to be passed to PCI routines
220 | ) {  
221 |   # Constructs a sequence of hedges of Y in terms of securities chosen from X.
222 |   # Uses a modified greedy algorithm to search for the best hedge.  Given a
223 |   # current node specified as a collection I of columns from X, the function
224 |   # find_branches(I) returns a list of nodes that should be explored next.
225 |   # The best of these nodes is added to I and then the search continues.
226 | 
227 |   
228 |   factor_score <- function (I) {
229 |     # On input, I is a collection of indexes.  Computes the hedge score
230 |     # for Y hedged against X[,I].  Lower scores are better.
231 |     #       tf <- test.pci(Y, X[,I,drop=FALSE], null_hyp="rw")
232 |     #       tf$statistic
233 |     score <- likelihood_ratio.pci(Y, X[,I,drop=FALSE], null_model="rw", ...)
234 |     score
235 |   }
236 | 
237 |   find_factor <- function (I) {
238 |     # On input, I is a collection of indexes.  For each index j not in I,
239 |     # computes the hedge score of that index.  Returns I \cup {j*}, where j*
240 |     # is the index of that factor that most improves the factor score.
241 |     J <- find_branches(I)
242 |     if (length(J) == 0) return(I)
243 |     if (use.multicore) {
244 |       jscores <- mclapply(J, function(js) factor_score(c(I,js)))
245 |     } else {
246 |       #           jscores <- sapply(J, function (js) factor_score(c(I, js)))
247 |       jscores <- lapply(J, function (js) factor_score(c(I, js)))
248 |     }
249 |     jscores <- c(jscores, recursive=TRUE)
250 |     jstar <- J[which.min(jscores)]
251 |     c(I, jstar)
252 |   }
253 |   
254 |   if (verbose && !is.null(names(Y))) {
255 |     cat(sprintf("Hedges computed for %s\n\n", names(Y)[1]))
256 |   }
257 |   
258 |   if (verbose) cat(sprintf("%8s %8s %8s %8s %8s %8s %8s |   Factor coefficients\n", "-LL", "LR[rw]", "p[rw]", "p[mr]", "rho", "R^2[MR]", "Factor"))
259 |   
260 |   I <- c()
261 |   #   debug (find_factor)
262 |   score <- 0
263 |   pci.fit <- NULL
264 |   while (length(I) < maxfact && (length(I) < ncol(X))) {
265 |     Inew <- find_factor(I)
266 |     if (length(Inew) == length(I)) break
267 |     pci.fit <- fit.pci(Y, X[,Inew,drop=FALSE], lambda=lambda, ...)
268 |     tr <- test.pci(Y, X[,Inew,drop=FALSE], null_hyp="rw", ...)
269 |     tm <- test.pci(Y, X[,Inew,drop=FALSE], null_hyp="ar1", ...)
270 |     score.new <- factor_score(Inew)
271 |     if ((length(I) > 0) && !is.na(minimum.stepsize) && 
272 |         (score.new + minimum.stepsize > score)) break;
273 |     if (verbose) {
274 |       cat(sprintf("%8.2f %8.4f %8.4f %8.4f %8.4f %8.4f %8s | ", pci.fit$negloglik, score.new,
275 |                   tr$p.value, tm$p.value,
276 |                   pci.fit$rho, pci.fit$pvmr, colnames(X)[Inew[length(Inew)]]))
277 |       for (j in 1:length(Inew)) {
278 |         betaj_str <- paste("beta_", colnames(X)[Inew[j]], sep="")
279 |         betaj <- pci.fit$beta[[betaj_str]]
280 |         cat(sprintf("%8.4f ", betaj))
281 |       }
282 |       cat("\n")
283 |     }
284 |     score <- score.new
285 |     I <- Inew
286 |   }
287 |   
288 |   pci.fit <- fit.pci(Y, X[,I,drop=FALSE], lambda=lambda, ...)
289 |   
290 |   fit <- structure(list(pci = pci.fit,
291 |                         indexes = I,
292 |                         index_names = colnames(X)[I]
293 |   ),
294 |   class="pci.hedge")
295 |   
296 |   if (verbose) printf("\n")
297 |   fit
298 | }
299 | 
300 | multihedge.pci.lasso <- function (
301 |   Y, 
302 |   X, 
303 |   ...
304 | ) {  
305 |   # Constructs a sequence of hedges of Y in terms of securities chosen from X
306 |   # Uses the lasso to search for a hedge of Y in terms of securities
307 |   # from X that is significant as an PCI model.  
308 |   
309 |   lasso <- glmnet(X, Y)
310 |   
311 |   find_branches <- function (I) {
312 |     # On input, I is a collection of indexes.  For each index j not in I,
313 |     # computes the hedge score of that index.  Returns I \cup {j*}, where j*
314 |     # is the index of that factor that most improves the factor score.
315 |     J <- c()
316 |     ix <- 1
317 |     while (all(J %in% I)) {
318 |       ix <- ix + 1
319 |       J <- which(lasso$beta[,ix] != 0)
320 |     }
321 |     return(setdiff(J, I)[1])
322 |   }
323 |   
324 |   multihedge.pci.general(Y, X, find_branches, ...)  
325 | }
326 | 
327 | multihedge.pci.branching <- function (
328 |   Y, 
329 |   X, 
330 |   max.branch = 10,
331 |   lambda=0,     # Penalty factor to be applied to random walk portion of fit
332 |   pci_opt_method=c("jp", "twostep"), # The method used to perform PCI fitting
333 |   robust=FALSE, # TRUE if robust fitting should be used
334 |   ...
335 | ) {  
336 |   # Constructs a sequence of hedges of Y in terms of securities chosen from X
337 |   # Uses a modified greedy algorithm to search for a hedge of Y in terms of securities
338 |   # from X that is significant as an PCI model.  
339 | 
340 |   pci_opt_method <- match.arg(pci_opt_method)  
341 |   DX <- diff(X)
342 |   
343 |   find_branches <- function (I) {
344 |     # On input, I is a collection of indexes.  For each index j not in I,
345 |     # calculates a ranking for that index.  Returns the top max.branch
346 |     # indexes.
347 |     J <- setdiff(1:ncol(X), I)
348 |     if (length(J) <= max.branch) return(J)
349 |     
350 |     if (length(I) == 0) {
351 |       Z <- Y
352 |     } else {
353 |       pci.fit <- fit.pci(Y, X[,I], lambda=lambda, 
354 |                          pci_opt_method=pci_opt_method, robust=robust)
355 |       Z <- pci.fit$residuals
356 |     }
357 |     
358 |     DZ <- diff(Z)
359 |     scores <- cor(DZ, DX[,J])
360 |     best.scores <- J[order(scores, decreasing=TRUE)][1:max.branch]
361 |     best.scores
362 |   }
363 | 
364 |   multihedge.pci.general(Y, X, find_branches, lambda=lambda, 
365 |                          pci_opt_method=pci_opt_method, robust=robust, ...)  
366 | }
367 | 
368 | multihedge.pci.greedy <- function (
369 |   Y, 
370 |   X, 
371 |   ...
372 | ) {
373 |    
374 |   # Constructs a sequence of hedges of Y in terms of securities chosen from X
375 |   # Uses a greedy algorithm to search for a hedge of Y in terms of securities
376 |   # from X that is significant as an PCI model.  
377 |   
378 |   find_branches <- function (I) {
379 |     setdiff(1:ncol(X), I)
380 |   }
381 | 
382 |   multihedge.pci.general(Y, X, find_branches, ...)  
383 | }
384 | 
385 | 


--------------------------------------------------------------------------------
/tests/tests.R:
--------------------------------------------------------------------------------
  1 | all.tests.pass <- TRUE
  2 | all.tests.error.count <- 0
  3 | 
  4 | library(zoo)
  5 | library(xts)
  6 | 
  7 | test <- function(expr, out="", val=eval.parent(parse(text=expr), 1), tol=1e-4) {
  8 |     # expr is a string representing an R expression, and
  9 |     # out is the output that is expected.  Prints and evaluates
 10 |     # expr.  If out is given and it matches the output of
 11 |     # evaluating expr, returns TRUE.  Otherwise, returns FALSE.
 12 |     
 13 |     cat(expr, "-> ")
 14 |     
 15 |     p <- function (v) {
 16 |         if (length(v) < 5) {
 17 |             cat(v)
 18 |         } else {
 19 |             cat(class(v), "(", length(val), ")")
 20 |         }
 21 |     }
 22 |     p(val)
 23 |         
 24 |     result <- all.equal(val, out, tolerance=tol)
 25 |     if (!isTRUE(result)) {
 26 |         if (!missing(out)) {
 27 |             cat(" (Expecting ")
 28 |             p(out)
 29 |             cat(")")
 30 |         }
 31 |         cat("\nERROR: ", result, "\n")
 32 |         all.tests.pass <<- FALSE
 33 |         all.tests.error.count <<- all.tests.error.count + 1
 34 |     } else {
 35 |         cat(" OK\n")
 36 |     }
 37 |     
 38 |     isTRUE(result)
 39 | }
 40 | 
 41 | assert <- function (expr, out) {
 42 |     # expr is astring representing an R expression,
 43 |     # and out is the output that is expected.  Prints
 44 |     # and evaluates expr.  If out matches the output of
 45 |     # evaluating expr, returns TRUE.  Otherwise, stops
 46 |     # the execution with an error message.
 47 |     if (!test(expr, out)) {
 48 |         stop("Expression ", deparse(substitute(expr)), 
 49 |              " does not evaluate to its expected value\n")
 50 |     }
 51 | }
 52 | 
 53 | test_support <- function () {
 54 |   test("partialCI:::ctext(\"\", 0)", "")
 55 |   test("partialCI:::ctext(\"\", 1)", " ")
 56 |   test("partialCI:::ctext(\"hello\", 0)", "")
 57 |   test("partialCI:::ctext(\"hello\", 1)", "h")
 58 |   test("partialCI:::ctext(\"hello\", 5)", "hello")
 59 |   test("partialCI:::ctext(\"hello\", 6)", "hello ")
 60 |   test("partialCI:::ctext(\"hello\", 7)", " hello ")
 61 |   test("partialCI:::ctext(\"hello\", 9)", "  hello  ")
 62 | }
 63 | 
 64 | OIL <- structure(c(95.14, 93.66, 93.12, 93.31, 91.9, 91.36, 92.39, 91.45, 
 65 |             92.15, 93.78, 93.54, 93.96, 94.51, 96.35, 97.23, 96.66, 95.82, 
 66 |             97.49, 97.34, 98.25, 97.55, 96.44, 97.24, 97.4, 97.84, 99.98, 
 67 |             100.12, 99.96, 100.38, 100.27, 100.31, 102.54, 103.46, 103.2, 
 68 |             102.53, 103.17, 102.2, 102.93, 102.68, 102.88, 105.34, 103.64, 
 69 |             101.75, 101.82, 102.82, 101.39, 100.29, 98.29, 98.57, 99.23, 
 70 |             98.43, 100.08, 100.71, 99.68, 99.97, 100.05, 99.66, 100.61, 101.25, 
 71 |             101.73, 101.57, 99.69, 99.6, 100.29, 101.16, 100.43, 102.57, 
 72 |             103.55, 103.37, 103.68, 104.05, 103.7, 103.71, 104.33, 104.35, 
 73 |             101.69, 101.47, 102.2, 100.85, 101.13, 101.56, 100.07, 99.69, 
 74 |             100.09, 99.74, 99.81, 101.06, 100.52, 100.32, 100.89, 102.01, 
 75 |             102.63, 101.74, 102.31, 102.95, 102.8, 104.31, 104.03, 105.01, 
 76 |             104.78, 103.37, 104.26, 103.4, 103.07, 103.34, 103.27, 103.17, 
 77 |             103.32, 105.09, 105.02, 105.04, 107.2, 107.49, 107.52, 106.95, 
 78 |             106.64, 107.08, 107.95, 106.83, 106.64, 107.04, 106.49, 106.46, 
 79 |             106.07, 106.06, 105.18, 104.76, 104.19, 104.06, 102.93, 103.61, 
 80 |             101.48, 101.73, 100.56, 101.88, 103.84, 103.83, 105.34, 104.59, 
 81 |             103.81, 102.76, 105.23, 105.68, 104.91, 104.29, 98.23, 97.86, 
 82 |             98.26, 97.34, 96.93, 97.34, 97.61, 98.09, 97.36, 97.57, 95.54, 
 83 |             97.3, 96.44, 94.35, 96.4, 93.97, 93.61, 95.39, 95.78, 95.82, 
 84 |             96.44, 97.86, 92.92, 95.5, 94.51, 93.32, 92.64, 92.73, 91.71, 
 85 |             92.89, 92.18, 92.86, 94.91, 94.33, 93.07, 92.43, 91.46, 91.55, 
 86 |             93.6, 93.59, 95.55, 94.53, 91.17, 90.74, 91.02, 89.76, 90.33, 
 87 |             88.89, 87.29, 85.76, 85.87, 85.73, 81.72, 81.82, 82.33, 82.8, 
 88 |             82.76, 83.25, 80.52, 82.81, 81.27, 81.26, 81.36, 82.25, 81.06, 
 89 |             80.53, 78.77, 77.15, 78.71, 77.87, 78.71, 77.43, 77.85, 77.16, 
 90 |             74.13, 75.91, 75.64, 74.55, 74.55, 75.63, 76.52, 75.74, 74.04, 
 91 |             73.7, 65.94, 68.98, 66.99, 67.3, 66.73, 65.89, 63.13, 63.74, 
 92 |             60.99, 60.01, 57.81, 55.96, 55.97, 56.43, 54.18, 56.91, 55.25, 
 93 |             56.78, 55.7, 54.59, 53.46, 54.14, 53.45, 52.72, 50.05, 47.98, 
 94 |             48.69, 48.8, 48.35, 46.06, 45.92, 48.49, 46.37, 48.49, 46.79, 
 95 |             47.85, 45.93, 45.26, 44.8, 45.84, 44.08, 44.12, 47.79, 49.25, 
 96 |             53.04, 48.45, 50.48, 51.66, 52.99, 107.94, 106.57, 106.71, 107.01, 
 97 |             107.42, 107.49, 106.44, 108.02, 107.12, 108.09, 107.46, 108.45, 
 98 |             109.17, 109.69, 109.69, 109.14, 108.72, 109.1, 108.83, 109.36, 
 99 |             108.16, 106.55, 107.04, 106.81, 108.15, 110.12, 110.18, 109.21, 
100 |             108.62, 108.98, 108.63, 110.14, 110.37, 109.42, 109.03, 109.76, 
101 |             109.19, 109.39, 108.54, 108.98, 111.26, 109.17, 108.15, 107.99, 
102 |             109.14, 108.27, 108.35, 107.88, 107.48, 108.08, 106.99, 106.79, 
103 |             105.95, 105.73, 107.2, 106.59, 107.01, 105.9, 106.58, 106.64, 
104 |             105.95, 105.7, 103.37, 104.88, 106.41, 104.89, 105.83, 107.39, 
105 |             107.1, 107.34, 107.68, 109.1, 109.71, 109.79, 109.69, 108.54, 
106 |             108.48, 109.79, 109.53, 109.12, 109.89, 108.63, 108.63, 109.48, 
107 |             109.48, 108.3, 108.17, 108.19, 108.26, 108.37, 108.78, 109.87, 
108 |             109.74, 110.9, 110.84, 110.35, 111.32, 110.89, 110.19, 109.81, 
109 |             109.09, 109.98, 109.21, 109.34, 108.87, 109.07, 108.43, 109.21, 
110 |             110.55, 109.18, 109.83, 112.18, 113.15, 113.42, 114.02, 114.25, 
111 |             115.19, 114.55, 113.62, 113.74, 112.84, 112.61, 112.62, 111.03, 
112 |             110.84, 110.18, 108.98, 108.7, 107.65, 106.84, 106.2, 105.77, 
113 |             104.73, 104.73, 105.41, 106.04, 106.03, 105.71, 106.48, 106.85, 
114 |             105.78, 106.89, 106.7, 106.98, 106.47, 104.94, 103.45, 103.63, 
115 |             102.82, 104.17, 104.02, 103.36, 103.47, 101.68, 102.27, 101.15, 
116 |             101.13, 99.37, 99.74, 99.92, 100.28, 100.09, 100.49, 100.5, 100.4, 
117 |             100.71, 101.12, 100.21, 100.88, 101.21, 99.51, 99.53, 98.08, 
118 |             96.26, 96.42, 96.31, 96.43, 97.39, 97.7, 96.82, 96.75, 95.37, 
119 |             94.87, 94.53, 95.2, 95.08, 95.7, 94.67, 94.57, 91.29, 90.8, 90.65, 
120 |             90.9, 90.25, 90.47, 88.66, 87.82, 86.36, 84.02, 84.02, 85.27, 
121 |             84.42, 85.17, 86.38, 85.94, 86, 85.64, 85.57, 86.91, 85.5, 84.17, 
122 |             84.9, 82.12, 82.88, 82.08, 83.2, 82.9, 80.94, 80.42, 77.74, 77.51, 
123 |             76.86, 77.23, 77.21, 77.61, 79.2, 79.62, 77.62, 77.39, 71.89, 
124 |             70.87, 71.13, 70.13, 68.48, 68, 65.64, 66.11, 63.32, 63.65, 61.67, 
125 |             61.09, 60.26, 59.84, 58.81, 58.87, 58.31, 59.07, 58.67, 58.72, 
126 |             57.86, 55.6, 55.27, 55.38, 51.08, 50.12, 49.06, 49.43, 47.64, 
127 |             46.9, 45.13, 45.82, 47.66, 47.38, 46.49, 46.5, 46.09, 46.69, 
128 |             46.07, 46.55, 47.07, 46.61, 47.52, 51.74, 54.41, 55.07, 55.98, 
129 |             55.88, 57, 26.95, 26.63, 26.52, 26.45, 26.14, 26.14, 26.27, 25.94, 
130 |             26.07, 26.52, 26.44, 26.47, 26.74, 27.08, 26.95, 26.9, 26.71, 
131 |             27.06, 27.05, 27.11, 26.88, 26.71, 26.92, 26.98, 27.31, 27.78, 
132 |             27.73, 27.8, 27.94, 28.02, 28.07, 28.48, 28.6, 28.69, 28.56, 
133 |             28.68, 28.5, 28.59, 28.54, 28.65, 29.2, 28.85, 28.31, 28.57, 
134 |             28.73, 28.36, 28.04, 27.74, 27.72, 27.91, 27.55, 27.81, 27.78, 
135 |             27.79, 28.06, 28.01, 28.02, 28.23, 28.56, 28.64, 28.62, 28.05, 
136 |             28.01, 28.34, 28.57, 28.44, 28.9, 29.11, 29.06, 29.02, 29.24, 
137 |             29.24, 29.32, 29.47, 29.43, 29, 28.92, 29.08, 28.7, 28.76, 28.76, 
138 |             28.46, 28.31, 28.4, 28.28, 28.3, 28.65, 28.53, 28.48, 28.67, 
139 |             29.01, 29.13, 28.98, 29.14, 29.3, 29.44, 29.75, 29.79, 29.92, 
140 |             29.86, 29.49, 29.68, 29.47, 29.36, 29.48, 29.38, 29.41, 29.45, 
141 |             29.79, 29.85, 29.88, 30.43, 30.35, 30.53, 30.45, 30.6, 30.69, 
142 |             30.82, 30.78, 30.82, 30.94, 30.75, 30.79, 30.76, 30.75, 30.43, 
143 |             30.35, 30.28, 30.21, 29.91, 30.15, 29.61, 29.82, 29.44, 29.63, 
144 |             29.8, 29.68, 29.97, 29.88, 29.96, 29.83, 29.86, 29.67, 29.52, 
145 |             29.22, 28.9, 28.77, 28.99, 28.85, 28.8, 28.99, 28.89, 28.93, 
146 |             28.74, 28.84, 28.13, 28.33, 27.98, 27.83, 27.89, 28.07, 28, 27.99, 
147 |             28.09, 28.05, 28.16, 28.4, 27.76, 28.3, 28.16, 27.92, 27.67, 
148 |             27.55, 27.27, 27.58, 27.29, 27.4, 27.91, 27.74, 27.46, 27.43, 
149 |             27.13, 27.25, 27.65, 27.53, 27.74, 27.94, 27.13, 26.87, 26.91, 
150 |             26.47, 26.73, 26.31, 26.01, 25.28, 25.39, 25.16, 24.22, 24.07, 
151 |             24.47, 24.63, 24.46, 24.6, 24, 24.46, 24.34, 24.13, 24.28, 24.64, 
152 |             24.23, 24.18, 23.49, 23.09, 23.63, 23.35, 23.48, 23.11, 23.18, 
153 |             22.97, 22.28, 22.71, 22.57, 22.2, 22.22, 22.62, 22.92, 22.64, 
154 |             22.04, 21.94, 20.03, 20.6, 20.04, 20.04, 19.82, 19.51, 18.65, 
155 |             18.86, 18.13, 17.61, 17.03, 16.24, 16.49, 16.52, 16.05, 17.03, 
156 |             16.16, 16.68, 16.28, 16.06, 15.66, 15.65, 15.7, 15.28, 14.43, 
157 |             13.82, 14.05, 14.2, 14, 13.27, 13.43, 13.99, 13.3, 14.04, 13.32, 
158 |             13.57, 13.27, 12.94, 12.82, 13.11, 12.57, 12.65, 13.62, 14.27, 
159 |             15.09, 14.09, 14.56, 14.76, 14.98), 
160 | .indexTZ = "UTC", .indexCLASS = "Date", tclass = "Date", tzone = "UTC", src = "FRED", 
161 | updated = structure(1423948051.10322, class = c("POSIXct", 
162 |                                                 "POSIXt")), 
163 | index = structure(c(1388620800, 1388707200, 1388966400, 
164 |                     1389052800, 1389139200, 1389225600, 1389312000, 1389571200, 1389657600, 
165 |                     1389744000, 1389830400, 1389916800, 1390262400, 1390348800, 1390435200, 
166 |                     1390521600, 1390780800, 1390867200, 1390953600, 1391040000, 1391126400, 
167 |                     1391385600, 1391472000, 1391558400, 1391644800, 1391731200, 1391990400, 
168 |                     1392076800, 1392163200, 1392249600, 1392336000, 1392681600, 1392768000, 
169 |                     1392854400, 1392940800, 1393200000, 1393286400, 1393372800, 1393459200, 
170 |                     1393545600, 1393804800, 1393891200, 1393977600, 1394064000, 1394150400, 
171 |                     1394409600, 1394496000, 1394582400, 1394668800, 1394755200, 1395014400, 
172 |                     1395100800, 1395187200, 1395273600, 1395360000, 1395619200, 1395705600, 
173 |                     1395792000, 1395878400, 1395964800, 1396224000, 1396310400, 1396396800, 
174 |                     1396483200, 1396569600, 1396828800, 1396915200, 1397001600, 1397088000, 
175 |                     1397174400, 1397433600, 1397520000, 1397606400, 1397692800, 1398038400, 
176 |                     1398124800, 1398211200, 1398297600, 1398384000, 1398643200, 1398729600, 
177 |                     1398816000, 1398902400, 1398988800, 1399248000, 1399334400, 1399420800, 
178 |                     1399507200, 1399593600, 1399852800, 1399939200, 1400025600, 1400112000, 
179 |                     1400198400, 1400457600, 1400544000, 1400630400, 1400716800, 1400803200, 
180 |                     1401148800, 1401235200, 1401321600, 1401408000, 1401667200, 1401753600, 
181 |                     1401840000, 1401926400, 1402012800, 1402272000, 1402358400, 1402444800, 
182 |                     1402531200, 1402617600, 1402876800, 1402963200, 1403049600, 1403136000, 
183 |                     1403222400, 1403481600, 1403568000, 1403654400, 1403740800, 1403827200, 
184 |                     1404086400, 1404172800, 1404259200, 1404345600, 1404691200, 1404777600, 
185 |                     1404864000, 1404950400, 1405036800, 1405296000, 1405382400, 1405468800, 
186 |                     1405555200, 1405641600, 1405900800, 1405987200, 1406073600, 1406160000, 
187 |                     1406246400, 1406505600, 1406592000, 1406678400, 1406764800, 1406851200, 
188 |                     1407110400, 1407196800, 1407283200, 1407369600, 1407456000, 1407715200, 
189 |                     1407801600, 1407888000, 1407974400, 1408060800, 1408320000, 1408406400, 
190 |                     1408492800, 1408579200, 1408665600, 1408924800, 1409011200, 1409097600, 
191 |                     1409184000, 1409270400, 1409616000, 1409702400, 1409788800, 1409875200, 
192 |                     1410134400, 1410220800, 1410307200, 1410393600, 1410480000, 1410739200, 
193 |                     1410825600, 1410912000, 1410998400, 1411084800, 1411344000, 1411430400, 
194 |                     1411516800, 1411603200, 1411689600, 1411948800, 1412035200, 1412121600, 
195 |                     1412208000, 1412294400, 1412553600, 1412640000, 1412726400, 1412812800, 
196 |                     1412899200, 1413158400, 1413244800, 1413331200, 1413417600, 1413504000, 
197 |                     1413763200, 1413849600, 1413936000, 1414022400, 1414108800, 1414368000, 
198 |                     1414454400, 1414540800, 1414627200, 1414713600, 1414972800, 1415059200, 
199 |                     1415145600, 1415232000, 1415318400, 1415577600, 1415664000, 1415750400, 
200 |                     1415836800, 1415923200, 1416182400, 1416268800, 1416355200, 1416441600, 
201 |                     1416528000, 1416787200, 1416873600, 1416960000, 1417132800, 1417392000, 
202 |                     1417478400, 1417564800, 1417651200, 1417737600, 1417996800, 1418083200, 
203 |                     1418169600, 1418256000, 1418342400, 1418601600, 1418688000, 1418774400, 
204 |                     1418860800, 1418947200, 1419206400, 1419292800, 1419379200, 1419552000, 
205 |                     1419811200, 1419897600, 1419984000, 1420156800, 1420416000, 1420502400, 
206 |                     1420588800, 1420675200, 1420761600, 1421020800, 1421107200, 1421193600, 
207 |                     1421280000, 1421366400, 1421712000, 1421798400, 1421884800, 1421971200, 
208 |                     1422230400, 1422316800, 1422403200, 1422489600, 1422576000, 1422835200, 
209 |                     1422921600, 1423008000, 1423094400, 1423180800, 1423440000), 
210 |                   tzone = "UTC", tclass = "Date"), .Dim = c(278L, 3L), 
211 | .Dimnames = list(NULL, c("WTI", "BRENT", "DBO")), class = c("xts", "zoo"))
212 | 
213 | test_lr <- function (fast_only=FALSE) {
214 |   test("partialCI:::loglik.pci.fkf(OIL$WTI, OIL$BRENT, 0, 1, -0.0195, 0.8483, 0.8364)", 447.8704)
215 |   test("partialCI:::loglik.pci.ss(OIL$WTI, OIL$BRENT, 0, 1, -0.0195, 0.8483, 0.8364)", 443.1509)
216 |   test("partialCI:::loglik.pci.css(OIL$WTI, OIL$BRENT, 0, 1, -0.0195, 0.8483, 0.8364)", 443.1509)
217 |   test("partialCI:::loglik.pci.sst(OIL$WTI, OIL$BRENT, 0, 1, -0.0195, 0.8483, 0.8364)", 444.8827)
218 |   test("partialCI:::loglik.pci.csst(OIL$WTI, OIL$BRENT, 0, 1, -0.0195, 0.8483, 0.8364)", 444.8827)
219 |   
220 | }
221 | 
222 | test.likelihood_ratio.pci <- function (fast_only=FALSE) {
223 |   test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT)", c(negloglik=-19.88135), tol=0.1)
224 |   test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT, null_model=\"ar1\")", c(negloglik=-18.71606), tol=0.1)
225 |   test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT, pci_opt_method=\"twostep\")", c(negloglik=-21.24474))
226 |   test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT, null_model=\"ar1\", pci_opt_method=\"twostep\")", c(negloglik=-19.97211))
227 |   test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT, robust=TRUE)", c(negloglik=-16.94564), tol=0.1)
228 |   test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT, robust=TRUE, null_model=\"ar1\")", c(negloglik=-16.71372), tol=0.1)
229 |   test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT, robust=TRUE, pci_opt_method=\"twostep\")", c(negloglik=-18.84097))
230 |   test("partialCI:::likelihood_ratio.pci(OIL$WTI, OIL$BRENT, robust=TRUE, null_model=\"ar1\", pci_opt_method=\"twostep\")", c(negloglik=-16.87271))
231 | }
232 | 
233 | test_lr2 <- function(fast_only=FALSE) {    
234 | #    test.likelihood_ratio.par(fast_only)
235 | 
236 |     test("partialAR:::par.rw.pvalue(-3.5,400) < 0.05", TRUE)
237 |     test("partialAR:::par.rw.pvalue(-1,500) > 0.10", TRUE)
238 |     test("partialAR:::par.mr.pvalue(-1,600) < 0.05", TRUE)
239 |     test("partialAR:::par.mr.pvalue(-0.1, 700) > 0.05", TRUE)
240 |     test("partialAR:::par.rw.pvalue(-3.5,400, robust=TRUE) < 0.05", TRUE)
241 |     test("partialAR:::par.rw.pvalue(-1,500, robust=TRUE) > 0.10", TRUE)
242 |     test("partialAR:::par.mr.pvalue(-1,600, robust=TRUE) < 0.05", TRUE)
243 |     test("partialAR:::par.mr.pvalue(-0.1, 700, robust=TRUE) > 0.05", TRUE)
244 | 
245 |     test("partialAR:::par.mr.pvalue(-2,400,ar1test='kpss') < 0.05", TRUE)
246 |     test("partialAR:::par.mr.pvalue(-0.5, 500,ar1test='kpss') > 0.05", TRUE)
247 |     test("partialAR:::par.mr.pvalue(-2,600, robust=TRUE,ar1test='kpss') < 0.05", TRUE)
248 |     test("partialAR:::par.mr.pvalue(-0.5, 700, robust=TRUE,ar1test='kpss') > 0.05", TRUE)
249 | 
250 |     test("partialAR:::par.joint.pvalue(-4,-0.5,500) < 0.05", TRUE)
251 |     test("partialAR:::par.joint.pvalue(-1,-0.25,500) > 0.05", TRUE)
252 |     test("partialAR:::par.joint.pvalue(-5,-0.8,500, robust=TRUE) < 0.05", TRUE)
253 |     test("partialAR:::par.joint.pvalue(-3,-0.1,500, robust=TRUE) > 0.05", TRUE)
254 |     test("partialAR:::par.joint.pvalue(-5,-2,500, ar1test='kpss') < 0.05", TRUE)
255 |     test("partialAR:::par.joint.pvalue(-3,-1,500, ar1test='kpss') > 0.05", TRUE)
256 |     test("partialAR:::par.joint.pvalue(-4,-0.5,50000)", 0.03)
257 |     test("partialAR:::par.joint.pvalue(-4,-0.5,50)", 0.10)
258 |     test("partialAR:::par.joint.pvalue(4,-0.5,50)", 1)
259 |     test("partialAR:::par.joint.pvalue(-4,-0.5,49)", 1)
260 | 
261 |     test("partialAR:::test.par.nullrw(data.L)$p.value < 0.05", TRUE)
262 |     test("partialAR:::test.par.nullrw(data.IBM)$p.value > 0.05", TRUE)
263 |     test("partialAR:::test.par.nullrw(data.L, robust=TRUE)$p.value < 0.10", TRUE)
264 |     test("partialAR:::test.par.nullrw(data.IBM, robust=TRUE)$p.value > 0.10", TRUE)
265 | 
266 |     test("partialAR:::test.par.nullmr(data.L)$p.value <= 0.01", TRUE)
267 |     test("partialAR:::test.par.nullmr(data.L, robust=TRUE)$p.value <= 0.01", TRUE)
268 |     test("partialAR:::test.par.nullmr(data.L, ar1test='kpss')$p.value <= 0.01", TRUE)
269 |     test("partialAR:::test.par.nullmr(data.L, robust=TRUE, ar1test='kpss')$p.value <= 0.01", TRUE)
270 |     
271 |     test("partialAR:::test.par.nullmr(data.IBM)$p.value < 0.05", TRUE)
272 |     test("partialAR:::test.par.nullmr(data.IBM, robust=TRUE)$p.value < 0.10", TRUE)
273 |     test("partialAR:::test.par.nullmr(data.IBM, ar1test='kpss')$p.value > 0.10", TRUE)
274 |     test("partialAR:::test.par.nullmr(data.IBM, ar1test='kpss', robust=TRUE)$p.value > 0.10", TRUE)
275 | 
276 |     test("partialAR:::test.par(data.L, null_hyp='rw')$p.value == partialAR:::test.par.nullrw(data.L)$p.value", TRUE)
277 |     test("partialAR:::test.par(data.IBM, null_hyp='rw')$p.value == partialAR:::test.par.nullrw(data.IBM)$p.value", TRUE)
278 |     test("partialAR:::test.par(data.L, null_hyp='mr')$p.value == partialAR:::test.par.nullmr(data.L)$p.value", TRUE)
279 |     test("partialAR:::test.par(data.IBM, null_hyp='mr')$p.value == partialAR:::test.par.nullmr(data.IBM)$p.value", TRUE)
280 |     
281 |     test("partialAR:::test.par(data.L)$p.value['PAR'] <= 0.01", c(PAR=TRUE))
282 |     test("partialAR:::test.par(data.L, robust=TRUE)$p.value['PAR'] <= 0.10", c(PAR=TRUE))
283 |     test("partialAR:::test.par(data.IBM)$p.value['PAR'] > 0.10", c(PAR=TRUE))
284 |     test("partialAR:::test.par(data.IBM, robust=TRUE)$p.value['PAR'] > 0.10", c(PAR=TRUE))
285 |     test("partialAR:::test.par(data.L, ar1test='kpss')$p.value['PAR'] <= 0.01", c(PAR=TRUE))
286 |     test("partialAR:::test.par(data.L, ar1test='kpss',robust=TRUE)$p.value['PAR'] <= 0.10", c(PAR=TRUE))
287 |     test("partialAR:::test.par(data.IBM, ar1test='kpss')$p.value['PAR'] > 0.10", c(PAR=TRUE))
288 |     
289 |     print(partialAR:::test.par(data.L))
290 |     print(partialAR:::test.par(data.L, robust=TRUE))
291 |     
292 |     test("partialAR:::which.hypothesis.partest(partialAR:::test.par(data.L))", "PAR")
293 |     test("partialAR:::which.hypothesis.partest(partialAR:::test.par(data.L, robust=TRUE))", "RRW")
294 |     test("partialAR:::which.hypothesis.partest(partialAR:::test.par(data.IBM))", "RW")
295 |     
296 |     partialAR:::print.par.lrt(); cat("\n\n")
297 |     partialAR:::print.par.lrt(robust=TRUE); cat("\n\n")
298 |     partialAR:::print.par.lrt(latex=TRUE); cat("\n\n")
299 |     
300 | #    partialAR:::print.par.lrt.mr(); cat("\n\n")
301 | #    partialAR:::print.par.lrt.mr(robust=TRUE); cat("\n\n")
302 | #    partialAR:::print.par.lrt.mr(latex=TRUE); cat("\n\n")
303 | 
304 |     partialAR:::print.par.lrt.rw(); cat("\n\n")
305 |     partialAR:::print.par.lrt.rw(robust=TRUE); cat("\n\n")
306 |     partialAR:::print.par.lrt.rw(latex=TRUE); cat("\n\n")
307 |     
308 | }
309 | 
310 | test_fit_twostep <- function (fast_only = FALSE) {
311 |     F <- partialCI:::fit.pci.twostep(OIL$WTI, OIL$BRENT, include_alpha=TRUE)
312 |     test("F$alpha", c(alpha=3.988743))
313 |     test("F$beta", c(beta_BRENT=0.9014), tol=0.01)
314 |     test("F$rho", c(rho=-0.002161227))
315 |     test("F$sigma_M", c(sigma_M=0.8509465))
316 |     test("F$sigma_R", c(sigma_R=0.7937507))
317 |     test("F$alpha.se", c(alpha.se=0.7837437))
318 |     test("F$rho.se", c(rho.se=0.1696716))
319 |     test("F$sigma_M.se", c(sigma_M.se=0.1056687))
320 |     test("F$sigma_R.se", c(sigma_R.se=0.111671))
321 |     test("F$negloglik", c(negloglik=436.6048))
322 |     test("F$pvmr", c(pvmr=0.6972993))
323 | 
324 |     F0 <- partialCI:::fit.pci.twostep(OIL$WTI, OIL$BRENT)
325 |     test("F0$beta", c(beta_BRENT=0.9014), tol=0.01)
326 |     test("F0$rho", c(rho=-0.002161227))
327 |     test("F0$sigma_M", c(sigma_M=0.8509465))
328 |     test("F0$sigma_R", c(sigma_R=0.7937507))
329 |     test("F0$rho.se", c(rho.se=0.1696716))
330 |     test("F0$sigma_M.se", c(sigma_M.se=0.1056687))
331 |     test("F0$sigma_R.se", c(sigma_R.se=0.111671))
332 |     test("F0$negloglik", c(negloglik=436.6048))
333 |     test("F0$pvmr", c(pvmr=0.6972993))
334 | 
335 |     FR0 <- partialCI:::fit.pci.twostep(OIL$WTI, OIL$BRENT, robust=TRUE)
336 |     test("FR0$beta", c(beta_BRENT=0.911297), tol=0.01)
337 |     test("FR0$rho", c(rho=-0.006326975))
338 |     test("FR0$sigma_M", c(sigma_M=0.6299893))
339 |     test("FR0$sigma_R", c(sigma_R=0.6845977))
340 |     test("FR0$rho.se", c(rho.se=0.180226))
341 |     test("FR0$sigma_M.se", c(sigma_M.se=0.09403069))
342 |     test("FR0$sigma_R.se", c(sigma_R.se=0.08726811))
343 |     test("FR0$negloglik", c(negloglik=429.9065))
344 |     test("FR0$pvmr", c(pvmr=0.6302378))
345 |     
346 |     FR <- partialCI:::fit.pci.twostep(OIL$WTI, OIL$BRENT, robust=TRUE, include_alpha=TRUE)
347 |     test("FR$alpha", c(alpha=3.344333))
348 |     test("FR$beta", c(beta_BRENT=0.911297), tol=0.01)
349 |     test("FR$rho", c(rho=-0.006326975))
350 |     test("FR$sigma_M", c(sigma_M=0.6299893))
351 |     test("FR$sigma_R", c(sigma_R=0.6845977))
352 |     test("FR$alpha.se", c(alpha.se=0.6301423))
353 |     test("FR$rho.se", c(rho.se=0.180226))
354 |     test("FR$sigma_M.se", c(sigma_M.se=0.09403069))
355 |     test("FR$sigma_R.se", c(sigma_R.se=0.08726811))
356 |     test("FR$negloglik", c(negloglik=429.9065))
357 |     test("FR$pvmr", c(pvmr=0.6302378))
358 | 
359 | }
360 | 
361 | test_fit_jointpenalty <- function (fast_only = FALSE) {
362 |   guess <- c(alpha = 22.415066, beta_BRENT = 0.67375332, rho=-0.04976431, 
363 |              sigma_M=0.78760586, sigma_R=0.83733729, M0=0, R0=0)  
364 |   test("partialCI:::pci.jointpenalty.guess(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), include_alpha=TRUE)", guess)
365 | 
366 |   guess0 <- structure(c(0.673753323158499, -0.0499696397350701, 0.787500882009433, 
367 |     0.837430798633094, 0, 22.4150662982716), .Names = c("beta_BRENT", 
368 |       "rho", "sigma_M", "sigma_R", "M0", "R0"))
369 |   test("partialCI:::pci.jointpenalty.guess(as.zoo(OIL$WTI), as.zoo(OIL$BRENT))", guess0)
370 |  
371 |   rw.par <- structure(c(22.4150662982132, 0.673753316670566, 0, 0, 1.23155657754959, 0, 0), 
372 |                       .Names = c("alpha", "beta_BRENT", "rho", "sigma_M", "sigma_R", "M0", "R0"))
373 |   test("partialCI:::fit.pci.jointpenalty.rw(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), include_alpha=TRUE)$par", rw.par)  
374 | 
375 |   rw.par0 <- structure(c(0.673753316670566, 0, 0, 1.23155657754959, 0, 22.415066298274), 
376 |                       .Names = c("beta_BRENT", "rho", "sigma_M", "sigma_R", "M0", "R0"))
377 |   test("partialCI:::fit.pci.jointpenalty.rw(as.zoo(OIL$WTI), as.zoo(OIL$BRENT))$par", rw.par0) 
378 | 
379 |   mr.par <- structure(c(19.0448876678824, 0.707408539377214, 0.979236271148003, 
380 |                         1.22640513769963, 0, 0, 0), 
381 |                       .Names = c("alpha", "beta_BRENT", 
382 |                                  "rho", "sigma_M", "sigma_R", "M0", "R0"))
383 |   test("partialCI:::fit.pci.jointpenalty.mr(as.zoo(OIL$WTI), as.zoo(OIL$BRENT),include_alpha=TRUE)$par", mr.par)  
384 | 
385 |   mr.par0 <- structure(c(0.704082607377607, 0.98035806173428, 1.22650034762294, 
386 |     0, 0, 19.1413233596611), .Names = c("beta_BRENT", "rho", "sigma_M", 
387 |       "sigma_R", "M0", "R0"))
388 |    test("partialCI:::fit.pci.jointpenalty.mr(as.zoo(OIL$WTI), as.zoo(OIL$BRENT))$par", mr.par0)  
389 | 
390 |   both.par <- structure(c(14.5845792002844, 0.743252984476208, 0, 0.826758135981716, 
391 |                           0.794395597894045, 0, 0), 
392 |                         .Names = c("alpha", "beta_BRENT", "rho", 
393 |                                    "sigma_M", "sigma_R", "M0", "R0"))
394 | 
395 |   test("partialCI:::fit.pci.jointpenalty.both(as.zoo(OIL$WTI), as.zoo(OIL$BRENT),include_alpha=TRUE)$par", both.par, tol=0.05)  
396 | 
397 |   both.par0 <- structure(c(0.743252984476208, 0, 0.826758135981716, 
398 |                           0.794395597894045, 0, 14.5845792002844), 
399 |                         .Names = c("beta_BRENT", "rho", 
400 |                                    "sigma_M", "sigma_R", "M0", "R0"))
401 | 
402 |   test("partialCI:::fit.pci.jointpenalty.both(as.zoo(OIL$WTI), as.zoo(OIL$BRENT))$par", both.par0, tol=0.05)  
403 | 
404 |   rwrob.par <- structure(c(28.1949715154527, 0.620210939294621, 0, 0, 0.948726114365134, 
405 |                            0, 0), .Names = c("alpha", "beta_BRENT", "rho", "sigma_M", "sigma_R", 
406 |                                              "M0", "R0"))
407 |   test("partialCI:::fit.pci.jointpenalty.rw(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), robust=TRUE, include_alpha=TRUE)$par", rwrob.par)
408 | 
409 |   rwrob.par0 <- structure(c(0.620210939294621, 0, 0, 0.948726114365134, 
410 |                            0, 28.1949715154527), .Names = c("beta_BRENT", "rho", "sigma_M", "sigma_R", 
411 |                                              "M0", "R0"))
412 |   test("partialCI:::fit.pci.jointpenalty.rw(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), robust=TRUE)$par", rwrob.par0)
413 |   
414 |   mrrob.par <- structure(c(28.1948554986119, 0.62081912655694, 0.993012863223067, 
415 |                            0.948691236006445, 0, 0, 0), .Names = c("alpha", "beta_BRENT", 
416 |                                                                    "rho", "sigma_M", "sigma_R", "M0", "R0"))
417 |   test("partialCI:::fit.pci.jointpenalty.mr(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), robust=TRUE, include_alpha=TRUE)$par", mrrob.par)
418 |   
419 |   mrrob.par0 <- structure(c(0.634017098277415, 0.992411921549702, 0.948914138555383, 
420 |     0, 0, 26.7041944119358), .Names = c("beta_BRENT", "rho", "sigma_M", 
421 |       "sigma_R", "M0", "R0"))
422 |   test("partialCI:::fit.pci.jointpenalty.mr(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), robust=TRUE)$par", mrrob.par0)
423 | 
424 |   bothrob.par <- structure(c(20.098585528449, 0.692830550628575, 0, 0.599736912154181, 
425 |                              0.67441954256228, 0, 0), .Names = c("alpha", "beta_BRENT", "rho", 
426 |                                                                  "sigma_M", "sigma_R", "M0", "R0"))
427 |   test("partialCI:::fit.pci.jointpenalty.both(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), robust=TRUE, include_alpha=TRUE)$par", bothrob.par, tol=0.05) 
428 | 
429 |   bothrob.par0 <- structure(c(0.692830550628575, 0, 0.599736912154181, 
430 |                              0.67441954256228, 0, 20.098585528449), .Names = c("beta_BRENT", "rho", 
431 |                                                                  "sigma_M", "sigma_R", "M0", "R0"))
432 |   test("partialCI:::fit.pci.jointpenalty.both(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), robust=TRUE)$par", bothrob.par0, tol=0.05) 
433 |   
434 | }
435 | 
436 | test_fit <- function (fast_only = FALSE) {
437 |   test_fit_twostep(fast_only)  
438 |   test_fit_jointpenalty(fast_only)
439 |   
440 |   test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT))$negloglik", c(negloglik=432.4849))
441 |   test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), robust=TRUE)$negloglik", c(negloglik=422.9159))
442 |   test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), \"twostep\")$negloglik", c(negloglik=436.6048))
443 |   test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), \"twostep\", robust=TRUE)$negloglik", c(negloglik=429.9065))
444 |   test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), par_model=\"ar1\")$negloglik", c(negloglik=451.201))
445 |   test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), par_model=\"rw\")$negloglik", c(negloglik=452.3663))
446 |   test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), par_model=\"ar1\", robust=TRUE)$negloglik", c(negloglik=439.6296))
447 |   test("partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), par_model=\"rw\", robust=TRUE)$negloglik", c(negloglik=439.8615))
448 |   
449 |   f <- partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), include_alpha=TRUE)
450 |   last_state <- structure(list(Y = 52.99, Yhat = 57.0022715118106, Z = -4.01227151181061, 
451 |                                M = 0.359169355426614, R = -4.37144086723722, eps_M = 0.369402426645854, 
452 |                                eps_R = 0.579553335373364), .Names = c("Y", "Yhat", "Z", 
453 |                                                                       "M", "R", "eps_M", "eps_R"), row.names = "2015-02-09", class = "data.frame")
454 |   test("tail(partialCI:::statehistory.pci(partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), include_alpha=TRUE)),1)", last_state, tol=0.05)
455 | 
456 |    f0 <- partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT))
457 |   last_state0 <- structure(list(Y = 52.99, Yhat = 42.3231449335717, Z = 10.6668550664283, 
458 |     M = 0.355883860455669, R = 10.3109712059726, eps_M = 0.368030242920157, 
459 |     eps_R = 0.58348828109853), .Names = c("Y", "Yhat", "Z", "M", 
460 |      "R", "eps_M", "eps_R"), row.names = "2015-02-09", class = "data.frame")
461 |     
462 |   test("tail(partialCI:::statehistory.pci(partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT),)),1)", last_state0, tol=0.05)
463 | 
464 |  
465 | 
466 |   df <- structure(list(alpha = 14.6829401164186, beta = 0.742455481013507, 
467 |     rho = -0.0232576651129888, sigma_M = 0.816415189337928, sigma_R = 0.804964566979587, 
468 |     M0 = 0, R0 = 0, alpha.se = 6.1306344674343, beta.se = 0.0559775667569459, 
469 |     rho.se = 0.172275543552826, sigma_M.se = 0.108139711899864, 
470 |     sigma_R.se = 0.109367951001435, M0.se = NA_real_, R0.se = NA_real_, 
471 |     negloglik = 432.475807192823, pvmr = 0.67807277837379), .Names = c("alpha", 
472 | "beta", "rho", "sigma_M", "sigma_R", "M0", "R0", "alpha.se", 
473 | "beta.se", "rho.se", "sigma_M.se", "sigma_R.se", "M0.se", "R0.se", 
474 | "negloglik", "pvmr"), row.names = "beta_BRENT", class = "data.frame")
475 |     
476 |   test("as.data.frame(partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT), include_alpha=TRUE))", df, tol=0.05)
477 | 
478 |   df0 <- structure(list(beta = 0.742511314624065, rho = -0.0275437658638479, 
479 |     sigma_M = 0.813095055217725, sigma_R = 0.808578327606414, 
480 |     M0 = 0, R0 = 14.9933286994785, beta.se = 0.0560546241014983, 
481 |     rho.se = 0.17155905646127, sigma_M.se = 0.107698809017417, 
482 |     sigma_R.se = 0.108177642504822, M0.se = NA_real_, R0.se = NA_real_, 
483 |     negloglik = 432.520303570585, pvmr = 0.675291879436992), .Names = c("beta", 
484 |       "rho", "sigma_M", "sigma_R", "M0", "R0", "beta.se", "rho.se", 
485 |       "sigma_M.se", "sigma_R.se", "M0.se", "R0.se", "negloglik", "pvmr"
486 |     ), row.names = "beta_BRENT", class = "data.frame")
487 |   test("as.data.frame(partialCI:::fit.pci(as.zoo(OIL$WTI), as.zoo(OIL$BRENT)))", df0, tol=0.05)
488 | }
489 | 
490 | test_pci <- function (fast_only=FALSE) {
491 |     # Comprehensive unit testing for PAR package
492 | 
493 |     options(warn=1)
494 |     library(zoo)
495 |     
496 |     test_lr(fast_only)
497 |     test.likelihood_ratio.pci(fast_only)
498 |     test_fit(fast_only)
499 | #    test_lr2(fast_only)
500 |     
501 |     if (all.tests.pass) {
502 |         cat("SUCCESS! All tests passed.\n")
503 |     } else {
504 |         stop("ERRORS! ", all.tests.error.count," tests failed\n")
505 |     }
506 | }
507 | 
508 | test_pci(TRUE)
509 | 


--------------------------------------------------------------------------------