├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── R
    ├── additional.R
    ├── dfm.R
    ├── dfmMS.R
    ├── em_algorithm.R
    ├── kalman.R
    ├── kim.R
    ├── methods.R
    └── otherlang
    │   ├── kalman.c
    │   ├── kim.jl
    │   └── max.jl
├── README.md
├── data
    └── NBBsurvey.rda
├── inst
    └── doc
    │   ├── dynamic-factors.R
    │   ├── dynamic-factors.Rnw
    │   └── dynamic-factors.pdf
├── man
    ├── Estep.Rd
    ├── K_filter.Rd
    ├── K_smoother.Rd
    ├── KimFilter.Rd
    ├── KimSmoother.Rd
    ├── NBBsurvey.Rd
    ├── VAR.Rd
    ├── dfm.Rd
    ├── dfmMS.Rd
    ├── em_converged.Rd
    ├── predict.dfm.Rd
    └── summary.dfm.Rd
├── tests
    └── testthat.R
└── vignettes
    └── dynamic-factors.Rnw


/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: dynfactoR
 2 | Title: Dynamic factor model estimation for nowcasting
 3 | Version: 0.1
 4 | Authors@R: person("Rytis", "Bagdziunas", , "rytis.bagdziunas@uclouvain.be", role = c("aut", "cre"))
 5 | Description: This package implements a subset of state space modelling, namely models with dynamic factors. These models are commonly used in economics for short-term forecasting due to possibility to summarize information from large datasets in a small number of factors.
 6 | Depends: R (>= 3.1.3), MASS, optimx
 7 | License: MIT + file LICENSE
 8 | LazyData: true
 9 | Suggests: xts, testthat
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2015
2 | COPYRIGHT HOLDER: Rytis Bagdziunas
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | exportPattern("^[^\\.]")
2 | import(MASS)
3 | import(optimx)
4 | 


--------------------------------------------------------------------------------
/R/additional.R:
--------------------------------------------------------------------------------
 1 | #' Estimate a p-th order vector autoregressive (VAR) model
 2 | #'
 3 | #' @param x Data matrix (T x n)
 4 | #' @param p Maximum lag order, i.e. VAR(p) will be estimated
 5 | #' @return Estimated parameter matrix, residuals and regression model
 6 | #' independent and dependent variables
 7 | #' @examples
 8 | #' x <- matrix(rnorm(50*2), nrow=50, ncol=2)
 9 | #' VAR(x, 2)
10 | VAR <- function(x, p) {
11 |     T <- nrow(x)
12 |     Y <- x[(p + 1):T, ]
13 |     X <- c()
14 |     for (i in 1:p) {
15 |         X <- cbind(X, x[(p + 1 - i):(T - i), ])
16 |     }
17 |     A <- solve(t(X) %*% X) %*% t(X) %*% Y
18 |     res <- Y - X %*% A
19 |     
20 |     return(list(Y = Y, X = X, A = A, res = res))
21 | }
22 | 


--------------------------------------------------------------------------------
/R/dfm.R:
--------------------------------------------------------------------------------
  1 | #' Estimates a dynamic factor model based on Doz, Gianone & Reichlin (2011)
  2 | #'
  3 | #' @param X Data matrix (T x n)
  4 | #' @param r Number of static factors
  5 | #' @param p Lag order for factors
  6 | #' @param q Number of dynamic factors, must be equal to or less than r
  7 | #' @param max_iter Maximum number of iterations in EM-algorithm
  8 | #' @param threshold Threshold for algorithm convergence
  9 | #' @param rQ Restrictions on system state covariance
 10 | #' @param rC Restrictions on factor loading matrix
 11 | #' @return 3 types of factor estimates, namely principal component estimate, two step
 12 | #' estimate based on PCA and Kalman filtering and QML estimate based on EM-algorithm
 13 | #' @examples
 14 | #' x <- matrix(rnorm(50*10), 50, 10)
 15 | #' W <- as.logical(matrix(rbinom(50*10, 1, 0.1), 50, 10))
 16 | #' x[W] <- NA
 17 | #' dfm(x, 2, 2, 1)
 18 | dfm <- function(X, r, p, q, max_iter=100, threshold=1e-4, rQ, rC) {
 19 | 
 20 |   if(missing(rQ)) { rQ <- '' }
 21 |   if(missing(rC)) { rC <- '' }
 22 |   if(missing(q)) { q <- r }
 23 |   if (q > r) { stop("r must be larger than q.")}
 24 | 
 25 |   T <- dim(X)[1]; N <- dim(X)[2]
 26 |   x <- apply(X, 2, function(z) { (z - mean(z, na.rm=TRUE))/sd(z, na.rm=TRUE) })
 27 | 
 28 |   Mx <- apply(X, 2, mean, na.rm=TRUE)
 29 |   Wx <- apply(X, 2, sd, na.rm=TRUE)
 30 |   W <- !is.na(x)
 31 |   # A consists of two parts. In particular, the upper dynamic part and the
 32 |   # lower invariable identity part to ensure time lag coherence.
 33 | 
 34 |   A <- rbind(matrix(0, nrow=r, ncol=r*p),
 35 |              diag(1, nrow=r*(p-1), ncol=r*p))
 36 | 
 37 |   Q <- matrix(0, nrow=p*r, ncol=p*r)
 38 |   Q[1:r, 1:r] <- diag(1, r)
 39 | 
 40 |   eigen.decomp <- eigen(cov(x, use="complete.obs"))
 41 |   v <- eigen.decomp$vectors[,1:r]
 42 |   d <- eigen.decomp$values[1:r]
 43 | 
 44 |   chi <- x %*% v %*% t(v)
 45 |   d <- diag(1, r)
 46 |   F <- x %*% v
 47 |   F_pc <- F
 48 |   F <- na.omit(F)
 49 |   # If there are any dynamic factors, VAR(p) model is estimated
 50 |   # to initialize their parameters.
 51 | 
 52 |   if (p > 0)
 53 |   {
 54 |     # ML estimator for VAR(p) model when Q is restricted
 55 |     if (rQ == 'identity')
 56 |     {
 57 |       fit <- VAR(F,p)
 58 |       A[1:r, 1:(r*p)] <- t(fit$A)
 59 |       Q[1:r, 1:r] <- diag(1, r)
 60 |     }
 61 |     else
 62 |     {
 63 |       fit <- VAR(F, p)
 64 |       A[1:r, 1:(r*p)] <- t(fit$A)
 65 |       H <- cov(fit$res)
 66 | 
 67 |       # This only extracts the variance explained by 
 68 |       # the dynamic components
 69 | 
 70 |       if (r > q)
 71 |       {
 72 |         q.decomp <- eigen(H)
 73 |         P <- q.decomp$vectors[,1:q, drop=FALSE]
 74 |         M <- q.decomp$values[1:q]
 75 |         if (q == 1)
 76 |         {
 77 |           P <- P * P[1,]
 78 |           Q[1:r, 1:r] <- P %*% t(P) * M
 79 |         } else {
 80 |           P <- P %*% diag(sign(P[1,]))
 81 |           Q[1:r, 1:r] <- P %*% diag(M) %*% t(P)
 82 |         }
 83 |       } else
 84 |       {
 85 |         Q[1:r, 1:r] <- H
 86 |       }
 87 |     }
 88 |   }
 89 |   R <- diag(diag(cov(x - chi, use="complete.obs")))
 90 |   Z <- fit$X
 91 |   initx <- Z[1,]
 92 |   initV <- matrix(ginv(kronecker(A,A)) %*% as.numeric(Q),
 93 |                   ncol=r*p, nrow=r*p)
 94 |   C <- cbind(v, matrix(0, nrow=N, ncol=r*(p-1)))
 95 | 
 96 |   previous_loglik <- -.Machine$double.xmax
 97 |   loglik <- 0
 98 |   num_iter <- 0
 99 |   LL <- c()
100 | 
101 |   converged <- 0
102 |   kf_res <- K_filter(initx, initV, x, A, C, R, Q)
103 |   ks_res <- K_smoother(A, kf_res$xitt, kf_res$xittm,
104 |                        kf_res$Ptt, kf_res$Pttm, C, R, W)
105 | 
106 |   xsmooth <- ks_res$xitT
107 |   Vsmooth <- ks_res$PtT
108 |   Wsmooth <- ks_res$PtTm
109 | 
110 |   F_kal <- t(xsmooth[1:r,, drop=FALSE])
111 |   if (rC == 'upper' & (r > 1))
112 |   {
113 |     dimC <- dim(C[,1:r])
114 |     rK <- rep(0, (r-1)*r/2)
115 |     irC <- which(matrix(upper.tri(C[,1:r]) + 0) == 1)
116 |     rH <- matrix(0, nrow=length(rK), ncol=prod(dimC))
117 |     for (i in 1:length(rK))
118 |     {
119 |       rH[i,irC[i]] <- 1
120 |     }
121 |   }
122 | 
123 |   while ((num_iter < max_iter) & !converged)
124 |   {
125 | 
126 |     # E-step will return a list of sufficient statistics, namely second (cross)-moments
127 |     # for latent and observed data. This is then plugged back into M-step.
128 |     em_res <- Estep(t(x), A, C, Q, R, initx, initV, W)
129 |     beta <- em_res$beta_t
130 |     gamma <- em_res$gamma_t
131 |     delta <- em_res$delta_t
132 |     gamma1 <- em_res$gamma1_t
133 |     gamma2 <- em_res$gamma2_t
134 |     P1sum <- em_res$V1 + em_res$x1 %*% t(em_res$x1)
135 |     x1sum <- em_res$x1
136 |     loglik <- em_res$loglik_t
137 | 
138 |     num_iter <- num_iter + 1
139 | 
140 |     # M-step computes model parameters as a function of the sufficient statistics that
141 |     # were computed with the E-step. Iterate the procedure until convergence. Due to the
142 |     # model specification, likelihood maximiation in the M-step is just an OLS estimation.
143 |     # In particular, X_t = C*F_t and F_t = A*F_(t-1).
144 | 
145 |     if (rC == 'upper' & (r > 1))
146 |     {
147 |       fp <- matrix(delta[,1:r] %*% ginv(gamma[1:r,1:r]))
148 |       kronCR <- kronecker(ginv(gamma[1:r,1:r]), R)
149 |       sp <- kronCR %*% t(rH) %*% ginv(rH %*% kronCR %*% t(rH)) %*% (rK - rH %*% fp)
150 |       C[,1:r] <- matrix(fp + sp, nrow=dimC[1], ncol=dimC[2])  
151 |     }
152 |     else
153 |     {
154 |       C[,1:r] <- delta[,1:r] %*% ginv(gamma[1:r,1:r])
155 |     }
156 | 
157 |     if (p > 0)
158 |     {
159 | 
160 |       A_update <- beta[1:r,1:(r*p), drop=FALSE] %*% solve(gamma1[1:(r*p),1:(r*p)])
161 |       A[1:r,1:(r*p)] <- A_update
162 |       if (rQ != 'identity')
163 |       {
164 |         H <- (gamma2[1:r, 1:r] - A_update %*% t(beta[1:r, 1:(r*p), drop=FALSE])) / (T-1)
165 |         if (r > q)
166 |         {
167 |           h.decomp <- svd(H)
168 |           P <- h.decomp$v[,1:q, drop=FALSE]
169 |           M <- h.decomp$d[1:q]
170 |           if (q == 1)
171 |           {
172 |             P <- P * P[1,]
173 |             Q[1:r, 1:r] <- P %*% t(P) * M
174 |           } else {
175 |             P <- P %*% diag(sign(P[1,]))
176 |             Q[1:r, 1:r] <- P %*% diag(M) %*% t(P)
177 |           }
178 |         } else {
179 |           Q[1:r, 1:r] <- H
180 |         }
181 |       }
182 |     }
183 |     xx <- as.matrix(na.omit(x))
184 |     R <- (t(xx) %*% xx - C %*% t(delta)) / T
185 |     RR <- diag(R); RR[RR < 1e-7] <- 1e-7; R <- diag(RR)
186 | 
187 |     R <- diag(diag(R))
188 |     LL <- c(LL, loglik)
189 | 
190 |     initx <- x1sum
191 |     initV <- P1sum - initx %*% t(initx)
192 | 
193 |     converged <- em_converged(loglik, previous_loglik, threshold=threshold)
194 |     previous_loglik <- loglik
195 |     if (num_iter < 25) { converged <- FALSE }
196 | 
197 |   }
198 | 
199 |   if (converged == TRUE)
200 |   {
201 |     cat("Converged after", num_iter, "iterations.\n")
202 |   } else
203 |   {
204 |     cat("Maximum number of iterations reached.\n")
205 |   }
206 | 
207 |   kf <- K_filter(initx, initV, x, A, C, R, Q)
208 |   ks <- K_smoother(A, kf$xitt, kf$xittm, kf$Ptt, kf$Pttm, C, R, W)
209 | 
210 |   xsmooth <- ks$xitT
211 | 
212 |   chi <- t(xsmooth) %*% t(C) %*% diag(Wx) + kronecker(matrix(1,T,1), t(Mx))
213 |   F_hat <- t(xsmooth[1:r,, drop=FALSE])
214 | 
215 |   final_object <- list("pca"=F_pc, "qml"=F_hat, "twostep"=F_kal,
216 |                        "A"=A[1:r,], "C"=C[,1:r], "Q"=Q[1:q,1:q], "R"=R,
217 |                        "p"=p, "data"=x)
218 |   class(final_object) <- c("dfm", "list")
219 | 
220 |   return(final_object)
221 | }
222 | 
223 | #' National Bank of Belgium business and consumer surveys
224 | #'
225 | #' @format A time-stamped dataframe (xts object) with
226 | #' @source \url{http://stat.nbb.be}
227 | "NBBsurvey"
228 | 


--------------------------------------------------------------------------------
/R/dfmMS.R:
--------------------------------------------------------------------------------
 1 | #' Dynamic factor model with Markov-switching states
 2 | #'
 3 | #' @param X Data matrix (Txn)
 4 | #' @param s Number of states. Only \code{2} are supported for now
 5 | #' @param J Number of factors. Currently, only \code{1} is supported
 6 | #' @param p Lag number for VAR model of factors
 7 | #' @param x0 Initial value for state
 8 | #' @param P0 Initial value for state covariance
 9 | #' @return Return an estimator. Currently, it runs via a likelihood
10 | #' maximization an so is rather slow. Do not call it with large number
11 | #' of input variables or missing data. Some box constraints are currently
12 | #' to speed up the process, these are based on usual rationale for economic
13 | #' data.
14 | dfmMS <- function(X, J=1, s=2, p=1, x0, P0)
15 | {
16 |   
17 |   T <- nrow(X)
18 |   n <- ncol(X) 
19 |   x <- apply(X, 2, function(z) { (z-mean(z, na.rm=TRUE))/sd(z, na.rm=TRUE) })
20 | 
21 |   J <- 1
22 |   s <- 2
23 | 
24 |   if (missing(x0)) { x0 <- rep(0,J) }
25 |   if (missing(P0)) { P0 <- diag(1,J) }
26 |   
27 |   initV <- list()
28 |   initV$A <- c(0.96, 0.6)
29 |   initV$F <- runif(n*J*s, -1, 1)
30 |   initV$R <- rep(1,n)
31 |   initV$p <- c(0.98, 0.95)
32 | 
33 |   # State equation covariance is restricted to identity
34 |   Q <- diag(1, J)
35 | 
36 |   dimA <- J*J*s; dimF <- n*J*s; dimAF <- dimA + dimF; dimAFR <- dimAF + n
37 | 
38 |   KimFilterOptim <- function(pars)
39 |   {
40 |     A <- array(head(pars, dimA), c(J,J,s))
41 |     F <- array(pars[(dimA+1):(dimAF)], c(n,J,s))
42 |     R <- diag(pars[(dimAF+1):(dimAFR)])
43 |     dp <- tail(pars, s); r <- 1-dp
44 |     p <- diag(dp) + r - diag(r)
45 |     results <- KimFilter(x0, P0, x, F, A, R, Q, p)
46 |     return(results$result)
47 |   }
48 | 
49 |   optimP <- optimx(par=unlist(initV),
50 |                    fn = KimFilterOptim,
51 |                    lower = c(rep(-0.2, J*J*s), # A
52 |                              rep(-2.5, n*J*s), # F
53 |                              rep(0.15, n),     # R
54 |                              rep(0.9,s)),      # p
55 |                    upper = c(rep(0.98, J*J*s), # A
56 |                              rep(2.5, n*J*s),  # F
57 |                              rep(1.5, n),      # R
58 |                              rep(0.991,2)),    # p
59 |                    method = "L-BFGS-B",
60 |                    control=list(maximize=TRUE,maxit=1000, trace=1, kkt=FALSE))
61 | 
62 |   pars <- as.numeric(optimP)[1:length(unlist(initV))]
63 |   A_hat <- array(pars[1:dimA], c(J,J,s))
64 |   F_hat <- array(pars[(dimA+1):dimAF], c(n,J,s))
65 |   R_hat <- diag(pars[(dimAF+1):dimAFR])
66 |   p_hat <- tail(pars, s); p_hat <- diag(p_hat) + (1-p_hat) - diag(1-p_hat)
67 | 
68 |   kf <- KimFilter(x0, P0, x, F_hat, A_hat, R_hat, Q, p_hat)
69 |   ks <- KimSmoother(kf$xA, kf$Pa, A_hat, kf$P, kf$x, p_hat, kf$stateP, kf$stateP_fut)
70 | 
71 | return(list("A"=A_hat, "F"=F_hat, "R"=R_hat, "p"=p_hat,
72 |             "xF"=ks$xF, "Pf"=ks$Pf, "ProbS"=ks$ProbS))
73 | }
74 | 


--------------------------------------------------------------------------------
/R/em_algorithm.R:
--------------------------------------------------------------------------------
 1 | #' Computation of the expectation step in the EM-algorithm.
 2 | 
 3 | #' @param y Data matrix
 4 | #' @param A System state matrix
 5 | #' @param C Observation matrix
 6 | #' @param R Observation equation variance 
 7 | #' @param Q System state equation variance
 8 | #' @param W Logical matrix with dim(W) = dim(y) 
 9 | #' indicating missing observations
10 | #' @param initx Initial value for state variable
11 | #' @param initV Initial value for state matrix
12 | #' @return Sufficient statistics used for M-step
13 | Estep <- function(y, A, C, Q, R, initx, initV, W) {
14 |     
15 |     os <- dim(y)[1]
16 |     T <- dim(y)[2]
17 |     ss <- nrow(A)
18 |     
19 |     kf <- K_filter(initx, initV, t(y), A, C, R, Q)
20 |     ks <- K_smoother(A, kf$xitt, kf$xittm, kf$Ptt, kf$Pttm, C, R, W)
21 | 
22 |     xsmooth <- ks$xitT
23 |     Vsmooth <- ks$PtT
24 |     Wsmooth <- ks$PtTm
25 | 
26 |     delta <- matrix(0, os, ss)
27 |     gamma <- matrix(0, ss, ss)
28 |     beta <- matrix(0, ss, ss)
29 |     
30 |     for (t in 1:T) {
31 |       z <- y[,t]; z[is.na(z)] <- 0
32 | # There seems to be a problem here
33 |       delta <- delta + z %*% t(xsmooth[,t])
34 |       gamma <- gamma + xsmooth[,t] %*% t(xsmooth[,t]) + Vsmooth[,,t]
35 |       if (t > 1) {
36 |         beta <- beta + xsmooth[,t] %*% t(xsmooth[,(t-1)]) + Wsmooth[,,t]
37 |       }
38 |     }
39 |     
40 |     gamma1 <- gamma - xsmooth[, T] %*% t(xsmooth[, T]) - Vsmooth[, , T]
41 |     gamma2 <- gamma - xsmooth[, 1] %*% t(xsmooth[, 1]) - Vsmooth[, , 1]
42 |     x1 <- xsmooth[, 1]
43 |     V1 <- Vsmooth[, , 1]
44 | 
45 |     return(list(beta_t = beta, gamma_t = gamma, delta_t = delta, gamma1_t = gamma1,
46 |                 gamma2_t = gamma2, x1 = x1, V1 = V1, loglik_t = kf$loglik, xsmooth = xsmooth))
47 |     
48 | }
49 | 
50 | #' Convergence test for EM-algorithm.
51 | #'
52 | #' @param loglik Current value of the log-likelihood function
53 | #' @param previous_loglik Value of the log-likelihood function at the previous
54 | #  iteration
55 | #' @param threshold If difference is less than threshold, then algorithm has
56 | #' converged
57 | #' @param check_increased TO DOCUMENT
58 | #' @return A logical statement indicating whether EM algorithm has converged
59 | #' according to slope convergence test
60 | em_converged <- function(loglik, previous_loglik, threshold=1e-4, check_increased=TRUE) {
61 |     
62 |     converged <- FALSE
63 |     decrease <- 0
64 |     
65 |     if (check_increased == TRUE) {
66 |         if (loglik - previous_loglik < -0.001) {
67 | #            cat("*** Likelihood decreased from ", previous_loglik, " to ", loglik, "\n")
68 |             decrease <- 1
69 |         }
70 |     }
71 |     
72 |     delta_loglik <- abs(loglik - previous_loglik)
73 |     avg_loglik <- (abs(loglik) + abs(previous_loglik) + .Machine$double.eps)/2
74 |     
75 |     if ((delta_loglik/avg_loglik) < threshold) {
76 |         converged <- TRUE
77 |     }
78 |     return(converged)
79 |     # return(list('converged'=converged, 'decrease'=decrease))
80 | }
81 | 


--------------------------------------------------------------------------------
/R/kalman.R:
--------------------------------------------------------------------------------
  1 | #' Implements a Kalman for dynamic factor model.
  2 | #'
  3 | #' @param initx Initial value for state space observations
  4 | #' @param initV Initial value for state covariance
  5 | #' @param x Observation matrix
  6 | #' @param A State space matrix
  7 | #' @param C System matrix
  8 | #' @param R State space covariance
  9 | #' @param Q System covariance
 10 | #' @return Filtered state space variable and its covariance matrix
 11 | #' as well as their forecast for next period for further iterations
 12 | K_filter <- function(initx, initV, x, A, C, R, Q) {
 13 |     
 14 |     T <- dim(x)[1]
 15 |     N <- dim(x)[2]
 16 |     r <- dim(A)[1]
 17 |     W <- !is.na(x)    
 18 |     y <- t(x)
 19 |     
 20 |     xittm <- matrix(0, r, (T+1))
 21 |     xitt <- matrix(0, r, T)
 22 |     
 23 |     Pttm <- array(0, c(r, r, (T+1)))
 24 |     Ptt <- array(0, c(r, r, T))
 25 | 
 26 |     xittm[,1] <- initx
 27 |     Pttm[,,1] <- initV
 28 |    
 29 |     logl <- c()
 30 |     Ci <- C
 31 |     Ri <- R
 32 |     for (j in 1:T) {
 33 | #      missing_data <- MissData(y[,j], C, R)
 34 | #      C <- missing_data$C
 35 | #      R <- missing_data$R
 36 |       C <- Ci[W[j,],, drop=FALSE]
 37 |       R <- Ri[W[j,], W[j,], drop=FALSE]
 38 |       if (FALSE) #(all(!W[j,])) #(all(is.na(missing_data$y) == TRUE))
 39 |       {
 40 |          xitt[,,j] <- A %*% xittm[,,j]
 41 |          Ptt[,,j] <- C %*% Pttm[,,j] %*% t(C) + R
 42 |       } else
 43 |       {
 44 |          # Innovation covariance (inverse)
 45 |          Icov <- C %*% Pttm[,,j] %*% t(C) + R
 46 |          L <- solve(Icov)
 47 |          # Innovation residual
 48 |          Ires <- as.numeric(na.omit(y[,j])) - C %*% xittm[,j]
 49 |          # Optimal Kalman gain
 50 |          G <- Pttm[,,j] %*% t(C) %*% L
 51 |          # Updated state estimate: predicted + (Kalman gain)*fitted
 52 |          xitt[,j] <- xittm[,j] + G %*% Ires
 53 |          # Updated covariance estimate
 54 |          Ptt[,,j] <- Pttm[,,j] - G %*% C %*% Pttm[,,j]
 55 |          # State space variable and covariance predictions E[f_t | t-1]
 56 |          xittm[,(j+1)] <- A %*% xitt[,j]
 57 |          Pttm[,,(j+1)] <- A %*% Ptt[,,j] %*% t(A) + Q
 58 | 
 59 |          # Compute log-likelihood with Mahalanobis distance
 60 |          d <- length(Ires)
 61 |          S <- C %*% Pttm[,,j] %*% t(C) + R
 62 |          Sinv <- solve(S)
 63 |          if (nrow(R) == 1)
 64 |          {
 65 |            GG <- t(C) %*% solve(R) %*% C
 66 |            detS <- prod(R) %*% det(diag(1, r) + Pttm[,,j] %*% GG)
 67 |          } else {
 68 |            GG <- t(C) %*% diag(1/diag(R)) %*% C
 69 |            detS <- prod(diag(R)) * det(diag(1, r) + Pttm[,,j] %*% GG)
 70 |          }
 71 |          denom <- (2 * pi)^(d/2) * sqrt(abs(detS))
 72 |          mahal <- sum(t(Ires) %*% Sinv %*% Ires)
 73 |          logl[j] <- -0.5 * mahal - log(denom)
 74 |         }
 75 |     }
 76 |     loglik <- sum(logl, na.rm=TRUE)
 77 |     return(list(xitt = xitt, xittm = xittm, Ptt = Ptt, Pttm = Pttm, loglik = loglik))
 78 | }
 79 | 
 80 | #' Implements Kalman smoothing and is used along with Kalman filter.
 81 | #' Kalman filter outputs enter Kalman smoother as inputs.
 82 | #'
 83 | #' @param A State space matrix
 84 | #' @param xitt State space variable
 85 | #' @param xittm Predicted state space variable
 86 | #' @param Ptt State space covariance
 87 | #' @param Pttm Predicted state space covariance
 88 | #' @param C System matrix
 89 | #' @param R State space covariance
 90 | #' @param W Logical matrix (T x n) indicating missing data.
 91 | #' TRUE if observation is present, FALSE if it is missing.
 92 | #' @return Smoothed state space variable and state space covariance matrix
 93 | K_smoother <- function(A, xitt, xittm, Ptt, Pttm, C, R, W) {
 94 |     T <- dim(xitt)[2]
 95 |     r <- dim(A)[1]
 96 |     
 97 |     Pttm <- Pttm[,,(1:(dim(Pttm)[3] - 1)), drop = FALSE]
 98 |     xittm <- xittm[,(1:(dim(xittm)[2] - 1)), drop = FALSE]
 99 |    
100 |     # Whereas J is of constant dimension, L and K dimensions may vary
101 |     # depending on existence of NAs
102 |     J <- array(0, c(r, r, T))
103 |     L <- list()
104 |     K <- list()
105 | 
106 |     for (i in 1:(T-1)) {
107 |         J[,,i] <- Ptt[,,i] %*% t(A) %*% solve(Pttm[,,(i+1)], tol = 1e-32)
108 |     }
109 | 
110 |     Ci <- C 
111 |     Ri <- R
112 |     for (i in 1:T) {
113 |       # Only keep entries for non-missing data
114 |       C <- Ci[W[i,],, drop=FALSE]
115 |       R <- Ri[W[i,], W[i,], drop=FALSE]
116 |       L[[i]] <- solve(C %*% Pttm[,,i] %*% t(C) + R)
117 |       K[[i]] <- Pttm[,,i] %*% t(C) %*% L[[i]]
118 |     }
119 |     
120 |     xitT <- cbind(matrix(0, r, (T-1)), xitt[,T])
121 |     PtT <- array(0, c(r, r, T))
122 |     PtTm <- array(0, c(r, r, T))
123 |     PtT[,,T] <- Ptt[,,T]
124 |     PtTm[,,T] <- (diag(1, r) - K[[T]] %*% C) %*% A %*% Ptt[,,(T-1)]
125 | 
126 |     for (j in 1:(T-1)) {
127 |         xitT[,(T-j)] <- xitt[,(T-j)] + J[,,(T-j)] %*% (xitT[,(T+1-j)] - xittm[,(T+1-j)])
128 |         PtT[,,(T-j)] <- Ptt[,,(T-j)] + J[,,(T-j)] %*% (PtT[,,(T+1-j)] - Pttm[,,(T+1-j)]) %*% t(J[,,(T-j)])
129 |     }
130 |     
131 |     for (j in 1:(T-2)) {
132 |         PtTm[,,(T-j)] <- Ptt[,,(T-j)] %*% t(J[,,(T-j-1)]) + J[,,(T-j)] %*% (PtTm[,,(T-j+1)] - A %*% Ptt[,,(T-j)]) %*% t(J[,,(T-j-1)])
133 |     }
134 | 
135 |     return(list(xitT = xitT, PtT = PtT, PtTm = PtTm))
136 | }
137 | 


--------------------------------------------------------------------------------
/R/kim.R:
--------------------------------------------------------------------------------
  1 | #' Implementation of Kim (1994) filter, an extension to Kalman filter
  2 | #' for dynamic linear models with Markov-switching. Documentation
  3 | #' is incomplete, rudimentary and needs to be rechecked!
  4 | #'
  5 | #' @param x0 Initial condition for state vector
  6 | #' @param P0 Initial condition for state variance
  7 | #' @param y Data matrix (Txn)
  8 | #' @param F System matrix for measurement equation
  9 | #' @param A Transition matrix for state equation
 10 | #' @param R Error covariance for measurement equation
 11 | #' @param Q Error covariance for state equation
 12 | #' @param p Transition probability matrix
 13 | #' @return Filtered states and covariances with associated probability matrices.
 14 | KimFilter <- function(x0, P0, y, F, A, R, Q, p)
 15 | {
 16 |   # Define all containers for further computations. Notations for variables and indices,
 17 |   # where appropriate, carefully follow Kim (1994). State vector is denoted as 'x', its
 18 |   # covariance as 'P'. Appended letters explicit whether these are updated, approximated
 19 |   # or smoothed.
 20 | 
 21 |   if (is.vector(y) || (ncol(y) <= length(x0)))
 22 |   {
 23 |     stop("Number of factors should be strictly lower than number of variables. \n
 24 |           Increase number of variables or estimate a VAR model instead.")
 25 |   }
 26 | 
 27 |   T <- nrow(y)
 28 |   n <- dim(F)[1]
 29 |   J <- length(x0)
 30 |   s <- dim(p)[1]
 31 | 
 32 |   ## x:   x^(i,j)_(t|t-1): predicted state vector - (2.6)
 33 |   ## xU:  x^(i,j)_(t|t): updated state vector - (2.11)
 34 |   ## P:   P^(i,j)_(t|t-1): predicted state covariance - (2.7)
 35 |   ## Pu:  P^(i,j)_(t|t): updated state covariance - (2.12)
 36 |   ## eta: eta^(i,j)_(t|t-1): conditional forecast error - (2.8)
 37 |   ## H:   H^(i,j)_(t): conditional variance of forecast error - (2.9)
 38 |   ## K:   K^(i,j)_(t): Kalman gain - (2.10)
 39 |   ## lik: f(y_t, S_(t-1)=i, S_t = j | t-1): joint conditional density - (2.16)
 40 |   ## loglik: log of (2.16)
 41 |   x <- array(NA, c(T,J,s,s))
 42 |   xU <- array(NA, c(T,J,s,s))
 43 |   P <- array(NA, c(T,J,J,s,s))
 44 |   Pu <- array(NA, c(T,J,J,s,s))
 45 |   eta <- array(NA, c(T,n,s,s))
 46 |   H <- array(NA, c(T,n,n,s,s))
 47 |   K <- array(NA, c(T,J,n,s,s))
 48 |   lik <- array(NA, c(T,s,s))
 49 |   loglik <- array(NA, c(T,s,s))
 50 |   ## Pr[S_(t-1) = i, S_t = j | t-1 ]: (2.15)
 51 |   ## Pr[S_(t-1) = i, S_t = j | t ]: (2.17)
 52 |   ## Pr[S_t = j | t-1 ]: used only for the smoothing part
 53 |   ## Pr[S_t = j | t ]: (2.18)
 54 |   jointP_fut <- array(NA, c(T,s,s))
 55 |   jointP_cur <- array(NA, c((T+1),s,s))
 56 |   stateP_fut <- array(NA, c(T,s))
 57 |   stateP <- array(NA, c(T,s))
 58 | 
 59 |   ## x^(j)_(t|t): approximate state vector conditional on S_j - (2.13)
 60 |   ## P^(j)_(t|t): approximate state covariance conditional on S_j - (2.14)
 61 |   xA <- array(NA, c(T,J,s))
 62 |   Pa <- array(0, c(T,J,J,s))
 63 |   result <- array(0, c(T,1))
 64 | 
 65 |   # Some initial conditions to get started
 66 |   for (i in 1:s) { xA[1,,i] <- x0 }
 67 |   for (i in 1:s) { Pa[1,,,i] <- P0 }
 68 |   jointP_cur[1,,] <- matrix(c(0.25,0.25,0.25,0.25), ncol=2)
 69 | 
 70 |   for (t in 2:T)
 71 |   {
 72 |     for (j in 1:s)
 73 |     {
 74 |       for (i in 1:s)
 75 |       {
 76 |         x[t,,i,j] <- A[,,j] %*% xA[(t-1),,i]
 77 |         P[t,,,i,j] <- A[,,j] %*% Pa[(t-1),,,i] %*% t(A[,,j]) + Q
 78 |         eta[t,,i,j] <- y[t,] - as(F[,,j], "matrix") %*% x[t,,i,j]
 79 |         H[t,,,i,j] <- F[,,j] %*% as(P[t,,,i,j], "matrix") %*% t(F[,,j]) + R
 80 |         K[t,,,i,j] <- P[t,,,i,j] %*% t(F[,,j]) %*% solve(H[t,,,i,j])
 81 |         xU[t,,i,j] <- x[t,,i,j] + K[t,,,i,j] %*% eta[t,,i,j]
 82 |         Pu[t,,,i,j] <- (diag(1,J) - K[t,,,i,j] %*% F[,,j]) %*% P[t,,,i,j]
 83 |         jointP_fut[t,i,j] <- p[i,j]*sum(jointP_cur[(t-1),,i]) # is everything alright here?
 84 |         lik[t,i,j] <- (2*pi)^(-n/2) * det(H[t,,,i,j])^(-1/2) *
 85 |                       exp(-1/2*t(eta[t,,i,j]) %*% solve(H[t,,,i,j]) %*% eta[t,,i,j]) *
 86 |                       jointP_fut[t,i,j]
 87 |         loglik[t,i,j] <- log(lik[t,i,j])
 88 |         jointP_cur[t,i,j] <- lik[t,i,j]
 89 |       }
 90 |       # Technically, there should be sum(lik[t,,]) term but it cancels out and is computed later
 91 |       stateP[t,j] <- sum(jointP_cur[t,,j])
 92 |       stateP_fut[t,j] <- sum(jointP_fut[t,,j])
 93 |       # Compute probability-filtered state process and its covariance
 94 |       xA[t,,j] <- xU[t,,,j] %*% jointP_cur[t,,j] / stateP[t,j]
 95 |       for (i in 1:s)
 96 |       {
 97 |         Pa[t,,,j] <- Pa[t,,,j] +
 98 |                      (Pu[t,,,i,j] + (xA[t,,j] - xU[t,,i,j]) %*% t(xA[t,,j] - xU[t,,i,j])) * 
 99 |                      exp(log(jointP_cur[t,i,j]) - log(stateP[t,j]))
100 |       }
101 |     }
102 |     jointP_cur[t,,] <- exp(log(jointP_cur[t,,]) - log(sum(lik[t,,])))
103 |     stateP[t,] <- exp(log(stateP[t,]) - log(sum(lik[t,,])))
104 |     result[t,1] <- log(sum(lik[t,,]))
105 |   }
106 | 
107 |   return(list("result"=sum(result), "xA"=xA, "Pa"=Pa, "x"=x, "P"=P, "stateP"=stateP, "stateP_fut"=stateP_fut))
108 |   
109 | }
110 | 
111 | #' Smoothing algorithm from Kim (1994) to be used following a run
112 | #' of KimFilter function.
113 | #'
114 | #' @param xA Filtered state vector to be smoothed
115 | #' @param Pa Filtered state covariance to be smoothed
116 | #' @param x State-dependent state vector
117 | #' @param P State-dependent state covariance
118 | #' @param A Array with transition matrices
119 | #' @param p Markov transition matrix
120 | #' @param stateP Evolving current probability matrix
121 | #' @param stateP_fut Predicted probability matrix
122 | #' @return Smoothed states and covariance matrices. This is the equivalent
123 | #' of Kalman smoother in Markov-switching case.
124 | KimSmoother <- function(xA, Pa, A, P, x, p, stateP, stateP_fut)
125 | {
126 |   # Define all containers for further computations. Notations for variables and indices,
127 |   # where appropriate, carefully follow Kim (1994). State vector is denoted as 'x', its
128 |   # covariance as 'P'. Appended letters explicit whether these are updated, approximated
129 |   # or smoothed.
130 | 
131 |   T <- dim(xA)[1]
132 |   J <- dim(xA)[2]
133 |   s <- dim(xA)[3]
134 | 
135 |   ## Pr[S_t = j, S_(t+1) = k | T]: (2.20)
136 |   ## Pr[S_t = j | T]: (2.21)
137 |   jointPs <- array(NA, c(T,s,s))
138 |   ProbS <- array(NA, c(T,s))
139 | 
140 |   ## xS: x^(j,k)_(t|T): inference of x_t based on full sample - (2.24)
141 |   ## Ps: P^(j,k)_(t|T): covariance matrix of x^(j,k)_(t|T) - (2.25)
142 |   ## Ptilde: helper matrix as defined after (2.25) 
143 |   xS <- array(0, c(T,J,s,s))
144 |   Ps <- array(0, c(T,J,J,s,s))
145 |   Ptilde <- array(NA, c(T,J,J,s,s))
146 | 
147 |   ## xAS: x^(j)_(t|T): smoothed and approximated state vector conditional on S_j (2.26)
148 |   ## Pas: P^(j)_(t|T): smoothed and approximated state covariance conditional on S_j (2.27)
149 |   ## xF: x_(t|T): state-weighted [F]inal state vector (2.28)
150 |   ## Pf: P_(t|T): state-weighted [f]inal state covariance
151 |   xAS <- array(0, c(T,J,s))
152 |   Pas <- array(0, c(T,J,J,s))
153 |   xF <- array(0, c(T,J))
154 |   Pf <- array(0, c(T,J,J))
155 |   # Initial conditions for smoothing loop
156 |   ProbS[T,] <- stateP[T,]
157 | 
158 |   for (t in seq(T-1,1,-1))
159 |   {
160 |     for (j in 1:s)
161 |     {
162 |       for (k in 1:s)
163 |       {
164 |         jointPs[t,j,k] <- ProbS[(t+1),k]*stateP[t,j]*p[j,k] / stateP_fut[(t+1),k]
165 |         Ptilde[t,,,j,k] <- Pa[t,,,j] %*% t(A[,,k]) %*% solve(P[(t+1),,,j,k])
166 |         xS[t,,j,k] <- xA[t,,j] + Ptilde[t,,,j,k] %*% (xA[(t+1),,k] - x[(t+1),,j,k])
167 |         Ps[t,,,j,k] <- Pa[t,,,j] +
168 |                        Ptilde[t,,,j,k] %*% (Pa[(t+1),,,k] - P[(t+1),,,j,k]) %*% t(Ptilde[t,,,j,k])
169 |         xAS[t,,j] <- xAS[t,,j] + jointPs[t,j,k]*xS[t,,j,k]
170 |         Pas[t,,,j] <- Pas[t,,,j] + jointPs[t,j,k]*(Ps[t,,,j,k] +
171 |                                           (xAS[t,,j] - xS[t,,j,k]) %*% t(xAS[t,,j] - xS[t,,j,k]))
172 |       }
173 |       ProbS[t,j] <- sum(jointPs[t,j,])
174 |       xAS[t,,j] <- xAS[t,,j] / ProbS[t,j]
175 |       Pas[t,,,j] <- Pas[t,,,j] / ProbS[t,j]
176 |     }
177 |   }
178 |   for (t in 1:T)
179 |   {
180 |     for (j in 1:s)
181 |     {
182 |       xF[t,] <- xF[t,] + xAS[t,,j]*ProbS[t,j]
183 |       Pf[t,,] <- Pf[t,,] + Pas[t,,,j]*ProbS[t,j]
184 |     }
185 |   }
186 | 
187 |   return(list("xF"=xF, "Pf"=Pf, "ProbS"=ProbS))
188 | }
189 | 


--------------------------------------------------------------------------------
/R/methods.R:
--------------------------------------------------------------------------------
 1 | #' Summary information on dynamic factor model estimation
 2 | #' 
 3 | #' @param x An object of \code{dfm} class
 4 | #' @param plot If \code{TRUE}, returns a bunch of summary plots
 5 | #' @return Prints out a summary information following a dynamic factor
 6 | #' model estimation. Also can return summary plots.
 7 | summary.dfm <- function(x, plot=FALSE) {
 8 | 
 9 |   cl <- match.call()
10 |   
11 |   nf <- dim(x$qml)[2]
12 | 
13 |   if (plot == TRUE)
14 |   {
15 |     par(mfrow=c((nf+1),1))
16 |     for (i in 1:nf)
17 |     {
18 |       plot.title <- paste0("QML estimated factor ", i)
19 |       plot(x$qml[,i], type='l', main=plot.title, ylab="Value", xlab="Time") 
20 |     }
21 |     boxplot(x$data - t(x$C %*% t(x$qml)), main="Residuals by input variable")
22 |   }
23 |   cat("Observation equation matrix: \n")
24 |   print(x$C)
25 | 
26 |   cat("\nObservation residual covariance matrix: \n")
27 |   print(cov(x$data - t(x$C %*% t(x$qml))))
28 | 
29 |   cat("\nSystem equation transition matrix: \n")
30 |   print(x$A)
31 | 
32 | }
33 | 
34 | #' Predict factors and observables based on an estimated dynamic
35 | #' factor model
36 | #'
37 | #' @param x An object of \code{dfm} class
38 | #' @param h Forecasting horizon
39 | #' @return Returns prediction matrices for factors and input variables
40 | #' based on model estimation. Each row corresponds to a prediction
41 | #' horizon. First row is \code{T+1}, second -\code{T+2} and so on...
42 | predict.dfm <- function(x, h=1) {
43 |   
44 |   cl <- match.call()
45 | 
46 |   nf <- dim(x$qml)[2]
47 |   ny <- dim(x$C)[1]
48 | 
49 |   factor_forecast <- matrix(NA, nrow=h, ncol=nf)
50 |   y_forecast <- matrix(NA, nrow=h, ncol=ny)
51 |   factor_last <- matrix(NA, nrow=x$p, ncol=nf)
52 | 
53 |   factor_last <- tail(x$qml, x$p)
54 | 
55 |   for (i in 1:h)
56 |   {
57 |     reg_factor <- tail(factor_last, x$p)
58 |     factor_forecast[i,] <- x$A %*% matrix(t(reg_factor[rev(1:nrow(reg_factor)),]))
59 |     y_forecast[i,] <- x$C %*% factor_forecast[i,]
60 | 
61 |     factor_last <- rbind(factor_last, factor_forecast[i,])
62 |   }
63 |   return(list("y"=y_forecast, "f"=factor_forecast))
64 | }
65 | 


--------------------------------------------------------------------------------
/R/otherlang/kalman.c:
--------------------------------------------------------------------------------
 1 | #include <RcppArmadillo.h>
 2 | 
 3 | using namespace Rcpp;
 4 | 
 5 | class Kalman {
 6 | private:
 7 |     mat A, H, Q, R, xest, pest;
 8 |     double dt;
 9 | 
10 | public:
11 |     // constructor, sets up data structures
12 |     Kalman() : dt(1.0) {
13 |         A.eye(6,6);
14 |         A(0,2) = A(1,3) = A(2,4) = A(3,5) = dt;
15 |         H.zeros(2,6);
16 |         H(0,0) = H(1,1) = 1.0;
17 |         Q.eye(6,6);
18 |         R = 1000 * eye(2,2);
19 |         xest.zeros(6,1);
20 |         pest.zeros(6,6);
21 |     }
22 | 
23 |     // sole member function: estimate model
24 |     mat estimate(const mat & Z) {
25 |        unsigned int n = Z.n_rows, k = Z.n_cols;
26 |        mat Y = zeros(n, k);
27 |        mat xprd, pprd, S, B, kalmangain;
28 |        colvec z, y;
29 | 
30 |        for (unsigned int i = 0; i<n; i++) {
31 |            z = Z.row(i).t();
32 |            // predicted state and covariance
33 |            xprd = A * xest;
34 |            pprd = A * pest * A.t() + Q;
35 |            // estimation
36 |            S = H * pprd.t() * H.t() + R;
37 |            B = H * pprd.t();
38 |            kalmangain = (solve(S, B)).t();
39 |            // estimated state and covariance
40 |            xest = xprd + kalmangain * (z - H * xprd);
41 |            pest = pprd - kalmangain * H * pprd;
42 |            // compute the estimated measurements
43 |            y = H * xest;
44 |            Y.row(i) = y.t();
45 |        }
46 |        return Y;
47 |     }
48 | };
49 | 
50 | mat KalmanCpp(mat Z) {
51 |   Kalman K;
52 |   mat Y = K.estimate(Z);
53 |   return Y;
54 | }
55 | 


--------------------------------------------------------------------------------
/R/otherlang/kim.jl:
--------------------------------------------------------------------------------
 1 | function KimFilter(y,F,x0,P0,A,R,Q,p)
 2 |   T = size(y)[1]
 3 |   n = size(F)[1]
 4 |   J = size(x0)[1]
 5 |   s = size(p)[1]
 6 | 
 7 |   x = fill(0.0, T,J,s,s)
 8 |   xU = fill(0.0, T,J,s,s)
 9 |   P = fill(0.0, T,J,J,s,s)
10 |   Pu = fill(0.0, T,J,J,s,s)
11 |   eta = fill(0.0, T,n,s,s)
12 |   H = fill(0.0, T,n,n,s,s)
13 |   K = fill(0.0, T,J,n,s,s)
14 |   lik = fill(0.0, T,s,s)
15 |   loglik = fill(0.0, T,s,s)
16 |   jointP_fut = fill(0.0, T,s,s)
17 |   jointP_cur = fill(0.0, (T+1),s,s)
18 |   stateP_fut = fill(0.0, T,s)
19 |   stateP = fill(0.0, T,s)
20 |   xA = fill(0.0, T,J,s)
21 |   Pa = fill(0.0, T,J,J,s)
22 |   result = fill(0.0 , T,1)
23 | 
24 |   for i in 1:s
25 |     xA[1,:,i] = x0
26 |   end
27 |   for (i in 1:s)
28 |     Pa[1,:,:,i] = P0
29 |   end
30 |   jointP_cur[1,:,:] = [0.25 0.25; 0.25 0.25]
31 | 
32 |   for t in 2:T
33 |     for j in 1:s
34 |       for i in 1:s
35 |         x[t,:,i,j] = A[:,:,j] * xA[(t-1),:,i]
36 |         P[t,:,:,i,j] = A[:,:,j] * squeeze(Pa[(t-1),:,:,i],1) * A[:,:,j]' + Q
37 |         eta[t,:,i,j] = y[t,:]' - F[:,:,j] * x[t,:,i,j]
38 |         H[t,:,:,i,j] = F[:,:,j] * squeeze(P[t,:,:,i,j],1) * F[:,:,j]' + R
39 |         K[t,:,:,i,j] = squeeze(P[t,:,:,i,j],1) * F[:,:,j]' * inv(squeeze(H[t,:,:,i,j],1))
40 |         xU[t,:,i,j] = x[t,:,i,j] + squeeze(K[t,:,:,i,j],1) * eta[t,:,i,j]'
41 |         Pu[t,:,:,i,j] = (eye(J) - squeeze(K[t,:,:,i,j],1) * F[:,:,j]) * squeeze(P[t,:,:,i,j],1)
42 |         jointP_fut[t,i,j] = p[i,j]*sum(jointP_cur[(t-1),:,i])
43 |         lik[t,i,j] = abs((2*pi)^(-n/2) * det(squeeze(H[t,:,:,i,j],1))^(-1/2) * jointP_fut[t,i,j] *
44 |                      exp(-1/2*eta[t,:,i,j] * inv(squeeze(H[t,:,:,i,j],1)) * eta[t,:,i,j]')[1,1])
45 |         loglik[t,i,j] = log(lik[t,i,j])
46 |         jointP_cur[t,i,j] = lik[t,i,j]
47 |       end
48 |       stateP[t,j] = sum(jointP_cur[t,:,j])
49 |       stateP_fut[t,j] = sum(jointP_fut[t,:,j])
50 |       xA[t,:,j] = squeeze(xU[t,:,:,j], 1) * squeeze(jointP_cur[t,:,j],1) / stateP[t,j]
51 |       for (i in 1:s)
52 |         Pa[t,:,:,j] = Pa[t,:,:,j] +
53 |                       (Pu[t,:,:,i,j] + (xA[t,:,j] - xU[t,:,i,j]) * transpose(xA[t,:,j] - xU[t,:,i,j])) * 
54 |                       exp(log(jointP_cur[t,i,j]) - log(stateP[t,j]))
55 |       end
56 |     end
57 |     jointP_cur[t,:,:] = exp(log(jointP_cur[t,:,:]) - log(sum(lik[t,:,:])))
58 |     stateP[t,:] = exp(log(stateP[t,:]) - log(sum(lik[t,:,:])))
59 |     result[t,1] = log(sum(lik[t,:,:]))
60 |   end
61 | return sum(result)
62 | end
63 | 


--------------------------------------------------------------------------------
/R/otherlang/max.jl:
--------------------------------------------------------------------------------
 1 | using Optim
 2 | include("kim.jl")
 3 | 
 4 | y = readcsv("data.txt")
 5 | 
 6 | T = size(y)[1]; n = size(y)[2]
 7 | J = 1; s = 2
 8 | 
 9 | Q = eye(J)
10 | P0 = eye(J)*1.0
11 | x0 = [1.0]
12 | 
13 | l = [ kron(0.2, ones(1, J*J*s)) kron(-2.5, ones(1,n*J*s)) kron(0.4, ones(1,n)) kron(0.9, ones(1,s)) ]'
14 | u = [ kron(0.997, ones(1, J*J*s)) kron(2.5, ones(1,n*J*s)) kron(1.0, ones(1,n)) kron(0.999, ones(1,s)) ]'
15 | 
16 | dimA = J*J*s; dimAF = dimA + n*J*s; dimAFR = dimAF + n
17 | 
18 | function KimFilterOptim(pars)
19 |   A = reshape(pars[1:dimA], J, J, s)
20 |   F = reshape(pars[(dimA+1):dimAF], n, J, s)
21 |   R = diagm(pars[(dimAF+1):dimAFR])
22 |   p = diagm(pars[(dimAFR+1):(dimAFR+2)])
23 |   p[1,2] = 1 - p[1,1]; p[2,1] = 1 - p[2,2]
24 |   lik = KimFilter(y,F,x0,P0,A,R,Q,p)
25 |   return -lik
26 | end
27 | 
28 | 
29 | init = [reshape((rand(J*J*s) +0.3)/1.3, J*J*s, 1), reshape(rand(n*J*s), n*J*s, 1), kron(0.5, ones(1,n))', 0.95,0.95]
30 | # opt = optimize(KimFilterOptim, init[:,1] , method=:l_bfgs, show_trace=true, iterations=5000)
31 | 
32 | opt = fminbox(DifferentiableFunction(KimFilterOptim), init[:,1], l[:,1], u[:,1], show_trace=true)
33 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | This repository is deprecated in favor of [dfms](https://github.com/SebKrantz/dfms), a vastly improved version of whatever you can find here.
 3 | 
 4 | ---
 5 | 
 6 | This is a public repository for `dynfactoR`, a package for `R` which facilitates estimation of dynamic factor models.
 7 | 
 8 | Current implementation of main `dfm` function supports vector auto-regressive type dynamics for factors, missing observations and some statistical identification restrictions. A dynamic factor model estimation will typically return 3 estimates, namely principal component estimator, a two-step estimator as well as quasi-maximum likelihood (QML) estimator. The two latter estimators are based on Kalman filtering and QML estimator is a particular case of EM-algorithm.
 9 | 
10 | `dynfactoR` is easy to install with the help of `devtools`:
11 | ```
12 | devtools::install_github("rbagd/dynfactoR")
13 | ```
14 | 
15 | `dynfactoR` also provides a dataset from National Bank of Belgium which contains monthly Belgian business and consumer survey data over a sufficiently long period. You can call it with:
16 | 
17 | ```
18 | data("NBBsurvey")
19 | ```
20 | 
21 | If you are not familiar with related literature, it could be useful to read a short introduction to dynamic factor models which is packaged as a vignette. Some academic references are also made available.
22 | 
23 | ```
24 | vignette("dynamic-factors")
25 | ```
26 | 
27 | In the future, the package will be extended to support more general dynamics as well as Markov-switching factor loading or transition matrices. Currently, these features are early experimental.
28 | 


--------------------------------------------------------------------------------
/data/NBBsurvey.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rbagd/dynfactoR/cd92d901e5dce4d1d3c68f0f336a632035b872d4/data/NBBsurvey.rda


--------------------------------------------------------------------------------
/inst/doc/dynamic-factors.R:
--------------------------------------------------------------------------------
1 | ### R code from vignette source 'dynamic-factors.Rnw'
2 | ### Encoding: UTF-8
3 | 
4 | 


--------------------------------------------------------------------------------
/inst/doc/dynamic-factors.Rnw:
--------------------------------------------------------------------------------
  1 | %\VignetteIndexEntry{Modelling with dynfactoR}
  2 | 
  3 | \documentclass{article}
  4 | \usepackage{amsmath}
  5 | \usepackage[utf8]{inputenc}
  6 | 
  7 | \parindent 0.0in
  8 | \parskip 0.07in
  9 | 
 10 | \DeclareMathOperator{\id}{I}
 11 | 
 12 | \author{Rytis Bagd\v{z}i\={u}nas \\ \small{CORE, Universit\'{e} catholique de Louvain}}
 13 | \title{Modelling with \texttt{dynfactoR}}
 14 | 
 15 | %%% Reduce section header size
 16 | \makeatletter
 17 | \renewcommand\section{\@startsection{section}{1}{\z@}%
 18 |                            {-3.5ex \@plus -1ex \@minus -.2ex}%
 19 |                            {2.3ex \@plus.2ex}%
 20 |                            {\normalfont\large\bfseries}}
 21 | \makeatother
 22 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 23 | 
 24 | \begin{document}
 25 | 
 26 | \maketitle
 27 | 
 28 | \texttt{dynfactoR} package is aimed at researchers dealing with economic modelling. In particular, it is useful at extracting underlying trends from a potentially large number of economic time series.
 29 | 
 30 | Dynamic factor model is a special case of a state space equation. In its general form it can be written as 
 31 | 
 32 | \begin{equation} \label{eq:statespace}
 33 | \begin{array}{ll}
 34 |   \mathbf{X}_{t} = \mathbf{C}\mathbf{f}_{t} + \boldsymbol{\varepsilon}_{t}, & \boldsymbol{\varepsilon}_{t} \sim \mathcal{N} \left(\mathbf{0}, \mathbf{R} \right) \\
 35 |   \mathbf{f}_{t} = \mathbf{A}\mathbf{f}_{t-1} + \mathbf{u}_{t}, & u_{t} \sim \mathcal{N} \left(\mathbf{0}, \mathbf{Q} \right)
 36 | \end{array}
 37 | \end{equation}
 38 | 
 39 | where $\mathbf{X}_{t}$ is a vector of observable data which might contain missing data. It is assumed that observable data is linearly driven by a low-dimensional unobservable process $\mathbf{f}_{t}$.
 40 | 
 41 | \subsubsection*{Use case}
 42 | 
 43 | A typical use case for such a model is nowcasting gross domestic product (GDP). GDP data is published every $3$ months, so that economic growth from January to March would only be known in early April. However, many business and consumer surveys are published monthly and are often correlated with GDP growth. Therefore, predictions of economic growth for first quarter should be different depending on whether they are done in December or in February (hence the term \textit{nowcasting}). Dynamic factor model is one way to do that by extracting an underlying trend which often follows economic growth pattern. Besides, if restrictions are imposed on $\mathbf{C}$, economic growth can be modelled explicitly by factors.
 44 | 
 45 | \section{State space equations}
 46 | 
 47 | It is assumed that $\mathbf{A}(L)\mathbf{f}_{t} = \mathbf{u}_{t}$ where $\mathbf{u}_{t} \sim N \left( \mathbf{0}, \id_{r} \right)$ and $\mathbf{A}(L)$ is a lag polynomial with roots inside the unit circle, that is $\mathbf{f}_{t}$ is a covariance-stationary process.
 48 | 
 49 | Let $\mathbf{f}_{t}$ follow a $p$-th order autoregressive process, so that
 50 | \begin{equation}\label{eq:factorvar}
 51 |   \mathbf{f}_{t} = \mathbf{A}_{1}\mathbf{f}_{t-1} + \cdots + \mathbf{A}_{p}\mathbf{f}_{t-p} + \mathbf{u}_{t}
 52 | \end{equation}
 53 | Suppose that there are $r$-factors, that is $\mathbf{f}_{t}$ and $\mathbf{u}_{t}$ are $r$-variate processes. If $\mathbf{f}_{t}$ were observed, it would be easy to estimate $\left\lbrace \mathbf{A}_{1}, \dots, \mathbf{A}_{p} \right\rbrace$ with a simple OLS procedure in the context of VAR($p$) model. This estimator would also be a maximum likelihood estimator.
 54 | 
 55 | Let $\mathbf{f}_{t} = \left(f_{1,t}, \dots, f_{r,t}\right)$ and $\mathbf{u}_{t} = \left(u_{1,t}, \dots, u_{r,t}\right)$. Then \eqref{eq:factorvar} can be rewritten in state space equation form of \eqref{eq:statespace} as
 56 | \begin{equation}
 57 |   \begin{pmatrix} f_{1,t} \\ f_{2,t} \\ \cdots \\ f_{r, t} \\ f_{1,t-1} \\ \dots \\ f_{r,t-p+1} \end{pmatrix}
 58 |   = \begin{bmatrix} \mathbf{A}_{1} & \cdots & \cdots & \mathbf{A}_{p} \\ \id_{r} & 0 & 0 \\ \dots & \dots & \dots & \dots\\ \dots & 0 & \id_{r} & 0 \end{bmatrix}
 59 |   \begin{pmatrix} f_{1,t-1} \\ f_{2,t-1} \\ \dots \\ f_{r,t-1} \\ f_{1,t-2} \\ \dots \\ f_{r,t-p} \end{pmatrix}
 60 |   + \begin{pmatrix} u_{1,t} \\ \cdots \\ u_{r,t} \\ 0 \\ \dots \\ 0 \end{pmatrix}
 61 | \end{equation}
 62 | Evidently, state space covariance matrix has to be extended by zeroes. With this in mind, measurement equation then becomes
 63 | \begin{equation}
 64 |   \mathbf{X}_{t} = \begin{bmatrix} \mathbf{C} & \mathbf{0}_{n \times r} & \dots & \mathbf{0}_{n \times r} \end{bmatrix} 
 65 |   \begin{pmatrix} f_{1,t} \\ f_{2,t} \\ \cdots \\ f_{r, t} \\ f_{1,t-1} \\ \dots \\ f_{r,t-p+1} \end{pmatrix} + \boldsymbol{\varepsilon}_{t}
 66 | \end{equation}
 67 | where $\mathbf{0}_{n \times r}$ is zero-matrix with $n$ rows and $r$ columns.
 68 | 
 69 | \section{Estimators}
 70 | 
 71 | \begin{itemize}
 72 | 
 73 |     \item PCA estimator where factors are estimated as principal components of observable data. Its properties are well discussed in Stock \& Watson (2002). If missing data is present, other estimators should be used.
 74 |     \item Two-step estimator by Doz, Gianone and Reichlin (2011). This methods uses PCA estimates and runs them through Kalman filter.
 75 |     \item QML estimator by Doz, Gianone and Reichlin (2012). Similar to two-step estimator, however Kalman filtering procedure is iterated until EM convergence.
 76 | 
 77 | \end{itemize}
 78 | 
 79 | \section{Forecasting}
 80 | 
 81 | Once a dynamic factor model is estimated, $\left(\mathbf{X}_{1:T}, \hat{\mathbf{A}}, \hat{\mathbf{C}}\right)$ are available. Predicting $\left(\mathbf{X}_{f, T+h}, \mathbf{f}_{T+h}\right)$ for some horizon $h \in \left\lbrace1, \dots \right\rbrace$ consists in three steps:
 82 | \begin{enumerate}
 83 |   \item predict $\mathbf{f}_{f, T+1}$ given estimate of $\mathbf{f}_{T}$ and VAR($p$) process, i.e.
 84 |     \begin{equation*}
 85 |       \mathbf{f}_{f,T+1} = \hat{\mathbf{A}}_{1}\mathbf{f}_{T} + \dots + \hat{\mathbf{A}}_{p}\mathbf{f}_{T-p}
 86 |     \end{equation*}
 87 |   \item predict $\mathbf{X}_{f,T+1}$ based on factor loading and factor estimates, i.e.
 88 |     \begin{equation*}
 89 |       \mathbf{X}_{f,T+1} = \hat{\mathbf{C}} \mathbf{f}_{f,T+1}
 90 |     \end{equation*}
 91 |   \item repeat steps 1 and 2 until desired horizon level is reached
 92 | \end{enumerate}
 93 | 
 94 | \section{Future and ongoing work}
 95 | 
 96 | Some of the ideas for dynamic factor modelling are not yet implemented completely or even at all. This section gathers future ideas for the package.
 97 | 
 98 | \begin{enumerate}
 99 |   \item Introduce Markov-switching possibility so that, for example, $\mathbf{C}$ or $\mathbf{A}$ could be state dependent. It would be in particular interesting if this could capture business cycle growth variations. In case of this model, approximation to Kalman filter can be used, e.g. Kim filter, in order to avoid exponentially growing number of calculations.
100 |   \item Analyze and propose some defaults for restrictions on $\mathbf{C}$. Typically, this includes identification constraints and currently, only upper triangular $\mathbf{C}$ is implemented. Constraints can also be specified based on economic reasoning and render model hierarchical.
101 |   \item Kalman and Kim filters are slow in \texttt{R} due to their recursive nature. They should be reimplemented in \texttt{C} or call another package such as \texttt{FKF}. Kim filter will probably need to be rewritten in \texttt{C} anyway.
102 |   \item As presented, dynamic factor model is only dynamic in the state equation. It can be generalized to have dynamics in the measurement equation as well, i.e. $\mathbf{X}_{t}$ depending on current and past values of $\mathbf{f}_{t}$. This should not be difficult to implement as such model would be eventually reduced to \eqref{eq:statespace}.
103 |   \item Currently, there is no automated testing for the package. This should be fixed.
104 | \end{enumerate}
105 | 
106 | \end{document}
107 | 


--------------------------------------------------------------------------------
/inst/doc/dynamic-factors.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rbagd/dynfactoR/cd92d901e5dce4d1d3c68f0f336a632035b872d4/inst/doc/dynamic-factors.pdf


--------------------------------------------------------------------------------
/man/Estep.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/em_algorithm.R
 3 | \name{Estep}
 4 | \alias{Estep}
 5 | \title{Computation of the expectation step in the EM-algorithm.}
 6 | \usage{
 7 | Estep(y, A, C, Q, R, initx, initV, W)
 8 | }
 9 | \arguments{
10 | \item{y}{Data matrix}
11 | 
12 | \item{A}{System state matrix}
13 | 
14 | \item{C}{Observation matrix}
15 | 
16 | \item{Q}{System state equation variance}
17 | 
18 | \item{R}{Observation equation variance}
19 | 
20 | \item{initx}{Initial value for state variable}
21 | 
22 | \item{initV}{Initial value for state matrix}
23 | 
24 | \item{W}{Logical matrix with dim(W) = dim(y)
25 | indicating missing observations}
26 | }
27 | \value{
28 | Sufficient statistics used for M-step
29 | }
30 | \description{
31 | Computation of the expectation step in the EM-algorithm.
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/man/K_filter.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/kalman.R
 3 | \name{K_filter}
 4 | \alias{K_filter}
 5 | \title{Implements a Kalman for dynamic factor model.}
 6 | \usage{
 7 | K_filter(initx, initV, x, A, C, R, Q)
 8 | }
 9 | \arguments{
10 | \item{initx}{Initial value for state space observations}
11 | 
12 | \item{initV}{Initial value for state covariance}
13 | 
14 | \item{x}{Observation matrix}
15 | 
16 | \item{A}{State space matrix}
17 | 
18 | \item{C}{System matrix}
19 | 
20 | \item{R}{State space covariance}
21 | 
22 | \item{Q}{System covariance}
23 | }
24 | \value{
25 | Filtered state space variable and its covariance matrix
26 | as well as their forecast for next period for further iterations
27 | }
28 | \description{
29 | Implements a Kalman for dynamic factor model.
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/man/K_smoother.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/kalman.R
 3 | \name{K_smoother}
 4 | \alias{K_smoother}
 5 | \title{Implements Kalman smoothing and is used along with Kalman filter.
 6 | Kalman filter outputs enter Kalman smoother as inputs.}
 7 | \usage{
 8 | K_smoother(A, xitt, xittm, Ptt, Pttm, C, R, W)
 9 | }
10 | \arguments{
11 | \item{A}{State space matrix}
12 | 
13 | \item{xitt}{State space variable}
14 | 
15 | \item{xittm}{Predicted state space variable}
16 | 
17 | \item{Ptt}{State space covariance}
18 | 
19 | \item{Pttm}{Predicted state space covariance}
20 | 
21 | \item{C}{System matrix}
22 | 
23 | \item{R}{State space covariance}
24 | 
25 | \item{W}{Logical matrix (T x n) indicating missing data.
26 | TRUE if observation is present, FALSE if it is missing.}
27 | }
28 | \value{
29 | Smoothed state space variable and state space covariance matrix
30 | }
31 | \description{
32 | Implements Kalman smoothing and is used along with Kalman filter.
33 | Kalman filter outputs enter Kalman smoother as inputs.
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/man/KimFilter.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/kim.R
 3 | \name{KimFilter}
 4 | \alias{KimFilter}
 5 | \title{Implementation of Kim (1994) filter, an extension to Kalman filter
 6 | for dynamic linear models with Markov-switching. Documentation
 7 | is incomplete, rudimentary and needs to be rechecked!}
 8 | \usage{
 9 | KimFilter(x0, P0, y, F, A, R, Q, p)
10 | }
11 | \arguments{
12 | \item{x0}{Initial condition for state vector}
13 | 
14 | \item{P0}{Initial condition for state variance}
15 | 
16 | \item{y}{Data matrix (Txn)}
17 | 
18 | \item{F}{System matrix for measurement equation}
19 | 
20 | \item{A}{Transition matrix for state equation}
21 | 
22 | \item{R}{Error covariance for measurement equation}
23 | 
24 | \item{Q}{Error covariance for state equation}
25 | 
26 | \item{p}{Transition probability matrix}
27 | }
28 | \value{
29 | Filtered states and covariances with associated probability matrices.
30 | }
31 | \description{
32 | Implementation of Kim (1994) filter, an extension to Kalman filter
33 | for dynamic linear models with Markov-switching. Documentation
34 | is incomplete, rudimentary and needs to be rechecked!
35 | }
36 | 
37 | 


--------------------------------------------------------------------------------
/man/KimSmoother.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/kim.R
 3 | \name{KimSmoother}
 4 | \alias{KimSmoother}
 5 | \title{Smoothing algorithm from Kim (1994) to be used following a run
 6 | of KimFilter function.}
 7 | \usage{
 8 | KimSmoother(xA, Pa, A, P, x, p, stateP, stateP_fut)
 9 | }
10 | \arguments{
11 | \item{xA}{Filtered state vector to be smoothed}
12 | 
13 | \item{Pa}{Filtered state covariance to be smoothed}
14 | 
15 | \item{A}{Array with transition matrices}
16 | 
17 | \item{P}{State-dependent state covariance}
18 | 
19 | \item{x}{State-dependent state vector}
20 | 
21 | \item{p}{Markov transition matrix}
22 | 
23 | \item{stateP}{Evolving current probability matrix}
24 | 
25 | \item{stateP_fut}{Predicted probability matrix}
26 | }
27 | \value{
28 | Smoothed states and covariance matrices. This is the equivalent
29 | of Kalman smoother in Markov-switching case.
30 | }
31 | \description{
32 | Smoothing algorithm from Kim (1994) to be used following a run
33 | of KimFilter function.
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/man/NBBsurvey.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/dfm.R
 3 | \docType{data}
 4 | \name{NBBsurvey}
 5 | \alias{NBBsurvey}
 6 | \title{National Bank of Belgium business and consumer surveys}
 7 | \format{A time-stamped dataframe (xts object) with}
 8 | \source{
 9 | \url{http://stat.nbb.be}
10 | }
11 | \usage{
12 | NBBsurvey
13 | }
14 | \description{
15 | National Bank of Belgium business and consumer surveys
16 | }
17 | \keyword{datasets}
18 | 
19 | 


--------------------------------------------------------------------------------
/man/VAR.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/additional.R
 3 | \name{VAR}
 4 | \alias{VAR}
 5 | \title{Estimate a p-th order vector autoregressive (VAR) model}
 6 | \usage{
 7 | VAR(x, p)
 8 | }
 9 | \arguments{
10 | \item{x}{Data matrix (T x n)}
11 | 
12 | \item{p}{Maximum lag order, i.e. VAR(p) will be estimated}
13 | }
14 | \value{
15 | Estimated parameter matrix, residuals and regression model
16 | independent and dependent variables
17 | }
18 | \description{
19 | Estimate a p-th order vector autoregressive (VAR) model
20 | }
21 | \examples{
22 | x <- matrix(rnorm(50*2), nrow=50, ncol=2)
23 | VAR(x, 2)
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/man/dfm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/dfm.R
 3 | \name{dfm}
 4 | \alias{dfm}
 5 | \title{Estimates a dynamic factor model based on Doz, Gianone & Reichlin (2011)}
 6 | \usage{
 7 | dfm(X, r, p, q, max_iter = 100, threshold = 1e-04, rQ, rC)
 8 | }
 9 | \arguments{
10 | \item{X}{Data matrix (T x n)}
11 | 
12 | \item{r}{Number of static factors}
13 | 
14 | \item{p}{Lag order for factors}
15 | 
16 | \item{q}{Number of dynamic factors, must be equal to or less than r}
17 | 
18 | \item{max_iter}{Maximum number of iterations in EM-algorithm}
19 | 
20 | \item{threshold}{Threshold for algorithm convergence}
21 | 
22 | \item{rQ}{Restrictions on system state covariance}
23 | 
24 | \item{rC}{Restrictions on factor loading matrix}
25 | }
26 | \value{
27 | 3 types of factor estimates, namely principal component estimate, two step
28 | estimate based on PCA and Kalman filtering and QML estimate based on EM-algorithm
29 | }
30 | \description{
31 | Estimates a dynamic factor model based on Doz, Gianone & Reichlin (2011)
32 | }
33 | \examples{
34 | x <- matrix(rnorm(50*10), 50, 10)
35 | W <- as.logical(matrix(rbinom(50*10, 1, 0.1), 50, 10))
36 | x[W] <- NA
37 | dfm(x, 2, 2, 1)
38 | }
39 | 
40 | 


--------------------------------------------------------------------------------
/man/dfmMS.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/dfmMS.R
 3 | \name{dfmMS}
 4 | \alias{dfmMS}
 5 | \title{Dynamic factor model with Markov-switching states}
 6 | \usage{
 7 | dfmMS(X, J = 1, s = 2, p = 1, x0, P0)
 8 | }
 9 | \arguments{
10 | \item{X}{Data matrix (Txn)}
11 | 
12 | \item{J}{Number of factors. Currently, only \code{1} is supported}
13 | 
14 | \item{s}{Number of states. Only \code{2} are supported for now}
15 | 
16 | \item{p}{Lag number for VAR model of factors}
17 | 
18 | \item{x0}{Initial value for state}
19 | 
20 | \item{P0}{Initial value for state covariance}
21 | }
22 | \value{
23 | Return an estimator. Currently, it runs via a likelihood
24 | maximization an so is rather slow. Do not call it with large number
25 | of input variables or missing data. Some box constraints are currently
26 | to speed up the process, these are based on usual rationale for economic
27 | data.
28 | }
29 | \description{
30 | Dynamic factor model with Markov-switching states
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/man/em_converged.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/em_algorithm.R
 3 | \name{em_converged}
 4 | \alias{em_converged}
 5 | \title{Convergence test for EM-algorithm.}
 6 | \usage{
 7 | em_converged(loglik, previous_loglik, threshold = 1e-04,
 8 |   check_increased = TRUE)
 9 | }
10 | \arguments{
11 | \item{loglik}{Current value of the log-likelihood function}
12 | 
13 | \item{previous_loglik}{Value of the log-likelihood function at the previous}
14 | 
15 | \item{threshold}{If difference is less than threshold, then algorithm has
16 | converged}
17 | 
18 | \item{check_increased}{TO DOCUMENT}
19 | }
20 | \value{
21 | A logical statement indicating whether EM algorithm has converged
22 | according to slope convergence test
23 | }
24 | \description{
25 | Convergence test for EM-algorithm.
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/man/predict.dfm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/methods.R
 3 | \name{predict.dfm}
 4 | \alias{predict.dfm}
 5 | \title{Predict factors and observables based on an estimated dynamic
 6 | factor model}
 7 | \usage{
 8 | \method{predict}{dfm}(x, h = 1)
 9 | }
10 | \arguments{
11 | \item{x}{An object of \code{dfm} class}
12 | 
13 | \item{h}{Forecasting horizon}
14 | }
15 | \value{
16 | Returns prediction matrices for factors and input variables
17 | based on model estimation. Each row corresponds to a prediction
18 | horizon. First row is \code{T+1}, second -\code{T+2} and so on...
19 | }
20 | \description{
21 | Predict factors and observables based on an estimated dynamic
22 | factor model
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/man/summary.dfm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.1.1): do not edit by hand
 2 | % Please edit documentation in R/methods.R
 3 | \name{summary.dfm}
 4 | \alias{summary.dfm}
 5 | \title{Summary information on dynamic factor model estimation}
 6 | \usage{
 7 | \method{summary}{dfm}(x, plot = FALSE)
 8 | }
 9 | \arguments{
10 | \item{x}{An object of \code{dfm} class}
11 | 
12 | \item{plot}{If \code{TRUE}, returns a bunch of summary plots}
13 | }
14 | \value{
15 | Prints out a summary information following a dynamic factor
16 | model estimation. Also can return summary plots.
17 | }
18 | \description{
19 | Summary information on dynamic factor model estimation
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(dynfactoR)
3 | 
4 | # test_check("dynfactoR")
5 | 


--------------------------------------------------------------------------------
/vignettes/dynamic-factors.Rnw:
--------------------------------------------------------------------------------
  1 | %\VignetteIndexEntry{Modelling with dynfactoR}
  2 | 
  3 | \documentclass{article}
  4 | \usepackage{amsmath}
  5 | \usepackage[utf8]{inputenc}
  6 | 
  7 | \parindent 0.0in
  8 | \parskip 0.07in
  9 | 
 10 | \DeclareMathOperator{\id}{I}
 11 | 
 12 | \author{Rytis Bagd\v{z}i\={u}nas \\ \small{CORE, Universit\'{e} catholique de Louvain}}
 13 | \title{Modelling with \texttt{dynfactoR}}
 14 | 
 15 | %%% Reduce section header size
 16 | \makeatletter
 17 | \renewcommand\section{\@startsection{section}{1}{\z@}%
 18 |                            {-3.5ex \@plus -1ex \@minus -.2ex}%
 19 |                            {2.3ex \@plus.2ex}%
 20 |                            {\normalfont\large\bfseries}}
 21 | \makeatother
 22 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 23 | 
 24 | \begin{document}
 25 | 
 26 | \maketitle
 27 | 
 28 | \texttt{dynfactoR} package is aimed at researchers dealing with economic modelling. In particular, it is useful at extracting underlying trends from a potentially large number of economic time series.
 29 | 
 30 | Dynamic factor model is a special case of a state space equation. In its general form it can be written as 
 31 | 
 32 | \begin{equation} \label{eq:statespace}
 33 | \begin{array}{ll}
 34 |   \mathbf{X}_{t} = \mathbf{C}\mathbf{f}_{t} + \boldsymbol{\varepsilon}_{t}, & \boldsymbol{\varepsilon}_{t} \sim \mathcal{N} \left(\mathbf{0}, \mathbf{R} \right) \\
 35 |   \mathbf{f}_{t} = \mathbf{A}\mathbf{f}_{t-1} + \mathbf{u}_{t}, & u_{t} \sim \mathcal{N} \left(\mathbf{0}, \mathbf{Q} \right)
 36 | \end{array}
 37 | \end{equation}
 38 | 
 39 | where $\mathbf{X}_{t}$ is a vector of observable data which might contain missing data. It is assumed that observable data is linearly driven by a low-dimensional unobservable process $\mathbf{f}_{t}$.
 40 | 
 41 | \subsubsection*{Use case}
 42 | 
 43 | A typical use case for such a model is nowcasting gross domestic product (GDP). GDP data is published every $3$ months, so that economic growth from January to March would only be known in early April. However, many business and consumer surveys are published monthly and are often correlated with GDP growth. Therefore, predictions of economic growth for first quarter should be different depending on whether they are done in December or in February (hence the term \textit{nowcasting}). Dynamic factor model is one way to do that by extracting an underlying trend which often follows economic growth pattern. Besides, if restrictions are imposed on $\mathbf{C}$, economic growth can be modelled explicitly by factors.
 44 | 
 45 | \section{State space equations}
 46 | 
 47 | It is assumed that $\mathbf{A}(L)\mathbf{f}_{t} = \mathbf{u}_{t}$ where $\mathbf{u}_{t} \sim N \left( \mathbf{0}, \id_{r} \right)$ and $\mathbf{A}(L)$ is a lag polynomial with roots inside the unit circle, that is $\mathbf{f}_{t}$ is a covariance-stationary process.
 48 | 
 49 | Let $\mathbf{f}_{t}$ follow a $p$-th order autoregressive process, so that
 50 | \begin{equation}\label{eq:factorvar}
 51 |   \mathbf{f}_{t} = \mathbf{A}_{1}\mathbf{f}_{t-1} + \cdots + \mathbf{A}_{p}\mathbf{f}_{t-p} + \mathbf{u}_{t}
 52 | \end{equation}
 53 | Suppose that there are $r$-factors, that is $\mathbf{f}_{t}$ and $\mathbf{u}_{t}$ are $r$-variate processes. If $\mathbf{f}_{t}$ were observed, it would be easy to estimate $\left\lbrace \mathbf{A}_{1}, \dots, \mathbf{A}_{p} \right\rbrace$ with a simple OLS procedure in the context of VAR($p$) model. This estimator would also be a maximum likelihood estimator.
 54 | 
 55 | Let $\mathbf{f}_{t} = \left(f_{1,t}, \dots, f_{r,t}\right)$ and $\mathbf{u}_{t} = \left(u_{1,t}, \dots, u_{r,t}\right)$. Then \eqref{eq:factorvar} can be rewritten in state space equation form of \eqref{eq:statespace} as
 56 | \begin{equation}
 57 |   \begin{pmatrix} f_{1,t} \\ f_{2,t} \\ \cdots \\ f_{r, t} \\ f_{1,t-1} \\ \dots \\ f_{r,t-p+1} \end{pmatrix}
 58 |   = \begin{bmatrix} \mathbf{A}_{1} & \cdots & \cdots & \mathbf{A}_{p} \\ \id_{r} & 0 & 0 \\ \dots & \dots & \dots & \dots\\ \dots & 0 & \id_{r} & 0 \end{bmatrix}
 59 |   \begin{pmatrix} f_{1,t-1} \\ f_{2,t-1} \\ \dots \\ f_{r,t-1} \\ f_{1,t-2} \\ \dots \\ f_{r,t-p} \end{pmatrix}
 60 |   + \begin{pmatrix} u_{1,t} \\ \cdots \\ u_{r,t} \\ 0 \\ \dots \\ 0 \end{pmatrix}
 61 | \end{equation}
 62 | Evidently, state space covariance matrix has to be extended by zeroes. With this in mind, measurement equation then becomes
 63 | \begin{equation}
 64 |   \mathbf{X}_{t} = \begin{bmatrix} \mathbf{C} & \mathbf{0}_{n \times r} & \dots & \mathbf{0}_{n \times r} \end{bmatrix} 
 65 |   \begin{pmatrix} f_{1,t} \\ f_{2,t} \\ \cdots \\ f_{r, t} \\ f_{1,t-1} \\ \dots \\ f_{r,t-p+1} \end{pmatrix} + \boldsymbol{\varepsilon}_{t}
 66 | \end{equation}
 67 | where $\mathbf{0}_{n \times r}$ is zero-matrix with $n$ rows and $r$ columns.
 68 | 
 69 | \section{Estimators}
 70 | 
 71 | \begin{itemize}
 72 | 
 73 |     \item PCA estimator where factors are estimated as principal components of observable data. Its properties are well discussed in Stock \& Watson (2002). If missing data is present, other estimators should be used.
 74 |     \item Two-step estimator by Doz, Gianone and Reichlin (2011). This methods uses PCA estimates and runs them through Kalman filter.
 75 |     \item QML estimator by Doz, Gianone and Reichlin (2012). Similar to two-step estimator, however Kalman filtering procedure is iterated until EM convergence.
 76 | 
 77 | \end{itemize}
 78 | 
 79 | \section{Forecasting}
 80 | 
 81 | Once a dynamic factor model is estimated, $\left(\mathbf{X}_{1:T}, \hat{\mathbf{A}}, \hat{\mathbf{C}}\right)$ are available. Predicting $\left(\mathbf{X}_{f, T+h}, \mathbf{f}_{T+h}\right)$ for some horizon $h \in \left\lbrace1, \dots \right\rbrace$ consists in three steps:
 82 | \begin{enumerate}
 83 |   \item predict $\mathbf{f}_{f, T+1}$ given estimate of $\mathbf{f}_{T}$ and VAR($p$) process, i.e.
 84 |     \begin{equation*}
 85 |       \mathbf{f}_{f,T+1} = \hat{\mathbf{A}}_{1}\mathbf{f}_{T} + \dots + \hat{\mathbf{A}}_{p}\mathbf{f}_{T-p}
 86 |     \end{equation*}
 87 |   \item predict $\mathbf{X}_{f,T+1}$ based on factor loading and factor estimates, i.e.
 88 |     \begin{equation*}
 89 |       \mathbf{X}_{f,T+1} = \hat{\mathbf{C}} \mathbf{f}_{f,T+1}
 90 |     \end{equation*}
 91 |   \item repeat steps 1 and 2 until desired horizon level is reached
 92 | \end{enumerate}
 93 | 
 94 | \section{Future and ongoing work}
 95 | 
 96 | Some of the ideas for dynamic factor modelling are not yet implemented completely or even at all. This section gathers future ideas for the package.
 97 | 
 98 | \begin{enumerate}
 99 |   \item Introduce Markov-switching possibility so that, for example, $\mathbf{C}$ or $\mathbf{A}$ could be state dependent. It would be in particular interesting if this could capture business cycle growth variations. In case of this model, approximation to Kalman filter can be used, e.g. Kim filter, in order to avoid exponentially growing number of calculations.
100 |   \item Analyze and propose some defaults for restrictions on $\mathbf{C}$. Typically, this includes identification constraints and currently, only upper triangular $\mathbf{C}$ is implemented. Constraints can also be specified based on economic reasoning and render model hierarchical.
101 |   \item Kalman and Kim filters are slow in \texttt{R} due to their recursive nature. They should be reimplemented in \texttt{C} or call another package such as \texttt{FKF}. Kim filter will probably need to be rewritten in \texttt{C} anyway.
102 |   \item As presented, dynamic factor model is only dynamic in the state equation. It can be generalized to have dynamics in the measurement equation as well, i.e. $\mathbf{X}_{t}$ depending on current and past values of $\mathbf{f}_{t}$. This should not be difficult to implement as such model would be eventually reduced to \eqref{eq:statespace}.
103 |   \item Currently, there is no automated testing for the package. This should be fixed.
104 | \end{enumerate}
105 | 
106 | \end{document}
107 | 


--------------------------------------------------------------------------------