├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R ├── additional.R ├── dfm.R ├── dfmMS.R ├── em_algorithm.R ├── kalman.R ├── kim.R ├── methods.R └── otherlang │ ├── kalman.c │ ├── kim.jl │ └── max.jl ├── README.md ├── data └── NBBsurvey.rda ├── inst └── doc │ ├── dynamic-factors.R │ ├── dynamic-factors.Rnw │ └── dynamic-factors.pdf ├── man ├── Estep.Rd ├── K_filter.Rd ├── K_smoother.Rd ├── KimFilter.Rd ├── KimSmoother.Rd ├── NBBsurvey.Rd ├── VAR.Rd ├── dfm.Rd ├── dfmMS.Rd ├── em_converged.Rd ├── predict.dfm.Rd └── summary.dfm.Rd ├── tests └── testthat.R └── vignettes └── dynamic-factors.Rnw /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: dynfactoR 2 | Title: Dynamic factor model estimation for nowcasting 3 | Version: 0.1 4 | Authors@R: person("Rytis", "Bagdziunas", , "rytis.bagdziunas@uclouvain.be", role = c("aut", "cre")) 5 | Description: This package implements a subset of state space modelling, namely models with dynamic factors. These models are commonly used in economics for short-term forecasting due to possibility to summarize information from large datasets in a small number of factors. 6 | Depends: R (>= 3.1.3), MASS, optimx 7 | License: MIT + file LICENSE 8 | LazyData: true 9 | Suggests: xts, testthat 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2015 2 | COPYRIGHT HOLDER: Rytis Bagdziunas 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | exportPattern("^[^\\.]") 2 | import(MASS) 3 | import(optimx) 4 | -------------------------------------------------------------------------------- /R/additional.R: -------------------------------------------------------------------------------- 1 | #' Estimate a p-th order vector autoregressive (VAR) model 2 | #' 3 | #' @param x Data matrix (T x n) 4 | #' @param p Maximum lag order, i.e. VAR(p) will be estimated 5 | #' @return Estimated parameter matrix, residuals and regression model 6 | #' independent and dependent variables 7 | #' @examples 8 | #' x <- matrix(rnorm(50*2), nrow=50, ncol=2) 9 | #' VAR(x, 2) 10 | VAR <- function(x, p) { 11 | T <- nrow(x) 12 | Y <- x[(p + 1):T, ] 13 | X <- c() 14 | for (i in 1:p) { 15 | X <- cbind(X, x[(p + 1 - i):(T - i), ]) 16 | } 17 | A <- solve(t(X) %*% X) %*% t(X) %*% Y 18 | res <- Y - X %*% A 19 | 20 | return(list(Y = Y, X = X, A = A, res = res)) 21 | } 22 | -------------------------------------------------------------------------------- /R/dfm.R: -------------------------------------------------------------------------------- 1 | #' Estimates a dynamic factor model based on Doz, Gianone & Reichlin (2011) 2 | #' 3 | #' @param X Data matrix (T x n) 4 | #' @param r Number of static factors 5 | #' @param p Lag order for factors 6 | #' @param q Number of dynamic factors, must be equal to or less than r 7 | #' @param max_iter Maximum number of iterations in EM-algorithm 8 | #' @param threshold Threshold for algorithm convergence 9 | #' @param rQ Restrictions on system state covariance 10 | #' @param rC Restrictions on factor loading matrix 11 | #' @return 3 types of factor estimates, namely principal component estimate, two step 12 | #' estimate based on PCA and Kalman filtering and QML estimate based on EM-algorithm 13 | #' @examples 14 | #' x <- matrix(rnorm(50*10), 50, 10) 15 | #' W <- as.logical(matrix(rbinom(50*10, 1, 0.1), 50, 10)) 16 | #' x[W] <- NA 17 | #' dfm(x, 2, 2, 1) 18 | dfm <- function(X, r, p, q, max_iter=100, threshold=1e-4, rQ, rC) { 19 | 20 | if(missing(rQ)) { rQ <- '' } 21 | if(missing(rC)) { rC <- '' } 22 | if(missing(q)) { q <- r } 23 | if (q > r) { stop("r must be larger than q.")} 24 | 25 | T <- dim(X)[1]; N <- dim(X)[2] 26 | x <- apply(X, 2, function(z) { (z - mean(z, na.rm=TRUE))/sd(z, na.rm=TRUE) }) 27 | 28 | Mx <- apply(X, 2, mean, na.rm=TRUE) 29 | Wx <- apply(X, 2, sd, na.rm=TRUE) 30 | W <- !is.na(x) 31 | # A consists of two parts. In particular, the upper dynamic part and the 32 | # lower invariable identity part to ensure time lag coherence. 33 | 34 | A <- rbind(matrix(0, nrow=r, ncol=r*p), 35 | diag(1, nrow=r*(p-1), ncol=r*p)) 36 | 37 | Q <- matrix(0, nrow=p*r, ncol=p*r) 38 | Q[1:r, 1:r] <- diag(1, r) 39 | 40 | eigen.decomp <- eigen(cov(x, use="complete.obs")) 41 | v <- eigen.decomp$vectors[,1:r] 42 | d <- eigen.decomp$values[1:r] 43 | 44 | chi <- x %*% v %*% t(v) 45 | d <- diag(1, r) 46 | F <- x %*% v 47 | F_pc <- F 48 | F <- na.omit(F) 49 | # If there are any dynamic factors, VAR(p) model is estimated 50 | # to initialize their parameters. 51 | 52 | if (p > 0) 53 | { 54 | # ML estimator for VAR(p) model when Q is restricted 55 | if (rQ == 'identity') 56 | { 57 | fit <- VAR(F,p) 58 | A[1:r, 1:(r*p)] <- t(fit$A) 59 | Q[1:r, 1:r] <- diag(1, r) 60 | } 61 | else 62 | { 63 | fit <- VAR(F, p) 64 | A[1:r, 1:(r*p)] <- t(fit$A) 65 | H <- cov(fit$res) 66 | 67 | # This only extracts the variance explained by 68 | # the dynamic components 69 | 70 | if (r > q) 71 | { 72 | q.decomp <- eigen(H) 73 | P <- q.decomp$vectors[,1:q, drop=FALSE] 74 | M <- q.decomp$values[1:q] 75 | if (q == 1) 76 | { 77 | P <- P * P[1,] 78 | Q[1:r, 1:r] <- P %*% t(P) * M 79 | } else { 80 | P <- P %*% diag(sign(P[1,])) 81 | Q[1:r, 1:r] <- P %*% diag(M) %*% t(P) 82 | } 83 | } else 84 | { 85 | Q[1:r, 1:r] <- H 86 | } 87 | } 88 | } 89 | R <- diag(diag(cov(x - chi, use="complete.obs"))) 90 | Z <- fit$X 91 | initx <- Z[1,] 92 | initV <- matrix(ginv(kronecker(A,A)) %*% as.numeric(Q), 93 | ncol=r*p, nrow=r*p) 94 | C <- cbind(v, matrix(0, nrow=N, ncol=r*(p-1))) 95 | 96 | previous_loglik <- -.Machine$double.xmax 97 | loglik <- 0 98 | num_iter <- 0 99 | LL <- c() 100 | 101 | converged <- 0 102 | kf_res <- K_filter(initx, initV, x, A, C, R, Q) 103 | ks_res <- K_smoother(A, kf_res$xitt, kf_res$xittm, 104 | kf_res$Ptt, kf_res$Pttm, C, R, W) 105 | 106 | xsmooth <- ks_res$xitT 107 | Vsmooth <- ks_res$PtT 108 | Wsmooth <- ks_res$PtTm 109 | 110 | F_kal <- t(xsmooth[1:r,, drop=FALSE]) 111 | if (rC == 'upper' & (r > 1)) 112 | { 113 | dimC <- dim(C[,1:r]) 114 | rK <- rep(0, (r-1)*r/2) 115 | irC <- which(matrix(upper.tri(C[,1:r]) + 0) == 1) 116 | rH <- matrix(0, nrow=length(rK), ncol=prod(dimC)) 117 | for (i in 1:length(rK)) 118 | { 119 | rH[i,irC[i]] <- 1 120 | } 121 | } 122 | 123 | while ((num_iter < max_iter) & !converged) 124 | { 125 | 126 | # E-step will return a list of sufficient statistics, namely second (cross)-moments 127 | # for latent and observed data. This is then plugged back into M-step. 128 | em_res <- Estep(t(x), A, C, Q, R, initx, initV, W) 129 | beta <- em_res$beta_t 130 | gamma <- em_res$gamma_t 131 | delta <- em_res$delta_t 132 | gamma1 <- em_res$gamma1_t 133 | gamma2 <- em_res$gamma2_t 134 | P1sum <- em_res$V1 + em_res$x1 %*% t(em_res$x1) 135 | x1sum <- em_res$x1 136 | loglik <- em_res$loglik_t 137 | 138 | num_iter <- num_iter + 1 139 | 140 | # M-step computes model parameters as a function of the sufficient statistics that 141 | # were computed with the E-step. Iterate the procedure until convergence. Due to the 142 | # model specification, likelihood maximiation in the M-step is just an OLS estimation. 143 | # In particular, X_t = C*F_t and F_t = A*F_(t-1). 144 | 145 | if (rC == 'upper' & (r > 1)) 146 | { 147 | fp <- matrix(delta[,1:r] %*% ginv(gamma[1:r,1:r])) 148 | kronCR <- kronecker(ginv(gamma[1:r,1:r]), R) 149 | sp <- kronCR %*% t(rH) %*% ginv(rH %*% kronCR %*% t(rH)) %*% (rK - rH %*% fp) 150 | C[,1:r] <- matrix(fp + sp, nrow=dimC[1], ncol=dimC[2]) 151 | } 152 | else 153 | { 154 | C[,1:r] <- delta[,1:r] %*% ginv(gamma[1:r,1:r]) 155 | } 156 | 157 | if (p > 0) 158 | { 159 | 160 | A_update <- beta[1:r,1:(r*p), drop=FALSE] %*% solve(gamma1[1:(r*p),1:(r*p)]) 161 | A[1:r,1:(r*p)] <- A_update 162 | if (rQ != 'identity') 163 | { 164 | H <- (gamma2[1:r, 1:r] - A_update %*% t(beta[1:r, 1:(r*p), drop=FALSE])) / (T-1) 165 | if (r > q) 166 | { 167 | h.decomp <- svd(H) 168 | P <- h.decomp$v[,1:q, drop=FALSE] 169 | M <- h.decomp$d[1:q] 170 | if (q == 1) 171 | { 172 | P <- P * P[1,] 173 | Q[1:r, 1:r] <- P %*% t(P) * M 174 | } else { 175 | P <- P %*% diag(sign(P[1,])) 176 | Q[1:r, 1:r] <- P %*% diag(M) %*% t(P) 177 | } 178 | } else { 179 | Q[1:r, 1:r] <- H 180 | } 181 | } 182 | } 183 | xx <- as.matrix(na.omit(x)) 184 | R <- (t(xx) %*% xx - C %*% t(delta)) / T 185 | RR <- diag(R); RR[RR < 1e-7] <- 1e-7; R <- diag(RR) 186 | 187 | R <- diag(diag(R)) 188 | LL <- c(LL, loglik) 189 | 190 | initx <- x1sum 191 | initV <- P1sum - initx %*% t(initx) 192 | 193 | converged <- em_converged(loglik, previous_loglik, threshold=threshold) 194 | previous_loglik <- loglik 195 | if (num_iter < 25) { converged <- FALSE } 196 | 197 | } 198 | 199 | if (converged == TRUE) 200 | { 201 | cat("Converged after", num_iter, "iterations.\n") 202 | } else 203 | { 204 | cat("Maximum number of iterations reached.\n") 205 | } 206 | 207 | kf <- K_filter(initx, initV, x, A, C, R, Q) 208 | ks <- K_smoother(A, kf$xitt, kf$xittm, kf$Ptt, kf$Pttm, C, R, W) 209 | 210 | xsmooth <- ks$xitT 211 | 212 | chi <- t(xsmooth) %*% t(C) %*% diag(Wx) + kronecker(matrix(1,T,1), t(Mx)) 213 | F_hat <- t(xsmooth[1:r,, drop=FALSE]) 214 | 215 | final_object <- list("pca"=F_pc, "qml"=F_hat, "twostep"=F_kal, 216 | "A"=A[1:r,], "C"=C[,1:r], "Q"=Q[1:q,1:q], "R"=R, 217 | "p"=p, "data"=x) 218 | class(final_object) <- c("dfm", "list") 219 | 220 | return(final_object) 221 | } 222 | 223 | #' National Bank of Belgium business and consumer surveys 224 | #' 225 | #' @format A time-stamped dataframe (xts object) with 226 | #' @source \url{http://stat.nbb.be} 227 | "NBBsurvey" 228 | -------------------------------------------------------------------------------- /R/dfmMS.R: -------------------------------------------------------------------------------- 1 | #' Dynamic factor model with Markov-switching states 2 | #' 3 | #' @param X Data matrix (Txn) 4 | #' @param s Number of states. Only \code{2} are supported for now 5 | #' @param J Number of factors. Currently, only \code{1} is supported 6 | #' @param p Lag number for VAR model of factors 7 | #' @param x0 Initial value for state 8 | #' @param P0 Initial value for state covariance 9 | #' @return Return an estimator. Currently, it runs via a likelihood 10 | #' maximization an so is rather slow. Do not call it with large number 11 | #' of input variables or missing data. Some box constraints are currently 12 | #' to speed up the process, these are based on usual rationale for economic 13 | #' data. 14 | dfmMS <- function(X, J=1, s=2, p=1, x0, P0) 15 | { 16 | 17 | T <- nrow(X) 18 | n <- ncol(X) 19 | x <- apply(X, 2, function(z) { (z-mean(z, na.rm=TRUE))/sd(z, na.rm=TRUE) }) 20 | 21 | J <- 1 22 | s <- 2 23 | 24 | if (missing(x0)) { x0 <- rep(0,J) } 25 | if (missing(P0)) { P0 <- diag(1,J) } 26 | 27 | initV <- list() 28 | initV$A <- c(0.96, 0.6) 29 | initV$F <- runif(n*J*s, -1, 1) 30 | initV$R <- rep(1,n) 31 | initV$p <- c(0.98, 0.95) 32 | 33 | # State equation covariance is restricted to identity 34 | Q <- diag(1, J) 35 | 36 | dimA <- J*J*s; dimF <- n*J*s; dimAF <- dimA + dimF; dimAFR <- dimAF + n 37 | 38 | KimFilterOptim <- function(pars) 39 | { 40 | A <- array(head(pars, dimA), c(J,J,s)) 41 | F <- array(pars[(dimA+1):(dimAF)], c(n,J,s)) 42 | R <- diag(pars[(dimAF+1):(dimAFR)]) 43 | dp <- tail(pars, s); r <- 1-dp 44 | p <- diag(dp) + r - diag(r) 45 | results <- KimFilter(x0, P0, x, F, A, R, Q, p) 46 | return(results$result) 47 | } 48 | 49 | optimP <- optimx(par=unlist(initV), 50 | fn = KimFilterOptim, 51 | lower = c(rep(-0.2, J*J*s), # A 52 | rep(-2.5, n*J*s), # F 53 | rep(0.15, n), # R 54 | rep(0.9,s)), # p 55 | upper = c(rep(0.98, J*J*s), # A 56 | rep(2.5, n*J*s), # F 57 | rep(1.5, n), # R 58 | rep(0.991,2)), # p 59 | method = "L-BFGS-B", 60 | control=list(maximize=TRUE,maxit=1000, trace=1, kkt=FALSE)) 61 | 62 | pars <- as.numeric(optimP)[1:length(unlist(initV))] 63 | A_hat <- array(pars[1:dimA], c(J,J,s)) 64 | F_hat <- array(pars[(dimA+1):dimAF], c(n,J,s)) 65 | R_hat <- diag(pars[(dimAF+1):dimAFR]) 66 | p_hat <- tail(pars, s); p_hat <- diag(p_hat) + (1-p_hat) - diag(1-p_hat) 67 | 68 | kf <- KimFilter(x0, P0, x, F_hat, A_hat, R_hat, Q, p_hat) 69 | ks <- KimSmoother(kf$xA, kf$Pa, A_hat, kf$P, kf$x, p_hat, kf$stateP, kf$stateP_fut) 70 | 71 | return(list("A"=A_hat, "F"=F_hat, "R"=R_hat, "p"=p_hat, 72 | "xF"=ks$xF, "Pf"=ks$Pf, "ProbS"=ks$ProbS)) 73 | } 74 | -------------------------------------------------------------------------------- /R/em_algorithm.R: -------------------------------------------------------------------------------- 1 | #' Computation of the expectation step in the EM-algorithm. 2 | 3 | #' @param y Data matrix 4 | #' @param A System state matrix 5 | #' @param C Observation matrix 6 | #' @param R Observation equation variance 7 | #' @param Q System state equation variance 8 | #' @param W Logical matrix with dim(W) = dim(y) 9 | #' indicating missing observations 10 | #' @param initx Initial value for state variable 11 | #' @param initV Initial value for state matrix 12 | #' @return Sufficient statistics used for M-step 13 | Estep <- function(y, A, C, Q, R, initx, initV, W) { 14 | 15 | os <- dim(y)[1] 16 | T <- dim(y)[2] 17 | ss <- nrow(A) 18 | 19 | kf <- K_filter(initx, initV, t(y), A, C, R, Q) 20 | ks <- K_smoother(A, kf$xitt, kf$xittm, kf$Ptt, kf$Pttm, C, R, W) 21 | 22 | xsmooth <- ks$xitT 23 | Vsmooth <- ks$PtT 24 | Wsmooth <- ks$PtTm 25 | 26 | delta <- matrix(0, os, ss) 27 | gamma <- matrix(0, ss, ss) 28 | beta <- matrix(0, ss, ss) 29 | 30 | for (t in 1:T) { 31 | z <- y[,t]; z[is.na(z)] <- 0 32 | # There seems to be a problem here 33 | delta <- delta + z %*% t(xsmooth[,t]) 34 | gamma <- gamma + xsmooth[,t] %*% t(xsmooth[,t]) + Vsmooth[,,t] 35 | if (t > 1) { 36 | beta <- beta + xsmooth[,t] %*% t(xsmooth[,(t-1)]) + Wsmooth[,,t] 37 | } 38 | } 39 | 40 | gamma1 <- gamma - xsmooth[, T] %*% t(xsmooth[, T]) - Vsmooth[, , T] 41 | gamma2 <- gamma - xsmooth[, 1] %*% t(xsmooth[, 1]) - Vsmooth[, , 1] 42 | x1 <- xsmooth[, 1] 43 | V1 <- Vsmooth[, , 1] 44 | 45 | return(list(beta_t = beta, gamma_t = gamma, delta_t = delta, gamma1_t = gamma1, 46 | gamma2_t = gamma2, x1 = x1, V1 = V1, loglik_t = kf$loglik, xsmooth = xsmooth)) 47 | 48 | } 49 | 50 | #' Convergence test for EM-algorithm. 51 | #' 52 | #' @param loglik Current value of the log-likelihood function 53 | #' @param previous_loglik Value of the log-likelihood function at the previous 54 | # iteration 55 | #' @param threshold If difference is less than threshold, then algorithm has 56 | #' converged 57 | #' @param check_increased TO DOCUMENT 58 | #' @return A logical statement indicating whether EM algorithm has converged 59 | #' according to slope convergence test 60 | em_converged <- function(loglik, previous_loglik, threshold=1e-4, check_increased=TRUE) { 61 | 62 | converged <- FALSE 63 | decrease <- 0 64 | 65 | if (check_increased == TRUE) { 66 | if (loglik - previous_loglik < -0.001) { 67 | # cat("*** Likelihood decreased from ", previous_loglik, " to ", loglik, "\n") 68 | decrease <- 1 69 | } 70 | } 71 | 72 | delta_loglik <- abs(loglik - previous_loglik) 73 | avg_loglik <- (abs(loglik) + abs(previous_loglik) + .Machine$double.eps)/2 74 | 75 | if ((delta_loglik/avg_loglik) < threshold) { 76 | converged <- TRUE 77 | } 78 | return(converged) 79 | # return(list('converged'=converged, 'decrease'=decrease)) 80 | } 81 | -------------------------------------------------------------------------------- /R/kalman.R: -------------------------------------------------------------------------------- 1 | #' Implements a Kalman for dynamic factor model. 2 | #' 3 | #' @param initx Initial value for state space observations 4 | #' @param initV Initial value for state covariance 5 | #' @param x Observation matrix 6 | #' @param A State space matrix 7 | #' @param C System matrix 8 | #' @param R State space covariance 9 | #' @param Q System covariance 10 | #' @return Filtered state space variable and its covariance matrix 11 | #' as well as their forecast for next period for further iterations 12 | K_filter <- function(initx, initV, x, A, C, R, Q) { 13 | 14 | T <- dim(x)[1] 15 | N <- dim(x)[2] 16 | r <- dim(A)[1] 17 | W <- !is.na(x) 18 | y <- t(x) 19 | 20 | xittm <- matrix(0, r, (T+1)) 21 | xitt <- matrix(0, r, T) 22 | 23 | Pttm <- array(0, c(r, r, (T+1))) 24 | Ptt <- array(0, c(r, r, T)) 25 | 26 | xittm[,1] <- initx 27 | Pttm[,,1] <- initV 28 | 29 | logl <- c() 30 | Ci <- C 31 | Ri <- R 32 | for (j in 1:T) { 33 | # missing_data <- MissData(y[,j], C, R) 34 | # C <- missing_data$C 35 | # R <- missing_data$R 36 | C <- Ci[W[j,],, drop=FALSE] 37 | R <- Ri[W[j,], W[j,], drop=FALSE] 38 | if (FALSE) #(all(!W[j,])) #(all(is.na(missing_data$y) == TRUE)) 39 | { 40 | xitt[,,j] <- A %*% xittm[,,j] 41 | Ptt[,,j] <- C %*% Pttm[,,j] %*% t(C) + R 42 | } else 43 | { 44 | # Innovation covariance (inverse) 45 | Icov <- C %*% Pttm[,,j] %*% t(C) + R 46 | L <- solve(Icov) 47 | # Innovation residual 48 | Ires <- as.numeric(na.omit(y[,j])) - C %*% xittm[,j] 49 | # Optimal Kalman gain 50 | G <- Pttm[,,j] %*% t(C) %*% L 51 | # Updated state estimate: predicted + (Kalman gain)*fitted 52 | xitt[,j] <- xittm[,j] + G %*% Ires 53 | # Updated covariance estimate 54 | Ptt[,,j] <- Pttm[,,j] - G %*% C %*% Pttm[,,j] 55 | # State space variable and covariance predictions E[f_t | t-1] 56 | xittm[,(j+1)] <- A %*% xitt[,j] 57 | Pttm[,,(j+1)] <- A %*% Ptt[,,j] %*% t(A) + Q 58 | 59 | # Compute log-likelihood with Mahalanobis distance 60 | d <- length(Ires) 61 | S <- C %*% Pttm[,,j] %*% t(C) + R 62 | Sinv <- solve(S) 63 | if (nrow(R) == 1) 64 | { 65 | GG <- t(C) %*% solve(R) %*% C 66 | detS <- prod(R) %*% det(diag(1, r) + Pttm[,,j] %*% GG) 67 | } else { 68 | GG <- t(C) %*% diag(1/diag(R)) %*% C 69 | detS <- prod(diag(R)) * det(diag(1, r) + Pttm[,,j] %*% GG) 70 | } 71 | denom <- (2 * pi)^(d/2) * sqrt(abs(detS)) 72 | mahal <- sum(t(Ires) %*% Sinv %*% Ires) 73 | logl[j] <- -0.5 * mahal - log(denom) 74 | } 75 | } 76 | loglik <- sum(logl, na.rm=TRUE) 77 | return(list(xitt = xitt, xittm = xittm, Ptt = Ptt, Pttm = Pttm, loglik = loglik)) 78 | } 79 | 80 | #' Implements Kalman smoothing and is used along with Kalman filter. 81 | #' Kalman filter outputs enter Kalman smoother as inputs. 82 | #' 83 | #' @param A State space matrix 84 | #' @param xitt State space variable 85 | #' @param xittm Predicted state space variable 86 | #' @param Ptt State space covariance 87 | #' @param Pttm Predicted state space covariance 88 | #' @param C System matrix 89 | #' @param R State space covariance 90 | #' @param W Logical matrix (T x n) indicating missing data. 91 | #' TRUE if observation is present, FALSE if it is missing. 92 | #' @return Smoothed state space variable and state space covariance matrix 93 | K_smoother <- function(A, xitt, xittm, Ptt, Pttm, C, R, W) { 94 | T <- dim(xitt)[2] 95 | r <- dim(A)[1] 96 | 97 | Pttm <- Pttm[,,(1:(dim(Pttm)[3] - 1)), drop = FALSE] 98 | xittm <- xittm[,(1:(dim(xittm)[2] - 1)), drop = FALSE] 99 | 100 | # Whereas J is of constant dimension, L and K dimensions may vary 101 | # depending on existence of NAs 102 | J <- array(0, c(r, r, T)) 103 | L <- list() 104 | K <- list() 105 | 106 | for (i in 1:(T-1)) { 107 | J[,,i] <- Ptt[,,i] %*% t(A) %*% solve(Pttm[,,(i+1)], tol = 1e-32) 108 | } 109 | 110 | Ci <- C 111 | Ri <- R 112 | for (i in 1:T) { 113 | # Only keep entries for non-missing data 114 | C <- Ci[W[i,],, drop=FALSE] 115 | R <- Ri[W[i,], W[i,], drop=FALSE] 116 | L[[i]] <- solve(C %*% Pttm[,,i] %*% t(C) + R) 117 | K[[i]] <- Pttm[,,i] %*% t(C) %*% L[[i]] 118 | } 119 | 120 | xitT <- cbind(matrix(0, r, (T-1)), xitt[,T]) 121 | PtT <- array(0, c(r, r, T)) 122 | PtTm <- array(0, c(r, r, T)) 123 | PtT[,,T] <- Ptt[,,T] 124 | PtTm[,,T] <- (diag(1, r) - K[[T]] %*% C) %*% A %*% Ptt[,,(T-1)] 125 | 126 | for (j in 1:(T-1)) { 127 | xitT[,(T-j)] <- xitt[,(T-j)] + J[,,(T-j)] %*% (xitT[,(T+1-j)] - xittm[,(T+1-j)]) 128 | PtT[,,(T-j)] <- Ptt[,,(T-j)] + J[,,(T-j)] %*% (PtT[,,(T+1-j)] - Pttm[,,(T+1-j)]) %*% t(J[,,(T-j)]) 129 | } 130 | 131 | for (j in 1:(T-2)) { 132 | PtTm[,,(T-j)] <- Ptt[,,(T-j)] %*% t(J[,,(T-j-1)]) + J[,,(T-j)] %*% (PtTm[,,(T-j+1)] - A %*% Ptt[,,(T-j)]) %*% t(J[,,(T-j-1)]) 133 | } 134 | 135 | return(list(xitT = xitT, PtT = PtT, PtTm = PtTm)) 136 | } 137 | -------------------------------------------------------------------------------- /R/kim.R: -------------------------------------------------------------------------------- 1 | #' Implementation of Kim (1994) filter, an extension to Kalman filter 2 | #' for dynamic linear models with Markov-switching. Documentation 3 | #' is incomplete, rudimentary and needs to be rechecked! 4 | #' 5 | #' @param x0 Initial condition for state vector 6 | #' @param P0 Initial condition for state variance 7 | #' @param y Data matrix (Txn) 8 | #' @param F System matrix for measurement equation 9 | #' @param A Transition matrix for state equation 10 | #' @param R Error covariance for measurement equation 11 | #' @param Q Error covariance for state equation 12 | #' @param p Transition probability matrix 13 | #' @return Filtered states and covariances with associated probability matrices. 14 | KimFilter <- function(x0, P0, y, F, A, R, Q, p) 15 | { 16 | # Define all containers for further computations. Notations for variables and indices, 17 | # where appropriate, carefully follow Kim (1994). State vector is denoted as 'x', its 18 | # covariance as 'P'. Appended letters explicit whether these are updated, approximated 19 | # or smoothed. 20 | 21 | if (is.vector(y) || (ncol(y) <= length(x0))) 22 | { 23 | stop("Number of factors should be strictly lower than number of variables. \n 24 | Increase number of variables or estimate a VAR model instead.") 25 | } 26 | 27 | T <- nrow(y) 28 | n <- dim(F)[1] 29 | J <- length(x0) 30 | s <- dim(p)[1] 31 | 32 | ## x: x^(i,j)_(t|t-1): predicted state vector - (2.6) 33 | ## xU: x^(i,j)_(t|t): updated state vector - (2.11) 34 | ## P: P^(i,j)_(t|t-1): predicted state covariance - (2.7) 35 | ## Pu: P^(i,j)_(t|t): updated state covariance - (2.12) 36 | ## eta: eta^(i,j)_(t|t-1): conditional forecast error - (2.8) 37 | ## H: H^(i,j)_(t): conditional variance of forecast error - (2.9) 38 | ## K: K^(i,j)_(t): Kalman gain - (2.10) 39 | ## lik: f(y_t, S_(t-1)=i, S_t = j | t-1): joint conditional density - (2.16) 40 | ## loglik: log of (2.16) 41 | x <- array(NA, c(T,J,s,s)) 42 | xU <- array(NA, c(T,J,s,s)) 43 | P <- array(NA, c(T,J,J,s,s)) 44 | Pu <- array(NA, c(T,J,J,s,s)) 45 | eta <- array(NA, c(T,n,s,s)) 46 | H <- array(NA, c(T,n,n,s,s)) 47 | K <- array(NA, c(T,J,n,s,s)) 48 | lik <- array(NA, c(T,s,s)) 49 | loglik <- array(NA, c(T,s,s)) 50 | ## Pr[S_(t-1) = i, S_t = j | t-1 ]: (2.15) 51 | ## Pr[S_(t-1) = i, S_t = j | t ]: (2.17) 52 | ## Pr[S_t = j | t-1 ]: used only for the smoothing part 53 | ## Pr[S_t = j | t ]: (2.18) 54 | jointP_fut <- array(NA, c(T,s,s)) 55 | jointP_cur <- array(NA, c((T+1),s,s)) 56 | stateP_fut <- array(NA, c(T,s)) 57 | stateP <- array(NA, c(T,s)) 58 | 59 | ## x^(j)_(t|t): approximate state vector conditional on S_j - (2.13) 60 | ## P^(j)_(t|t): approximate state covariance conditional on S_j - (2.14) 61 | xA <- array(NA, c(T,J,s)) 62 | Pa <- array(0, c(T,J,J,s)) 63 | result <- array(0, c(T,1)) 64 | 65 | # Some initial conditions to get started 66 | for (i in 1:s) { xA[1,,i] <- x0 } 67 | for (i in 1:s) { Pa[1,,,i] <- P0 } 68 | jointP_cur[1,,] <- matrix(c(0.25,0.25,0.25,0.25), ncol=2) 69 | 70 | for (t in 2:T) 71 | { 72 | for (j in 1:s) 73 | { 74 | for (i in 1:s) 75 | { 76 | x[t,,i,j] <- A[,,j] %*% xA[(t-1),,i] 77 | P[t,,,i,j] <- A[,,j] %*% Pa[(t-1),,,i] %*% t(A[,,j]) + Q 78 | eta[t,,i,j] <- y[t,] - as(F[,,j], "matrix") %*% x[t,,i,j] 79 | H[t,,,i,j] <- F[,,j] %*% as(P[t,,,i,j], "matrix") %*% t(F[,,j]) + R 80 | K[t,,,i,j] <- P[t,,,i,j] %*% t(F[,,j]) %*% solve(H[t,,,i,j]) 81 | xU[t,,i,j] <- x[t,,i,j] + K[t,,,i,j] %*% eta[t,,i,j] 82 | Pu[t,,,i,j] <- (diag(1,J) - K[t,,,i,j] %*% F[,,j]) %*% P[t,,,i,j] 83 | jointP_fut[t,i,j] <- p[i,j]*sum(jointP_cur[(t-1),,i]) # is everything alright here? 84 | lik[t,i,j] <- (2*pi)^(-n/2) * det(H[t,,,i,j])^(-1/2) * 85 | exp(-1/2*t(eta[t,,i,j]) %*% solve(H[t,,,i,j]) %*% eta[t,,i,j]) * 86 | jointP_fut[t,i,j] 87 | loglik[t,i,j] <- log(lik[t,i,j]) 88 | jointP_cur[t,i,j] <- lik[t,i,j] 89 | } 90 | # Technically, there should be sum(lik[t,,]) term but it cancels out and is computed later 91 | stateP[t,j] <- sum(jointP_cur[t,,j]) 92 | stateP_fut[t,j] <- sum(jointP_fut[t,,j]) 93 | # Compute probability-filtered state process and its covariance 94 | xA[t,,j] <- xU[t,,,j] %*% jointP_cur[t,,j] / stateP[t,j] 95 | for (i in 1:s) 96 | { 97 | Pa[t,,,j] <- Pa[t,,,j] + 98 | (Pu[t,,,i,j] + (xA[t,,j] - xU[t,,i,j]) %*% t(xA[t,,j] - xU[t,,i,j])) * 99 | exp(log(jointP_cur[t,i,j]) - log(stateP[t,j])) 100 | } 101 | } 102 | jointP_cur[t,,] <- exp(log(jointP_cur[t,,]) - log(sum(lik[t,,]))) 103 | stateP[t,] <- exp(log(stateP[t,]) - log(sum(lik[t,,]))) 104 | result[t,1] <- log(sum(lik[t,,])) 105 | } 106 | 107 | return(list("result"=sum(result), "xA"=xA, "Pa"=Pa, "x"=x, "P"=P, "stateP"=stateP, "stateP_fut"=stateP_fut)) 108 | 109 | } 110 | 111 | #' Smoothing algorithm from Kim (1994) to be used following a run 112 | #' of KimFilter function. 113 | #' 114 | #' @param xA Filtered state vector to be smoothed 115 | #' @param Pa Filtered state covariance to be smoothed 116 | #' @param x State-dependent state vector 117 | #' @param P State-dependent state covariance 118 | #' @param A Array with transition matrices 119 | #' @param p Markov transition matrix 120 | #' @param stateP Evolving current probability matrix 121 | #' @param stateP_fut Predicted probability matrix 122 | #' @return Smoothed states and covariance matrices. This is the equivalent 123 | #' of Kalman smoother in Markov-switching case. 124 | KimSmoother <- function(xA, Pa, A, P, x, p, stateP, stateP_fut) 125 | { 126 | # Define all containers for further computations. Notations for variables and indices, 127 | # where appropriate, carefully follow Kim (1994). State vector is denoted as 'x', its 128 | # covariance as 'P'. Appended letters explicit whether these are updated, approximated 129 | # or smoothed. 130 | 131 | T <- dim(xA)[1] 132 | J <- dim(xA)[2] 133 | s <- dim(xA)[3] 134 | 135 | ## Pr[S_t = j, S_(t+1) = k | T]: (2.20) 136 | ## Pr[S_t = j | T]: (2.21) 137 | jointPs <- array(NA, c(T,s,s)) 138 | ProbS <- array(NA, c(T,s)) 139 | 140 | ## xS: x^(j,k)_(t|T): inference of x_t based on full sample - (2.24) 141 | ## Ps: P^(j,k)_(t|T): covariance matrix of x^(j,k)_(t|T) - (2.25) 142 | ## Ptilde: helper matrix as defined after (2.25) 143 | xS <- array(0, c(T,J,s,s)) 144 | Ps <- array(0, c(T,J,J,s,s)) 145 | Ptilde <- array(NA, c(T,J,J,s,s)) 146 | 147 | ## xAS: x^(j)_(t|T): smoothed and approximated state vector conditional on S_j (2.26) 148 | ## Pas: P^(j)_(t|T): smoothed and approximated state covariance conditional on S_j (2.27) 149 | ## xF: x_(t|T): state-weighted [F]inal state vector (2.28) 150 | ## Pf: P_(t|T): state-weighted [f]inal state covariance 151 | xAS <- array(0, c(T,J,s)) 152 | Pas <- array(0, c(T,J,J,s)) 153 | xF <- array(0, c(T,J)) 154 | Pf <- array(0, c(T,J,J)) 155 | # Initial conditions for smoothing loop 156 | ProbS[T,] <- stateP[T,] 157 | 158 | for (t in seq(T-1,1,-1)) 159 | { 160 | for (j in 1:s) 161 | { 162 | for (k in 1:s) 163 | { 164 | jointPs[t,j,k] <- ProbS[(t+1),k]*stateP[t,j]*p[j,k] / stateP_fut[(t+1),k] 165 | Ptilde[t,,,j,k] <- Pa[t,,,j] %*% t(A[,,k]) %*% solve(P[(t+1),,,j,k]) 166 | xS[t,,j,k] <- xA[t,,j] + Ptilde[t,,,j,k] %*% (xA[(t+1),,k] - x[(t+1),,j,k]) 167 | Ps[t,,,j,k] <- Pa[t,,,j] + 168 | Ptilde[t,,,j,k] %*% (Pa[(t+1),,,k] - P[(t+1),,,j,k]) %*% t(Ptilde[t,,,j,k]) 169 | xAS[t,,j] <- xAS[t,,j] + jointPs[t,j,k]*xS[t,,j,k] 170 | Pas[t,,,j] <- Pas[t,,,j] + jointPs[t,j,k]*(Ps[t,,,j,k] + 171 | (xAS[t,,j] - xS[t,,j,k]) %*% t(xAS[t,,j] - xS[t,,j,k])) 172 | } 173 | ProbS[t,j] <- sum(jointPs[t,j,]) 174 | xAS[t,,j] <- xAS[t,,j] / ProbS[t,j] 175 | Pas[t,,,j] <- Pas[t,,,j] / ProbS[t,j] 176 | } 177 | } 178 | for (t in 1:T) 179 | { 180 | for (j in 1:s) 181 | { 182 | xF[t,] <- xF[t,] + xAS[t,,j]*ProbS[t,j] 183 | Pf[t,,] <- Pf[t,,] + Pas[t,,,j]*ProbS[t,j] 184 | } 185 | } 186 | 187 | return(list("xF"=xF, "Pf"=Pf, "ProbS"=ProbS)) 188 | } 189 | -------------------------------------------------------------------------------- /R/methods.R: -------------------------------------------------------------------------------- 1 | #' Summary information on dynamic factor model estimation 2 | #' 3 | #' @param x An object of \code{dfm} class 4 | #' @param plot If \code{TRUE}, returns a bunch of summary plots 5 | #' @return Prints out a summary information following a dynamic factor 6 | #' model estimation. Also can return summary plots. 7 | summary.dfm <- function(x, plot=FALSE) { 8 | 9 | cl <- match.call() 10 | 11 | nf <- dim(x$qml)[2] 12 | 13 | if (plot == TRUE) 14 | { 15 | par(mfrow=c((nf+1),1)) 16 | for (i in 1:nf) 17 | { 18 | plot.title <- paste0("QML estimated factor ", i) 19 | plot(x$qml[,i], type='l', main=plot.title, ylab="Value", xlab="Time") 20 | } 21 | boxplot(x$data - t(x$C %*% t(x$qml)), main="Residuals by input variable") 22 | } 23 | cat("Observation equation matrix: \n") 24 | print(x$C) 25 | 26 | cat("\nObservation residual covariance matrix: \n") 27 | print(cov(x$data - t(x$C %*% t(x$qml)))) 28 | 29 | cat("\nSystem equation transition matrix: \n") 30 | print(x$A) 31 | 32 | } 33 | 34 | #' Predict factors and observables based on an estimated dynamic 35 | #' factor model 36 | #' 37 | #' @param x An object of \code{dfm} class 38 | #' @param h Forecasting horizon 39 | #' @return Returns prediction matrices for factors and input variables 40 | #' based on model estimation. Each row corresponds to a prediction 41 | #' horizon. First row is \code{T+1}, second -\code{T+2} and so on... 42 | predict.dfm <- function(x, h=1) { 43 | 44 | cl <- match.call() 45 | 46 | nf <- dim(x$qml)[2] 47 | ny <- dim(x$C)[1] 48 | 49 | factor_forecast <- matrix(NA, nrow=h, ncol=nf) 50 | y_forecast <- matrix(NA, nrow=h, ncol=ny) 51 | factor_last <- matrix(NA, nrow=x$p, ncol=nf) 52 | 53 | factor_last <- tail(x$qml, x$p) 54 | 55 | for (i in 1:h) 56 | { 57 | reg_factor <- tail(factor_last, x$p) 58 | factor_forecast[i,] <- x$A %*% matrix(t(reg_factor[rev(1:nrow(reg_factor)),])) 59 | y_forecast[i,] <- x$C %*% factor_forecast[i,] 60 | 61 | factor_last <- rbind(factor_last, factor_forecast[i,]) 62 | } 63 | return(list("y"=y_forecast, "f"=factor_forecast)) 64 | } 65 | -------------------------------------------------------------------------------- /R/otherlang/kalman.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | using namespace Rcpp; 4 | 5 | class Kalman { 6 | private: 7 | mat A, H, Q, R, xest, pest; 8 | double dt; 9 | 10 | public: 11 | // constructor, sets up data structures 12 | Kalman() : dt(1.0) { 13 | A.eye(6,6); 14 | A(0,2) = A(1,3) = A(2,4) = A(3,5) = dt; 15 | H.zeros(2,6); 16 | H(0,0) = H(1,1) = 1.0; 17 | Q.eye(6,6); 18 | R = 1000 * eye(2,2); 19 | xest.zeros(6,1); 20 | pest.zeros(6,6); 21 | } 22 | 23 | // sole member function: estimate model 24 | mat estimate(const mat & Z) { 25 | unsigned int n = Z.n_rows, k = Z.n_cols; 26 | mat Y = zeros(n, k); 27 | mat xprd, pprd, S, B, kalmangain; 28 | colvec z, y; 29 | 30 | for (unsigned int i = 0; i