├── .gitignore ├── LICENSE ├── README.md ├── Stock_Watson.ipynb ├── data ├── hom_fac_1.xlsx └── hpfilter_trend.asc ├── dfm_functions.ipynb └── readin_functions.jl /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, QuantEcon 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dynamic_factor_models 2 | 3 | This is a respository for the project to replicate some results of dynamic factor models. Tentatively planned papers are 4 | - [Stock, J. H., & Watson, M. W. (2016). Dynamic factor models, factor-augmented vector autoregressions, and structural vector autoregressions in macroeconomics. In Handbook of macroeconomics (Vol. 2, pp. 415-525). Elsevier.](https://www.sciencedirect.com/science/article/pii/S1574004816300027) 5 | - [Barigozzi, M., Conti, A. M., & Luciani, M. (2014). Do euro area countries respond asymmetrically to the common monetary policy?. Oxford bulletin of economics and statistics, 76(5), 693-714.](https://onlinelibrary.wiley.com/doi/abs/10.1111/obes.12038) 6 | - [Forni, M., & Gambetti, L. (2010). The dynamic effects of monetary policy: A structural factor model approach. Journal of Monetary Economics, 57(2), 203-216.](https://www.sciencedirect.com/science/article/pii/S0304393209001597) 7 | 8 | **Any contribution would be appreciated!!** 9 | - post issue if you find any error or if you want a new feature 10 | - send pull request if you can contribute to the repository 11 | -------------------------------------------------------------------------------- /data/hom_fac_1.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QuantEcon/dynamic_factor_models/ca9dbde34cdbd70360642ce612e78461c113d1ea/data/hom_fac_1.xlsx -------------------------------------------------------------------------------- /data/hpfilter_trend.asc: -------------------------------------------------------------------------------- 1 | 0.000000 2 | 0.000000 3 | -0.000000 4 | -0.000001 5 | -0.000001 6 | -0.000001 7 | -0.000002 8 | -0.000002 9 | -0.000003 10 | -0.000003 11 | -0.000003 12 | -0.000004 13 | -0.000004 14 | -0.000004 15 | -0.000004 16 | -0.000005 17 | -0.000005 18 | -0.000005 19 | -0.000004 20 | -0.000004 21 | -0.000003 22 | -0.000002 23 | -0.000001 24 | 0.000000 25 | 0.000002 26 | 0.000005 27 | 0.000007 28 | 0.000010 29 | 0.000014 30 | 0.000018 31 | 0.000023 32 | 0.000028 33 | 0.000034 34 | 0.000040 35 | 0.000047 36 | 0.000054 37 | 0.000061 38 | 0.000068 39 | 0.000076 40 | 0.000083 41 | 0.000090 42 | 0.000095 43 | 0.000100 44 | 0.000103 45 | 0.000103 46 | 0.000101 47 | 0.000096 48 | 0.000086 49 | 0.000072 50 | 0.000052 51 | 0.000026 52 | -0.000007 53 | -0.000049 54 | -0.000100 55 | -0.000161 56 | -0.000232 57 | -0.000315 58 | -0.000410 59 | -0.000517 60 | -0.000637 61 | -0.000769 62 | -0.000913 63 | -0.001067 64 | -0.001230 65 | -0.001400 66 | -0.001573 67 | -0.001745 68 | -0.001912 69 | -0.002067 70 | -0.002204 71 | -0.002313 72 | -0.002384 73 | -0.002407 74 | -0.002369 75 | -0.002254 76 | -0.002048 77 | -0.001733 78 | -0.001290 79 | -0.000700 80 | 0.000058 81 | 0.001004 82 | 0.002159 83 | 0.003543 84 | 0.005176 85 | 0.007074 86 | 0.009250 87 | 0.011714 88 | 0.014468 89 | 0.017508 90 | 0.020821 91 | 0.024384 92 | 0.028158 93 | 0.032092 94 | 0.036115 95 | 0.040139 96 | 0.044049 97 | 0.047708 98 | 0.050952 99 | 0.053584 100 | 0.055379 101 | 0.056076 102 | 0.055379 103 | 0.053584 104 | 0.050952 105 | 0.047708 106 | 0.044049 107 | 0.040139 108 | 0.036115 109 | 0.032092 110 | 0.028158 111 | 0.024384 112 | 0.020821 113 | 0.017508 114 | 0.014468 115 | 0.011714 116 | 0.009250 117 | 0.007074 118 | 0.005176 119 | 0.003543 120 | 0.002159 121 | 0.001004 122 | 0.000058 123 | -0.000700 124 | -0.001290 125 | -0.001733 126 | -0.002048 127 | -0.002254 128 | -0.002369 129 | -0.002407 130 | -0.002384 131 | -0.002313 132 | -0.002204 133 | -0.002067 134 | -0.001912 135 | -0.001745 136 | -0.001573 137 | -0.001400 138 | -0.001230 139 | -0.001067 140 | -0.000913 141 | -0.000769 142 | -0.000637 143 | -0.000517 144 | -0.000410 145 | -0.000315 146 | -0.000232 147 | -0.000161 148 | -0.000100 149 | -0.000049 150 | -0.000007 151 | 0.000026 152 | 0.000052 153 | 0.000072 154 | 0.000086 155 | 0.000096 156 | 0.000101 157 | 0.000103 158 | 0.000103 159 | 0.000100 160 | 0.000095 161 | 0.000090 162 | 0.000083 163 | 0.000076 164 | 0.000068 165 | 0.000061 166 | 0.000054 167 | 0.000047 168 | 0.000040 169 | 0.000034 170 | 0.000028 171 | 0.000023 172 | 0.000018 173 | 0.000014 174 | 0.000010 175 | 0.000007 176 | 0.000005 177 | 0.000002 178 | 0.000000 179 | -0.000001 180 | -0.000002 181 | -0.000003 182 | -0.000004 183 | -0.000004 184 | -0.000005 185 | -0.000005 186 | -0.000005 187 | -0.000004 188 | -0.000004 189 | -0.000004 190 | -0.000004 191 | -0.000003 192 | -0.000003 193 | -0.000003 194 | -0.000002 195 | -0.000002 196 | -0.000001 197 | -0.000001 198 | -0.000001 199 | -0.000000 200 | 0.000000 201 | 0.000000 202 | -------------------------------------------------------------------------------- /dfm_functions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "using Statistics\n", 10 | "using DelimitedFiles\n", 11 | "using LinearAlgebra\n", 12 | "using MultivariateStats" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "abstract type EstimationMethod end\n", 22 | "struct NonParametric <: EstimationMethod end\n", 23 | "struct Parametric <: EstimationMethod end" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "##### `VARModel`\n", 31 | "VAR model can be expressed in state-space form:\n", 32 | "$$y_t = Qz_t$$\n", 33 | "$$z_t = Mz_{t-1}+Gu_t$$\n", 34 | "where $z_t$ has $y_t$ and lagged $y_t$" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "struct VARModel{TA <: AbstractArray, TAm <: AbstractArray, TMm <: AbstractMatrix}\n", 44 | " y::TA\n", 45 | " nlag::Int\n", 46 | " withconst::Bool\n", 47 | " initperiod::Int\n", 48 | " lastperiod::Int\n", 49 | " T::Int\n", 50 | " ns::Int\n", 51 | " resid::TAm\n", 52 | " betahat::TAm\n", 53 | " M::TMm\n", 54 | " Q::TMm\n", 55 | " G::TMm\n", 56 | " seps::TMm\n", 57 | "end" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "mutable struct FactorEstimateStats\n", 67 | " T::Int # number of data periods used for factor estimation\n", 68 | " ns::Int # number of data series\n", 69 | " nobs # total number of observations (=sum_i T_i)\n", 70 | " tss\n", 71 | " ssr\n", 72 | " R2::Vector{Union{Missing, Float64}}\n", 73 | "end" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "Note: `factor` and `factor_var_model.y` are actually using same memory." 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "struct DFMModel\n", 90 | " data::AbstractArray\n", 91 | " inclcode::Vector{Int}\n", 92 | " T::Int # number of whole data periods\n", 93 | " ns::Int\n", 94 | " nt_min_factor_estimation::Int\n", 95 | " nt_min_factorloading_estimation::Int\n", 96 | " initperiod::Int\n", 97 | " lastperiod::Int\n", 98 | " nfac_o::Int\n", 99 | " nfac_u::Int\n", 100 | " nfac_t::Int\n", 101 | " tol::Float64\n", 102 | " fes::FactorEstimateStats\n", 103 | " factor::AbstractArray\n", 104 | " lambda::AbstractArray\n", 105 | " uar_coef::AbstractArray\n", 106 | " uar_ser::Vector{Float64}\n", 107 | " n_uarlag::Int\n", 108 | " n_factorlag::Int\n", 109 | " factor_var_model::VARModel\n", 110 | " r2::Vector{Union{Missing, Float64}}\n", 111 | "end" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "function DFMModel(data, inclcode,\n", 121 | " nt_min_factor_estimation::Integer, nt_min_factorloading_estimation::Integer,\n", 122 | " initperiod::Integer, lastperiod::Integer,\n", 123 | " nfac_o::Integer, nfac_u::Integer, tol, n_uarlag::Integer, n_factorlag::Integer)\n", 124 | " size(data, 2) == length(inclcode) || error(\"length of inclcode must equal to number of data series\")\n", 125 | " initperiod < lastperiod || error(\"initperiod must be smaller than lastperiod\")\n", 126 | " ((n_uarlag > 0) && (n_factorlag > 0)) || error(\"n_uarlag and n_factorlag must be positive\")\n", 127 | "\n", 128 | " T, ns = size(data)\n", 129 | " nfac_t = nfac_o+nfac_u\n", 130 | " fes = FactorEstimateStats(lastperiod - initperiod + 1,\n", 131 | " count(inclcode.==1),\n", 132 | " missing, missing, missing,\n", 133 | " Vector{Union{Missing, Float64}}(undef, count(inclcode.==1)))\n", 134 | " factor = Matrix{Union{Missing, Float64}}(missing, T, nfac_t)\n", 135 | " lambda = Matrix{Float64}(undef, ns, nfac_t)\n", 136 | " uar_coef = Matrix{Float64}(undef, ns, n_uarlag)\n", 137 | " uar_ser = Vector{Float64}(undef, ns)\n", 138 | " factor_var_model = VARModel(factor, n_factorlag, initperiod= initperiod,\n", 139 | " lastperiod=lastperiod)\n", 140 | " return DFMModel(data, vec(inclcode), T, ns,\n", 141 | " nt_min_factor_estimation, nt_min_factorloading_estimation,\n", 142 | " initperiod, lastperiod, nfac_o, nfac_u, nfac_t,\n", 143 | " tol, fes, factor, lambda, uar_coef, uar_ser,\n", 144 | " n_uarlag, n_factorlag, factor_var_model,\n", 145 | " Vector{Union{Missing, Float64}}(undef, ns))\n", 146 | "end" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "function drop_missing_row(A::AbstractMatrix)\n", 156 | " tmp = .!any(ismissing.(A), dims=2)\n", 157 | " return Float64.(A[vec(tmp), :]), tmp\n", 158 | "end" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "function drop_missing_col(A::AbstractMatrix)\n", 168 | " tmp = drop_missing_row(A')\n", 169 | " return tmp[1]', tmp[2]'\n", 170 | "end" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "function pca_score(X, nfac_u::Integer)\n", 180 | " _, _, V = svd(X)\n", 181 | " score = (X*V)[:, 1:nfac_u]\n", 182 | " return score\n", 183 | "end" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "##### `ols`\n", 191 | "###### Arguments\n", 192 | "- `y::AbstractVector`: length `T` Vector or `TxN` Matrix where `T` is sample size and `N` is the number of regressed variables\n", 193 | "- `X::AbstractArray`: `TxK` Matrix where `K` is the number of regressors\n", 194 | "###### Outputs\n", 195 | "- `b`: OLS estimator of the coefficients\n", 196 | "- `e`: residual" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "function ols(y::AbstractArray, X::AbstractArray)\n", 206 | "# b = llsq(X, y; bias=false)\n", 207 | " b = X\\y\n", 208 | " e = y - X*b\n", 209 | " return b, e\n", 210 | "end" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "abstract type OLSSkipRule end\n", 220 | "struct Balanced <: OLSSkipRule end\n", 221 | "struct Unbalanced <: OLSSkipRule end" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "##### `ols_skipmissing`\n", 229 | "do OLS ignoring samples containing missing values\n", 230 | "###### Outputs\n", 231 | "- `b`: OLS estimator of the coefficients\n", 232 | "- `e`: residual\n", 233 | "- `numrow`: BitArray inidicating rows used to estimate" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "function ols_skipmissing(y::AbstractMatrix, X::AbstractArray, ::Balanced)\n", 243 | " N = size(y, 2)\n", 244 | " tmp, numrow = drop_missing_row([y X])\n", 245 | " y_used, x_used = tmp[:, 1:N], tmp[:, N+1:end]\n", 246 | " b, e = ols(y_used, x_used)\n", 247 | " return b, e, vec(numrow)\n", 248 | "end\n", 249 | "function ols_skipmissing(y::AbstractVector, X::AbstractArray, method::Balanced)\n", 250 | " b, e, numrow = ols_skipmissing(reshape(y, size(y, 1), size(y, 2)), X, method)\n", 251 | " return b, vec(e), numrow\n", 252 | "end" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "##### `ols_skipmissing`\n", 260 | "###### Arguments\n", 261 | "- `y::AbstractMatrix`: `TxN`\n", 262 | "- `X::AbstractArray`: `TxK` Matrix or `T` Vector" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "function ols_skipmissing(y::AbstractMatrix, X::AbstractArray, ::Unbalanced)\n", 272 | " if size(y, 1) != size(X, 1)\n", 273 | " error(\"Sample size must be same\")\n", 274 | " end\n", 275 | " T, N = size(y)\n", 276 | " b = Matrix{Float64}(undef, size(X, 2), N)\n", 277 | " e = Matrix{Union{Missing, Float64}}(missing, T, N)\n", 278 | " numrow = BitArray(undef, T, N)\n", 279 | " for i=1:N\n", 280 | " tmp = ols_skipmissing(y[:, i], X, Balanced())\n", 281 | " b[:, i] = tmp[1]\n", 282 | " e[tmp[3], i] = tmp[2]\n", 283 | " numrow[:, i] = tmp[3]\n", 284 | " end\n", 285 | " return b, e, numrow\n", 286 | "end" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "metadata": {}, 293 | "outputs": [], 294 | "source": [ 295 | "function lagmat(X::AbstractArray, lags::AbstractVector)\n", 296 | " nc = size(X, 2)\n", 297 | " Xlag = Matrix{Union{Missing, Float64}}(missing, size(X, 1), nc*length(lags))\n", 298 | " for (i, lag) in enumerate(lags)\n", 299 | " Xlag[lag+1:end, nc*(i-1)+1:nc*i] .= X[1:end-lag, :]\n", 300 | " end\n", 301 | " return Xlag\n", 302 | "end\n", 303 | "lagmat(X::AbstractArray, lag::Integer) = lagmat(X, [lag])\n", 304 | "\n", 305 | "function uar(y::AbstractVector, n_lags::Integer)\n", 306 | " x = lagmat(y, 1:n_lags)\n", 307 | " arcoef, ehat, _ = ols_skipmissing(y, x, Balanced())\n", 308 | " ssr = dot(ehat, ehat)\n", 309 | " ser = sqrt(ssr/(size(x, 1)-size(x, 2)))\n", 310 | " return arcoef, ser\n", 311 | "end" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "##### `estimate_factor!`\n", 319 | "estimate factor by iteration using balanced data\n" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": {}, 326 | "outputs": [], 327 | "source": [ 328 | "function estimate_factor!(m::DFMModel, max_iter::Integer=100000000,\n", 329 | " computeR2::Bool=true; lam_constr=nothing)\n", 330 | " data = m.data\n", 331 | " initperiod, lastperiod, nt_min, nfac_u, nfac_o, tol =\n", 332 | " m.initperiod, m.lastperiod, m.nt_min_factor_estimation,\n", 333 | " m.nfac_u, m.nfac_o, m.tol\n", 334 | " # use part of the data\n", 335 | " est_data = data[:, m.inclcode.==1]\n", 336 | " xdata = est_data[initperiod:lastperiod, :]\n", 337 | "\n", 338 | " # preprocess data to have unit standard error\n", 339 | " xdata_standardized, xdatastd = standardize_data(xdata)\n", 340 | " standardize_constraint!(lam_constr, xdatastd)\n", 341 | "\n", 342 | " m.fes.tss = sum(skipmissing(xdata_standardized.^2))\n", 343 | " m.fes.nobs = length(xdata_standardized[.!ismissing.(xdata_standardized)])\n", 344 | "\n", 345 | " xbal, _ = drop_missing_col(xdata_standardized)\n", 346 | "\n", 347 | " # Get initial F_t given Lambda_t using PCA\n", 348 | " f = pca_score(xbal, nfac_u)\n", 349 | " m.fes.ssr = 0\n", 350 | " diff = 1000\n", 351 | " lambda = Array{Union{Missing, Float64}}(undef, m.fes.ns, m.nfac_t)\n", 352 | " for iter = 1:max_iter\n", 353 | " ssr_old = m.fes.ssr\n", 354 | " # given F_t, get Lambda_t\n", 355 | " for i = 1:m.fes.ns\n", 356 | " tmp, usedrows = drop_missing_row([xdata_standardized[:, i] f])\n", 357 | " if count(usedrows) >= nt_min\n", 358 | " lambda[i, :] =\n", 359 | " ols_skipmissing(xdata_standardized[:, i], f, Balanced())[1]'\n", 360 | " impose_constraint!(view(lambda, i, :), i, f, lam_constr, :factor)\n", 361 | " end\n", 362 | " end\n", 363 | " # given Lambda_t, get F_t by regressing X_t on Lambda_t for each t\n", 364 | " tmp = ols_skipmissing(xdata_standardized', lambda[:, nfac_o+1:end], Unbalanced())\n", 365 | " f, ehat = tmp[1]', tmp[2]\n", 366 | " m.fes.ssr = sum(sum(skipmissing(ehat.^2)))\n", 367 | " diff = abs(ssr_old - m.fes.ssr)\n", 368 | " diff >= tol*m.fes.T*m.fes.ns || break\n", 369 | " # println(\"diff = \", diff)\n", 370 | " end\n", 371 | " m.factor[initperiod:lastperiod, :] = f\n", 372 | " if computeR2\n", 373 | " for i=1:m.fes.ns\n", 374 | " tmp = drop_missing_row([xdata_standardized[:, i] f])[1]\n", 375 | " if size(tmp, 1) >= nt_min\n", 376 | " bhat, ehat = ols(tmp[:, 1], tmp[:, 2:end])\n", 377 | " m.fes.R2[i] = compute_r2(tmp[:, 1], ehat)[1]\n", 378 | " end\n", 379 | " end\n", 380 | " end\n", 381 | " return nothing\n", 382 | "end" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": null, 388 | "metadata": {}, 389 | "outputs": [], 390 | "source": [ 391 | "function estimate_factor_loading!(m::DFMModel; lam_constr=nothing)\n", 392 | " data, initperiod, lastperiod, fac, nt_min, nfac_t, n_uarlag =\n", 393 | " m.data, m.initperiod, m.lastperiod, m.factor,\n", 394 | " m.nt_min_factorloading_estimation, m.nfac_t, m.n_uarlag\n", 395 | " n_series = size(data, 2)\n", 396 | " for is = 1:n_series\n", 397 | " tmp, numrow = drop_missing_row([data[initperiod:lastperiod, is] fac[initperiod:lastperiod, :]])\n", 398 | " if count(numrow) >= nt_min # if available sample size is large enough\n", 399 | " X = [tmp[:, 2:end] ones(count(numrow))]\n", 400 | " b, uhat = ols(tmp[:, 1], X)\n", 401 | " impose_constraint!(b, uhat, tmp[:, 1], is, X, lam_constr, :loading)\n", 402 | " y_used = data[initperiod:lastperiod, is][vec(numrow), :]\n", 403 | " m.lambda[is, :] .= b[1:end-1]\n", 404 | " m.r2[is], _, _ = compute_r2(y_used, uhat)\n", 405 | " if m.r2[is] < 0.9999\n", 406 | " arcoef, ser = uar(uhat, n_uarlag)\n", 407 | " else\n", 408 | " arcoef, ser = zeros(n_uarlag, 1), 0.0\n", 409 | " end\n", 410 | " end\n", 411 | " m.uar_coef[is, :] = arcoef'\n", 412 | " m.uar_ser[is] = ser\n", 413 | " end\n", 414 | " return nothing\n", 415 | "end" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": null, 421 | "metadata": {}, 422 | "outputs": [], 423 | "source": [ 424 | "function VARModel(y::AbstractArray, nlag::Integer=1;\n", 425 | " withconst::Bool=true,\n", 426 | " initperiod::Integer=1, lastperiod::Integer=size(y, 1))\n", 427 | " T, ns = size(y, 1), size(y, 2)\n", 428 | " resid = Array{Union{Missing, Float64}}(missing, size(y))\n", 429 | " betahat = Matrix{Union{Missing, Float64}}(missing, ns*nlag+withconst, ns)\n", 430 | " M = Matrix{Union{Missing, Float64}}(missing, ns*nlag, ns*nlag)\n", 431 | " Q = Matrix{Union{Missing, Float64}}(missing, ns, ns*nlag)\n", 432 | " G = Matrix{Union{Missing, Float64}}(missing, ns*nlag, ns)\n", 433 | " seps = Matrix{Union{Missing, Float64}}(missing, ns, ns)\n", 434 | " return VARModel(y, nlag, withconst, initperiod, lastperiod, T, ns, resid, betahat, M, Q, G, seps)\n", 435 | "end" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": null, 441 | "metadata": {}, 442 | "outputs": [], 443 | "source": [ 444 | "function estimate_var!(varm::VARModel, compute_matrices::Bool=true)\n", 445 | " initperiod, lastperiod = varm.initperiod, varm.lastperiod\n", 446 | " withconst, nlag = varm.withconst, varm.nlag\n", 447 | " resid, seps = varm.resid, varm.seps\n", 448 | "\n", 449 | " y_restricted = varm.y[initperiod:lastperiod, :]\n", 450 | "\n", 451 | " # regressors\n", 452 | " withconst || (x = lagmat(y_restricted, 1:nlag))\n", 453 | " !withconst || (x = [ones(lastperiod-initperiod+1) lagmat(y_restricted, 1:nlag)])\n", 454 | "\n", 455 | " # do OLS ignoring the samples containing NaN\n", 456 | " betahat, ehat, numrows = ols_skipmissing(y_restricted, x, Balanced())\n", 457 | " varm.betahat .= betahat\n", 458 | "\n", 459 | " T_used = count(numrows) # used sample size\n", 460 | " K = size(x, 2) # number of regressors\n", 461 | "\n", 462 | " ndf = T_used - K # degree of freedom (T-K)\n", 463 | " seps .= ehat'*ehat/ndf # covariance matrix of error term\n", 464 | " resid[initperiod- 1 .+ findall(numrows), :] .= ehat\n", 465 | "\n", 466 | " !compute_matrices || fill_matrices!(varm, betahat)\n", 467 | " return nothing\n", 468 | "end" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": null, 474 | "metadata": {}, 475 | "outputs": [], 476 | "source": [ 477 | "function fill_matrices!(varm::VARModel, betahat::Array)\n", 478 | " ns, nlag = varm.ns, varm.nlag\n", 479 | " M, Q, G = varm.M, varm.Q, varm.G\n", 480 | "\n", 481 | " b = betahat[2:end, :]' # now, each row corresponds to each equation\n", 482 | "\n", 483 | " M .= zeros(ns*nlag, ns*nlag)\n", 484 | " M[1:ns, :] .= b # coefficients of VAR\n", 485 | " M[ns+1:end, 1:end-ns] .= Matrix{Float64}(I, ns*nlag-ns, ns*nlag-ns) # lag part\n", 486 | "\n", 487 | " Q .= zeros(ns, ns*nlag)\n", 488 | " Q[1:ns, 1:ns] .= Matrix{Float64}(I, ns, ns)\n", 489 | " G .= zeros(ns*nlag, ns)\n", 490 | " G[1:ns, 1:ns] .= ((cholesky(varm.seps)).U)'\n", 491 | " return nothing\n", 492 | "end" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": null, 498 | "metadata": {}, 499 | "outputs": [], 500 | "source": [ 501 | "function standardize_data(data::AbstractArray)\n", 502 | " datamean = [mean(collect(skipmissing(data[:, i]))) for i = 1:size(data, 2)]'\n", 503 | " # # make correction (which I don't understand why being needed)\n", 504 | " tmp = size(data, 1) .- sum(ismissing.(data), dims = 1)\n", 505 | " tmp = (tmp.-1)./tmp\n", 506 | " datastd = [std(collect(skipmissing(data[:, i]))) for i = 1:size(data, 2)]'.*(tmp.^.5)\n", 507 | " data_standardized = (data .- datamean)./datastd\n", 508 | " return data_standardized, datastd\n", 509 | "end" 510 | ] 511 | }, 512 | { 513 | "cell_type": "markdown", 514 | "metadata": {}, 515 | "source": [ 516 | "##### `estimate!`\n", 517 | "estimate DFM Model non-parametrically\n", 518 | "###### Procedure\n", 519 | "1. estimate factor $F$ using balanced data\n", 520 | "1. estimate factor loading $\\Lambda$ using full sample\n", 521 | "1. estimate the equation of factor evolution" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": null, 527 | "metadata": {}, 528 | "outputs": [], 529 | "source": [ 530 | "function estimate!(m::DFMModel, ::NonParametric=NonParametric();\n", 531 | " lam_constr_f=nothing, lam_constr_fl=nothing)\n", 532 | "\n", 533 | " # estimate factor using balanced data\n", 534 | " estimate_factor!(m, lam_constr=lam_constr_f)\n", 535 | "\n", 536 | " # estimate factor loading using full sample\n", 537 | " estimate_factor_loading!(m, lam_constr=lam_constr_fl)\n", 538 | "\n", 539 | " # estimate the equation of factor evolution\n", 540 | " estimate_var!(m.factor_var_model)\n", 541 | "\n", 542 | " return nothing\n", 543 | "end" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": null, 549 | "metadata": {}, 550 | "outputs": [], 551 | "source": [ 552 | "compute_series(dfmm::DFMModel, is::Integer) = dfmm.factor*dfmm.lambda[is, :]\n", 553 | "detrended_year_growth(y::AbstractVector) = vec(sum(lagmat(y, 0:3), dims=2))\n", 554 | "\n", 555 | "find_row_number(date::Tuple{Int, Int}, dates) =\n", 556 | " findall([date == dataset.calds[i] for i=1:length(dataset.calds)])[1]" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": null, 562 | "metadata": {}, 563 | "outputs": [], 564 | "source": [ 565 | "function compute_r2(y::AbstractArray, e::AbstractVector)\n", 566 | " ssr = dot(e, e)\n", 567 | " tss = dot(y.-mean(y), y.-mean(y))\n", 568 | " return 1-(ssr/tss), ssr, tss\n", 569 | "end" 570 | ] 571 | }, 572 | { 573 | "cell_type": "markdown", 574 | "metadata": {}, 575 | "source": [ 576 | "##### `compute_bw_weight`\n", 577 | "compute Tukey’s biweight filter $w(L)$:\n", 578 | "$$w_j=\\left\\{\\begin{array}{ll}\n", 579 | "c\\left(1-\\left(\\frac{j}{B}\\right)^2\\right)^2 &\\text{ for }|j|\\leq B\\\\\n", 580 | "0 &\\text{ otherwise }\n", 581 | "\\end{array}\\right.$$\n", 582 | "where $B$ is the bandwidth (truncation) parameter and $c$ is a normalization constant such that $w(1)=\\sum_{j=-B}^B w_j=1$." 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": null, 588 | "metadata": {}, 589 | "outputs": [], 590 | "source": [ 591 | "function compute_bw_weight(B::Integer)\n", 592 | " bw_weight = zeros(2B+1)\n", 593 | " for i=0:B\n", 594 | " bw_weight[B+1+i] = bw_weight[B+1-i] = (1-(i/B)^2)^2\n", 595 | " end\n", 596 | " bw_weight = bw_weight./sum(bw_weight) # make normalization\n", 597 | " return bw_weight\n", 598 | "end" 599 | ] 600 | }, 601 | { 602 | "cell_type": "markdown", 603 | "metadata": {}, 604 | "source": [ 605 | "##### `compute_gain`\n", 606 | "Calculate the spectral gain $\\|w(e^{i\\lambda})\\| $ of filter $w(L)$ at frequency $\\lambda$ where $\\|\\cdot\\|$ is complex norm \n", 607 | "\n", 608 | "$$gain=\\left\\|w_{-B}e^{-Bi\\lambda}+w_{-(B-1)}e^{-(B-1)i\\lambda}+\\dots+w_{0}+\\dots+w_{B-1}e^{(B-1)i\\lambda}+w_{B}e^{Bi\\lambda}\\right\\|$$\n", 609 | "###### Detail\n", 610 | "\n", 611 | "###### Arguments\n", 612 | "- `w`: weight \n", 613 | "- `lambda`: frequency " 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": null, 619 | "metadata": {}, 620 | "outputs": [], 621 | "source": [ 622 | "function compute_gain(w::AbstractVector, lambda::Real)\n", 623 | " B = (length(w)-1)/2\n", 624 | " z = exp(lambda*im)\n", 625 | " z1 = exp(-lambda*im*B)\n", 626 | " gain = w[1]*z1\n", 627 | " for i = 2:length(w)\n", 628 | " z1 = z1*z\n", 629 | " gain = gain + w[i]*z1\n", 630 | " end\n", 631 | " return abs(gain)\n", 632 | "end" 633 | ] 634 | }, 635 | { 636 | "cell_type": "markdown", 637 | "metadata": {}, 638 | "source": [ 639 | "##### `bai_ng_critetion`\n" 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": null, 645 | "metadata": {}, 646 | "outputs": [], 647 | "source": [ 648 | "function bai_ng_criterion(m::DFMModel)\n", 649 | " fes = m.fes\n", 650 | " nbar = fes.nobs/fes.T # average observation per period\n", 651 | " g = log(min(nbar, fes.T))*(nbar+fes.T)/fes.nobs\n", 652 | " bn_icp = log(fes.ssr/fes.nobs)+ m.nfac_t*g\n", 653 | " return bn_icp\n", 654 | "end" 655 | ] 656 | }, 657 | { 658 | "cell_type": "markdown", 659 | "metadata": {}, 660 | "source": [ 661 | "##### `FactorNumberEstimateStats`" 662 | ] 663 | }, 664 | { 665 | "cell_type": "code", 666 | "execution_count": null, 667 | "metadata": {}, 668 | "outputs": [], 669 | "source": [ 670 | "struct FactorNumberEstimateStats\n", 671 | " bn_icp\n", 672 | " ssr_static\n", 673 | " R2_static\n", 674 | " aw_icp\n", 675 | " ssr_dynamic\n", 676 | " R2_dynamic\n", 677 | " tss::Float64\n", 678 | " nobs::Int\n", 679 | " T::Int\n", 680 | "end" 681 | ] 682 | }, 683 | { 684 | "cell_type": "markdown", 685 | "metadata": {}, 686 | "source": [ 687 | "##### `estimate_factor_numbers`\n", 688 | "###### Arguments\n", 689 | "- `m::DFMModel`: `DFMModel` specifying the model except number of unobservable factors." 690 | ] 691 | }, 692 | { 693 | "cell_type": "code", 694 | "execution_count": null, 695 | "metadata": {}, 696 | "outputs": [], 697 | "source": [ 698 | "function estimate_factor_numbers(m::DFMModel, nfacs::Union{Real, AbstractVector})\n", 699 | " max_nfac = maximum(nfacs)\n", 700 | " bn_icp = Vector{Union{Missing, Float64}}(undef, max_nfac)\n", 701 | " ssr_static = Vector{Float64}(undef, max_nfac)\n", 702 | " R2_static = Matrix{Union{Missing, Float64}}(undef, m.fes.ns, max_nfac)\n", 703 | " aw_icp = Matrix{Union{Missing, Float64}}(undef, max_nfac, max_nfac)\n", 704 | " ssr_dynamic = Matrix{Union{Missing, Float64}}(undef, max_nfac, max_nfac)\n", 705 | " R2_dynamic = Array{Union{Missing, Float64}}(undef, m.fes.ns, max_nfac, max_nfac)\n", 706 | "\n", 707 | "# global tss, nobs, T\n", 708 | " for (i, nfac) = enumerate(1:max_nfac)\n", 709 | " dfmm = DFMModel(m.data, m.inclcode,\n", 710 | " m.nt_min_factor_estimation, m.nt_min_factorloading_estimation,\n", 711 | " m.initperiod, m.lastperiod, m.nfac_o, nfac, m.tol, m.n_uarlag, m.n_factorlag)\n", 712 | " estimate_factor!(dfmm)\n", 713 | " bn_icp[i] = bai_ng_criterion(dfmm)\n", 714 | " ssr_static[i] = dfmm.fes.ssr\n", 715 | " R2_static[:, i] = dfmm.fes.R2\n", 716 | " aw_icp[1:nfac, i], ssr_dynamic[1:nfac, i], R2_dynamic[:, 1:nfac, i] =\n", 717 | " amengual_watson_test(dfmm, 4)\n", 718 | " global tss = dfmm.fes.tss\n", 719 | " global nobs = dfmm.fes.nobs\n", 720 | " global T = dfmm.fes.T\n", 721 | " end\n", 722 | " return FactorNumberEstimateStats(bn_icp, ssr_static, R2_static,\n", 723 | " aw_icp, ssr_dynamic, R2_dynamic,\n", 724 | " tss, nobs, T)\n", 725 | "end" 726 | ] 727 | }, 728 | { 729 | "cell_type": "code", 730 | "execution_count": null, 731 | "metadata": {}, 732 | "outputs": [], 733 | "source": [ 734 | "function amengual_watson_test(m::DFMModel, nper::Integer)\n", 735 | " factor = m.factor\n", 736 | " T, ns, nfac_static = m.T, m.fes.ns, m.nfac_t\n", 737 | " nvar_lag = m.factor_var_model.nlag\n", 738 | " est_data = m.data[:, m.inclcode.==1]\n", 739 | "\n", 740 | " # Construct lags of factors and residuals for est_data\n", 741 | " x = [ones(T) lagmat(factor, 1:nvar_lag)]\n", 742 | " est_data_res = Array{Union{Missing, Float64}}(undef, T, ns)\n", 743 | " for is = 1:ns\n", 744 | " tmp, usedrows = drop_missing_row([est_data[:, is] x])\n", 745 | " y = tmp[:, 1]\n", 746 | " z = tmp[:, 2:end]\n", 747 | " ndf = size(z, 1)-size(z, 2)\n", 748 | " if ndf >= m.nt_min_factor_estimation # Minimum degrees of freedom for series\n", 749 | " b, e = ols(y, z)\n", 750 | " est_data_res[findall(vec(usedrows)), is] = e\n", 751 | " end\n", 752 | " end\n", 753 | "\n", 754 | " # Carry out calculations for number of dynamic factors\n", 755 | " ssr = Array{Float64}(undef, nfac_static)\n", 756 | " r2 = Array{Union{Missing, Float64}}(undef, ns, nfac_static)\n", 757 | " aw = Array{Float64}(undef, nfac_static)\n", 758 | " for nfac = 1:nfac_static\n", 759 | " dfmm = DFMModel(est_data_res, ones(count(m.inclcode.==1)),\n", 760 | " m.nt_min_factor_estimation, m.nt_min_factorloading_estimation,\n", 761 | " m.initperiod+4, m.lastperiod, 0, nfac, m.tol, m.n_uarlag, m.n_factorlag)\n", 762 | " estimate_factor!(dfmm)\n", 763 | " aw[nfac] = bai_ng_criterion(dfmm)\n", 764 | " ssr[nfac] = dfmm.fes.ssr\n", 765 | " r2[:, nfac] = dfmm.fes.R2\n", 766 | " end\n", 767 | " return aw, ssr, r2\n", 768 | "end" 769 | ] 770 | }, 771 | { 772 | "cell_type": "markdown", 773 | "metadata": {}, 774 | "source": [ 775 | "##### `impulse_response`\n", 776 | "compute impulse response function of `VARModel` with one standard error shock\n", 777 | "$$y_t = Qz_t$$\n", 778 | "$$z_t = Mz_{t-1}+Gu_t$$\n", 779 | "###### Arguments\n", 780 | "- `varm::VARModel`\n", 781 | "- `shock_ids`: `<:AbstractVector` or `<:Real` or `:all`.\n", 782 | "- `T::Integer`: horizon of IRF\n", 783 | "###### Return\n", 784 | "- `irfs::Array` IRFs. It is 2D if `shock_ids<:Real` and 3D otherwise. First dimension is each series, second dimension is time, third dimension is type of shock (if exist)." 785 | ] 786 | }, 787 | { 788 | "cell_type": "code", 789 | "execution_count": null, 790 | "metadata": {}, 791 | "outputs": [], 792 | "source": [ 793 | "function impulse_response(varm::VARModel, shock_ids::AbstractVector, T::Integer)\n", 794 | " irfs = Array{Float64, 3}(undef, size(varm.Q, 1), T, length(shock_ids))\n", 795 | " for (i, shock_id) in enumerate(shock_ids)\n", 796 | " compute_irf_single_shock!(irfs, varm, i, shock_id, T)\n", 797 | " end\n", 798 | " return irfs\n", 799 | "end" 800 | ] 801 | }, 802 | { 803 | "cell_type": "code", 804 | "execution_count": null, 805 | "metadata": {}, 806 | "outputs": [], 807 | "source": [ 808 | "function compute_irf_single_shock!(irfs::AbstractArray, varm::VARModel,\n", 809 | " i::Integer, shock_id::Integer, T::Integer)\n", 810 | " x = varm.G[:, shock_id]\n", 811 | " for t = 1:T\n", 812 | " irfs[:, t, i] = varm.Q * x\n", 813 | " x .= varm.M * x\n", 814 | " end\n", 815 | " return nothing\n", 816 | "end\n", 817 | "function impulse_response(varm::VARModel, shock_id::Real, T::Integer)\n", 818 | " irfs = Matrix{Float64}(undef, size(varm.Q, 1), T)\n", 819 | " compute_irf_single_shock!(irfs, varm, x, 1, shock_id, T)\n", 820 | " return irfs\n", 821 | "end\n", 822 | "impulse_response(varm::VARModel, shock_id::Symbol, T::Integer) =\n", 823 | " impulse_response(varm, Val(shock_id), T)\n", 824 | "impulse_response(varm::VARModel, shock_ids::Val{:all}, T::Integer) =\n", 825 | " impulse_response(varm, 1:size(varm.G, 2), T)\n" 826 | ] 827 | }, 828 | { 829 | "cell_type": "markdown", 830 | "metadata": {}, 831 | "source": [ 832 | "##### `form_kernel`\n", 833 | "create kernel $$1-\\frac{v}{q+1}$$ for all $i\\leq q$.\n", 834 | "\n", 835 | "###### Arguments\n", 836 | "- `q::Integer`: bandwidth parameter" 837 | ] 838 | }, 839 | { 840 | "cell_type": "code", 841 | "execution_count": null, 842 | "metadata": {}, 843 | "outputs": [], 844 | "source": [ 845 | "# abstract type KernelType end\n", 846 | "# struct White <: KernelType end\n", 847 | "# struct Triangular <: KernelType \n", 848 | "# nma::Int\n", 849 | "# end\n", 850 | "# struct Rectangular <: KernelType\n", 851 | "# nma::Int\n", 852 | "# end" 853 | ] 854 | }, 855 | { 856 | "cell_type": "code", 857 | "execution_count": null, 858 | "metadata": {}, 859 | "outputs": [], 860 | "source": [ 861 | "form_kernel(q::Integer) = [1-i/(q+1) for i = 0:q]" 862 | ] 863 | }, 864 | { 865 | "cell_type": "code", 866 | "execution_count": null, 867 | "metadata": {}, 868 | "outputs": [], 869 | "source": [ 870 | "# form_kernel(kt::Triangular) = [1-i/(kt.nma+1) for i = 0:kt.nma]\n", 871 | "# form_kernel(kt::Rectangular) = ones(kt.nma+1)\n", 872 | "# form_kernel(kt::White) = ones(1, 1)" 873 | ] 874 | }, 875 | { 876 | "cell_type": "markdown", 877 | "metadata": {}, 878 | "source": [ 879 | "##### `compute_chow`\n", 880 | "Letting $D_t$ take 1 in post sample period ($t> T_{break}$) and 0 otherwise ($t\\leq T_{break}$), regress \n", 881 | "$$ y_t = \\beta X_t+\\gamma(X_tD_t)$$\n", 882 | "and compute statics for F-test of $\\gamma=0$" 883 | ] 884 | }, 885 | { 886 | "cell_type": "code", 887 | "execution_count": null, 888 | "metadata": {}, 889 | "outputs": [], 890 | "source": [ 891 | "function compute_chow(y::AbstractVector, X::AbstractArray,\n", 892 | " q::Integer, T_break::Integer)\n", 893 | " k = size(X, 2)\n", 894 | " T = length(y)\n", 895 | " D = [zeros(T_break);\n", 896 | " ones(T-T_break)]\n", 897 | " betahat, vbeta, _ = regress_hac(y, [X X.*D], q)\n", 898 | " gamma = betahat[k+1:end]\n", 899 | " v1 = vbeta[k+1:end, k+1:end]\n", 900 | " chow = gamma'/v1*gamma\n", 901 | " return chow\n", 902 | "end" 903 | ] 904 | }, 905 | { 906 | "cell_type": "markdown", 907 | "metadata": {}, 908 | "source": [ 909 | "##### `regress_hac`" 910 | ] 911 | }, 912 | { 913 | "cell_type": "code", 914 | "execution_count": null, 915 | "metadata": {}, 916 | "outputs": [], 917 | "source": [ 918 | "function regress_hac(y::AbstractVector, X::AbstractArray, q::Integer)\n", 919 | " betahat = X\\y\n", 920 | " ehat = y - X*betahat\n", 921 | " vbeta, se_beta = hac(ehat, X, q)\n", 922 | " return betahat, vbeta, se_beta\n", 923 | "end" 924 | ] 925 | }, 926 | { 927 | "cell_type": "markdown", 928 | "metadata": {}, 929 | "source": [ 930 | "##### `hac`\n", 931 | "\n", 932 | "###### Arguments\n", 933 | "- `u::AbstractVector`: `T`\n", 934 | "- `X::AbstractArray`: `Txk`\n", 935 | "- `q::Integer`: bandwidth parameter" 936 | ] 937 | }, 938 | { 939 | "cell_type": "code", 940 | "execution_count": null, 941 | "metadata": {}, 942 | "outputs": [], 943 | "source": [ 944 | "function hac(u::AbstractVector, X::AbstractArray, q::Integer)\n", 945 | " kernel = form_kernel(q)\n", 946 | " z = X.*u\n", 947 | " vbeta = form_hscrc(z, X, kernel, q)\n", 948 | " se_beta = sqrt.(diag(vbeta))\n", 949 | " return vbeta, se_beta\n", 950 | "end" 951 | ] 952 | }, 953 | { 954 | "cell_type": "markdown", 955 | "metadata": {}, 956 | "source": [ 957 | "##### `form_hscrc`\n", 958 | "form hetero-serial correlation robust covariance\n", 959 | "\n", 960 | "###### Arguments\n", 961 | "- `X`: `Txk` matrix\n", 962 | "- `kernel`: weight of length `T`\n", 963 | "\n", 964 | "###### Returns\n", 965 | "- `vbeta`: `kxk` hetero-serial correlation robust covariance" 966 | ] 967 | }, 968 | { 969 | "cell_type": "code", 970 | "execution_count": null, 971 | "metadata": {}, 972 | "outputs": [], 973 | "source": [ 974 | "function form_hscrc(z::AbstractArray, X, kernel, q::Integer)\n", 975 | " k = size(X, 2)\n", 976 | " v = zeros(k, k)\n", 977 | " for i = -q:0\n", 978 | " r1 = 1\n", 979 | " r2 = size(z, 1) + i\n", 980 | " v = v .+ kernel[-i+1]*z[r1:r2, :]'*z[r1-i:r2-i, :]\n", 981 | " end\n", 982 | " for i = 1:q\n", 983 | " r1 = 1+i\n", 984 | " r2 = size(z, 1)\n", 985 | " v = v .+ kernel[i+1]*z[r1:r2, :]'*z[r1-i:r2-i, :]\n", 986 | " end\n", 987 | " XX = X'*X # k x k\n", 988 | " vbeta = XX\\v/(XX') # k x k\n", 989 | " return vbeta\n", 990 | "end" 991 | ] 992 | }, 993 | { 994 | "cell_type": "code", 995 | "execution_count": null, 996 | "metadata": {}, 997 | "outputs": [], 998 | "source": [ 999 | "# form_hscrc(z, X, kernel, kt::White) = \n", 1000 | "# form_hscrc(z, X, kernel, 0)\n", 1001 | "# form_hscrc(z, X, kernel, kt::Union{Triangular, Rectangular}) = \n", 1002 | "# form_hscrc(z, X, kernel, kt.nma)" 1003 | ] 1004 | }, 1005 | { 1006 | "cell_type": "markdown", 1007 | "metadata": {}, 1008 | "source": [ 1009 | "##### `compute_qlr`\n", 1010 | "Compute Quandt Likelihood Ratio (QLR) Statistic\n", 1011 | "\n", 1012 | "###### Arguments\n", 1013 | "- `X1`: regressors with fixed coefficients under both null and alternative. If all variables are varying under alternative, pass `nothing` as `x1`.\n", 1014 | "- `X2`: regressors with fixed coefficients under null and time varying coefficients under alternative\n", 1015 | "- `ccut::AbstractFloat`: must be between 0 and 0.5." 1016 | ] 1017 | }, 1018 | { 1019 | "cell_type": "code", 1020 | "execution_count": null, 1021 | "metadata": {}, 1022 | "outputs": [], 1023 | "source": [ 1024 | "function compute_qlr(y::AbstractVector, X1::Union{Nothing, AbstractArray},\n", 1025 | " X2::AbstractArray,\n", 1026 | " ccut::AbstractFloat, q::Integer)\n", 1027 | " T = length(y)\n", 1028 | " n1t = floor(Int, ccut*T)\n", 1029 | " n2t = T - n1t\n", 1030 | " k = size(X2, 2) #number of coefficients that can vary\n", 1031 | " if X1 == nothing\n", 1032 | " l = 0\n", 1033 | " X = X2\n", 1034 | " else\n", 1035 | " l = length(X1)\n", 1036 | " X = [X1,X2] # T x (l+k) where l=length(X1)\n", 1037 | " end\n", 1038 | " lr = Vector{Float64}(undef, n2t-n1t+1)\n", 1039 | " lrr = Vector{Float64}(undef, n2t-n1t+1)\n", 1040 | " for (i, T_break) in enumerate(n1t:n2t)\n", 1041 | " lr[i] = compute_chow(y, X, 0, T_break)\n", 1042 | " lrr[i] = compute_chow(y, X, q, T_break)\n", 1043 | " end\n", 1044 | " lm = maximum(lr)\n", 1045 | " lmr = maximum(lrr)\n", 1046 | " return lm, lmr\n", 1047 | "end" 1048 | ] 1049 | }, 1050 | { 1051 | "cell_type": "markdown", 1052 | "metadata": {}, 1053 | "source": [ 1054 | "##### Constraint" 1055 | ] 1056 | }, 1057 | { 1058 | "cell_type": "code", 1059 | "execution_count": null, 1060 | "metadata": {}, 1061 | "outputs": [], 1062 | "source": [ 1063 | "struct LambdaConstraint\n", 1064 | " indices::Vector{Int}\n", 1065 | " R::Matrix{Float64}\n", 1066 | " r::Vector{Float64}\n", 1067 | " r_std::Vector{Float64}\n", 1068 | "end" 1069 | ] 1070 | }, 1071 | { 1072 | "cell_type": "markdown", 1073 | "metadata": {}, 1074 | "source": [ 1075 | "##### `construct_constraint`\n", 1076 | "construct constraint on lambda\n", 1077 | "###### Arguments\n", 1078 | "- `varnames`: \n", 1079 | "- `used_varnames`\n", 1080 | "- `R`\n", 1081 | "- `r`" 1082 | ] 1083 | }, 1084 | { 1085 | "cell_type": "code", 1086 | "execution_count": null, 1087 | "metadata": {}, 1088 | "outputs": [], 1089 | "source": [ 1090 | "function construct_constraint(varnames::AbstractVector{String},\n", 1091 | " used_varnames, R, r)\n", 1092 | " n_constr = length(varnames)\n", 1093 | " n_R = size(R, 1)\n", 1094 | " indices = Vector{Int}(undef, length(varnames)*n_R)\n", 1095 | " for (i, varname) in enumerate(varnames)\n", 1096 | " index = findall(used_varnames .== varname)\n", 1097 | " [indices[n_R*(i-1)+j] = index[1] for j = 1:n_R]\n", 1098 | " end\n", 1099 | " R_stack = repeat(R, n_constr)\n", 1100 | " r_stack = repeat(r, n_constr)\n", 1101 | " return LambdaConstraint(indices, R_stack, r_stack, similar(r_stack))\n", 1102 | "end" 1103 | ] 1104 | }, 1105 | { 1106 | "cell_type": "markdown", 1107 | "metadata": {}, 1108 | "source": [ 1109 | "##### `impose_constraint!`\n", 1110 | "###### Arguments\n", 1111 | "- `b`: unconstrained coefficients\n", 1112 | "- `R`: see above\n", 1113 | "- `r`: see above\n", 1114 | "- `lam_col`:\n", 1115 | "- `i::Integer`: index of series\n", 1116 | "- `X` regressors" 1117 | ] 1118 | }, 1119 | { 1120 | "cell_type": "code", 1121 | "execution_count": null, 1122 | "metadata": {}, 1123 | "outputs": [], 1124 | "source": [ 1125 | "function impose_constraint!(b, i::Integer, X, \n", 1126 | " lam_constr::LambdaConstraint,\n", 1127 | " forwhat::Symbol)\n", 1128 | " lam_col= lam_constr.indices\n", 1129 | " used_series = vec(lam_col.==i)\n", 1130 | " R_tmp, r = get_Rr(lam_constr, used_series, Val(forwhat))\n", 1131 | " r_tmp = r[used_series]\n", 1132 | " tmp = (X'*X)\\R_tmp'\n", 1133 | " b .= b .- tmp/(R_tmp*tmp)*(R_tmp*b-r_tmp)\n", 1134 | " return nothing\n", 1135 | "end\n", 1136 | "impose_constraint!(b, i::Integer, X, lam_constr::Nothing, forwhat::Symbol) = nothing\n", 1137 | "\n", 1138 | "get_Rr(lam_constr, used_series, ::Val{:factor}) = \n", 1139 | " (hcat(lam_constr.R[used_series, :]), lam_constr.r_std)\n", 1140 | "get_Rr(lam_constr, used_series, ::Val{:loading}) = \n", 1141 | " (hcat(lam_constr.R[used_series, :], zeros(count(used_series))), lam_constr.r)" 1142 | ] 1143 | }, 1144 | { 1145 | "cell_type": "markdown", 1146 | "metadata": {}, 1147 | "source": [ 1148 | "##### `impose_constraint!`\n", 1149 | "this version updates residual term as well\n", 1150 | "###### Arguments\n", 1151 | "- `b`: unconstrained coefficients\n", 1152 | "- `e`: pre-allocated residual term vector\n", 1153 | "- `y`: dependent variable\n", 1154 | "- `R`: see above\n", 1155 | "- `r`: see above\n", 1156 | "- `lam_col`:\n", 1157 | "- `i::Integer`: index of series\n", 1158 | "- `X` regressors" 1159 | ] 1160 | }, 1161 | { 1162 | "cell_type": "code", 1163 | "execution_count": null, 1164 | "metadata": {}, 1165 | "outputs": [], 1166 | "source": [ 1167 | "function impose_constraint!(b, e, y, i::Integer, X, \n", 1168 | " lam_constr::LambdaConstraint, forwhat::Symbol)\n", 1169 | " impose_constraint!(b, i, X, lam_constr, forwhat)\n", 1170 | " e .= y - X*b\n", 1171 | " return nothing\n", 1172 | "end\n", 1173 | "impose_constraint!(b, e, y, i::Integer, X, lam_constr::Nothing, forwhat::Symbol) = nothing" 1174 | ] 1175 | }, 1176 | { 1177 | "cell_type": "code", 1178 | "execution_count": null, 1179 | "metadata": {}, 1180 | "outputs": [], 1181 | "source": [ 1182 | "function standardize_constraint!(lam_constr::LambdaConstraint, xdatastd)\n", 1183 | " lam_constr.r_std .= lam_constr.r ./ xdatastd[lam_constr.indices]\n", 1184 | " return nothing\n", 1185 | "end\n", 1186 | "standardize_constraint!(lam_constr::Nothing, xdatastd) = nothing" 1187 | ] 1188 | } 1189 | ], 1190 | "metadata": { 1191 | "hide_input": false, 1192 | "kernelspec": { 1193 | "display_name": "Julia 1.0.2", 1194 | "language": "julia", 1195 | "name": "julia-1.0" 1196 | }, 1197 | "language_info": { 1198 | "file_extension": ".jl", 1199 | "mimetype": "application/julia", 1200 | "name": "julia", 1201 | "version": "1.0.2" 1202 | }, 1203 | "toc": { 1204 | "colors": { 1205 | "hover_highlight": "#DAA520", 1206 | "running_highlight": "#FF0000", 1207 | "selected_highlight": "#FFD700" 1208 | }, 1209 | "moveMenuLeft": true, 1210 | "nav_menu": { 1211 | "height": "334.617px", 1212 | "width": "252.233px" 1213 | }, 1214 | "navigate_menu": true, 1215 | "number_sections": true, 1216 | "sideBar": true, 1217 | "threshold": 4, 1218 | "toc_cell": false, 1219 | "toc_section_display": "block", 1220 | "toc_window_display": false 1221 | } 1222 | }, 1223 | "nbformat": 4, 1224 | "nbformat_minor": 2 1225 | } 1226 | -------------------------------------------------------------------------------- /readin_functions.jl: -------------------------------------------------------------------------------- 1 | using ExcelReaders 2 | using Statistics 3 | import Dates 4 | using StatsBase 5 | using LinearAlgebra 6 | 7 | abstract type DataFrequency end 8 | struct MonthlyData <: DataFrequency 9 | nobs::Int # number of observations 10 | ns::Int # number of series 11 | end 12 | 13 | struct QuarterlyData <: DataFrequency 14 | nobs::Int # number of observations 15 | ns::Int # number of series 16 | end 17 | 18 | abstract type DetrendMethod end 19 | struct BiWeight{TR <: Real} <: DetrendMethod 20 | weight::TR 21 | end 22 | struct Mean <: DetrendMethod end 23 | struct NoDetrend <: DetrendMethod end 24 | 25 | get_sample_periods(initvec::Array, lastvec::Array, ::MonthlyData) = 26 | 12*(lastvec[1]-initvec[1]-1)+lastvec[2]+(12-initvec[2]+1) 27 | get_sample_periods(initvec::Array, lastvec::Array, ::QuarterlyData) = 28 | 4*(lastvec[1]-initvec[1]-1)+lastvec[2]+(4-initvec[2]+1) 29 | function MonthlyData(initvec, lastvec, ns) 30 | nobs = 12*(lastvec[1]-initvec[1]-1)+lastvec[2]+(12-initvec[2]+1) 31 | return MonthlyData(nobs, ns) 32 | end 33 | function QuarterlyData(initvec, lastvec, ns) 34 | nobs = 4*(lastvec[1]-initvec[1]-1)+lastvec[2]+(4-initvec[2]+1) 35 | return QuarterlyData(nobs, ns) 36 | end 37 | """ 38 | deflate series by PCE 39 | """ 40 | function deflate_series!(labvec_long::AbstractVector, labvec_short::AbstractVector, 41 | x::AbstractVector, price_def::AbstractVector, 42 | price_def_lfe::AbstractVector, price_def_pgdp, 43 | is::Integer, defcode::Val{1}) 44 | x .= x./price_def 45 | labvec_long[is] = labvec_long[is] * " Defl by PCE Def" 46 | labvec_short[is] = "Real_" * labvec_short[is] 47 | return x 48 | end 49 | """ 50 | deflate series by PCE(LFE) 51 | """ 52 | function deflate_series!(labvec_long::AbstractVector, labvec_short::AbstractVector, 53 | x::AbstractVector, price_def::AbstractVector, 54 | price_def_lfe::AbstractVector, price_def_pgdp, 55 | is::Integer, defcode::Val{2}) 56 | x .= x./price_def_lfe 57 | labvec_long[is] = labvec_long[is] * " Defl by PCE(LFE) Def" 58 | labvec_short[is] = "Real_" * labvec_short[is] 59 | return x 60 | end 61 | """ 62 | deflate series by GDP 63 | """ 64 | function deflate_series!(labvec_long::AbstractVector, labvec_short::AbstractVector, 65 | x::AbstractVector, price_def::AbstractVector, 66 | price_def_lfe::AbstractVector, price_def_pgdp, 67 | is::Integer, defcode::Val{3}) 68 | x .= x./price_def_pgdp 69 | labvec_long[is] = labvec_long[is] * " Defl by GDP Def" 70 | labvec_short[is] = "Real_" * labvec_short[is] 71 | return x 72 | end 73 | deflate_series!(labvec_long::AbstractVector, labvec_short::AbstractVector, 74 | x::AbstractVector, price_def::AbstractVector, 75 | price_def_lfe::AbstractVector, price_def_pgdp, 76 | is::Integer, defcode::Val{0}) = x 77 | """ 78 | transform monthly data to quarterly data by averaging 79 | ##### Arguments 80 | - `data_m::AbstractArray`: monthly data. `size(y, 1) == length(date)` 81 | - `date_m::AbstractVector{Date}`: Vector of Date. 82 | """ 83 | function monthly_to_quarterly(data_m::AbstractArray, date_m::AbstractVector{Dates.Date}, ::MonthlyData) 84 | if size(data_m, 1) != length(date_m) 85 | @error "number of rows of data_m must be same as the length of date" 86 | end 87 | years = Dates.year.(date_m) 88 | date_m_quarter = transform_date.(date_m) 89 | T_q = length(unique(date_m_quarter)) 90 | data_q = Array{Union{Missing, Float64}}(undef, T_q, size(data_m, 2)) 91 | for t = 1:T_q 92 | id = (date_m_quarter .== [unique(date_m_quarter)[t]]) 93 | data_q[t, :] = mean(data_m[id, :], dims=1) 94 | end 95 | return data_q, unique(date_m_quarter) 96 | end 97 | function monthly_to_quarterly(data_q::AbstractArray, date_q::AbstractVector{Dates.Date}, ::QuarterlyData) 98 | date_q_quarter = transform_date.(date_q) 99 | return data_q, date_q_quarter 100 | end 101 | transform_date(date::Dates.Date) = (Dates.year(date), get_quarter(date)) 102 | get_quarter(date::Dates.Date) = cld(Dates.month(date), 3) 103 | 104 | # Level 105 | transform(x::AbstractVector, ::Val{1}) = x 106 | # First Difference 107 | transform(x::AbstractVector, ::Val{2}) = vcat(missing, x[2:end].-x[1:end-1]) 108 | # Second Difference 109 | transform(x::AbstractVector, ::Val{3}) = vcat(missing, missing, x[3:end]-2*x[2:end-1].+x[1:end-2]) 110 | # Log-level 111 | transform(x::AbstractVector, ::Val{4}) = log.(x) 112 | # Log-First-Difference 113 | transform(x::AbstractVector, ::Val{5}) = transform(log.(x), Val(2)) 114 | # Log-Second-Difference 115 | transform(x::AbstractVector, ::Val{6}) = transform(log.(x), Val(3)) 116 | 117 | function transform!(data::AbstractMatrix, tcode::AbstractVector) 118 | if size(data, 2) != length(tcode) 119 | @error "size(data, 2) must equal to length(tcode)" 120 | end 121 | for i = 1:size(data, 2) 122 | data[:, i] = transform(data[:, i], Val(tcode[i])) 123 | end 124 | return nothing 125 | end 126 | # Threshold multiple for IQR is 4.5 127 | adjust_outlier!(x::AbstractVector, ::Val{1}, io_method::Integer) = 128 | adjust_outlier!(x, 4.5, io_method) 129 | # Threshold multiple for IQR is 3 130 | adjust_outlier!(x::AbstractVector, ::Val{2}, io_method::Integer) = 131 | adjust_outlier!(x, 3, io_method) 132 | adjust_outlier!(x::AbstractVector, ::Val{0}, io_method::Integer) = nothing 133 | 134 | function adjust_outlier!(x::AbstractVector, thr::Real, tflag::Integer) 135 | # compute median and IQR 136 | zm = median(skipmissing(x)) 137 | iqr = quantile(skipmissing(x), 0.75) - quantile(skipmissing(x), 0.25) 138 | z = x[.!ismissing.(x)] 139 | # sort!(z) 140 | # tmp = z[ceil.(Int, length(z)*[1/4, 1/2, 3/4])] 141 | # zm = tmp[2] 142 | # iqr = tmp[3]-tmp[1] 143 | (iqr >= 1e-6) || @error "error in adjusting outlier" 144 | 145 | ya = abs.(x .- zm) 146 | i_outlier = ya .> (thr*iqr) 147 | adjust_x!(x, i_outlier, zm, thr, iqr, Val(tflag)) 148 | return nothing 149 | end 150 | """ 151 | replace with missing value 152 | """ 153 | function adjust_x!(x::AbstractVector, i_outlier, zm, thr, iqr, ::Val{0}) 154 | x[i_outlier] = missing 155 | return nothing 156 | end 157 | """ 158 | replace with maximum or minimum value 159 | """ 160 | function adjust_x!(x, i_outlier, zm, thr, iqr, ::Val{1}) 161 | isign = Int.(x[i_outlier] .> 0) - Int.(x[i_outlier] .< 0) 162 | yt = zm .+ isign.*(thr*iqr) 163 | x[i_outlier] = yt 164 | return nothing 165 | end 166 | """ 167 | replace with median value 168 | """ 169 | function adjust_x!(x, i_outlier, zm, thr, iqr, ::Val{2}) 170 | x[i_outlier] = zm 171 | return nothing 172 | end 173 | """ 174 | replace with local median (obs + or - 3 on each side) 175 | """ 176 | function adjust_x!(x, i_outlier, zm, thr, iqr, ::Val{3}) 177 | # Compute rolling median 178 | iwin = 3 # Window on either side 179 | for i in findall(i_outlier) 180 | j1 = max(1, i-iwin) 181 | j2 = min(length(x), i+iwin) 182 | x[i] = median(skipmissing(x[j1:j2])) 183 | end 184 | return nothing 185 | end 186 | """ 187 | replace with one-sided median (5 preceding obs) 188 | """ 189 | function adjust_x!(x, i_outlier, zm, thr, iqr, ::Val{4}) 190 | # Compute rolling median 191 | iwin=5 # Window on one side 192 | for i in findall(i_outlier .& .!ismissing.(i_outlier)) 193 | j1 = max(1, i-iwin) 194 | j2 = i 195 | x[i] = median(skipmissing(x[j1:j2])) 196 | end 197 | return nothing 198 | end 199 | 200 | readin_data(frequency::MonthlyData, datatype) = 201 | readin_monthly_data(true, 4, 2, 6, [1, 2, 3, 5], frequency, datatype) 202 | readin_data(frequency::QuarterlyData, datatype) = 203 | readin_monthly_data(true, 4, 2, 5, [1, 2, 3, 5], frequency, datatype) 204 | readdata(::MonthlyData) = readxlsheet("data/hom_fac_1.xlsx", "Monthly") 205 | readdata(::QuarterlyData) = readxlsheet("data/hom_fac_1.xlsx", "Quarterly") 206 | function readin_monthly_data(correct_outlier::Bool, 207 | io_method::Integer, # Replacement of outliers, 208 | ndesc::Integer, # number of "description" rows in Excel file 209 | ncodes::Integer, # number of rows of "codes" in Excel file 210 | cat_include::AbstractVector, 211 | frequency::DataFrequency, 212 | datatype::Symbol 213 | ) 214 | dnobs = frequency.nobs 215 | ns_m = frequency.ns 216 | ## read monthly data 217 | wholedata = readdata(frequency) 218 | 219 | maindata = wholedata[1:1+ndesc+ncodes+dnobs, 2:ns_m+1] 220 | date = Dates.Date.(wholedata[1+ndesc+ncodes+1:1+ndesc+ncodes+dnobs, 1]) 221 | 222 | namevec, labvec_long, labvec_short, aggcode, tcode, defcode, 223 | outliercode, includecode, catcode = get_headers(maindata, frequency) 224 | datamat_m = maindata[1+ndesc+ncodes+1:end, :] 225 | datamat_m[.!isa.(datamat_m, Float64)] .= missing # set missing values 226 | datamat_m = convert(Array{Union{Missing, Float64}, 2}, datamat_m) 227 | 228 | # Price Deflators 229 | price_def, price_def_lfe, price_def_pgdp = 230 | get_deflators(namevec, datamat_m, frequency) 231 | 232 | # Standardize Killian Activity Index 233 | standardize_killian!(datamat_m, namevec, frequency) 234 | 235 | usedcols_id = (includecode .!=0) .& [in(cat, cat_include) for cat in floor.(catcode)] 236 | usedcols_id = get_usedcols_id(includecode, catcode, cat_include, Val(datatype)) 237 | data_m = datamat_m[:, usedcols_id] 238 | bpdefcode = defcode[usedcols_id] 239 | bpoutlier = outliercode[usedcols_id] 240 | bplab_long = labvec_long[usedcols_id] 241 | bplab_short = labvec_short[usedcols_id] 242 | ns = size(data_m, 2) 243 | 244 | [deflate_series!(bplab_long, bplab_short, view(data_m, :, i), 245 | price_def, price_def_lfe, price_def_pgdp, i, Val(bpdefcode[i])) for i =1:ns] 246 | data_q, date_q = monthly_to_quarterly(data_m, date, frequency) # Temporally aggregated to quarterly 247 | bpdata_raw = copy(data_q) 248 | transform!(data_q, tcode[usedcols_id]) # Transform .. log, first difference, etc. 249 | bpdata_noa = copy(data_q) 250 | !correct_outlier || [adjust_outlier!(view(data_q, :, i), Val(bpoutlier[i]), io_method) for i = 1:ns] 251 | return data_q, bpdata_raw, bpdata_noa, date_q, catcode[usedcols_id], 252 | includecode[usedcols_id], namevec[usedcols_id] 253 | end 254 | get_usedcols_id(includecode, catcode, cat_include, ::Val{:Real}) = 255 | (includecode .!=0) .& [in(cat, cat_include) for cat in floor.(catcode)] 256 | get_usedcols_id(includecode, catcode, cat_include, ::Val{:All}) = includecode .!=0 257 | 258 | function get_headers(maindata, ::MonthlyData) 259 | namevec = uppercase.(convert(Vector{String}, maindata[1, :])) 260 | labvec_long = convert(Vector{String}, maindata[2, :]) # Vector of "long" labels 261 | labvec_short = convert(Vector{String}, maindata[3, :]) # Vector of "short" labels 262 | aggcode = Int.(maindata[4, :]) # Temporal aggregation code 263 | tcode = Int.(maindata[5, :]) # transformation code 264 | defcode = Int.(maindata[6, :]) # code for price deflation (nominal to real) 265 | outliercode = Int.(maindata[7, :]) # code for outlier adjustment 266 | includecode = Int.(maindata[8, :]) # code for use in factor estimation 267 | catcode = maindata[9, :] # category code for ordering variables 268 | return namevec, labvec_long, labvec_short, aggcode, tcode, defcode, 269 | outliercode, includecode, catcode 270 | end 271 | 272 | function get_headers(maindata, ::QuarterlyData) 273 | namevec = uppercase.(convert(Vector{String}, maindata[1, :])) 274 | labvec_long = convert(Vector{String}, maindata[2, :]) # Vector of "long" labels 275 | labvec_short = convert(Vector{String}, maindata[3, :]) # Vector of "short" labels 276 | tcode = Int.(maindata[4, :]) # transformation code 277 | defcode = Int.(maindata[5, :]) # code for price deflation (nominal to real) 278 | outliercode = Int.(maindata[6, :]) # code for outlier adjustment 279 | includecode = Int.(maindata[7, :]) # code for use in factor estimation 280 | catcode = maindata[8, :] # category code for ordering variables 281 | return namevec, labvec_long, labvec_short, nothing, tcode, defcode, 282 | outliercode, includecode, catcode 283 | end 284 | 285 | function get_deflators(namevec, datamat, ::MonthlyData) 286 | j = findall(namevec.=="PCEPI")[1] # PCE Price Deflator 287 | price_def = datamat[:, j] 288 | j = findall(namevec.=="PCEPILFE")[1] # PCE-xFE Price Deflator 289 | price_def_lfe = datamat[:, j] 290 | return price_def, price_def_lfe, NaN 291 | end 292 | 293 | function get_deflators(namevec, datamat, ::QuarterlyData) 294 | j = findall(namevec.=="PCECTPI")[1] # PCE Price Deflator 295 | price_def = datamat[:, j] 296 | j = findall(namevec.=="JCXFE")[1] # PCE Excl. food and energy 297 | price_def_lfe = datamat[:, j] 298 | j = findall(namevec.=="GDPCTPI")[1] # GDP Deflator 299 | price_def_pgdp = datamat[:, j] 300 | return price_def, price_def_lfe, price_def_pgdp 301 | end 302 | 303 | """ 304 | Standardize Killian Activity Index 305 | """ 306 | function standardize_killian!(datamat_m, namevec, ::MonthlyData) 307 | j = findall(namevec.=="GLOBAL_ACT")[1] # Killian Index 308 | tmp = datamat_m[:, j] 309 | tmp1 = tmp[.!ismissing.(tmp)] 310 | tmp2 = (tmp1.-mean(tmp1))./std(tmp1) 311 | datamat_m[.!ismissing.(tmp), j] = tmp2 312 | return nothing 313 | end 314 | 315 | standardize_killian!(datamat_m, namevec, ::QuarterlyData) = nothing 316 | 317 | function detrend_var!(data::AbstractArray, bw::BiWeight) 318 | trend = Array{Union{Missing, Float64}}(undef, size(data)) 319 | for i = 1:size(data, 2) 320 | trend[:, i] = bi_weight_filter(data[:, i], bw.weight) 321 | data[:, i] = data[:, i] - trend[:, i] 322 | end 323 | return trend 324 | end 325 | 326 | 327 | function detrend_var!(data::AbstractArray, ::Mean) 328 | trend = repmat(mean(skipmissing(data), dims=1), size(data, 1), 1) 329 | data .= data .- trend 330 | return trend 331 | end 332 | 333 | detrend_var!(data::AbstractArray, ::NoDetrend) = fill(missing, size(data)) 334 | 335 | function bi_weight_filter(y::AbstractVector, weight::Real) 336 | T = length(y) 337 | ytrend = Array{Union{Missing, Float64}}(undef, T) 338 | trend = 1:T 339 | for t = findall(.!ismissing.(y)) 340 | dt = (trend .- t)/weight 341 | bw_weight = 15/16*((1 .-dt.^2).^2) # bi-weight 342 | bw_weight[abs.(dt).>=1] .= 0 343 | bw_weight_ignore_missing = bw_weight[.!ismissing.(y)] 344 | bw_weight_ignore_missing = bw_weight_ignore_missing ./ sum(bw_weight_ignore_missing) 345 | ytrend[t] = dot(bw_weight_ignore_missing, y[.!ismissing.(y)]) 346 | end 347 | return ytrend 348 | end 349 | 350 | """ 351 | ##### Arguments 352 | - `detrend_method::DetrendMethod`: method for detrending. 353 | - `datatype::Symbol`: type of read data, `:Real` or `:All` 354 | """ 355 | function readin_data(md::MonthlyData, qd::QuarterlyData, 356 | detrend_method::DetrendMethod, datatype::Symbol) 357 | data_m, bpdata_raw_m, bpdata_noa_m, date_m, bpcatcode_m, inclcode_m, namevec_m = 358 | readin_data(md, datatype) 359 | 360 | data_q, bpdata_raw_q, bpdata_noa_q, date_q, bpcatcode_q, inclcode_q, namevec_q = 361 | readin_data(qd, datatype) 362 | 363 | all(date_m .== date_q) || 364 | @error "inconsistent sample size for monthly and quarterly data" 365 | 366 | id = sortperm(vcat(bpcatcode_m, bpcatcode_q)) 367 | bpdata = hcat(data_m, data_q)[:, id] 368 | bpdata_unfiltered = copy(bpdata) 369 | bpdata_trend = detrend_var!(bpdata, detrend_method) 370 | 371 | dataset = (bpdata_raw = hcat(bpdata_raw_m, bpdata_raw_q)[:, id], 372 | bpcatcode =vcat(bpcatcode_m, bpcatcode_q)[id], 373 | bpdata = bpdata, 374 | bpdata_unfiltered =bpdata_unfiltered, 375 | bpdata_noa = hcat(bpdata_noa_m, bpdata_noa_q)[:, id], 376 | bpdata_trend = bpdata_trend, 377 | inclcode = vcat(inclcode_m, inclcode_q)[id], 378 | bpnamevec = vcat(namevec_m, namevec_q)[id], 379 | calvec = calender_value.(date_q), 380 | calds = date_q) 381 | return dataset 382 | end 383 | 384 | calender_value(year::Integer, quarter::Integer) = year+(quarter-1)/4 385 | calender_value(calvec::Tuple{Int, Int}) = calender_value(calvec[1], calvec[2]) 386 | --------------------------------------------------------------------------------