├── .gitignore
├── LICENSE
├── README.md
├── Stock_Watson.ipynb
├── data
    ├── hom_fac_1.xlsx
    └── hpfilter_trend.asc
├── dfm_functions.ipynb
└── readin_functions.jl


/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018, QuantEcon
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # dynamic_factor_models
 2 | 
 3 | This is a respository for the project to replicate some results of dynamic factor models. Tentatively planned papers are
 4 | - [Stock, J. H., & Watson, M. W. (2016). Dynamic factor models, factor-augmented vector autoregressions, and structural vector autoregressions in macroeconomics. In Handbook of macroeconomics (Vol. 2, pp. 415-525). Elsevier.](https://www.sciencedirect.com/science/article/pii/S1574004816300027)
 5 | - [Barigozzi, M., Conti, A. M., & Luciani, M. (2014). Do euro area countries respond asymmetrically to the common monetary policy?. Oxford bulletin of economics and statistics, 76(5), 693-714.](https://onlinelibrary.wiley.com/doi/abs/10.1111/obes.12038)
 6 | - [Forni, M., & Gambetti, L. (2010). The dynamic effects of monetary policy: A structural factor model approach. Journal of Monetary Economics, 57(2), 203-216.](https://www.sciencedirect.com/science/article/pii/S0304393209001597)
 7 | 
 8 | **Any contribution would be appreciated!!**
 9 | - post issue if you find any error or if you want a new feature
10 | - send pull request if you can contribute to the repository
11 | 


--------------------------------------------------------------------------------
/data/hom_fac_1.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuantEcon/dynamic_factor_models/ca9dbde34cdbd70360642ce612e78461c113d1ea/data/hom_fac_1.xlsx


--------------------------------------------------------------------------------
/data/hpfilter_trend.asc:
--------------------------------------------------------------------------------
  1 | 0.000000
  2 | 0.000000
  3 | -0.000000
  4 | -0.000001
  5 | -0.000001
  6 | -0.000001
  7 | -0.000002
  8 | -0.000002
  9 | -0.000003
 10 | -0.000003
 11 | -0.000003
 12 | -0.000004
 13 | -0.000004
 14 | -0.000004
 15 | -0.000004
 16 | -0.000005
 17 | -0.000005
 18 | -0.000005
 19 | -0.000004
 20 | -0.000004
 21 | -0.000003
 22 | -0.000002
 23 | -0.000001
 24 | 0.000000
 25 | 0.000002
 26 | 0.000005
 27 | 0.000007
 28 | 0.000010
 29 | 0.000014
 30 | 0.000018
 31 | 0.000023
 32 | 0.000028
 33 | 0.000034
 34 | 0.000040
 35 | 0.000047
 36 | 0.000054
 37 | 0.000061
 38 | 0.000068
 39 | 0.000076
 40 | 0.000083
 41 | 0.000090
 42 | 0.000095
 43 | 0.000100
 44 | 0.000103
 45 | 0.000103
 46 | 0.000101
 47 | 0.000096
 48 | 0.000086
 49 | 0.000072
 50 | 0.000052
 51 | 0.000026
 52 | -0.000007
 53 | -0.000049
 54 | -0.000100
 55 | -0.000161
 56 | -0.000232
 57 | -0.000315
 58 | -0.000410
 59 | -0.000517
 60 | -0.000637
 61 | -0.000769
 62 | -0.000913
 63 | -0.001067
 64 | -0.001230
 65 | -0.001400
 66 | -0.001573
 67 | -0.001745
 68 | -0.001912
 69 | -0.002067
 70 | -0.002204
 71 | -0.002313
 72 | -0.002384
 73 | -0.002407
 74 | -0.002369
 75 | -0.002254
 76 | -0.002048
 77 | -0.001733
 78 | -0.001290
 79 | -0.000700
 80 | 0.000058
 81 | 0.001004
 82 | 0.002159
 83 | 0.003543
 84 | 0.005176
 85 | 0.007074
 86 | 0.009250
 87 | 0.011714
 88 | 0.014468
 89 | 0.017508
 90 | 0.020821
 91 | 0.024384
 92 | 0.028158
 93 | 0.032092
 94 | 0.036115
 95 | 0.040139
 96 | 0.044049
 97 | 0.047708
 98 | 0.050952
 99 | 0.053584
100 | 0.055379
101 | 0.056076
102 | 0.055379
103 | 0.053584
104 | 0.050952
105 | 0.047708
106 | 0.044049
107 | 0.040139
108 | 0.036115
109 | 0.032092
110 | 0.028158
111 | 0.024384
112 | 0.020821
113 | 0.017508
114 | 0.014468
115 | 0.011714
116 | 0.009250
117 | 0.007074
118 | 0.005176
119 | 0.003543
120 | 0.002159
121 | 0.001004
122 | 0.000058
123 | -0.000700
124 | -0.001290
125 | -0.001733
126 | -0.002048
127 | -0.002254
128 | -0.002369
129 | -0.002407
130 | -0.002384
131 | -0.002313
132 | -0.002204
133 | -0.002067
134 | -0.001912
135 | -0.001745
136 | -0.001573
137 | -0.001400
138 | -0.001230
139 | -0.001067
140 | -0.000913
141 | -0.000769
142 | -0.000637
143 | -0.000517
144 | -0.000410
145 | -0.000315
146 | -0.000232
147 | -0.000161
148 | -0.000100
149 | -0.000049
150 | -0.000007
151 | 0.000026
152 | 0.000052
153 | 0.000072
154 | 0.000086
155 | 0.000096
156 | 0.000101
157 | 0.000103
158 | 0.000103
159 | 0.000100
160 | 0.000095
161 | 0.000090
162 | 0.000083
163 | 0.000076
164 | 0.000068
165 | 0.000061
166 | 0.000054
167 | 0.000047
168 | 0.000040
169 | 0.000034
170 | 0.000028
171 | 0.000023
172 | 0.000018
173 | 0.000014
174 | 0.000010
175 | 0.000007
176 | 0.000005
177 | 0.000002
178 | 0.000000
179 | -0.000001
180 | -0.000002
181 | -0.000003
182 | -0.000004
183 | -0.000004
184 | -0.000005
185 | -0.000005
186 | -0.000005
187 | -0.000004
188 | -0.000004
189 | -0.000004
190 | -0.000004
191 | -0.000003
192 | -0.000003
193 | -0.000003
194 | -0.000002
195 | -0.000002
196 | -0.000001
197 | -0.000001
198 | -0.000001
199 | -0.000000
200 | 0.000000
201 | 0.000000
202 | 


--------------------------------------------------------------------------------
/dfm_functions.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": null,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "using Statistics\n",
  10 |     "using DelimitedFiles\n",
  11 |     "using LinearAlgebra\n",
  12 |     "using MultivariateStats"
  13 |    ]
  14 |   },
  15 |   {
  16 |    "cell_type": "code",
  17 |    "execution_count": null,
  18 |    "metadata": {},
  19 |    "outputs": [],
  20 |    "source": [
  21 |     "abstract type EstimationMethod end\n",
  22 |     "struct NonParametric <: EstimationMethod end\n",
  23 |     "struct Parametric <: EstimationMethod end"
  24 |    ]
  25 |   },
  26 |   {
  27 |    "cell_type": "markdown",
  28 |    "metadata": {},
  29 |    "source": [
  30 |     "##### `VARModel`\n",
  31 |     "VAR model can be expressed in state-space form:\n",
  32 |     "$$y_t = Qz_t$$\n",
  33 |     "$$z_t = Mz_{t-1}+Gu_t$$\n",
  34 |     "where $z_t$ has $y_t$ and lagged $y_t$"
  35 |    ]
  36 |   },
  37 |   {
  38 |    "cell_type": "code",
  39 |    "execution_count": null,
  40 |    "metadata": {},
  41 |    "outputs": [],
  42 |    "source": [
  43 |     "struct VARModel{TA <: AbstractArray, TAm <: AbstractArray, TMm <: AbstractMatrix}\n",
  44 |     "    y::TA\n",
  45 |     "    nlag::Int\n",
  46 |     "    withconst::Bool\n",
  47 |     "    initperiod::Int\n",
  48 |     "    lastperiod::Int\n",
  49 |     "    T::Int\n",
  50 |     "    ns::Int\n",
  51 |     "    resid::TAm\n",
  52 |     "    betahat::TAm\n",
  53 |     "    M::TMm\n",
  54 |     "    Q::TMm\n",
  55 |     "    G::TMm\n",
  56 |     "    seps::TMm\n",
  57 |     "end"
  58 |    ]
  59 |   },
  60 |   {
  61 |    "cell_type": "code",
  62 |    "execution_count": null,
  63 |    "metadata": {},
  64 |    "outputs": [],
  65 |    "source": [
  66 |     "mutable struct FactorEstimateStats\n",
  67 |     "    T::Int # number of data periods used for factor estimation\n",
  68 |     "    ns::Int # number of data series\n",
  69 |     "    nobs # total number of observations (=sum_i T_i)\n",
  70 |     "    tss\n",
  71 |     "    ssr\n",
  72 |     "    R2::Vector{Union{Missing, Float64}}\n",
  73 |     "end"
  74 |    ]
  75 |   },
  76 |   {
  77 |    "cell_type": "markdown",
  78 |    "metadata": {},
  79 |    "source": [
  80 |     "Note: `factor` and `factor_var_model.y` are actually using same memory."
  81 |    ]
  82 |   },
  83 |   {
  84 |    "cell_type": "code",
  85 |    "execution_count": null,
  86 |    "metadata": {},
  87 |    "outputs": [],
  88 |    "source": [
  89 |     "struct DFMModel\n",
  90 |     "    data::AbstractArray\n",
  91 |     "    inclcode::Vector{Int}\n",
  92 |     "    T::Int  # number of whole data periods\n",
  93 |     "    ns::Int\n",
  94 |     "    nt_min_factor_estimation::Int\n",
  95 |     "    nt_min_factorloading_estimation::Int\n",
  96 |     "    initperiod::Int\n",
  97 |     "    lastperiod::Int\n",
  98 |     "    nfac_o::Int\n",
  99 |     "    nfac_u::Int\n",
 100 |     "    nfac_t::Int\n",
 101 |     "    tol::Float64\n",
 102 |     "    fes::FactorEstimateStats\n",
 103 |     "    factor::AbstractArray\n",
 104 |     "    lambda::AbstractArray\n",
 105 |     "    uar_coef::AbstractArray\n",
 106 |     "    uar_ser::Vector{Float64}\n",
 107 |     "    n_uarlag::Int\n",
 108 |     "    n_factorlag::Int\n",
 109 |     "    factor_var_model::VARModel\n",
 110 |     "    r2::Vector{Union{Missing, Float64}}\n",
 111 |     "end"
 112 |    ]
 113 |   },
 114 |   {
 115 |    "cell_type": "code",
 116 |    "execution_count": null,
 117 |    "metadata": {},
 118 |    "outputs": [],
 119 |    "source": [
 120 |     "function DFMModel(data, inclcode,\n",
 121 |     "    nt_min_factor_estimation::Integer, nt_min_factorloading_estimation::Integer,\n",
 122 |     "    initperiod::Integer, lastperiod::Integer,\n",
 123 |     "    nfac_o::Integer, nfac_u::Integer, tol, n_uarlag::Integer, n_factorlag::Integer)\n",
 124 |     "    size(data, 2) == length(inclcode) || error(\"length of inclcode must equal to number of data series\")\n",
 125 |     "    initperiod < lastperiod || error(\"initperiod must be smaller than lastperiod\")\n",
 126 |     "    ((n_uarlag > 0) && (n_factorlag > 0)) || error(\"n_uarlag and n_factorlag must be positive\")\n",
 127 |     "\n",
 128 |     "    T, ns = size(data)\n",
 129 |     "    nfac_t = nfac_o+nfac_u\n",
 130 |     "    fes = FactorEstimateStats(lastperiod - initperiod + 1,\n",
 131 |     "                              count(inclcode.==1),\n",
 132 |     "                              missing, missing, missing,\n",
 133 |     "                              Vector{Union{Missing, Float64}}(undef, count(inclcode.==1)))\n",
 134 |     "    factor = Matrix{Union{Missing, Float64}}(missing, T, nfac_t)\n",
 135 |     "    lambda = Matrix{Float64}(undef, ns, nfac_t)\n",
 136 |     "    uar_coef = Matrix{Float64}(undef, ns, n_uarlag)\n",
 137 |     "    uar_ser = Vector{Float64}(undef, ns)\n",
 138 |     "    factor_var_model = VARModel(factor, n_factorlag, initperiod= initperiod,\n",
 139 |     "                                lastperiod=lastperiod)\n",
 140 |     "    return DFMModel(data, vec(inclcode), T, ns,\n",
 141 |     "                    nt_min_factor_estimation, nt_min_factorloading_estimation,\n",
 142 |     "                    initperiod, lastperiod, nfac_o, nfac_u, nfac_t,\n",
 143 |     "                    tol, fes, factor, lambda, uar_coef, uar_ser,\n",
 144 |     "                    n_uarlag, n_factorlag, factor_var_model,\n",
 145 |     "                    Vector{Union{Missing, Float64}}(undef, ns))\n",
 146 |     "end"
 147 |    ]
 148 |   },
 149 |   {
 150 |    "cell_type": "code",
 151 |    "execution_count": null,
 152 |    "metadata": {},
 153 |    "outputs": [],
 154 |    "source": [
 155 |     "function drop_missing_row(A::AbstractMatrix)\n",
 156 |     "    tmp = .!any(ismissing.(A), dims=2)\n",
 157 |     "    return Float64.(A[vec(tmp), :]), tmp\n",
 158 |     "end"
 159 |    ]
 160 |   },
 161 |   {
 162 |    "cell_type": "code",
 163 |    "execution_count": null,
 164 |    "metadata": {},
 165 |    "outputs": [],
 166 |    "source": [
 167 |     "function drop_missing_col(A::AbstractMatrix)\n",
 168 |     "    tmp = drop_missing_row(A')\n",
 169 |     "    return tmp[1]', tmp[2]'\n",
 170 |     "end"
 171 |    ]
 172 |   },
 173 |   {
 174 |    "cell_type": "code",
 175 |    "execution_count": null,
 176 |    "metadata": {},
 177 |    "outputs": [],
 178 |    "source": [
 179 |     "function pca_score(X, nfac_u::Integer)\n",
 180 |     "    _, _, V = svd(X)\n",
 181 |     "    score = (X*V)[:, 1:nfac_u]\n",
 182 |     "    return score\n",
 183 |     "end"
 184 |    ]
 185 |   },
 186 |   {
 187 |    "cell_type": "markdown",
 188 |    "metadata": {},
 189 |    "source": [
 190 |     "##### `ols`\n",
 191 |     "###### Arguments\n",
 192 |     "- `y::AbstractVector`: length `T` Vector or `TxN` Matrix where `T` is sample size and `N` is the number of regressed variables\n",
 193 |     "- `X::AbstractArray`: `TxK` Matrix where `K` is the number of regressors\n",
 194 |     "###### Outputs\n",
 195 |     "- `b`: OLS estimator of the coefficients\n",
 196 |     "- `e`: residual"
 197 |    ]
 198 |   },
 199 |   {
 200 |    "cell_type": "code",
 201 |    "execution_count": null,
 202 |    "metadata": {},
 203 |    "outputs": [],
 204 |    "source": [
 205 |     "function ols(y::AbstractArray, X::AbstractArray)\n",
 206 |     "#     b = llsq(X, y; bias=false)\n",
 207 |     "    b = X\\y\n",
 208 |     "    e = y - X*b\n",
 209 |     "    return b, e\n",
 210 |     "end"
 211 |    ]
 212 |   },
 213 |   {
 214 |    "cell_type": "code",
 215 |    "execution_count": null,
 216 |    "metadata": {},
 217 |    "outputs": [],
 218 |    "source": [
 219 |     "abstract type OLSSkipRule end\n",
 220 |     "struct Balanced <: OLSSkipRule end\n",
 221 |     "struct Unbalanced <: OLSSkipRule end"
 222 |    ]
 223 |   },
 224 |   {
 225 |    "cell_type": "markdown",
 226 |    "metadata": {},
 227 |    "source": [
 228 |     "##### `ols_skipmissing`\n",
 229 |     "do OLS ignoring samples containing missing values\n",
 230 |     "###### Outputs\n",
 231 |     "- `b`: OLS estimator of the coefficients\n",
 232 |     "- `e`: residual\n",
 233 |     "- `numrow`: BitArray inidicating rows used to estimate"
 234 |    ]
 235 |   },
 236 |   {
 237 |    "cell_type": "code",
 238 |    "execution_count": null,
 239 |    "metadata": {},
 240 |    "outputs": [],
 241 |    "source": [
 242 |     "function ols_skipmissing(y::AbstractMatrix, X::AbstractArray, ::Balanced)\n",
 243 |     "    N = size(y, 2)\n",
 244 |     "    tmp, numrow = drop_missing_row([y X])\n",
 245 |     "    y_used, x_used = tmp[:, 1:N], tmp[:, N+1:end]\n",
 246 |     "    b, e = ols(y_used, x_used)\n",
 247 |     "    return b, e, vec(numrow)\n",
 248 |     "end\n",
 249 |     "function ols_skipmissing(y::AbstractVector, X::AbstractArray, method::Balanced)\n",
 250 |     "    b, e, numrow = ols_skipmissing(reshape(y, size(y, 1), size(y, 2)), X, method)\n",
 251 |     "    return b, vec(e), numrow\n",
 252 |     "end"
 253 |    ]
 254 |   },
 255 |   {
 256 |    "cell_type": "markdown",
 257 |    "metadata": {},
 258 |    "source": [
 259 |     "##### `ols_skipmissing`\n",
 260 |     "###### Arguments\n",
 261 |     "- `y::AbstractMatrix`: `TxN`\n",
 262 |     "- `X::AbstractArray`: `TxK` Matrix or `T` Vector"
 263 |    ]
 264 |   },
 265 |   {
 266 |    "cell_type": "code",
 267 |    "execution_count": null,
 268 |    "metadata": {},
 269 |    "outputs": [],
 270 |    "source": [
 271 |     "function ols_skipmissing(y::AbstractMatrix, X::AbstractArray, ::Unbalanced)\n",
 272 |     "    if size(y, 1) != size(X, 1)\n",
 273 |     "        error(\"Sample size must be same\")\n",
 274 |     "    end\n",
 275 |     "    T, N = size(y)\n",
 276 |     "    b = Matrix{Float64}(undef, size(X, 2), N)\n",
 277 |     "    e = Matrix{Union{Missing, Float64}}(missing, T, N)\n",
 278 |     "    numrow = BitArray(undef, T, N)\n",
 279 |     "    for i=1:N\n",
 280 |     "        tmp = ols_skipmissing(y[:, i], X, Balanced())\n",
 281 |     "        b[:, i] = tmp[1]\n",
 282 |     "        e[tmp[3], i] = tmp[2]\n",
 283 |     "        numrow[:, i] = tmp[3]\n",
 284 |     "    end\n",
 285 |     "    return b, e, numrow\n",
 286 |     "end"
 287 |    ]
 288 |   },
 289 |   {
 290 |    "cell_type": "code",
 291 |    "execution_count": null,
 292 |    "metadata": {},
 293 |    "outputs": [],
 294 |    "source": [
 295 |     "function lagmat(X::AbstractArray, lags::AbstractVector)\n",
 296 |     "    nc = size(X, 2)\n",
 297 |     "    Xlag = Matrix{Union{Missing, Float64}}(missing, size(X, 1), nc*length(lags))\n",
 298 |     "    for (i, lag) in enumerate(lags)\n",
 299 |     "        Xlag[lag+1:end, nc*(i-1)+1:nc*i] .= X[1:end-lag, :]\n",
 300 |     "    end\n",
 301 |     "    return Xlag\n",
 302 |     "end\n",
 303 |     "lagmat(X::AbstractArray, lag::Integer) = lagmat(X, [lag])\n",
 304 |     "\n",
 305 |     "function uar(y::AbstractVector, n_lags::Integer)\n",
 306 |     "    x = lagmat(y, 1:n_lags)\n",
 307 |     "    arcoef, ehat, _ = ols_skipmissing(y, x, Balanced())\n",
 308 |     "    ssr = dot(ehat, ehat)\n",
 309 |     "    ser = sqrt(ssr/(size(x, 1)-size(x, 2)))\n",
 310 |     "    return arcoef, ser\n",
 311 |     "end"
 312 |    ]
 313 |   },
 314 |   {
 315 |    "cell_type": "markdown",
 316 |    "metadata": {},
 317 |    "source": [
 318 |     "##### `estimate_factor!`\n",
 319 |     "estimate factor by iteration using balanced data\n"
 320 |    ]
 321 |   },
 322 |   {
 323 |    "cell_type": "code",
 324 |    "execution_count": null,
 325 |    "metadata": {},
 326 |    "outputs": [],
 327 |    "source": [
 328 |     "function estimate_factor!(m::DFMModel, max_iter::Integer=100000000,\n",
 329 |     "                         computeR2::Bool=true; lam_constr=nothing)\n",
 330 |     "    data = m.data\n",
 331 |     "    initperiod, lastperiod, nt_min, nfac_u, nfac_o, tol =\n",
 332 |     "        m.initperiod, m.lastperiod, m.nt_min_factor_estimation,\n",
 333 |     "        m.nfac_u, m.nfac_o, m.tol\n",
 334 |     "    # use part of the data\n",
 335 |     "    est_data = data[:, m.inclcode.==1]\n",
 336 |     "    xdata = est_data[initperiod:lastperiod, :]\n",
 337 |     "\n",
 338 |     "    # preprocess data to have unit standard error\n",
 339 |     "    xdata_standardized, xdatastd = standardize_data(xdata)\n",
 340 |     "    standardize_constraint!(lam_constr, xdatastd)\n",
 341 |     "\n",
 342 |     "    m.fes.tss = sum(skipmissing(xdata_standardized.^2))\n",
 343 |     "    m.fes.nobs = length(xdata_standardized[.!ismissing.(xdata_standardized)])\n",
 344 |     "\n",
 345 |     "    xbal, _ = drop_missing_col(xdata_standardized)\n",
 346 |     "\n",
 347 |     "    # Get initial F_t given Lambda_t using PCA\n",
 348 |     "    f = pca_score(xbal, nfac_u)\n",
 349 |     "    m.fes.ssr = 0\n",
 350 |     "    diff = 1000\n",
 351 |     "    lambda = Array{Union{Missing, Float64}}(undef, m.fes.ns, m.nfac_t)\n",
 352 |     "    for iter = 1:max_iter\n",
 353 |     "        ssr_old = m.fes.ssr\n",
 354 |     "        # given F_t, get Lambda_t\n",
 355 |     "        for i = 1:m.fes.ns\n",
 356 |     "            tmp, usedrows = drop_missing_row([xdata_standardized[:, i] f])\n",
 357 |     "            if count(usedrows) >= nt_min\n",
 358 |     "                lambda[i, :] =\n",
 359 |     "                    ols_skipmissing(xdata_standardized[:, i], f, Balanced())[1]'\n",
 360 |     "                impose_constraint!(view(lambda, i, :), i, f, lam_constr, :factor)\n",
 361 |     "            end\n",
 362 |     "        end\n",
 363 |     "        # given Lambda_t, get F_t by regressing X_t on Lambda_t for each t\n",
 364 |     "        tmp = ols_skipmissing(xdata_standardized', lambda[:, nfac_o+1:end], Unbalanced())\n",
 365 |     "        f, ehat = tmp[1]', tmp[2]\n",
 366 |     "        m.fes.ssr = sum(sum(skipmissing(ehat.^2)))\n",
 367 |     "        diff = abs(ssr_old - m.fes.ssr)\n",
 368 |     "        diff >= tol*m.fes.T*m.fes.ns || break\n",
 369 |     "        # println(\"diff = \", diff)\n",
 370 |     "    end\n",
 371 |     "    m.factor[initperiod:lastperiod,  :] = f\n",
 372 |     "    if computeR2\n",
 373 |     "        for i=1:m.fes.ns\n",
 374 |     "            tmp = drop_missing_row([xdata_standardized[:, i] f])[1]\n",
 375 |     "            if size(tmp, 1) >= nt_min\n",
 376 |     "                bhat, ehat = ols(tmp[:, 1], tmp[:, 2:end])\n",
 377 |     "                m.fes.R2[i] = compute_r2(tmp[:, 1], ehat)[1]\n",
 378 |     "            end\n",
 379 |     "        end\n",
 380 |     "    end\n",
 381 |     "    return nothing\n",
 382 |     "end"
 383 |    ]
 384 |   },
 385 |   {
 386 |    "cell_type": "code",
 387 |    "execution_count": null,
 388 |    "metadata": {},
 389 |    "outputs": [],
 390 |    "source": [
 391 |     "function estimate_factor_loading!(m::DFMModel; lam_constr=nothing)\n",
 392 |     "    data, initperiod, lastperiod, fac, nt_min, nfac_t, n_uarlag =\n",
 393 |     "        m.data, m.initperiod, m.lastperiod, m.factor,\n",
 394 |     "        m.nt_min_factorloading_estimation, m.nfac_t, m.n_uarlag\n",
 395 |     "    n_series = size(data, 2)\n",
 396 |     "    for is = 1:n_series\n",
 397 |     "        tmp, numrow = drop_missing_row([data[initperiod:lastperiod, is] fac[initperiod:lastperiod, :]])\n",
 398 |     "        if count(numrow) >= nt_min # if available sample size is large enough\n",
 399 |     "            X = [tmp[:, 2:end] ones(count(numrow))]\n",
 400 |     "            b, uhat = ols(tmp[:, 1], X)\n",
 401 |     "            impose_constraint!(b, uhat, tmp[:, 1], is, X, lam_constr, :loading)\n",
 402 |     "            y_used = data[initperiod:lastperiod, is][vec(numrow), :]\n",
 403 |     "            m.lambda[is, :] .= b[1:end-1]\n",
 404 |     "            m.r2[is], _, _ = compute_r2(y_used, uhat)\n",
 405 |     "            if m.r2[is] < 0.9999\n",
 406 |     "                arcoef, ser = uar(uhat, n_uarlag)\n",
 407 |     "            else\n",
 408 |     "                arcoef, ser = zeros(n_uarlag, 1), 0.0\n",
 409 |     "            end\n",
 410 |     "        end\n",
 411 |     "        m.uar_coef[is, :] = arcoef'\n",
 412 |     "        m.uar_ser[is] = ser\n",
 413 |     "    end\n",
 414 |     "    return nothing\n",
 415 |     "end"
 416 |    ]
 417 |   },
 418 |   {
 419 |    "cell_type": "code",
 420 |    "execution_count": null,
 421 |    "metadata": {},
 422 |    "outputs": [],
 423 |    "source": [
 424 |     "function VARModel(y::AbstractArray, nlag::Integer=1;\n",
 425 |     "                  withconst::Bool=true,\n",
 426 |     "                  initperiod::Integer=1, lastperiod::Integer=size(y, 1))\n",
 427 |     "    T, ns = size(y, 1), size(y, 2)\n",
 428 |     "    resid = Array{Union{Missing, Float64}}(missing, size(y))\n",
 429 |     "    betahat = Matrix{Union{Missing, Float64}}(missing, ns*nlag+withconst, ns)\n",
 430 |     "    M = Matrix{Union{Missing, Float64}}(missing, ns*nlag, ns*nlag)\n",
 431 |     "    Q = Matrix{Union{Missing, Float64}}(missing, ns, ns*nlag)\n",
 432 |     "    G = Matrix{Union{Missing, Float64}}(missing, ns*nlag, ns)\n",
 433 |     "    seps = Matrix{Union{Missing, Float64}}(missing, ns, ns)\n",
 434 |     "    return VARModel(y, nlag, withconst, initperiod, lastperiod, T, ns, resid, betahat, M, Q, G, seps)\n",
 435 |     "end"
 436 |    ]
 437 |   },
 438 |   {
 439 |    "cell_type": "code",
 440 |    "execution_count": null,
 441 |    "metadata": {},
 442 |    "outputs": [],
 443 |    "source": [
 444 |     "function estimate_var!(varm::VARModel, compute_matrices::Bool=true)\n",
 445 |     "    initperiod, lastperiod = varm.initperiod, varm.lastperiod\n",
 446 |     "    withconst, nlag = varm.withconst, varm.nlag\n",
 447 |     "    resid, seps = varm.resid, varm.seps\n",
 448 |     "\n",
 449 |     "    y_restricted = varm.y[initperiod:lastperiod, :]\n",
 450 |     "\n",
 451 |     "    # regressors\n",
 452 |     "    withconst || (x = lagmat(y_restricted, 1:nlag))\n",
 453 |     "    !withconst || (x = [ones(lastperiod-initperiod+1) lagmat(y_restricted, 1:nlag)])\n",
 454 |     "\n",
 455 |     "    # do OLS ignoring the samples containing NaN\n",
 456 |     "    betahat, ehat, numrows = ols_skipmissing(y_restricted, x, Balanced())\n",
 457 |     "    varm.betahat .= betahat\n",
 458 |     "\n",
 459 |     "    T_used = count(numrows) # used sample size\n",
 460 |     "    K = size(x, 2) # number of regressors\n",
 461 |     "\n",
 462 |     "    ndf = T_used - K # degree of freedom (T-K)\n",
 463 |     "    seps .= ehat'*ehat/ndf # covariance matrix of error term\n",
 464 |     "    resid[initperiod- 1 .+ findall(numrows), :] .= ehat\n",
 465 |     "\n",
 466 |     "    !compute_matrices || fill_matrices!(varm, betahat)\n",
 467 |     "    return nothing\n",
 468 |     "end"
 469 |    ]
 470 |   },
 471 |   {
 472 |    "cell_type": "code",
 473 |    "execution_count": null,
 474 |    "metadata": {},
 475 |    "outputs": [],
 476 |    "source": [
 477 |     "function fill_matrices!(varm::VARModel, betahat::Array)\n",
 478 |     "    ns, nlag = varm.ns, varm.nlag\n",
 479 |     "    M, Q, G = varm.M, varm.Q, varm.G\n",
 480 |     "\n",
 481 |     "    b = betahat[2:end, :]' # now, each row corresponds to each equation\n",
 482 |     "\n",
 483 |     "    M .= zeros(ns*nlag, ns*nlag)\n",
 484 |     "    M[1:ns, :] .= b # coefficients of VAR\n",
 485 |     "    M[ns+1:end, 1:end-ns] .= Matrix{Float64}(I, ns*nlag-ns, ns*nlag-ns)　# lag part\n",
 486 |     "\n",
 487 |     "    Q .= zeros(ns, ns*nlag)\n",
 488 |     "    Q[1:ns, 1:ns] .= Matrix{Float64}(I, ns, ns)\n",
 489 |     "    G .= zeros(ns*nlag, ns)\n",
 490 |     "    G[1:ns, 1:ns] .= ((cholesky(varm.seps)).U)'\n",
 491 |     "    return nothing\n",
 492 |     "end"
 493 |    ]
 494 |   },
 495 |   {
 496 |    "cell_type": "code",
 497 |    "execution_count": null,
 498 |    "metadata": {},
 499 |    "outputs": [],
 500 |    "source": [
 501 |     "function standardize_data(data::AbstractArray)\n",
 502 |     "    datamean = [mean(collect(skipmissing(data[:, i]))) for i = 1:size(data, 2)]'\n",
 503 |     "    # # make correction (which I don't understand why being needed)\n",
 504 |     "    tmp = size(data, 1) .- sum(ismissing.(data), dims = 1)\n",
 505 |     "    tmp = (tmp.-1)./tmp\n",
 506 |     "    datastd = [std(collect(skipmissing(data[:, i]))) for i = 1:size(data, 2)]'.*(tmp.^.5)\n",
 507 |     "    data_standardized = (data .- datamean)./datastd\n",
 508 |     "    return data_standardized, datastd\n",
 509 |     "end"
 510 |    ]
 511 |   },
 512 |   {
 513 |    "cell_type": "markdown",
 514 |    "metadata": {},
 515 |    "source": [
 516 |     "##### `estimate!`\n",
 517 |     "estimate DFM Model non-parametrically\n",
 518 |     "###### Procedure\n",
 519 |     "1. estimate factor $F$ using balanced data\n",
 520 |     "1. estimate factor loading $\\Lambda$ using full sample\n",
 521 |     "1. estimate the equation of factor evolution"
 522 |    ]
 523 |   },
 524 |   {
 525 |    "cell_type": "code",
 526 |    "execution_count": null,
 527 |    "metadata": {},
 528 |    "outputs": [],
 529 |    "source": [
 530 |     "function estimate!(m::DFMModel, ::NonParametric=NonParametric();\n",
 531 |     "                   lam_constr_f=nothing, lam_constr_fl=nothing)\n",
 532 |     "\n",
 533 |     "    # estimate factor using balanced data\n",
 534 |     "    estimate_factor!(m, lam_constr=lam_constr_f)\n",
 535 |     "\n",
 536 |     "    # estimate factor loading using full sample\n",
 537 |     "    estimate_factor_loading!(m, lam_constr=lam_constr_fl)\n",
 538 |     "\n",
 539 |     "    # estimate the equation of factor evolution\n",
 540 |     "    estimate_var!(m.factor_var_model)\n",
 541 |     "\n",
 542 |     "    return nothing\n",
 543 |     "end"
 544 |    ]
 545 |   },
 546 |   {
 547 |    "cell_type": "code",
 548 |    "execution_count": null,
 549 |    "metadata": {},
 550 |    "outputs": [],
 551 |    "source": [
 552 |     "compute_series(dfmm::DFMModel, is::Integer) = dfmm.factor*dfmm.lambda[is, :]\n",
 553 |     "detrended_year_growth(y::AbstractVector) = vec(sum(lagmat(y, 0:3), dims=2))\n",
 554 |     "\n",
 555 |     "find_row_number(date::Tuple{Int, Int}, dates) =\n",
 556 |     "    findall([date == dataset.calds[i] for i=1:length(dataset.calds)])[1]"
 557 |    ]
 558 |   },
 559 |   {
 560 |    "cell_type": "code",
 561 |    "execution_count": null,
 562 |    "metadata": {},
 563 |    "outputs": [],
 564 |    "source": [
 565 |     "function compute_r2(y::AbstractArray, e::AbstractVector)\n",
 566 |     "    ssr = dot(e, e)\n",
 567 |     "    tss = dot(y.-mean(y), y.-mean(y))\n",
 568 |     "    return 1-(ssr/tss), ssr, tss\n",
 569 |     "end"
 570 |    ]
 571 |   },
 572 |   {
 573 |    "cell_type": "markdown",
 574 |    "metadata": {},
 575 |    "source": [
 576 |     "##### `compute_bw_weight`\n",
 577 |     "compute Tukey’s biweight filter $w(L)$:\n",
 578 |     "$$w_j=\\left\\{\\begin{array}{ll}\n",
 579 |     "c\\left(1-\\left(\\frac{j}{B}\\right)^2\\right)^2 &\\text{ for }|j|\\leq B\\\\\n",
 580 |     "0 &\\text{ otherwise }\n",
 581 |     "\\end{array}\\right.$$\n",
 582 |     "where $B$ is the bandwidth (truncation) parameter and $c$ is a normalization constant such that $w(1)=\\sum_{j=-B}^B w_j=1$."
 583 |    ]
 584 |   },
 585 |   {
 586 |    "cell_type": "code",
 587 |    "execution_count": null,
 588 |    "metadata": {},
 589 |    "outputs": [],
 590 |    "source": [
 591 |     "function compute_bw_weight(B::Integer)\n",
 592 |     "    bw_weight = zeros(2B+1)\n",
 593 |     "    for i=0:B\n",
 594 |     "        bw_weight[B+1+i] = bw_weight[B+1-i] = (1-(i/B)^2)^2\n",
 595 |     "    end\n",
 596 |     "    bw_weight = bw_weight./sum(bw_weight) # make normalization\n",
 597 |     "    return bw_weight\n",
 598 |     "end"
 599 |    ]
 600 |   },
 601 |   {
 602 |    "cell_type": "markdown",
 603 |    "metadata": {},
 604 |    "source": [
 605 |     "##### `compute_gain`\n",
 606 |     "Calculate the spectral gain $\\|w(e^{i\\lambda})\\| $ of filter $w(L)$ at frequency $\\lambda$ where $\\|\\cdot\\|$ is complex norm \n",
 607 |     "\n",
 608 |     "$$gain=\\left\\|w_{-B}e^{-Bi\\lambda}+w_{-(B-1)}e^{-(B-1)i\\lambda}+\\dots+w_{0}+\\dots+w_{B-1}e^{(B-1)i\\lambda}+w_{B}e^{Bi\\lambda}\\right\\|$$\n",
 609 |     "###### Detail\n",
 610 |     "\n",
 611 |     "###### Arguments\n",
 612 |     "- `w`: weight \n",
 613 |     "- `lambda`: frequency "
 614 |    ]
 615 |   },
 616 |   {
 617 |    "cell_type": "code",
 618 |    "execution_count": null,
 619 |    "metadata": {},
 620 |    "outputs": [],
 621 |    "source": [
 622 |     "function compute_gain(w::AbstractVector, lambda::Real)\n",
 623 |     "    B = (length(w)-1)/2\n",
 624 |     "    z = exp(lambda*im)\n",
 625 |     "    z1 = exp(-lambda*im*B)\n",
 626 |     "    gain = w[1]*z1\n",
 627 |     "    for i = 2:length(w)\n",
 628 |     "        z1 = z1*z\n",
 629 |     "        gain = gain + w[i]*z1\n",
 630 |     "    end\n",
 631 |     "    return abs(gain)\n",
 632 |     "end"
 633 |    ]
 634 |   },
 635 |   {
 636 |    "cell_type": "markdown",
 637 |    "metadata": {},
 638 |    "source": [
 639 |     "##### `bai_ng_critetion`\n"
 640 |    ]
 641 |   },
 642 |   {
 643 |    "cell_type": "code",
 644 |    "execution_count": null,
 645 |    "metadata": {},
 646 |    "outputs": [],
 647 |    "source": [
 648 |     "function bai_ng_criterion(m::DFMModel)\n",
 649 |     "    fes = m.fes\n",
 650 |     "    nbar = fes.nobs/fes.T # average observation per period\n",
 651 |     "    g = log(min(nbar, fes.T))*(nbar+fes.T)/fes.nobs\n",
 652 |     "    bn_icp = log(fes.ssr/fes.nobs)+ m.nfac_t*g\n",
 653 |     "    return bn_icp\n",
 654 |     "end"
 655 |    ]
 656 |   },
 657 |   {
 658 |    "cell_type": "markdown",
 659 |    "metadata": {},
 660 |    "source": [
 661 |     "##### `FactorNumberEstimateStats`"
 662 |    ]
 663 |   },
 664 |   {
 665 |    "cell_type": "code",
 666 |    "execution_count": null,
 667 |    "metadata": {},
 668 |    "outputs": [],
 669 |    "source": [
 670 |     "struct FactorNumberEstimateStats\n",
 671 |     "    bn_icp\n",
 672 |     "    ssr_static\n",
 673 |     "    R2_static\n",
 674 |     "    aw_icp\n",
 675 |     "    ssr_dynamic\n",
 676 |     "    R2_dynamic\n",
 677 |     "    tss::Float64\n",
 678 |     "    nobs::Int\n",
 679 |     "    T::Int\n",
 680 |     "end"
 681 |    ]
 682 |   },
 683 |   {
 684 |    "cell_type": "markdown",
 685 |    "metadata": {},
 686 |    "source": [
 687 |     "##### `estimate_factor_numbers`\n",
 688 |     "###### Arguments\n",
 689 |     "- `m::DFMModel`: `DFMModel` specifying the model except number of unobservable factors."
 690 |    ]
 691 |   },
 692 |   {
 693 |    "cell_type": "code",
 694 |    "execution_count": null,
 695 |    "metadata": {},
 696 |    "outputs": [],
 697 |    "source": [
 698 |     "function estimate_factor_numbers(m::DFMModel, nfacs::Union{Real, AbstractVector})\n",
 699 |     "    max_nfac = maximum(nfacs)\n",
 700 |     "    bn_icp = Vector{Union{Missing, Float64}}(undef, max_nfac)\n",
 701 |     "    ssr_static = Vector{Float64}(undef, max_nfac)\n",
 702 |     "    R2_static = Matrix{Union{Missing, Float64}}(undef, m.fes.ns, max_nfac)\n",
 703 |     "    aw_icp = Matrix{Union{Missing, Float64}}(undef, max_nfac, max_nfac)\n",
 704 |     "    ssr_dynamic = Matrix{Union{Missing, Float64}}(undef, max_nfac, max_nfac)\n",
 705 |     "    R2_dynamic = Array{Union{Missing, Float64}}(undef, m.fes.ns, max_nfac, max_nfac)\n",
 706 |     "\n",
 707 |     "#     global tss, nobs, T\n",
 708 |     "    for (i, nfac) = enumerate(1:max_nfac)\n",
 709 |     "        dfmm = DFMModel(m.data, m.inclcode,\n",
 710 |     "                m.nt_min_factor_estimation, m.nt_min_factorloading_estimation,\n",
 711 |     "                m.initperiod, m.lastperiod, m.nfac_o, nfac, m.tol, m.n_uarlag, m.n_factorlag)\n",
 712 |     "        estimate_factor!(dfmm)\n",
 713 |     "        bn_icp[i] = bai_ng_criterion(dfmm)\n",
 714 |     "        ssr_static[i] = dfmm.fes.ssr\n",
 715 |     "        R2_static[:, i] = dfmm.fes.R2\n",
 716 |     "        aw_icp[1:nfac, i], ssr_dynamic[1:nfac, i], R2_dynamic[:, 1:nfac, i] =\n",
 717 |     "            amengual_watson_test(dfmm, 4)\n",
 718 |     "        global tss = dfmm.fes.tss\n",
 719 |     "        global nobs = dfmm.fes.nobs\n",
 720 |     "        global T = dfmm.fes.T\n",
 721 |     "    end\n",
 722 |     "    return FactorNumberEstimateStats(bn_icp, ssr_static, R2_static,\n",
 723 |     "                                     aw_icp, ssr_dynamic, R2_dynamic,\n",
 724 |     "                                     tss, nobs, T)\n",
 725 |     "end"
 726 |    ]
 727 |   },
 728 |   {
 729 |    "cell_type": "code",
 730 |    "execution_count": null,
 731 |    "metadata": {},
 732 |    "outputs": [],
 733 |    "source": [
 734 |     "function amengual_watson_test(m::DFMModel, nper::Integer)\n",
 735 |     "    factor = m.factor\n",
 736 |     "    T, ns, nfac_static = m.T, m.fes.ns, m.nfac_t\n",
 737 |     "    nvar_lag = m.factor_var_model.nlag\n",
 738 |     "    est_data = m.data[:, m.inclcode.==1]\n",
 739 |     "\n",
 740 |     "    # Construct lags of factors and residuals for est_data\n",
 741 |     "    x = [ones(T) lagmat(factor, 1:nvar_lag)]\n",
 742 |     "    est_data_res = Array{Union{Missing, Float64}}(undef, T, ns)\n",
 743 |     "    for is = 1:ns\n",
 744 |     "        tmp, usedrows = drop_missing_row([est_data[:, is] x])\n",
 745 |     "        y = tmp[:, 1]\n",
 746 |     "        z = tmp[:, 2:end]\n",
 747 |     "        ndf = size(z, 1)-size(z, 2)\n",
 748 |     "        if ndf >= m.nt_min_factor_estimation  # Minimum degrees of freedom for series\n",
 749 |     "            b, e = ols(y, z)\n",
 750 |     "            est_data_res[findall(vec(usedrows)), is] = e\n",
 751 |     "        end\n",
 752 |     "    end\n",
 753 |     "\n",
 754 |     "    # Carry out calculations for number of dynamic factors\n",
 755 |     "    ssr = Array{Float64}(undef, nfac_static)\n",
 756 |     "    r2  = Array{Union{Missing, Float64}}(undef, ns, nfac_static)\n",
 757 |     "    aw  = Array{Float64}(undef, nfac_static)\n",
 758 |     "    for nfac = 1:nfac_static\n",
 759 |     "        dfmm = DFMModel(est_data_res, ones(count(m.inclcode.==1)),\n",
 760 |     "                m.nt_min_factor_estimation, m.nt_min_factorloading_estimation,\n",
 761 |     "                m.initperiod+4, m.lastperiod, 0, nfac, m.tol, m.n_uarlag, m.n_factorlag)\n",
 762 |     "        estimate_factor!(dfmm)\n",
 763 |     "        aw[nfac] = bai_ng_criterion(dfmm)\n",
 764 |     "        ssr[nfac] = dfmm.fes.ssr\n",
 765 |     "        r2[:, nfac] = dfmm.fes.R2\n",
 766 |     "    end\n",
 767 |     "    return aw, ssr, r2\n",
 768 |     "end"
 769 |    ]
 770 |   },
 771 |   {
 772 |    "cell_type": "markdown",
 773 |    "metadata": {},
 774 |    "source": [
 775 |     "##### `impulse_response`\n",
 776 |     "compute impulse response function of `VARModel` with one standard error shock\n",
 777 |     "$$y_t = Qz_t$$\n",
 778 |     "$$z_t = Mz_{t-1}+Gu_t$$\n",
 779 |     "###### Arguments\n",
 780 |     "- `varm::VARModel`\n",
 781 |     "- `shock_ids`: `<:AbstractVector` or `<:Real` or `:all`.\n",
 782 |     "- `T::Integer`:  horizon of IRF\n",
 783 |     "###### Return\n",
 784 |     "- `irfs::Array` IRFs. It is 2D if `shock_ids<:Real` and 3D otherwise. First dimension is each series, second dimension is time, third dimension is type of shock (if exist)."
 785 |    ]
 786 |   },
 787 |   {
 788 |    "cell_type": "code",
 789 |    "execution_count": null,
 790 |    "metadata": {},
 791 |    "outputs": [],
 792 |    "source": [
 793 |     "function impulse_response(varm::VARModel, shock_ids::AbstractVector, T::Integer)\n",
 794 |     "    irfs = Array{Float64, 3}(undef, size(varm.Q, 1), T, length(shock_ids))\n",
 795 |     "    for (i, shock_id) in enumerate(shock_ids)\n",
 796 |     "        compute_irf_single_shock!(irfs, varm, i, shock_id, T)\n",
 797 |     "    end\n",
 798 |     "    return irfs\n",
 799 |     "end"
 800 |    ]
 801 |   },
 802 |   {
 803 |    "cell_type": "code",
 804 |    "execution_count": null,
 805 |    "metadata": {},
 806 |    "outputs": [],
 807 |    "source": [
 808 |     "function compute_irf_single_shock!(irfs::AbstractArray, varm::VARModel,\n",
 809 |     "                       i::Integer, shock_id::Integer, T::Integer)\n",
 810 |     "    x = varm.G[:, shock_id]\n",
 811 |     "    for t = 1:T\n",
 812 |     "        irfs[:, t, i] = varm.Q * x\n",
 813 |     "        x .= varm.M * x\n",
 814 |     "    end\n",
 815 |     "    return nothing\n",
 816 |     "end\n",
 817 |     "function impulse_response(varm::VARModel, shock_id::Real, T::Integer)\n",
 818 |     "    irfs = Matrix{Float64}(undef, size(varm.Q, 1), T)\n",
 819 |     "    compute_irf_single_shock!(irfs, varm, x, 1, shock_id, T)\n",
 820 |     "    return irfs\n",
 821 |     "end\n",
 822 |     "impulse_response(varm::VARModel, shock_id::Symbol, T::Integer) =\n",
 823 |     "    impulse_response(varm, Val(shock_id), T)\n",
 824 |     "impulse_response(varm::VARModel, shock_ids::Val{:all}, T::Integer) =\n",
 825 |     "    impulse_response(varm, 1:size(varm.G, 2), T)\n"
 826 |    ]
 827 |   },
 828 |   {
 829 |    "cell_type": "markdown",
 830 |    "metadata": {},
 831 |    "source": [
 832 |     "##### `form_kernel`\n",
 833 |     "create kernel $$1-\\frac{v}{q+1}$$ for all $i\\leq q$.\n",
 834 |     "\n",
 835 |     "###### Arguments\n",
 836 |     "- `q::Integer`: bandwidth parameter"
 837 |    ]
 838 |   },
 839 |   {
 840 |    "cell_type": "code",
 841 |    "execution_count": null,
 842 |    "metadata": {},
 843 |    "outputs": [],
 844 |    "source": [
 845 |     "# abstract type KernelType end\n",
 846 |     "# struct White <: KernelType end\n",
 847 |     "# struct Triangular <: KernelType \n",
 848 |     "#     nma::Int\n",
 849 |     "# end\n",
 850 |     "# struct Rectangular <: KernelType\n",
 851 |     "#     nma::Int\n",
 852 |     "# end"
 853 |    ]
 854 |   },
 855 |   {
 856 |    "cell_type": "code",
 857 |    "execution_count": null,
 858 |    "metadata": {},
 859 |    "outputs": [],
 860 |    "source": [
 861 |     "form_kernel(q::Integer) = [1-i/(q+1) for i = 0:q]"
 862 |    ]
 863 |   },
 864 |   {
 865 |    "cell_type": "code",
 866 |    "execution_count": null,
 867 |    "metadata": {},
 868 |    "outputs": [],
 869 |    "source": [
 870 |     "# form_kernel(kt::Triangular) = [1-i/(kt.nma+1) for i = 0:kt.nma]\n",
 871 |     "# form_kernel(kt::Rectangular) = ones(kt.nma+1)\n",
 872 |     "# form_kernel(kt::White) = ones(1, 1)"
 873 |    ]
 874 |   },
 875 |   {
 876 |    "cell_type": "markdown",
 877 |    "metadata": {},
 878 |    "source": [
 879 |     "##### `compute_chow`\n",
 880 |     "Letting $D_t$ take 1 in post sample period ($t> T_{break}$) and 0 otherwise ($t\\leq T_{break}$), regress \n",
 881 |     "$$ y_t = \\beta X_t+\\gamma(X_tD_t)$$\n",
 882 |     "and compute statics for F-test of $\\gamma=0$"
 883 |    ]
 884 |   },
 885 |   {
 886 |    "cell_type": "code",
 887 |    "execution_count": null,
 888 |    "metadata": {},
 889 |    "outputs": [],
 890 |    "source": [
 891 |     "function compute_chow(y::AbstractVector, X::AbstractArray,\n",
 892 |     "                      q::Integer, T_break::Integer)\n",
 893 |     "    k = size(X, 2)\n",
 894 |     "    T = length(y)\n",
 895 |     "    D = [zeros(T_break);\n",
 896 |     "         ones(T-T_break)]\n",
 897 |     "    betahat, vbeta, _ = regress_hac(y, [X X.*D], q)\n",
 898 |     "    gamma = betahat[k+1:end]\n",
 899 |     "    v1 = vbeta[k+1:end, k+1:end]\n",
 900 |     "    chow = gamma'/v1*gamma\n",
 901 |     "    return chow\n",
 902 |     "end"
 903 |    ]
 904 |   },
 905 |   {
 906 |    "cell_type": "markdown",
 907 |    "metadata": {},
 908 |    "source": [
 909 |     "##### `regress_hac`"
 910 |    ]
 911 |   },
 912 |   {
 913 |    "cell_type": "code",
 914 |    "execution_count": null,
 915 |    "metadata": {},
 916 |    "outputs": [],
 917 |    "source": [
 918 |     "function regress_hac(y::AbstractVector, X::AbstractArray, q::Integer)\n",
 919 |     "    betahat = X\\y\n",
 920 |     "    ehat = y - X*betahat\n",
 921 |     "    vbeta, se_beta = hac(ehat, X, q)\n",
 922 |     "    return betahat, vbeta, se_beta\n",
 923 |     "end"
 924 |    ]
 925 |   },
 926 |   {
 927 |    "cell_type": "markdown",
 928 |    "metadata": {},
 929 |    "source": [
 930 |     "##### `hac`\n",
 931 |     "\n",
 932 |     "###### Arguments\n",
 933 |     "- `u::AbstractVector`: `T`\n",
 934 |     "- `X::AbstractArray`: `Txk`\n",
 935 |     "- `q::Integer`: bandwidth parameter"
 936 |    ]
 937 |   },
 938 |   {
 939 |    "cell_type": "code",
 940 |    "execution_count": null,
 941 |    "metadata": {},
 942 |    "outputs": [],
 943 |    "source": [
 944 |     "function hac(u::AbstractVector, X::AbstractArray, q::Integer)\n",
 945 |     "    kernel = form_kernel(q)\n",
 946 |     "    z = X.*u\n",
 947 |     "    vbeta = form_hscrc(z, X, kernel, q)\n",
 948 |     "    se_beta = sqrt.(diag(vbeta))\n",
 949 |     "    return vbeta, se_beta\n",
 950 |     "end"
 951 |    ]
 952 |   },
 953 |   {
 954 |    "cell_type": "markdown",
 955 |    "metadata": {},
 956 |    "source": [
 957 |     "##### `form_hscrc`\n",
 958 |     "form hetero-serial correlation robust covariance\n",
 959 |     "\n",
 960 |     "###### Arguments\n",
 961 |     "- `X`: `Txk` matrix\n",
 962 |     "- `kernel`: weight of length `T`\n",
 963 |     "\n",
 964 |     "###### Returns\n",
 965 |     "- `vbeta`: `kxk` hetero-serial correlation robust covariance"
 966 |    ]
 967 |   },
 968 |   {
 969 |    "cell_type": "code",
 970 |    "execution_count": null,
 971 |    "metadata": {},
 972 |    "outputs": [],
 973 |    "source": [
 974 |     "function form_hscrc(z::AbstractArray, X, kernel, q::Integer)\n",
 975 |     "    k = size(X, 2)\n",
 976 |     "    v = zeros(k, k)\n",
 977 |     "    for i = -q:0\n",
 978 |     "        r1 = 1\n",
 979 |     "        r2 = size(z, 1) + i\n",
 980 |     "        v = v .+ kernel[-i+1]*z[r1:r2, :]'*z[r1-i:r2-i, :]\n",
 981 |     "    end\n",
 982 |     "    for i = 1:q\n",
 983 |     "        r1 = 1+i\n",
 984 |     "        r2 = size(z, 1)\n",
 985 |     "        v = v .+ kernel[i+1]*z[r1:r2, :]'*z[r1-i:r2-i, :]\n",
 986 |     "    end\n",
 987 |     "    XX = X'*X # k x k\n",
 988 |     "    vbeta = XX\\v/(XX') # k x k\n",
 989 |     "    return vbeta\n",
 990 |     "end"
 991 |    ]
 992 |   },
 993 |   {
 994 |    "cell_type": "code",
 995 |    "execution_count": null,
 996 |    "metadata": {},
 997 |    "outputs": [],
 998 |    "source": [
 999 |     "# form_hscrc(z, X, kernel, kt::White) = \n",
1000 |     "#     form_hscrc(z, X, kernel, 0)\n",
1001 |     "# form_hscrc(z, X, kernel, kt::Union{Triangular, Rectangular}) = \n",
1002 |     "#     form_hscrc(z, X, kernel, kt.nma)"
1003 |    ]
1004 |   },
1005 |   {
1006 |    "cell_type": "markdown",
1007 |    "metadata": {},
1008 |    "source": [
1009 |     "##### `compute_qlr`\n",
1010 |     "Compute Quandt Likelihood Ratio (QLR) Statistic\n",
1011 |     "\n",
1012 |     "###### Arguments\n",
1013 |     "- `X1`: regressors with fixed coefficients under both null and alternative. If all variables are varying under alternative, pass `nothing` as `x1`.\n",
1014 |     "- `X2`: regressors with fixed coefficients under null and time varying coefficients under alternative\n",
1015 |     "- `ccut::AbstractFloat`: must be between 0 and 0.5."
1016 |    ]
1017 |   },
1018 |   {
1019 |    "cell_type": "code",
1020 |    "execution_count": null,
1021 |    "metadata": {},
1022 |    "outputs": [],
1023 |    "source": [
1024 |     "function compute_qlr(y::AbstractVector, X1::Union{Nothing, AbstractArray},\n",
1025 |     "                     X2::AbstractArray,\n",
1026 |     "                     ccut::AbstractFloat, q::Integer)\n",
1027 |     "    T = length(y)\n",
1028 |     "    n1t = floor(Int, ccut*T)\n",
1029 |     "    n2t = T - n1t\n",
1030 |     "    k = size(X2, 2) #number of coefficients that can vary\n",
1031 |     "    if X1 == nothing\n",
1032 |     "        l = 0\n",
1033 |     "        X = X2\n",
1034 |     "    else\n",
1035 |     "        l = length(X1)\n",
1036 |     "        X = [X1,X2] # T x (l+k) where l=length(X1)\n",
1037 |     "    end\n",
1038 |     "    lr = Vector{Float64}(undef, n2t-n1t+1)\n",
1039 |     "    lrr = Vector{Float64}(undef, n2t-n1t+1)\n",
1040 |     "    for (i, T_break) in enumerate(n1t:n2t)\n",
1041 |     "        lr[i] = compute_chow(y, X, 0, T_break)\n",
1042 |     "        lrr[i] = compute_chow(y, X, q, T_break)\n",
1043 |     "    end\n",
1044 |     "    lm = maximum(lr)\n",
1045 |     "    lmr = maximum(lrr)\n",
1046 |     "    return lm, lmr\n",
1047 |     "end"
1048 |    ]
1049 |   },
1050 |   {
1051 |    "cell_type": "markdown",
1052 |    "metadata": {},
1053 |    "source": [
1054 |     "##### Constraint"
1055 |    ]
1056 |   },
1057 |   {
1058 |    "cell_type": "code",
1059 |    "execution_count": null,
1060 |    "metadata": {},
1061 |    "outputs": [],
1062 |    "source": [
1063 |     "struct LambdaConstraint\n",
1064 |     "    indices::Vector{Int}\n",
1065 |     "    R::Matrix{Float64}\n",
1066 |     "    r::Vector{Float64}\n",
1067 |     "    r_std::Vector{Float64}\n",
1068 |     "end"
1069 |    ]
1070 |   },
1071 |   {
1072 |    "cell_type": "markdown",
1073 |    "metadata": {},
1074 |    "source": [
1075 |     "##### `construct_constraint`\n",
1076 |     "construct constraint on lambda\n",
1077 |     "###### Arguments\n",
1078 |     "- `varnames`: \n",
1079 |     "- `used_varnames`\n",
1080 |     "- `R`\n",
1081 |     "- `r`"
1082 |    ]
1083 |   },
1084 |   {
1085 |    "cell_type": "code",
1086 |    "execution_count": null,
1087 |    "metadata": {},
1088 |    "outputs": [],
1089 |    "source": [
1090 |     "function construct_constraint(varnames::AbstractVector{String},\n",
1091 |     "                              used_varnames, R, r)\n",
1092 |     "    n_constr = length(varnames)\n",
1093 |     "    n_R = size(R, 1)\n",
1094 |     "    indices = Vector{Int}(undef, length(varnames)*n_R)\n",
1095 |     "    for (i, varname) in enumerate(varnames)\n",
1096 |     "        index = findall(used_varnames .== varname)\n",
1097 |     "        [indices[n_R*(i-1)+j] = index[1] for j = 1:n_R]\n",
1098 |     "    end\n",
1099 |     "    R_stack = repeat(R, n_constr)\n",
1100 |     "    r_stack = repeat(r, n_constr)\n",
1101 |     "    return LambdaConstraint(indices, R_stack, r_stack, similar(r_stack))\n",
1102 |     "end"
1103 |    ]
1104 |   },
1105 |   {
1106 |    "cell_type": "markdown",
1107 |    "metadata": {},
1108 |    "source": [
1109 |     "##### `impose_constraint!`\n",
1110 |     "###### Arguments\n",
1111 |     "- `b`: unconstrained coefficients\n",
1112 |     "- `R`: see above\n",
1113 |     "- `r`: see above\n",
1114 |     "- `lam_col`:\n",
1115 |     "- `i::Integer`: index of series\n",
1116 |     "- `X` regressors"
1117 |    ]
1118 |   },
1119 |   {
1120 |    "cell_type": "code",
1121 |    "execution_count": null,
1122 |    "metadata": {},
1123 |    "outputs": [],
1124 |    "source": [
1125 |     "function impose_constraint!(b, i::Integer, X, \n",
1126 |     "                            lam_constr::LambdaConstraint,\n",
1127 |     "                            forwhat::Symbol)\n",
1128 |     "    lam_col= lam_constr.indices\n",
1129 |     "    used_series = vec(lam_col.==i)\n",
1130 |     "    R_tmp, r = get_Rr(lam_constr, used_series, Val(forwhat))\n",
1131 |     "    r_tmp = r[used_series]\n",
1132 |     "    tmp = (X'*X)\\R_tmp'\n",
1133 |     "    b .= b .- tmp/(R_tmp*tmp)*(R_tmp*b-r_tmp)\n",
1134 |     "    return nothing\n",
1135 |     "end\n",
1136 |     "impose_constraint!(b, i::Integer, X, lam_constr::Nothing, forwhat::Symbol) = nothing\n",
1137 |     "\n",
1138 |     "get_Rr(lam_constr, used_series, ::Val{:factor}) = \n",
1139 |     "    (hcat(lam_constr.R[used_series, :]),  lam_constr.r_std)\n",
1140 |     "get_Rr(lam_constr, used_series, ::Val{:loading}) = \n",
1141 |     "    (hcat(lam_constr.R[used_series, :], zeros(count(used_series))), lam_constr.r)"
1142 |    ]
1143 |   },
1144 |   {
1145 |    "cell_type": "markdown",
1146 |    "metadata": {},
1147 |    "source": [
1148 |     "##### `impose_constraint!`\n",
1149 |     "this version updates residual term as well\n",
1150 |     "###### Arguments\n",
1151 |     "- `b`: unconstrained coefficients\n",
1152 |     "- `e`: pre-allocated residual term vector\n",
1153 |     "- `y`: dependent variable\n",
1154 |     "- `R`: see above\n",
1155 |     "- `r`: see above\n",
1156 |     "- `lam_col`:\n",
1157 |     "- `i::Integer`: index of series\n",
1158 |     "- `X` regressors"
1159 |    ]
1160 |   },
1161 |   {
1162 |    "cell_type": "code",
1163 |    "execution_count": null,
1164 |    "metadata": {},
1165 |    "outputs": [],
1166 |    "source": [
1167 |     "function impose_constraint!(b, e, y, i::Integer, X, \n",
1168 |     "                            lam_constr::LambdaConstraint, forwhat::Symbol)\n",
1169 |     "    impose_constraint!(b, i, X, lam_constr, forwhat)\n",
1170 |     "    e .= y - X*b\n",
1171 |     "    return nothing\n",
1172 |     "end\n",
1173 |     "impose_constraint!(b, e, y, i::Integer, X, lam_constr::Nothing, forwhat::Symbol) = nothing"
1174 |    ]
1175 |   },
1176 |   {
1177 |    "cell_type": "code",
1178 |    "execution_count": null,
1179 |    "metadata": {},
1180 |    "outputs": [],
1181 |    "source": [
1182 |     "function standardize_constraint!(lam_constr::LambdaConstraint, xdatastd)\n",
1183 |     "    lam_constr.r_std .= lam_constr.r ./ xdatastd[lam_constr.indices]\n",
1184 |     "    return nothing\n",
1185 |     "end\n",
1186 |     "standardize_constraint!(lam_constr::Nothing, xdatastd) = nothing"
1187 |    ]
1188 |   }
1189 |  ],
1190 |  "metadata": {
1191 |   "hide_input": false,
1192 |   "kernelspec": {
1193 |    "display_name": "Julia 1.0.2",
1194 |    "language": "julia",
1195 |    "name": "julia-1.0"
1196 |   },
1197 |   "language_info": {
1198 |    "file_extension": ".jl",
1199 |    "mimetype": "application/julia",
1200 |    "name": "julia",
1201 |    "version": "1.0.2"
1202 |   },
1203 |   "toc": {
1204 |    "colors": {
1205 |     "hover_highlight": "#DAA520",
1206 |     "running_highlight": "#FF0000",
1207 |     "selected_highlight": "#FFD700"
1208 |    },
1209 |    "moveMenuLeft": true,
1210 |    "nav_menu": {
1211 |     "height": "334.617px",
1212 |     "width": "252.233px"
1213 |    },
1214 |    "navigate_menu": true,
1215 |    "number_sections": true,
1216 |    "sideBar": true,
1217 |    "threshold": 4,
1218 |    "toc_cell": false,
1219 |    "toc_section_display": "block",
1220 |    "toc_window_display": false
1221 |   }
1222 |  },
1223 |  "nbformat": 4,
1224 |  "nbformat_minor": 2
1225 | }
1226 | 


--------------------------------------------------------------------------------
/readin_functions.jl:
--------------------------------------------------------------------------------
  1 | using ExcelReaders
  2 | using Statistics
  3 | import Dates
  4 | using StatsBase
  5 | using LinearAlgebra
  6 | 
  7 | abstract type DataFrequency end
  8 | struct MonthlyData <: DataFrequency
  9 |     nobs::Int # number of observations
 10 |     ns::Int # number of series
 11 | end
 12 | 
 13 | struct QuarterlyData <: DataFrequency
 14 |     nobs::Int # number of observations
 15 |     ns::Int # number of series
 16 | end
 17 | 
 18 | abstract type DetrendMethod end
 19 | struct BiWeight{TR <: Real} <: DetrendMethod
 20 |     weight::TR
 21 | end
 22 | struct Mean <: DetrendMethod end
 23 | struct NoDetrend <: DetrendMethod end
 24 | 
 25 | get_sample_periods(initvec::Array, lastvec::Array, ::MonthlyData) =
 26 |     12*(lastvec[1]-initvec[1]-1)+lastvec[2]+(12-initvec[2]+1)
 27 | get_sample_periods(initvec::Array, lastvec::Array, ::QuarterlyData) =
 28 |     4*(lastvec[1]-initvec[1]-1)+lastvec[2]+(4-initvec[2]+1)
 29 | function MonthlyData(initvec, lastvec, ns)
 30 |     nobs = 12*(lastvec[1]-initvec[1]-1)+lastvec[2]+(12-initvec[2]+1)
 31 |     return MonthlyData(nobs, ns)
 32 | end
 33 | function QuarterlyData(initvec, lastvec, ns)
 34 |     nobs = 4*(lastvec[1]-initvec[1]-1)+lastvec[2]+(4-initvec[2]+1)
 35 |     return QuarterlyData(nobs, ns)
 36 | end
 37 | """
 38 | deflate series by PCE
 39 | """
 40 | function deflate_series!(labvec_long::AbstractVector, labvec_short::AbstractVector,
 41 |                         x::AbstractVector, price_def::AbstractVector,
 42 |                         price_def_lfe::AbstractVector, price_def_pgdp,
 43 |                         is::Integer, defcode::Val{1})
 44 |     x .= x./price_def
 45 |     labvec_long[is] = labvec_long[is] * " Defl by PCE Def"
 46 |     labvec_short[is] = "Real_" * labvec_short[is]
 47 |     return x
 48 | end
 49 | """
 50 | deflate series by PCE(LFE)
 51 | """
 52 | function deflate_series!(labvec_long::AbstractVector, labvec_short::AbstractVector,
 53 |                          x::AbstractVector, price_def::AbstractVector,
 54 |                          price_def_lfe::AbstractVector, price_def_pgdp,
 55 |                          is::Integer, defcode::Val{2})
 56 |     x .= x./price_def_lfe
 57 |     labvec_long[is] = labvec_long[is] * " Defl by PCE(LFE) Def"
 58 |     labvec_short[is] = "Real_" * labvec_short[is]
 59 |     return x
 60 | end
 61 | """
 62 | deflate series by GDP
 63 | """
 64 | function deflate_series!(labvec_long::AbstractVector, labvec_short::AbstractVector,
 65 |                          x::AbstractVector, price_def::AbstractVector,
 66 |                          price_def_lfe::AbstractVector, price_def_pgdp,
 67 |                          is::Integer, defcode::Val{3})
 68 |     x .= x./price_def_pgdp
 69 |     labvec_long[is] = labvec_long[is] * " Defl by GDP Def"
 70 |     labvec_short[is] = "Real_" * labvec_short[is]
 71 |     return x
 72 | end
 73 | deflate_series!(labvec_long::AbstractVector, labvec_short::AbstractVector,
 74 |                 x::AbstractVector, price_def::AbstractVector,
 75 |                 price_def_lfe::AbstractVector, price_def_pgdp,
 76 |                 is::Integer, defcode::Val{0}) = x
 77 | """
 78 | transform monthly data to quarterly data by averaging
 79 | ##### Arguments
 80 | - `data_m::AbstractArray`: monthly data. `size(y, 1) == length(date)`
 81 | - `date_m::AbstractVector{Date}`: Vector of Date.
 82 | """
 83 | function monthly_to_quarterly(data_m::AbstractArray, date_m::AbstractVector{Dates.Date}, ::MonthlyData)
 84 |     if size(data_m, 1) != length(date_m)
 85 |         @error "number of rows of data_m must be same as the length of date"
 86 |     end
 87 |     years = Dates.year.(date_m)
 88 |     date_m_quarter = transform_date.(date_m)
 89 |     T_q = length(unique(date_m_quarter))
 90 |     data_q = Array{Union{Missing, Float64}}(undef, T_q, size(data_m, 2))
 91 |     for t = 1:T_q
 92 |         id = (date_m_quarter .== [unique(date_m_quarter)[t]])
 93 |         data_q[t, :] = mean(data_m[id, :], dims=1)
 94 |     end
 95 |     return data_q, unique(date_m_quarter)
 96 | end
 97 | function monthly_to_quarterly(data_q::AbstractArray, date_q::AbstractVector{Dates.Date}, ::QuarterlyData)
 98 |     date_q_quarter = transform_date.(date_q)
 99 |     return data_q, date_q_quarter
100 | end
101 | transform_date(date::Dates.Date) = (Dates.year(date), get_quarter(date))
102 | get_quarter(date::Dates.Date) = cld(Dates.month(date), 3)
103 | 
104 | # Level
105 | transform(x::AbstractVector, ::Val{1}) = x
106 | # First Difference
107 | transform(x::AbstractVector, ::Val{2}) = vcat(missing, x[2:end].-x[1:end-1])
108 | # Second Difference
109 | transform(x::AbstractVector, ::Val{3}) = vcat(missing, missing, x[3:end]-2*x[2:end-1].+x[1:end-2])
110 | # Log-level
111 | transform(x::AbstractVector, ::Val{4}) = log.(x)
112 | # Log-First-Difference
113 | transform(x::AbstractVector, ::Val{5}) = transform(log.(x), Val(2))
114 | # Log-Second-Difference
115 | transform(x::AbstractVector, ::Val{6}) = transform(log.(x), Val(3))
116 | 
117 | function transform!(data::AbstractMatrix, tcode::AbstractVector)
118 |     if size(data, 2) != length(tcode)
119 |         @error "size(data, 2) must equal to length(tcode)"
120 |     end
121 |     for i = 1:size(data, 2)
122 |         data[:, i] = transform(data[:, i], Val(tcode[i]))
123 |     end
124 |     return nothing
125 | end
126 | # Threshold multiple for IQR is 4.5
127 | adjust_outlier!(x::AbstractVector, ::Val{1}, io_method::Integer) =
128 |     adjust_outlier!(x, 4.5, io_method)
129 | # Threshold multiple for IQR is 3
130 | adjust_outlier!(x::AbstractVector, ::Val{2}, io_method::Integer) =
131 |     adjust_outlier!(x, 3, io_method)
132 | adjust_outlier!(x::AbstractVector, ::Val{0}, io_method::Integer) = nothing
133 | 
134 | function adjust_outlier!(x::AbstractVector, thr::Real, tflag::Integer)
135 |     # compute median and IQR
136 |     zm = median(skipmissing(x))
137 |     iqr = quantile(skipmissing(x), 0.75) - quantile(skipmissing(x), 0.25)
138 |     z = x[.!ismissing.(x)]
139 |     # sort!(z)
140 |     # tmp = z[ceil.(Int, length(z)*[1/4, 1/2, 3/4])]
141 |     # zm = tmp[2]
142 |     # iqr = tmp[3]-tmp[1]
143 |     (iqr >= 1e-6) || @error "error in adjusting outlier"
144 | 
145 |     ya = abs.(x .- zm)
146 |     i_outlier = ya .> (thr*iqr)
147 |     adjust_x!(x, i_outlier, zm, thr, iqr, Val(tflag))
148 |     return nothing
149 | end
150 | """
151 | replace with missing value
152 | """
153 | function adjust_x!(x::AbstractVector, i_outlier, zm, thr, iqr, ::Val{0})
154 |      x[i_outlier] = missing
155 |      return nothing
156 | end
157 | """
158 | replace with maximum or minimum value
159 | """
160 | function adjust_x!(x, i_outlier, zm, thr, iqr, ::Val{1})
161 |     isign = Int.(x[i_outlier] .> 0) - Int.(x[i_outlier] .< 0)
162 |     yt = zm .+ isign.*(thr*iqr)
163 |     x[i_outlier] = yt
164 |     return nothing
165 | end
166 | """
167 | replace with median value
168 | """
169 | function adjust_x!(x, i_outlier, zm, thr, iqr, ::Val{2})
170 |     x[i_outlier] = zm
171 |     return nothing
172 | end
173 | """
174 | replace with local median (obs + or - 3 on each side)
175 | """
176 | function adjust_x!(x, i_outlier, zm, thr, iqr, ::Val{3})
177 |     # Compute rolling median
178 |     iwin = 3  # Window on either side
179 |     for i in findall(i_outlier)
180 |         j1 = max(1, i-iwin)
181 |         j2 = min(length(x), i+iwin)
182 |         x[i] = median(skipmissing(x[j1:j2]))
183 |     end
184 |     return nothing
185 | end
186 | """
187 | replace with one-sided median (5 preceding obs)
188 | """
189 | function adjust_x!(x, i_outlier, zm, thr, iqr, ::Val{4})
190 |     # Compute rolling median
191 |     iwin=5  # Window on one side
192 |     for i in findall(i_outlier .& .!ismissing.(i_outlier))
193 |         j1 = max(1, i-iwin)
194 |         j2 = i
195 |         x[i] = median(skipmissing(x[j1:j2]))
196 |     end
197 |     return nothing
198 | end
199 | 
200 | readin_data(frequency::MonthlyData, datatype) =
201 |     readin_monthly_data(true, 4, 2, 6, [1, 2, 3, 5], frequency, datatype)
202 | readin_data(frequency::QuarterlyData, datatype) =
203 |     readin_monthly_data(true, 4, 2, 5, [1, 2, 3, 5], frequency, datatype)
204 | readdata(::MonthlyData) = readxlsheet("data/hom_fac_1.xlsx", "Monthly")
205 | readdata(::QuarterlyData) = readxlsheet("data/hom_fac_1.xlsx", "Quarterly")
206 | function readin_monthly_data(correct_outlier::Bool,
207 |                              io_method::Integer, # Replacement of outliers,
208 |                              ndesc::Integer,     # number of "description" rows in Excel file
209 |                              ncodes::Integer,    # number of rows of "codes" in Excel file
210 |                              cat_include::AbstractVector,
211 |                              frequency::DataFrequency,
212 |                              datatype::Symbol
213 |                              )
214 |     dnobs = frequency.nobs
215 |     ns_m = frequency.ns
216 |     ## read monthly data
217 |     wholedata = readdata(frequency)
218 | 
219 |     maindata = wholedata[1:1+ndesc+ncodes+dnobs, 2:ns_m+1]
220 |     date = Dates.Date.(wholedata[1+ndesc+ncodes+1:1+ndesc+ncodes+dnobs, 1])
221 | 
222 |     namevec, labvec_long, labvec_short, aggcode, tcode, defcode,
223 |            outliercode, includecode, catcode = get_headers(maindata, frequency)
224 |     datamat_m = maindata[1+ndesc+ncodes+1:end, :]
225 |     datamat_m[.!isa.(datamat_m, Float64)] .= missing # set missing values
226 |     datamat_m = convert(Array{Union{Missing, Float64}, 2}, datamat_m)
227 | 
228 |     # Price Deflators
229 |     price_def, price_def_lfe, price_def_pgdp =
230 |         get_deflators(namevec, datamat_m, frequency)
231 | 
232 |     # Standardize Killian Activity Index
233 |     standardize_killian!(datamat_m, namevec, frequency)
234 | 
235 |     usedcols_id = (includecode .!=0) .& [in(cat, cat_include) for cat in floor.(catcode)]
236 |     usedcols_id = get_usedcols_id(includecode, catcode, cat_include, Val(datatype))
237 |     data_m = datamat_m[:, usedcols_id]
238 |     bpdefcode = defcode[usedcols_id]
239 |     bpoutlier = outliercode[usedcols_id]
240 |     bplab_long = labvec_long[usedcols_id]
241 |     bplab_short = labvec_short[usedcols_id]
242 |     ns = size(data_m, 2)
243 | 
244 |     [deflate_series!(bplab_long, bplab_short, view(data_m, :, i),
245 |                      price_def, price_def_lfe, price_def_pgdp, i, Val(bpdefcode[i])) for i =1:ns]
246 |     data_q, date_q = monthly_to_quarterly(data_m, date, frequency)  # Temporally aggregated to quarterly
247 |     bpdata_raw = copy(data_q)
248 |     transform!(data_q, tcode[usedcols_id]) # Transform .. log, first difference, etc.
249 |     bpdata_noa = copy(data_q)
250 |     !correct_outlier || [adjust_outlier!(view(data_q, :, i), Val(bpoutlier[i]), io_method) for i = 1:ns]
251 |     return data_q, bpdata_raw, bpdata_noa, date_q, catcode[usedcols_id],
252 |            includecode[usedcols_id], namevec[usedcols_id]
253 | end
254 | get_usedcols_id(includecode, catcode, cat_include, ::Val{:Real}) =
255 |      (includecode .!=0) .& [in(cat, cat_include) for cat in floor.(catcode)]
256 | get_usedcols_id(includecode, catcode, cat_include, ::Val{:All}) = includecode .!=0
257 | 
258 | function get_headers(maindata, ::MonthlyData)
259 |     namevec = uppercase.(convert(Vector{String}, maindata[1, :]))
260 |     labvec_long = convert(Vector{String}, maindata[2, :])   # Vector of "long" labels
261 |     labvec_short = convert(Vector{String}, maindata[3, :])  # Vector of "short" labels
262 |     aggcode = Int.(maindata[4, :]) # Temporal aggregation code
263 |     tcode = Int.(maindata[5, :])   # transformation code
264 |     defcode = Int.(maindata[6, :]) # code for price deflation (nominal to real)
265 |     outliercode = Int.(maindata[7, :]) # code for outlier adjustment
266 |     includecode = Int.(maindata[8, :]) # code for use in factor estimation
267 |     catcode = maindata[9, :]     # category code for ordering variables
268 |     return namevec, labvec_long, labvec_short, aggcode, tcode, defcode,
269 |            outliercode, includecode, catcode
270 | end
271 | 
272 | function get_headers(maindata, ::QuarterlyData)
273 |     namevec = uppercase.(convert(Vector{String}, maindata[1, :]))
274 |     labvec_long = convert(Vector{String}, maindata[2, :])   # Vector of "long" labels
275 |     labvec_short = convert(Vector{String}, maindata[3, :])  # Vector of "short" labels
276 |     tcode = Int.(maindata[4, :])   # transformation code
277 |     defcode = Int.(maindata[5, :]) # code for price deflation (nominal to real)
278 |     outliercode = Int.(maindata[6, :]) # code for outlier adjustment
279 |     includecode = Int.(maindata[7, :]) # code for use in factor estimation
280 |     catcode = maindata[8, :]     # category code for ordering variables
281 |     return namevec, labvec_long, labvec_short, nothing, tcode, defcode,
282 |            outliercode, includecode, catcode
283 | end
284 | 
285 | function get_deflators(namevec, datamat, ::MonthlyData)
286 |     j = findall(namevec.=="PCEPI")[1] # PCE Price Deflator
287 |     price_def = datamat[:, j]
288 |     j = findall(namevec.=="PCEPILFE")[1] # PCE-xFE Price Deflator
289 |     price_def_lfe = datamat[:, j]
290 |     return price_def, price_def_lfe, NaN
291 | end
292 | 
293 | function get_deflators(namevec, datamat, ::QuarterlyData)
294 |     j = findall(namevec.=="PCECTPI")[1] # PCE Price Deflator
295 |     price_def = datamat[:, j]
296 |     j = findall(namevec.=="JCXFE")[1] # PCE Excl. food and energy
297 |     price_def_lfe = datamat[:, j]
298 |     j = findall(namevec.=="GDPCTPI")[1] # GDP Deflator
299 |     price_def_pgdp = datamat[:, j]
300 |     return price_def, price_def_lfe, price_def_pgdp
301 | end
302 | 
303 | """
304 | Standardize Killian Activity Index
305 | """
306 | function standardize_killian!(datamat_m, namevec, ::MonthlyData)
307 |     j = findall(namevec.=="GLOBAL_ACT")[1] # Killian Index
308 |     tmp = datamat_m[:, j]
309 |     tmp1 = tmp[.!ismissing.(tmp)]
310 |     tmp2 = (tmp1.-mean(tmp1))./std(tmp1)
311 |     datamat_m[.!ismissing.(tmp), j] = tmp2
312 |     return nothing
313 | end
314 | 
315 | standardize_killian!(datamat_m, namevec, ::QuarterlyData) = nothing
316 | 
317 | function detrend_var!(data::AbstractArray, bw::BiWeight)
318 |     trend = Array{Union{Missing, Float64}}(undef, size(data))
319 |     for i = 1:size(data, 2)
320 |         trend[:, i] = bi_weight_filter(data[:, i], bw.weight)
321 |         data[:, i] = data[:, i] - trend[:, i]
322 |     end
323 |     return trend
324 | end
325 | 
326 | 
327 | function detrend_var!(data::AbstractArray, ::Mean)
328 |     trend = repmat(mean(skipmissing(data), dims=1), size(data, 1), 1)
329 |     data .= data .- trend
330 |     return trend
331 | end
332 | 
333 | detrend_var!(data::AbstractArray, ::NoDetrend) = fill(missing, size(data))
334 | 
335 | function bi_weight_filter(y::AbstractVector, weight::Real)
336 |     T = length(y)
337 |     ytrend = Array{Union{Missing, Float64}}(undef, T)
338 |     trend = 1:T
339 |     for t = findall(.!ismissing.(y))
340 |         dt = (trend .- t)/weight
341 |         bw_weight = 15/16*((1 .-dt.^2).^2) # bi-weight
342 |         bw_weight[abs.(dt).>=1] .= 0
343 |         bw_weight_ignore_missing = bw_weight[.!ismissing.(y)]
344 |         bw_weight_ignore_missing = bw_weight_ignore_missing ./ sum(bw_weight_ignore_missing)
345 |         ytrend[t] = dot(bw_weight_ignore_missing, y[.!ismissing.(y)])
346 |     end
347 |     return ytrend
348 | end
349 | 
350 | """
351 | ##### Arguments
352 | - `detrend_method::DetrendMethod`: method for detrending.
353 | - `datatype::Symbol`: type of read data, `:Real` or `:All`
354 | """
355 | function readin_data(md::MonthlyData, qd::QuarterlyData,
356 |                      detrend_method::DetrendMethod, datatype::Symbol)
357 |     data_m, bpdata_raw_m, bpdata_noa_m, date_m, bpcatcode_m, inclcode_m, namevec_m =
358 |         readin_data(md, datatype)
359 | 
360 |     data_q, bpdata_raw_q, bpdata_noa_q, date_q, bpcatcode_q, inclcode_q, namevec_q =
361 |         readin_data(qd, datatype)
362 | 
363 |     all(date_m .== date_q) ||
364 |         @error "inconsistent sample size for monthly and quarterly data"
365 | 
366 |     id = sortperm(vcat(bpcatcode_m, bpcatcode_q))
367 |     bpdata = hcat(data_m, data_q)[:, id]
368 |     bpdata_unfiltered = copy(bpdata)
369 |     bpdata_trend = detrend_var!(bpdata, detrend_method)
370 | 
371 |     dataset = (bpdata_raw = hcat(bpdata_raw_m, bpdata_raw_q)[:, id],
372 |                bpcatcode =vcat(bpcatcode_m, bpcatcode_q)[id],
373 |                bpdata = bpdata,
374 |                bpdata_unfiltered =bpdata_unfiltered,
375 |                bpdata_noa = hcat(bpdata_noa_m, bpdata_noa_q)[:, id],
376 |                bpdata_trend = bpdata_trend,
377 |                inclcode = vcat(inclcode_m, inclcode_q)[id],
378 |                bpnamevec = vcat(namevec_m, namevec_q)[id],
379 |                calvec = calender_value.(date_q),
380 |                calds = date_q)
381 |     return dataset
382 | end
383 | 
384 | calender_value(year::Integer, quarter::Integer) = year+(quarter-1)/4
385 | calender_value(calvec::Tuple{Int, Int}) = calender_value(calvec[1], calvec[2])
386 | 


--------------------------------------------------------------------------------