├── .gitignore ├── Example - MAR Hamilton.ipynb ├── README.md ├── __init__.py ├── example.py ├── mar_c ├── __init__.py ├── hamilton_filter.pyx └── setup.py ├── mar_model.py └── tests ├── __init__.py ├── results ├── __init__.py └── results_mar.py └── test_mar.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | *.pyc 3 | 4 | # Cython 5 | *.c 6 | *.so 7 | 8 | # IPython Notebooks 9 | .ipynb_checkpoints 10 | 11 | # Setup.py 12 | build 13 | dist 14 | 15 | # Documentation 16 | docs/source/generated 17 | 18 | # Egg metadata 19 | ./*.egg-info 20 | 21 | # Mac OS X 22 | .DS_Store 23 | 24 | # LaTeX metadata 25 | *.aux 26 | *.fdb_latexmk 27 | *.fls 28 | *.idx 29 | *.ilg 30 | *.ind 31 | *.log 32 | *.out 33 | *.synctex.gz -------------------------------------------------------------------------------- /Example - MAR Hamilton.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:972bef9696f0dad281db904aac4c6759cda5918b1f43d0795b507dac9c927a9c" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "code", 13 | "collapsed": false, 14 | "input": [ 15 | "%matplotlib inline" 16 | ], 17 | "language": "python", 18 | "metadata": {}, 19 | "outputs": [] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "collapsed": false, 24 | "input": [ 25 | "import numpy as np\n", 26 | "import pandas as pd\n", 27 | "from mar_model import MAR" 28 | ], 29 | "language": "python", 30 | "metadata": {}, 31 | "outputs": [] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "collapsed": false, 36 | "input": [ 37 | "# Model Setup\n", 38 | "order = 4\n", 39 | "nstates = 2\n", 40 | "\n", 41 | "switch_ar = False\n", 42 | "switch_sd = False\n", 43 | "switch_mean = True" 44 | ], 45 | "language": "python", 46 | "metadata": {}, 47 | "outputs": [] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "collapsed": false, 52 | "input": [ 53 | "# Load the dataset from Kim and Nelson's website\n", 54 | "import requests\n", 55 | "f = requests.get('http://econ.korea.ac.kr/~cjkim/MARKOV/data/gdp4795.prn')\n", 56 | "\n", 57 | "# Hamilton's 1989 GNP dataset: Quarterly, 1947.1 - 1995.3\n", 58 | "data = pd.DataFrame(\n", 59 | " [float(line) for line in f.content.split('\\n')[:-3]],\n", 60 | " index=pd.date_range('1947-01-01', '1995-07-01', freq='QS'),\n", 61 | " columns=['gnp']\n", 62 | ")\n", 63 | "data['dlgnp'] = np.log(data['gnp']).diff()*100\n", 64 | "data = data['1952-01-01':'1984-10-01']\n", 65 | "\n", 66 | "# NBER recessions\n", 67 | "from pandas.io.data import DataReader\n", 68 | "from datetime import datetime\n", 69 | "usrec = DataReader('USREC', 'fred', start=datetime(1952, 1, 1), end=datetime(1984, 12, 1))" 70 | ], 71 | "language": "python", 72 | "metadata": {}, 73 | "outputs": [] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "collapsed": false, 78 | "input": [ 79 | "mod = MAR(data.dlgnp, order, nstates)\n", 80 | "\n", 81 | "# Set the initial probability values\n", 82 | "# see the file's docstring for more information\n", 83 | "# In this case, we have the following transition matrix:\n", 84 | "# | p_11 p_21 |\n", 85 | "# | p_12 p_22 |\n", 86 | "# where p_ij is the probability of transitioning from\n", 87 | "# state i to state j\n", 88 | "# Then the initial transition probabilities should be\n", 89 | "# *the first row* of that matrix, i.e. it should be\n", 90 | "# | p_11 p_21 |\n", 91 | "init_trans_prob = np.r_[0.75, 0.1]\n", 92 | "# so I am saying that p_11 = 0.75 and p_22 = 0.9\n", 93 | "# i.e. there is a 75% probability of staying in state 1\n", 94 | "# if you are already there and a 90% probability of staying\n", 95 | "# in state 2 if you are already there.\n", 96 | "\n", 97 | "# Set the full initial parameters array\n", 98 | "initial_params = np.array(np.r_[\n", 99 | " np.log(init_trans_prob / (1 - init_trans_prob)), # transition probabilities (transformed)\n", 100 | " 0, 0, 0, 0, # AR parameters\n", 101 | " -np.log(0.8), # standard deviation (transformed)\n", 102 | " -0.2, 1.0 # Means\n", 103 | "])\n", 104 | "\n", 105 | "# Estimate the parameters via MLE\n", 106 | "res = mod.fit(initial_params, method='bfgs');\n", 107 | "params = res.params\n", 108 | "\n", 109 | "# Transform the unconstrained parameters to interpretable parameters\n", 110 | "final_params = params.copy()\n", 111 | "final_params[0:2] = np.exp(final_params[0:2]) / (1 + np.exp(final_params[0:2])) # transition probabilities\n", 112 | "final_params[6] = np.exp(-final_params[6]) # Standard deviation" 113 | ], 114 | "language": "python", 115 | "metadata": {}, 116 | "outputs": [] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "collapsed": false, 121 | "input": [ 122 | "print('Parameter Estimates')\n", 123 | "print('-------------------')\n", 124 | "print('Transition probabilities:')\n", 125 | "print(mod.transition_matrix(final_params[0:2]))\n", 126 | "print('AR Parameters (non-switching, here)')\n", 127 | "print(final_params[2:6])\n", 128 | "print('Standard Deviation (non-switching, here)')\n", 129 | "print(final_params[6])\n", 130 | "print('Means (switching)')\n", 131 | "print('- in state 1:', final_params[7])\n", 132 | "print('- in state 2:', final_params[8])" 133 | ], 134 | "language": "python", 135 | "metadata": {}, 136 | "outputs": [] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "collapsed": false, 141 | "input": [ 142 | "# Filter the data\n", 143 | "# note that this uses the unconstained parameters\n", 144 | "(\n", 145 | " marginal_densities, filtered_joint_probabilities,\n", 146 | " filtered_joint_probabilities_t1\n", 147 | ") = mod.filter(params);\n", 148 | "\n", 149 | "transitions = mod.separate_params(params)[0]\n", 150 | "\n", 151 | "# Smooth the data\n", 152 | "filtered_marginal_probabilities = mod.marginalize_probabilities(filtered_joint_probabilities[1:])\n", 153 | "smoothed_marginal_probabilities = mod.smooth(filtered_joint_probabilities, filtered_joint_probabilities_t1, transitions)\n", 154 | "\n", 155 | "# Save the data\n", 156 | "data['filtered'] = np.r_[\n", 157 | " [np.NaN]*order,\n", 158 | " filtered_marginal_probabilities[:,0]\n", 159 | "]\n", 160 | "data['smoothed'] = np.r_[\n", 161 | " [np.NaN]*order,\n", 162 | " smoothed_marginal_probabilities[:,0]\n", 163 | "]" 164 | ], 165 | "language": "python", 166 | "metadata": {}, 167 | "outputs": [] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "collapsed": false, 172 | "input": [ 173 | "import matplotlib.pyplot as plt\n", 174 | "from matplotlib import dates\n", 175 | "fig = plt.figure(figsize=(9,9))\n", 176 | "\n", 177 | "ax = fig.add_subplot(211)\n", 178 | "ax.fill_between(usrec.index, 0, usrec.USREC, color='gray', alpha=0.3)\n", 179 | "ax.plot(data.index, data.filtered, 'k')\n", 180 | "ax.set(\n", 181 | " xlim=('1952-04-01', '1984-12-01'),\n", 182 | " ylim=(0,1),\n", 183 | " title='Filtered probability of a recession (GDP: 1952:II - 1984:IV)'\n", 184 | ");\n", 185 | "\n", 186 | "ax = fig.add_subplot(212)\n", 187 | "ax.fill_between(usrec.index, 0, usrec.USREC, color='gray', alpha=0.3)\n", 188 | "ax.plot(data.index, data.smoothed, 'k')\n", 189 | "ax.set(\n", 190 | " xlim=('1952-04-01', '1984-12-01'),\n", 191 | " ylim=(0,1),\n", 192 | " title='Smoothed probability of a recession (GDP: 1952:II - 1984:IV)'\n", 193 | ");" 194 | ], 195 | "language": "python", 196 | "metadata": {}, 197 | "outputs": [] 198 | } 199 | ], 200 | "metadata": {} 201 | } 202 | ] 203 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Markov Switching Models for Statsmodels 2 | ======================================= 3 | 4 | **Note**: this code has been superseded by a rewritten Markov switching package implemented in Statsmodels. It will be available in Statsmodels version 0.8, or it can be used by installing the development version of Statsmodels. See the [pull request](https://github.com/statsmodels/statsmodels/pull/2980). 5 | 6 | Installation 7 | ------------ 8 | 9 | 1. Place in your project folder. 10 | 2. Compile the Cython filter using 11 | 12 | python setup.py build_ext --inplace 13 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChadFulton/pymar/4268cbe1b5aeaeede7b3d3dc6e1439a21accfae1/__init__.py -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example - MAR Hamilton 3 | """ 4 | import numpy as np 5 | import pandas as pd 6 | from mar_model import MAR 7 | 8 | # Model Setup 9 | order = 4 10 | nstates = 2 11 | 12 | switch_ar = False 13 | switch_sd = False 14 | switch_mean = True 15 | 16 | # Load the dataset from Kim and Nelson's website 17 | import requests 18 | f = requests.get('http://econ.korea.ac.kr/~cjkim/MARKOV/data/gdp4795.prn') 19 | 20 | # Hamilton's 1989 GNP dataset: Quarterly, 1947.1 - 1995.3 21 | data = pd.DataFrame( 22 | [float(line) for line in f.text.split('\n')[:-3]], 23 | index=pd.date_range('1947-01-01', '1995-07-01', freq='QS'), 24 | columns=['gnp'] 25 | ) 26 | data['dlgnp'] = np.log(data['gnp']).diff()*100 27 | data = data['1952-01-01':'1984-10-01'] 28 | 29 | # NBER recessions 30 | from pandas.io.data import DataReader 31 | from datetime import datetime 32 | usrec = DataReader('USREC', 'fred', start=datetime(1952, 1, 1), end=datetime(1984, 12, 1)) 33 | 34 | # Setup the model 35 | mod = MAR(data.dlgnp, order, nstates) 36 | 37 | # Set the initial probability values 38 | # see the file's docstring for more information 39 | # In this case, we have the following transition matrix: 40 | # | p_11 p_21 | 41 | # | p_12 p_22 | 42 | # where p_ij is the probability of transitioning from 43 | # state i to state j 44 | # Then the initial transition probabilities should be 45 | # *the first row* of that matrix, i.e. it should be 46 | # | p_11 p_21 | 47 | init_trans_prob = np.r_[0.75, 0.1] 48 | # so I am saying that p_11 = 0.75 and p_22 = 0.9 49 | # i.e. there is a 75% probability of staying in state 1 50 | # if you are already there and a 90% probability of staying 51 | # in state 2 if you are already there. 52 | 53 | # Set the full initial parameters array 54 | initial_params = np.array(np.r_[ 55 | np.log(init_trans_prob / (1 - init_trans_prob)), # transition probabilities 56 | 0, 0, 0, 0, # AR parameters 57 | -np.log(0.8), # Standard Deviation 58 | -0.2, 1.0 # Means 59 | ]) 60 | 61 | # Estimate the parameters via MLE 62 | res = mod.fit(initial_params, method='bfgs'); 63 | params = res.params 64 | 65 | # Transform the unconstrained parameters to interpretable parameters 66 | final_params = params.copy() 67 | final_params[0:2] = np.exp(final_params[0:2]) / (1 + np.exp(final_params[0:2])) # transition probabilities 68 | final_params[6] = np.exp(-final_params[6]) # Standard deviation 69 | 70 | print('Parameter Estimates') 71 | print('-------------------') 72 | print('Transition probabilities:') 73 | print(mod.transition_matrix(final_params[0:2])) 74 | print('AR Parameters (non-switching, here)') 75 | print(final_params[2:6]) 76 | print('Standard Deviation (non-switching, here)') 77 | print(final_params[6]) 78 | print('Means (switching)') 79 | print('- in state 1:', final_params[7]) 80 | print('- in state 2:', final_params[8]) 81 | 82 | # Filter the data 83 | ( 84 | marginal_densities, filtered_joint_probabilities, 85 | filtered_joint_probabilities_t1 86 | ) = mod.filter(params); 87 | 88 | transitions = mod.separate_params(params)[0] 89 | 90 | # Smooth the data 91 | filtered_marginal_probabilities = mod.marginalize_probabilities(filtered_joint_probabilities[1:]) 92 | smoothed_marginal_probabilities = mod.smooth(filtered_joint_probabilities, filtered_joint_probabilities_t1, transitions) 93 | 94 | # Save the data 95 | data['filtered'] = np.r_[ 96 | [np.NaN]*order, 97 | filtered_marginal_probabilities[:,0] 98 | ] 99 | data['smoothed'] = np.r_[ 100 | [np.NaN]*order, 101 | smoothed_marginal_probabilities[:,0] 102 | ] 103 | 104 | # Plots 105 | import matplotlib.pyplot as plt 106 | from matplotlib import dates 107 | fig = plt.figure(figsize=(9,9)) 108 | 109 | ax = fig.add_subplot(211) 110 | ax.fill_between(usrec.index, 0, usrec.USREC, color='gray', alpha=0.3) 111 | ax.plot(data.index, data.filtered, 'k') 112 | ax.set( 113 | xlim=('1952-04-01', '1984-12-01'), 114 | ylim=(0,1), 115 | title='Filtered probability of a recession (GDP: 1952:II - 1984:IV)' 116 | ); 117 | 118 | ax = fig.add_subplot(212) 119 | ax.fill_between(usrec.index, 0, usrec.USREC, color='gray', alpha=0.3) 120 | ax.plot(data.index, data.smoothed, 'k') 121 | ax.set( 122 | xlim=('1952-04-01', '1984-12-01'), 123 | ylim=(0,1), 124 | title='Smoothed probability of a recession (GDP: 1952:II - 1984:IV)' 125 | ); 126 | 127 | fig.savefig('mar_hamilton.png') -------------------------------------------------------------------------------- /mar_c/__init__.py: -------------------------------------------------------------------------------- 1 | from .hamilton_filter import hamilton_filter, tvtp_transition_vectors_left, tvtp_transition_vectors_right, marginal_conditional_densities 2 | -------------------------------------------------------------------------------- /mar_c/hamilton_filter.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport numpy as cnp 3 | cimport cython 4 | DTYPE = np.float64 5 | ctypedef cnp.float64_t dtype_t 6 | 7 | cdef extern from "math.h": 8 | double log(double x) 9 | cdef extern from "math.h": 10 | double exp(double x) 11 | cdef extern from "math.h": 12 | double sqrt(double x) 13 | cdef extern from "math.h": 14 | double PI 15 | 16 | @cython.boundscheck(False) 17 | @cython.wraparound(False) 18 | def hamilton_filter(int nobs, 19 | int nstates, 20 | int order, 21 | cnp.ndarray[dtype_t, ndim = 2] transition_vectors not None, 22 | cnp.ndarray[dtype_t, ndim = 2, mode='c'] joint_probabilities not None, 23 | cnp.ndarray[dtype_t, ndim = 2, mode='c'] marginal_conditional_densities not None): 24 | 25 | if order == 0: 26 | return hamilton_filter_uncorrelated(nobs, nstates, transition_vectors, joint_probabilities, marginal_conditional_densities) 27 | 28 | cdef cnp.ndarray[dtype_t, ndim = 2] joint_probabilities_t1 29 | cdef cnp.ndarray[dtype_t, ndim = 1] joint_densities, marginal_densities 30 | #cdef cnp.ndarray[dtype_t, ndim = 1] _joint_probabilities_t1 #, joint_probabilities_t 31 | #cdef cnp.ndarray[dtype_t, ndim = 1] _joint_probabilities, _marginal_conditional_densities 32 | cdef int nstatesk_1, nstatesk, nstatesk1, t, i, j, k, idx 33 | cdef dtype_t transition 34 | 35 | nstatesk_1 = nstates**(order-1) 36 | nstatesk = nstates**order 37 | nstatesk1 = nstates**(order+1) 38 | 39 | joint_probabilities_t1 = np.zeros((nobs, nstatesk1)) 40 | joint_densities = np.zeros((nstatesk1,)) 41 | marginal_densities = np.zeros((nobs,)) 42 | #_joint_probabilities_t1 = np.zeros((nstatesk1,)) 43 | #joint_probabilities_t = np.zeros((nstatesk1,)) 44 | #_joint_probabilities = np.zeros((nstatesk,)) 45 | #_marginal_conditional_densities = np.zeros((nstatesk1,)) 46 | 47 | for t in range(1, nobs+1): 48 | #_joint_probabilities = joint_probabilities[t-1] 49 | #_marginal_conditional_densities = marginal_conditional_densities[t-1] 50 | idx = 0 51 | for i in range(nstates): 52 | for j in range(nstates): 53 | transition = transition_vectors[t-1, i*nstates + j] 54 | for k in range(nstatesk_1): 55 | joint_probabilities_t1[t-1, idx] = transition * joint_probabilities[t-1, j*nstatesk_1 + k] 56 | joint_densities[idx] = joint_probabilities_t1[t-1, idx] * marginal_conditional_densities[t-1, idx] 57 | marginal_densities[t-1] += joint_densities[idx] 58 | idx += 1 59 | #joint_probabilities_t1[t-1] = _joint_probabilities_t1 60 | 61 | #joint_probabilities_t1 = ( 62 | # np.repeat(transition_vectors[t], nstates**(order-1)) * 63 | # np.tile(joint_probabilities[t-1], nstates) 64 | #) 65 | 66 | #joint_densities = np.multiply( 67 | # marginal_conditional_densities[t-1], joint_probabilities_t1 68 | #) 69 | 70 | #for i in range(nstatesk1): 71 | # marginal_densities[t-1] += joint_densities[i] 72 | #marginal_densities[t-1] = np.sum(joint_densities) 73 | 74 | #for i in range(nstatesk1): 75 | # joint_probabilities_t[i] = joint_densities[i] / marginal_densities[t-1] 76 | 77 | #joint_probabilities_t = joint_densities / marginal_densities[t-1] 78 | 79 | for i in range(nstatesk): 80 | #_joint_probabilities[i] = 0 81 | idx = i*nstates 82 | for j in range(nstates): 83 | #joint_probabilities_t[idx+j] = joint_densities[idx+j] / marginal_densities[t-1] 84 | joint_probabilities[t, i] += joint_densities[idx+j] / marginal_densities[t-1] 85 | #joint_probabilities[t] = _joint_probabilities 86 | #joint_probabilities[t] = joint_probabilities_t.reshape( 87 | # (nstates**order, nstates) 88 | #).sum(1) 89 | return marginal_densities, joint_probabilities, joint_probabilities_t1 90 | 91 | @cython.boundscheck(False) 92 | @cython.wraparound(False) 93 | def hamilton_filter_uncorrelated(int nobs, 94 | int nstates, 95 | cnp.ndarray[dtype_t, ndim = 2] transition_vectors not None, 96 | cnp.ndarray[dtype_t, ndim = 2, mode='c'] joint_probabilities not None, 97 | cnp.ndarray[dtype_t, ndim = 2, mode='c'] marginal_conditional_densities not None): 98 | 99 | cdef cnp.ndarray[dtype_t, ndim = 2] joint_probabilities_t1, marginal_probabilities_t1 100 | cdef cnp.ndarray[dtype_t, ndim = 1] joint_densities, marginal_densities 101 | cdef int t, i, j, k, idx 102 | cdef dtype_t transition 103 | 104 | joint_probabilities_t1 = np.zeros((nobs, nstates**2)) 105 | marginal_probabilities_t1 = np.zeros((nobs, nstates)) 106 | joint_densities = np.zeros((nstates,)) 107 | marginal_densities = np.zeros((nobs,)) 108 | 109 | for t in range(1, nobs+1): 110 | for i in range(nstates): # Range over S_t 111 | for j in range(nstates): # Range over S_{t-1} 112 | # This step is what dictates whether transition is in left or right stochastic form 113 | # Here, i represents the row (S_t = the state to which we're moving), 114 | # and j represents the column (S_{t-1} = the state from which we're moving) 115 | # Thus the vector needs to be of the form: 116 | # [P11 P12 ... P1M P21 ... P2M ... PMM ] 117 | transition = transition_vectors[t-1, i*nstates + j] 118 | joint_probabilities_t1[t-1, i*nstates + j] = transition * joint_probabilities[t-1, j] 119 | marginal_probabilities_t1[t-1, i] += joint_probabilities_t1[t-1, i*nstates + j] 120 | joint_densities[i] = marginal_probabilities_t1[t-1, i] * marginal_conditional_densities[t-1, i] 121 | marginal_densities[t-1] += joint_densities[i] 122 | 123 | joint_probabilities[t] = joint_densities / marginal_densities[t-1] 124 | return marginal_densities, joint_probabilities, joint_probabilities_t1 125 | 126 | def tvtp_transition_vectors_right(int nobs, 127 | int nstates, 128 | int tvtp_order, 129 | cnp.ndarray[dtype_t, ndim = 2] transitions, # nstates * (nstates-1) x tvtp_order 130 | cnp.ndarray[dtype_t, ndim = 2, mode='c'] exog): # t+1 x tvtp_order 131 | cdef int n, t, i, j, k, idx 132 | cpdef dtype_t transition, colsum 133 | cdef cnp.ndarray[dtype_t, ndim = 2] transition_vectors 134 | 135 | transition_vectors = np.zeros((nobs+1, nstates**2)) 136 | 137 | for t in range(nobs+1): 138 | for i in range(nstates): # iterate over "columns" in the transition matrix 139 | colsum = 0 140 | for j in range(nstates-1): # iterate all but last "row" in the transition matrix 141 | transition = 0 142 | for k in range(tvtp_order): 143 | transition += exog[t,k] * transitions[i*(nstates-1)+j, k] 144 | transition = exp(transition) 145 | transition_vectors[t, i + j*nstates] = transition 146 | colsum += transition 147 | # iterate over all but the last "row" again, now that we have all 148 | # of the values 149 | for j in range(nstates-1): 150 | transition_vectors[t, i + j*nstates] /= (1 + colsum) 151 | # Add in last row 152 | transition_vectors[t,i + (nstates-1)*nstates] = 1 - (colsum / (1 + colsum)) 153 | 154 | return transition_vectors 155 | 156 | def tvtp_transition_vectors_left(int nobs, 157 | int nstates, 158 | int tvtp_order, 159 | cnp.ndarray[dtype_t, ndim = 2] transitions, # nstates * (nstates-1) x tvtp_order 160 | cnp.ndarray[dtype_t, ndim = 2, mode='c'] exog): # t+1 x tvtp_order 161 | cdef int n, t, i, j, k, idx 162 | cpdef dtype_t transition, colsum 163 | cdef cnp.ndarray[dtype_t, ndim = 2] transition_vectors 164 | 165 | transition_vectors = np.zeros((nobs+1, nstates**2)) 166 | 167 | for t in range(nobs+1): 168 | idx = 0 169 | for i in range(nstates): # iterate over "columns" in the transition matrix 170 | colsum = 0 171 | for j in range(nstates-1): # iterate all but last "row" in the transition matrix 172 | transition = 0 173 | for k in range(tvtp_order): 174 | transition += exog[t,k] * transitions[i*(nstates-1)+j, k] 175 | transition = exp(transition) 176 | transition_vectors[t, idx] = transition 177 | colsum += transition 178 | # iterate over all but the last "row" again, now that we have all 179 | # of the values 180 | for j in range(nstates-1): 181 | transition_vectors[t, idx] /= (1 + colsum) 182 | idx += 1 183 | # Add in last row 184 | transition_vectors[t,idx] = 1 - (colsum / (1 + colsum)) 185 | idx += 1 186 | 187 | return transition_vectors 188 | 189 | def marginal_conditional_densities(int nobs, 190 | int nstates, 191 | int order, 192 | cnp.ndarray[dtype_t, ndim=2] params, 193 | cnp.ndarray[dtype_t, ndim=1] stddevs, 194 | cnp.ndarray[dtype_t, ndim=1] means, 195 | cnp.ndarray[dtype_t, ndim=2] augmented): 196 | cdef int nstatesk, t, i, j, k, idx, idx2, num, state 197 | cdef dtype_t var, top 198 | cdef cnp.ndarray[dtype_t, ndim = 1] state_means, variances 199 | cdef cnp.ndarray[dtype_t, ndim = 2] marginal_conditional_densities 200 | 201 | nstatesk = nstates**order 202 | marginal_conditional_densities = np.zeros((nobs, nstates**(order+1))) 203 | variances = stddevs**2 204 | 205 | state_means = np.zeros((order+1,)) 206 | for t in range(nobs): 207 | idx = 0 208 | for i in range(nstates): 209 | var = variances[i] 210 | for j in range(nstatesk): 211 | num = idx 212 | top = 0 213 | for k in range(order+1): 214 | state = num % nstates 215 | top += (augmented[t, -(k+1)] - means[state]) * params[i, -(k+1)] 216 | num = num // nstates 217 | marginal_conditional_densities[t, idx] = ( 218 | (1 / sqrt(2*np.pi*var)) * exp( 219 | -( top**2 / (2*var)) 220 | ) 221 | ) 222 | idx += 1 223 | 224 | return marginal_conditional_densities -------------------------------------------------------------------------------- /mar_c/setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from distutils.core import setup 3 | from distutils.extension import Extension 4 | from Cython.Distutils import build_ext 5 | from Cython.Build import cythonize 6 | from numpy.distutils.system_info import get_info 7 | 8 | ext_modules = [ 9 | Extension("hamilton_filter", ["hamilton_filter.pyx"]) 10 | ] 11 | 12 | setup( 13 | name = "Hamilton Filter", 14 | cmdclass = {'build_ext': build_ext}, 15 | ext_modules = cythonize(ext_modules), 16 | include_dirs=[numpy.get_include()], 17 | ) -------------------------------------------------------------------------------- /mar_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Markov Autoregressive Models 3 | 4 | Author: Chad Fulton 5 | License: BSD 6 | 7 | References 8 | ---------- 9 | 10 | Hamilton, James D. 1989. 11 | "A New Approach to the Economic Analysis of 12 | Nonstationary Time Series and the Business Cycle." 13 | Econometrica 57 (2) (March 1): 357-384. 14 | 15 | Hamilton, James D. 1994. 16 | Time Series Analysis. 17 | Princeton, N.J.: Princeton University Press. 18 | 19 | Kim, Chang-Jin, and Charles R. Nelson. 1999. 20 | "State-Space Models with Regime Switching: 21 | Classical and Gibbs-Sampling Approaches with Applications". 22 | MIT Press Books. The MIT Press. 23 | 24 | Notes 25 | ----- 26 | 27 | Roadmap: 28 | - Correctly estimate covariance matrix 29 | - Add expected regime duration 30 | - Add results class 31 | - Add plotting capabilities (e.g. for showing probabilities of each regime at 32 | time t - can do a line plot for M=2, otherwise an area plot) 33 | - Add support for model specification testing (the usual nuisence approach) 34 | - Add support for MS-VAR 35 | - Add support for state-space models 36 | - Add support for the EM algorithm 37 | 38 | TODO: Modify tvtp_transition_vectors_left and right to only calculate a single 39 | time if tvtp_order == 1, and then just repeat it nobs+1 times. 40 | Also, refactor to not call it many times for each loglike call. 41 | 42 | The MAR model has four types of parameters: 43 | - transition probabilities 44 | - AR parameters 45 | - standard deviation parameters 46 | - mean parameters 47 | 48 | The standard case is the assumption of fixed transition probabilities. In this 49 | case, there are nstates * (nstates - 1) parameters to be estimated, and 50 | nstates^2 parameters used in the model. See below for more details. 51 | If the transition probabilities are allowed to change over time, it is called a 52 | Time-Varying Transition Probabilites (TVTP) Markov-Switching Model. In this 53 | case, an additional exogenous matrix must be supplied that are assumed to 54 | determine the transition probabilities at each point in time. The number of 55 | parameters to be estimated is (k+1)(nstates) * (k+1)(nstates-1), and the number 56 | of parameters used in the model is (k+1)^2 * nstates^2. 57 | 58 | The AR, standard deviation, and mean parameters may be allowed to differ 59 | across states, or may be restricted to be the same. 60 | If the AR parameters are allowed to differ, there are `order`*`nstates` 61 | parameters to be estimated and used in the model. If they are not, then there 62 | are `order` parameters. 63 | If the standard deviation (or the mean) parameter is allowed to differ, 64 | there are `nstates` standard deviation (or mean) parameters to estimate and use 65 | in the model, otherwise there is only 1. 66 | 67 | Parameters are used in two ways: 68 | 69 | (1) Optimization: the optimization routine requires a flat array of parameters 70 | where each parameter can range over (-Inf, Inf), and it adjusts each 71 | parameter while finding the values that optimize the objective function, 72 | here the log likelihood. Thus if there are M states with regime 73 | homoskedasticity, there must be only a single standard deviation parameter 74 | in the array passed to the optimizer. If there are M states with regime 75 | heteroskedasticity, there must be M standard deviation parameters in the 76 | array. 77 | These are the parameters passed to the MAR.loglike() method. 78 | (2) Initializing the filter: the parameters selected by the optimizer at each 79 | iteration are then used to calculate the inputs to the filtering routines 80 | (i.e. joint_probabilities and marginal_conditional_densities). For this, 81 | they need to be (a) transformed to their actual ranges (e.g. probabilities 82 | to lie within [0,1]) and (b) expanded to the full state range. In the 83 | regime homoskedasticity example above, the single standard deviation 84 | parameter must be expanded so that there is one parameter per regime. In 85 | this case, each regime's standard deviation parameter would be identical. 86 | These are the parameters passed to the MAR.filter() and 87 | MAR.initialize_filter() methods. 88 | 89 | To achieve this, several helper methods are employed: 90 | - MAR.expand_params() 91 | - Takes an array of parameters from the optimizer, and returns an expanded 92 | array of parameters suitable for use in the model. 93 | - (If not TVTP) Expands the probability vector into a transition vector 94 | - Expands restrictions (e.g. if parameters are restricted to not change, it 95 | expands the single set of `order` parameters to `nstates`*`order` 96 | parameters). 97 | - Always returns `nparams` parameters. 98 | - MAR.contract_params() 99 | - Takes an array of parameters suitable for use in the model, and returns a 100 | contracted array of parameters to be passed to the optimizer. 101 | - MAR.fuse_params() 102 | - Takes each set of parameters separately and fuses them into a single array 103 | (used to maintain consistent parameter ordering in e.g. the optimization 104 | setting). 105 | - MAR.separate_params() 106 | - Takes an array of parameters and separates it into the component parts. 107 | - MAR.transform_params() 108 | - Takes an array of parameters (either the contracted or expanded set of 109 | parameters) that are unconstrained (as would be used in the optimizer) and 110 | transforms them to a constrained form suitable for use in the model (e.g. 111 | transforms probabilities from (-Inf, Inf) to [0,1]) 112 | - MAR.untransform_params() 113 | - Takes an array of parameters (either the contracted or expanded set of 114 | parameters) that are constrained (as would be used in the model) and 115 | reverses the transformation to make them be unconstrained (e.g. transforms 116 | probabilities from [0,1] to (-Inf, Inf)) 117 | 118 | The flow of parameters through the model looks like: 119 | 120 | (1) MAR.fit() is called, optionally with the start_params argument. 121 | (2) MAR.loglike() is called by the optimizer, and is passed the contracted, 122 | untransformed (i.e. unconstrained) params. 123 | (3) The parameters are 124 | a. Transformed (i.e. constrained to lie within the actual parameter spaces) 125 | b. Expanded 126 | (4) MAR.initialize_filter() is called with the expanded, transformed 127 | parameters. 128 | 129 | The default functionality 130 | 131 | To allow arbitrary specification of regime-switching for the parameters, 132 | 133 | 134 | Internally, the transition matrix is constructed to be left stochastic, and 135 | the transition vector is created from it by stacking its columns. 136 | 137 | Notes about the (internally used, left stochastic) transition matrix: 138 | The nstates x nstates Markov chain transition matrix. 139 | 140 | The [i,j]th element of the matrix corresponds to the 141 | probability of moving to the i-th state given that you are 142 | in the j-th state. This means that it is the columns that 143 | sum to one, aka that the matrix is left stochastic. 144 | 145 | It looks like: 146 | 147 | | p11 p12 ... p1M | 148 | | p21 . . | 149 | | . . . | 150 | | pM1 ... pMM | 151 | 152 | Here the element pM1 is the probability of moving from the first state 153 | to the M-th state. This representation of the matrix: 154 | - is consistent with usual row / column indexing, but 155 | - inconveniently represents the "from" state as the second index and the 156 | "to" state as the first index. 157 | 158 | Kim and Nelson (1999) represent this same matrix (on p.70) as: 159 | 160 | | p11 p21 ... pM1 | 161 | | p12 . . | 162 | | . . . | 163 | | p1M ... pMM | 164 | 165 | This is the same, identical, matrix, just with a different indexing 166 | convention. Here, p1M is the probability of moving from the first state to 167 | the M-th state. This representation of the matrix is: 168 | - inconsitent with usual row / column indexing, but 169 | - conveniently represents the "from" and "to" states as the, respectively, 170 | first and second indices 171 | 172 | Constructing the internally used transition vector (from column stacking) 173 | is easily achieved via: 174 | 175 | P.reshape((1, P.size), order='F') 176 | or 177 | P.reshape(-1, order='F') 178 | or 179 | P.ravel(order='F') 180 | or 181 | P.T.ravel() 182 | etc. 183 | 184 | Two convenience functions to assist with transition matrices are: 185 | 186 | - MAR.transition_vector() accepts a transition matrix and converts it to a 187 | transition vector. The default options create the vector via column 188 | stacking. (Note: this function also may accept a probabilities vector, 189 | which is then converted to a transition vector - see below for details) 190 | 191 | - MAR.transition_matrix() accepts a transition vector and converts it to a 192 | transition matrix. The default options create the matrix by setting the 193 | first M elements of the vector to the first column of the matrix, etc. 194 | 195 | Notes about the (internally used, constructed via column stacking) transition 196 | vector: 197 | 198 | The full (column stacked, M^2 length) transition vector is of the form: 199 | 200 | [ p11, p21, ..., pM1, p12, p22, ..., pM2, ..., p1M, p2M, ..., pMM ] 201 | 202 | This is the version that used internally, which means that it is: 203 | - Returned by mar_c.hamilton_filter() and MAR.hamilton_filter() 204 | - Accepted as an argument by MAR.smooth() 205 | 206 | However, it is not the version that is accepted to externally facing 207 | methods, because it contains redundant information. Each column of the 208 | (left stochastic) transition matrix has M entries, but e.g. the M-th entry 209 | could be left out, and instead calculated as the sum of the first M-1 210 | entries. This is significant because the M-th (or whichever arbitrary 211 | entry is left out) is constrained, and so is not estimated separately from 212 | the other M-1 entries. Thus the optimizer will only optimize over M * (M-1) 213 | probability values. 214 | 215 | Some normalization must be specified, and the convention here is that the 216 | last row of the transition matrix will be left off. This means that from 217 | the full transition vector above, each M-th element must be left off (this 218 | corresponds to eliminating the last row of the transition matrix before 219 | creating the vector by stacking the columns). It is of the form: 220 | 221 | [ p11, p21, ..., p(M-1)1, p12, p22, ..., p(M-1)2, ..., p1M, p2M, ..., p(M-1)M ] 222 | 223 | and the last elements are calculated as: 224 | 225 | PM* = 1 - p1* - p2* - ... - p(M-1)* 226 | 227 | To distinguish between these two, the following terminology will be used: 228 | - `transition_vector` refers to the full transition vector 229 | - `probabilities` refers to the version without each M-th value 230 | 231 | There are convenience functions to assist in moving between these two 232 | representations: 233 | 234 | - probabilities() accepts a transition vector and returns the 235 | corresponding probabilities vector by removing each M-th value 236 | - transition_vector() accepts a probabilities vector and returns the 237 | corresponding transition vector by calculating and adding the M-th values 238 | (this is its behavior if its first argument has ndim=1. If the first 239 | argument has ndim=2, then it is assumed to be converting a transition 240 | matrix to a transition vector by column stacking) 241 | 242 | """ 243 | 244 | from __future__ import division 245 | from functools import reduce 246 | import numpy as np 247 | import pandas as pd 248 | import statsmodels.tsa.base.tsa_model as tsbase 249 | import statsmodels.base.model as base 250 | from statsmodels.base import data 251 | from statsmodels.tsa.tsatools import add_constant, lagmat 252 | from statsmodels.regression.linear_model import OLS, OLSResults 253 | from statsmodels.tools.numdiff import approx_fprime 254 | from statsmodels.tools.decorators import (cache_readonly, cache_writable, 255 | resettable_cache) 256 | import statsmodels.base.wrapper as wrap 257 | from scipy import stats 258 | from mar_c import (hamilton_filter, tvtp_transition_vectors_left, 259 | tvtp_transition_vectors_right, 260 | marginal_conditional_densities) 261 | import resource 262 | 263 | class MAR(tsbase.TimeSeriesModel): 264 | """ 265 | "An autoregressive model of order k with first-order , M-state 266 | Markov-switching mean and variance" 267 | 268 | Parameters 269 | ---------- 270 | endog : array-like 271 | The endogenous variable. Assumed not to be in deviation-from-mean form. 272 | order : integer 273 | The order of the autoregressive parameters. 274 | nstates : integer 275 | The number of states in the Markov chain. 276 | switch_ar : boolean, optiona 277 | Whether or not AR parameters are allowed to switch with regimes. 278 | switch_var : boolean, optional 279 | Whether or not the variances are allowed to vary across regimes. 280 | (Regime-specific Heteroskedasticity) 281 | switch_means : boolean, optional 282 | Whether or not the means are allowed to vary across regimes. 283 | tvtp_data : array-like, optional 284 | A vector or matrix of exogenous or lagged variables to use in 285 | calculating time varying transition probabilities (TVTP). TVTP is only 286 | used if this variable is provided. 287 | 288 | References 289 | ---------- 290 | Kim, Chang-Jin, and Charles R. Nelson. 1999. 291 | "State-Space Models with Regime Switching: 292 | Classical and Gibbs-Sampling Approaches with Applications". 293 | MIT Press Books. The MIT Press. 294 | 295 | Notes 296 | ----- 297 | States are zero-indexed. 298 | """ 299 | 300 | def __init__(self, endog, order, nstates, 301 | switch_ar=False, switch_var=False, switch_mean=True, 302 | tvtp_exog=None, 303 | dates=None, freq=None, missing='none'): 304 | 305 | # "Immutable" properties 306 | self.nobs_initial = order 307 | self.nobs = endog.shape[0] - order 308 | self.order = order 309 | self.nstates = nstates 310 | 311 | # Determine switching parameters 312 | 313 | # Transition probabilities 314 | if tvtp_exog is None: 315 | self.tvtp_exog = np.ones((self.nobs + self.nobs_initial + 1, 1)) 316 | else: 317 | self.tvtp_exog = add_constant(tvtp_exog) 318 | self.tvtp_order = self.tvtp_exog.shape[1] 319 | if not self.tvtp_exog.shape[0] == self.nobs + self.nobs_initial + 1: 320 | raise ValueError('Length of exogenous data determining the time' 321 | ' varying transition probabilities must have' 322 | ' length equal to %d: the number of observations' 323 | ' plus one. Got length %d.' % 324 | (self.nobs + self.nobs_initial + 1, 325 | self.tvtp_exog.shape[0])) 326 | self.nparams_prob = ( 327 | self.nstates * (self.nstates - 1) * self.tvtp_order 328 | ) 329 | 330 | # AR parameters 331 | if switch_ar == True: 332 | self.nparams_ar = self.nstates*self.order 333 | self.switch_ar = True 334 | self.switch_method_ar = 'all' 335 | elif switch_ar == False: 336 | self.nparams_ar = self.order 337 | self.switch_ar = False 338 | self.switch_method_ar = 'none' 339 | elif isinstance(switch_ar, (list, np.ndarray)): 340 | self.nparams_ar = 0 341 | self.switch_ar = np.asarray(switch_ar) 342 | if not self.switch_ar.shape[0] == nstates: 343 | raise ValueError('Fixed switching definitions for AR' 344 | ' parameters must be an array specifying a' 345 | ' fixed value for each state. Expected length' 346 | ' %d, got length %d.' % 347 | (nstates, self.switch_ar.shape[0])) 348 | self.switch_method_ar = 'fixed' 349 | elif isinstance(switch_ar, tuple) and callable(switch_ar[1]): 350 | self.nparams_ar, self.switch_ar = switch_ar 351 | self.switch_method_ar = 'custom' 352 | else: 353 | raise ValueError('Custom switching definitions for AR' 354 | ' parameters must be an array of fixed values or' 355 | ' must be a tuple with the number of parameters' 356 | ' to estimate as the first value and a callback' 357 | ' as the second value.') 358 | 359 | # Variance parameters 360 | if switch_var == True: 361 | self.nparams_var = self.nstates 362 | self.switch_var = True 363 | self.switch_method_var = 'all' 364 | elif switch_var == False: 365 | self.nparams_var = 1 366 | self.switch_var = False 367 | self.switch_method_var = 'none' 368 | elif isinstance(switch_var, (list, np.ndarray)): 369 | self.nparams_var = 0 370 | self.switch_var = np.asarray(switch_var) 371 | if not self.switch_var.shape[0] == nstates: 372 | raise ValueError('Fixed switching definitions for variance' 373 | ' parameters must be an array specifying a' 374 | ' fixed value for each state. Expected length' 375 | ' %d, got length %d.' % 376 | (nstates, self.switch_var.shape[0])) 377 | self.switch_method_var = 'fixed' 378 | elif isinstance(switch_var, tuple) and callable(switch_var[1]): 379 | self.nparams_var, self.switch_var = switch_var 380 | self.switch_method_var = 'custom' 381 | else: 382 | raise ValueError('Custom switching definitions for variance' 383 | ' parameters must be an array of fixed values or' 384 | ' must be a tuple with the number of parameters' 385 | ' to estimate as the first value and a callback' 386 | ' as the second value.') 387 | 388 | # Mean parameters 389 | if switch_mean == True: 390 | self.nparams_mean = self.nstates 391 | self.switch_mean = True 392 | self.switch_method_mean = 'all' 393 | elif switch_mean == False: 394 | self.nparams_mean = 1 395 | self.switch_mean = False 396 | self.switch_method_mean = 'none' 397 | elif isinstance(switch_mean, (list, np.ndarray)): 398 | self.nparams_mean = 0 399 | self.switch_mean = np.asarray(switch_mean) 400 | if not self.switch_mean.shape[0] == nstates: 401 | raise ValueError('Fixed switching definitions for mean' 402 | ' parameters must be an array specifying a' 403 | ' fixed value for each state. Expected length' 404 | ' %d, got length %d.' % 405 | (nstates, self.switch_mean.shape[0])) 406 | self.switch_method_mean = 'fixed' 407 | elif isinstance(switch_mean, tuple) and callable(switch_mean[1]): 408 | self.nparams_mean, self.switch_mean = switch_mean 409 | self.switch_method_mean = 'custom' 410 | else: 411 | raise ValueError('Custom switching definitions for mean' 412 | ' parameters must be an array of fixed values or' 413 | ' must be a tuple with the number of parameters' 414 | ' to estimate as the first value and a callback' 415 | ' as the second value.') 416 | 417 | # The number of parameters used by the optimizer 418 | self.nparams = ( 419 | self.nparams_prob + 420 | self.nparams_ar + 421 | self.nparams_var + 422 | self.nparams_mean 423 | ) 424 | # The number of parameters used by the model 425 | # (not quite right for nparams_prob, in case of TVTP) 426 | self.nparams_prob_full = self.nparams_prob 427 | self.nparams_ar_full = self.order * self.nstates 428 | self.nparams_var_full = self.nstates 429 | self.nparams_mean_full = self.nstates 430 | self.nparams_full = ( 431 | self.nparams_prob_full + 432 | self.nparams_ar_full + 433 | self.nparams_var_full + 434 | self.nparams_mean_full 435 | ) 436 | 437 | # If we got custom (callable) switch functions, test them 438 | test_args = self.separate_params(np.ones((self.nparams,))) 439 | if self.switch_method_ar == 'custom': 440 | test_ar = len(self.switch_ar(*test_args)) 441 | if not test_ar == self.nparams_ar_full: 442 | raise ValueError('Invalid custom switching function for AR' 443 | ' parameters. Must return a vector of length' 444 | ' %d. Got a parameter of length %d.' % 445 | (self.nparams_ar_full, test_ar)) 446 | if self.switch_method_var == 'custom': 447 | test_var = len(self.switch_var(*test_args)) 448 | if not test_var == self.nparams_var_full: 449 | raise ValueError('Invalid custom switching function for' 450 | ' variance parameters. Must return a vector' 451 | ' of length %d. Got a parameter of length' 452 | ' %d.' % (self.nparams_ar_full, test_var)) 453 | if self.switch_method_mean == 'custom': 454 | test_mean = len(self.switch_mean(*test_args)) 455 | if not test_mean == self.nparams_mean_full: 456 | raise ValueError('Invalid custom switching function for mean' 457 | ' parameters. Must return a vector of length' 458 | ' %d. Got a parameter of length %d.' % 459 | (self.nparams_mean_full, test_mean)) 460 | 461 | 462 | # Make a copy of original datasets 463 | orig_endog = endog 464 | 465 | # Create datasets / complete initialization 466 | endog = orig_endog[self.nobs_initial:] 467 | 468 | # Handle exogenous data 469 | if order > 0: 470 | orig_exog = lagmat(orig_endog, order) 471 | exog = orig_exog[self.nobs_initial:] 472 | else: 473 | orig_exog = None 474 | exog = None 475 | 476 | super(MAR, self).__init__(endog, exog, missing=missing) 477 | 478 | # Overwrite originals 479 | self.data.orig_endog = orig_endog 480 | self.data.orig_exog = orig_exog 481 | 482 | # Cache 483 | if exog is not None: 484 | self.augmented = np.c_[endog, exog] 485 | else: 486 | self.augmented = endog.values[:, np.newaxis] 487 | 488 | def expand_params(self, params): 489 | params = np.asarray(params) 490 | # Make sure they're not already expanded 491 | if params.shape == (self.nparams_full,): 492 | return params 493 | elif params.shape != (self.nparams,): 494 | raise ValueError('Unexpected parameter vector shape. Expected %s,' 495 | ' got %s.' % ((self.nparams,), params.shape)) 496 | 497 | transitions, ar_params, stddevs, means = self.separate_params(params) 498 | 499 | # Transition probabilities 500 | # (these are expanded later, due to possibility of TVTP) 501 | 502 | # AR parameters 503 | if self.switch_method_ar == 'all': 504 | pass 505 | elif self.switch_method_ar == 'none': 506 | ar_params = np.tile(ar_params, self.nstates) 507 | elif self.switch_method_ar == 'fixed': 508 | ar_params = self.switch_ar 509 | else: 510 | ar_params = self.switch_ar(transitions, ar_params, stddevs, means) 511 | 512 | # Variance parameters 513 | if self.switch_method_var == 'all': 514 | pass 515 | elif self.switch_method_var == 'none': 516 | stddevs = np.tile(stddevs, self.nstates) 517 | elif self.switch_method_var == 'fixed': 518 | stddevs = self.switch_var 519 | else: 520 | stddevs = self.switch_var(transitions, ar_params, stddevs, means) 521 | 522 | # Mean parameters 523 | if self.switch_method_mean == 'all': 524 | pass 525 | elif self.switch_method_mean == 'none': 526 | means = np.tile(means, self.nstates) 527 | elif self.switch_method_mean == 'fixed': 528 | means = self.switch_mean 529 | else: 530 | means = self.switch_mean(transitions, ar_params, stddevs, means) 531 | 532 | return self.fuse_params(transitions, ar_params, stddevs, means) 533 | 534 | def contract_params(self, params): 535 | raise NotImplementedError 536 | 537 | def fuse_params(self, transitions, ar_params, stddevs, means): 538 | """ 539 | Combines the component parameters into a single array. 540 | 541 | Parameters 542 | ---------- 543 | transitions : array-like 544 | A vector of transition probabilities 545 | ar_params : array-like 546 | The AR parameters 547 | stddevs : array-like 548 | The standard deviations for each state 549 | means : array-like 550 | The means for each state 551 | 552 | Returns 553 | ------- 554 | params : array-like 555 | An array of parameters 556 | """ 557 | return np.r_[transitions, ar_params, stddevs, means] 558 | 559 | def separate_params(self, params): 560 | """ 561 | Separates a single array of parameters into the component pieces. 562 | 563 | Parameters 564 | ---------- 565 | params : array-like 566 | An array of parameters 567 | 568 | Returns 569 | ------- 570 | transitions : array-like 571 | A vector of transition probabilities 572 | ar_params : array-like 573 | The AR parameters 574 | stddevs : array-like 575 | The standard deviations for each state 576 | means : array-like 577 | The means for each state 578 | """ 579 | params = np.asarray(params) 580 | 581 | # Separate the parameters 582 | if params.shape == (self.nparams,): 583 | nparams = np.cumsum((self.nparams_prob, self.nparams_ar, 584 | self.nparams_var, self.nparams_mean)) 585 | elif params.shape == (self.nparams_full,): 586 | nparams = np.cumsum((self.nparams_prob_full, self.nparams_ar_full, 587 | self.nparams_var_full, self.nparams_mean_full)) 588 | else: 589 | raise ValueError('Invalid number of parameters. Expected %s or %s,' 590 | ' got %s.' % ((self.nparams,), 591 | (self.nparams_full,), params.shape)) 592 | transitions = params[:nparams[0]] 593 | ar_params = params[nparams[0]:nparams[1]] 594 | stddevs = params[nparams[1]:nparams[2]] 595 | means = params[nparams[2]:] 596 | 597 | return transitions, ar_params, stddevs, means 598 | 599 | def transform_params(self, params, method='logit'): 600 | """ 601 | Transforms a set of unconstrained parameters to a set of contrained 602 | parameters. 603 | 604 | Optimization methods (e.g scipy.optimize) work on sets of unconstrained 605 | parameters, but the model requires e.g. that probability values lie in 606 | the range [0, 1]. This function takes the unconstrained parameters from 607 | the optimizer and transforms them into parameters usable in the model 608 | (e.g to evaluate the likelihood). 609 | 610 | Parameters 611 | ---------- 612 | params : array-like 613 | An array of unconstrained parameters 614 | method : {'logit', 'abs'}, optional 615 | The method used to transform parameters on the entire real line to 616 | parameters in the range (0,1). 617 | 618 | Returns 619 | ------- 620 | params : an array of constrained parameters 621 | """ 622 | transitions, ar_params, stddevs, means = self.separate_params(params) 623 | 624 | # Standard deviations: transform to always be positive 625 | stddevs = np.exp(-stddevs) 626 | 627 | return self.fuse_params(transitions, ar_params, stddevs, means) 628 | 629 | def untransform_params(self, params, method='logit'): 630 | """ 631 | Transforms a set of constrained parameters to a set of uncontrained 632 | parameters. 633 | 634 | Optimization methods (e.g scipy.optimize) work on sets of unconstrained 635 | parameters, but the model requires e.g. that probability values lie in 636 | the range [0, 1]. This function takes the constrained parameters used 637 | in the model and transforms them into parameters usable by the 638 | optimizer (e.g to take step sizes, etc.). 639 | 640 | Parameters 641 | ---------- 642 | params : array-like 643 | An array of constrained parameters 644 | method : {'logit', 'abs'}, optional 645 | The method used to transform parameters on the entire real line to 646 | parameters in the range (0,1). 647 | 648 | Returns 649 | ------- 650 | params : an array of unconstrained parameters 651 | """ 652 | transitions, ar_params, stddevs, means = self.separate_params(params) 653 | 654 | stddevs = -np.log(stddevs) 655 | 656 | return self.fuse_params(transitions, ar_params, stddevs, means) 657 | 658 | def transform_jacobian(self, params): 659 | """ 660 | Evaluates the jacobian of the transformation function. 661 | 662 | Used to calculate standard errors via the delta method (the method of 663 | propagation of errors). 664 | 665 | Parameters 666 | ---------- 667 | params : array-like 668 | An array of parameters 669 | 670 | Returns 671 | ------- 672 | jacobian : array-like 673 | The jacobian matrix of the transformation function, evaluated at 674 | the given parameters. 675 | """ 676 | transitions, ar_params, stddevs, means = self.separate_params(params) 677 | 678 | # If not TVTP, then we want to return the estimated probabilities 679 | # themselves, and not the unconstrainted parameters 680 | # (If TVTP, then we just want to return the unconstrained parameters) 681 | if self.tvtp_order == 1: 682 | transitions = ( 683 | np.exp(transitions) / (1 + np.exp(transitions))**2 684 | ) 685 | 686 | # Standard deviation parameters: 687 | stddevs = -np.exp(-stddevs) 688 | 689 | vector = np.r_[ 690 | transitions, [1]*len(ar_params), stddevs, [1]*len(means) 691 | ] 692 | return np.diag(vector) 693 | 694 | def loglike(self, params): 695 | """ 696 | Calculate the log likelihood. 697 | 698 | Parameters 699 | ---------- 700 | params : array-like 701 | An array of unconstrained, contracted parameters 702 | 703 | Returns 704 | ------- 705 | loglike : float 706 | The log likelihood of the model evaluated at the given parameters. 707 | 708 | Notes 709 | ----- 710 | Uses unconstrained parameters because it is meant to be called via 711 | the optimization routine, which uses unconstrained parameters. 712 | """ 713 | params = self.transform_params(params) 714 | params = self.expand_params(params) 715 | 716 | (joint_probabilities, 717 | marginal_conditional_densities) = self.initialize_filter(params) 718 | 719 | transitions, _, _, _ = self.separate_params(params) 720 | transition_vectors = self.tvtp_transition_vectors(transitions, 'right') 721 | transition_vectors = transition_vectors[self.nobs_initial:] 722 | 723 | marginal_densities, _, _ = hamilton_filter( 724 | self.nobs, self.nstates, self.order, 725 | transition_vectors, joint_probabilities, 726 | marginal_conditional_densities 727 | ) 728 | 729 | return np.sum(np.log(marginal_densities)) 730 | 731 | def tvtp_transition_vectors(self, transitions, matrix_type='left'): 732 | """ 733 | Create a vector of time varying transition probability vectors 734 | 735 | Each transition vector is the vectorized version of the transition 736 | matrix. 737 | 738 | Parameters 739 | ---------- 740 | transitions : array-like 741 | A vector of transition parameters, with length 742 | self.nstates * (self.nstates - 1) * self.tvtp_order 743 | matrix_type : {'left', 'right'}, optional 744 | The method by which the corresponding transition matrix would be 745 | constructed from the returned transition vector. 746 | - If 'left', the transition matrix would be constructed to be left 747 | stochastic. 748 | - If 'right', the transition matrix would be constructed to be 749 | right stochastic. 750 | See MAR.transition_matrix() or the module docstring for details. 751 | 752 | Returns 753 | ------- 754 | transition_vector : array 755 | An (nobs+1) x (nstates*nstates) matrix (i.e. an nobs+1 vector of 756 | nstates*nstates transition vectors). 757 | """ 758 | transitions = transitions.reshape( 759 | self.nstates*(self.nstates-1), self.tvtp_order 760 | ) 761 | 762 | if matrix_type == 'left': 763 | fn = tvtp_transition_vectors_left 764 | elif matrix_type == 'right': 765 | fn = tvtp_transition_vectors_right 766 | else: 767 | raise ValueError("Invalid matrix type method. Must be one of" 768 | " {'left', 'right'}, corresponding to a left" 769 | " stochastic or right stochastic transition" 770 | " matrix. Got %s." % matrix_type) 771 | 772 | transition_vectors = fn( 773 | self.nobs + self.nobs_initial, self.nstates, self.tvtp_order, 774 | transitions, np.asarray(self.tvtp_exog, order='C') 775 | ) 776 | return transition_vectors 777 | 778 | def probability_vector(self, transitions, matrix_type='left'): 779 | """ 780 | Create a probability vector 781 | 782 | The probability vector is the vectorized version of the transition 783 | matrix, excluding its last row. 784 | 785 | Parameters 786 | ---------- 787 | transitions : array-like 788 | A vector of transition values for the probability vector. It can be 789 | either: 790 | - a transition vector, if it has 1-dimension 791 | - a transition matrix, if it has 2-dimensions 792 | See the module docstring for more information about the difference. 793 | matrix_type : {'left', 'right'}, optional 794 | The method by which the corresponding transition matrix would be 795 | constructed from the returned probability vector. 796 | - If 'left', the transition matrix would be constructed to be left 797 | stochastic. 798 | - If 'right', the transition matrix would be constructed to be 799 | right stochastic. 800 | See MAR.transition_matrix() or the module docstring for details. 801 | 802 | Returns 803 | ------- 804 | probability_vector : array 805 | A 1-dimensional probability vector 806 | 807 | Notes 808 | ----- 809 | See module docstring for details on the distinction between the terms 810 | `transitions`, `probability_vector`, `transition_vector`, and 811 | `transition_matrix`. 812 | """ 813 | 814 | # Figure out which type of stochastic matrix we have 815 | if matrix_type == 'left': 816 | order = 'F' 817 | elif matrix_type == 'right': 818 | order = 'C' 819 | else: 820 | raise ValueError("Invalid matrix type method. Must be one of" 821 | " {'left', 'right'}, corresponding to a left" 822 | " stochastic or right stochastic transition" 823 | " matrix. Got %s." % matrix_type) 824 | 825 | # Handle transition vector (convert to a transition matrix first) 826 | if transitions.ndim == 1: 827 | transitions = self.transition_matrix(array, order) 828 | if not transitions.ndim == 2: 829 | raise ValueError('Invalid input array. Must be 1-dimensional (a' 830 | ' transition vector) or 2-dimensional (a' 831 | ' transition matrix. Got %d dimensions.' % 832 | transitions.ndim) 833 | 834 | # Transition matrix to probabilities vector 835 | return transitions[:-1,:].ravel(order=order) 836 | 837 | def transition_matrix(self, transitions, matrix_type='left'): 838 | """ 839 | Create a transition matrix from a vector of probability values. 840 | 841 | Parameters 842 | ---------- 843 | transitions : array-like 844 | A vector of probability values for the transition matrix. It can be 845 | either: 846 | - a transition vector, if it has length self.nstates^2) 847 | - a probabilities vector, if it has length 848 | self.nstates*(self.nstates - 1) 849 | See the module docstring for more information about the difference. 850 | matrix_type : {'left', 'right'}, optional 851 | The method by which the transition matrix is constructed. 852 | - If 'left', the transition matrix is constructed to be left 853 | stochastic by converting each set of `self.nstates` values in the 854 | transition vector into columns of the transition matrix. This 855 | corresponds to creating the matrix by unstacking the vector into 856 | columns, and the operation is equivalent to reshaping the vector 857 | into the matrix using Fortran ordering. 858 | - If 'right', the transition matrix is constructed to be right 859 | stochastic by converting each set of `self.nstates` values in the 860 | transition vector into rows of the transition matrix. This 861 | corresponds to creating the matrix by unstacking the vector into 862 | rows, and the operation is equivalent to reshaping the vector 863 | into the matrix using C ordering. 864 | 865 | Returns 866 | ------- 867 | transition_matrix : array 868 | A 2-dimensional transition matrix 869 | 870 | Notes 871 | ----- 872 | See module docstring for details on the distinction between the terms 873 | `transitions`, `probability_vector`, `transition_vector`, and 874 | `transition_matrix`. 875 | """ 876 | transitions = np.asarray(transitions) 877 | 878 | # Figure out which type of stochastic matrix we have 879 | if matrix_type == 'left': 880 | order = 'F' 881 | elif matrix_type == 'right': 882 | order = 'C' 883 | else: 884 | raise ValueError("Invalid matrix type method. Must be one of" 885 | " {'left', 'right'}, corresponding to a left" 886 | " stochastic or right stochastic transition" 887 | " matrix. Got %s." % matrix_type) 888 | 889 | # If we already have a transition matrix 890 | if transitions.ndim == 2: 891 | transition_matrix = transitions 892 | elif transitions.ndim == 1: 893 | # Handle a probabilities vector by converting it to a transition 894 | # vector first 895 | if transitions.shape[0] == self.nstates*(self.nstates-1): 896 | transitions = self.transition_vector(transitions, matrix_type) 897 | 898 | if not transitions.shape[0] == self.nstates**2: 899 | raise ValueError('Invalid vector of probability values. Must' 900 | ' have length %d if it is a transition vector' 901 | ' or length %d if it is a probabilities vector' 902 | ' (see module docstring for details). Got ' 903 | ' length %d.' % 904 | (self.nstates**2, 905 | self.nstates*(self.nstates-1), 906 | transitions.shape[0])) 907 | transition_matrix = transitions.reshape( 908 | (self.nstates, self.nstates), 909 | order=order 910 | ) 911 | else: 912 | raise ValueError('Invalid input array. Must be 1-dimensional (a' 913 | ' probability or transition vector) or ' 914 | ' 2-dimensional (a transition matrix. Got %d' 915 | ' dimensions.' % transitions.ndim) 916 | 917 | # Transition vector to transition matrix 918 | return transition_matrix 919 | 920 | def transition_vector(self, transitions, matrix_type='left'): 921 | """ 922 | Create a transition vector 923 | 924 | The transition vector is the vectorized version of the transition 925 | matrix. 926 | 927 | Parameters 928 | ---------- 929 | transitions : array-like 930 | A vector of transition values for the transition vector. It can be 931 | either: 932 | - a probability vector, if it has 1-dimension 933 | - a transition matrix, if it has 2-dimensions 934 | See the module docstring for more information about the difference. 935 | matrix_type : {'left', 'right'}, optional 936 | The method by which the corresponding transition matrix would be 937 | constructed from the returned transition vector. 938 | - If 'left', the transition matrix would be constructed to be left 939 | stochastic. 940 | - If 'right', the transition matrix would be constructed to be 941 | right stochastic. 942 | See MAR.transition_matrix() or the module docstring for details. 943 | 944 | Returns 945 | ------- 946 | transition_vector : array 947 | A 1-dimensional transition vector 948 | 949 | Notes 950 | ----- 951 | See module docstring for details on the distinction between the terms 952 | `transitions`, `probability_vector`, `transition_vector`, and 953 | `transition_matrix`. 954 | """ 955 | transitions = np.asarray(transitions) 956 | 957 | if matrix_type == 'left': 958 | order = 'F' 959 | elif matrix_type == 'right': 960 | order = 'C' 961 | else: 962 | raise ValueError("Invalid matrix type method. Must be one of" 963 | " {'left', 'right'}, corresponding to a left" 964 | " stochastic or right stochastic transition" 965 | " matrix. Got %s." % matrix_type) 966 | 967 | # If we already have a transition vector 968 | if transitions.ndim == 1 and transitions.size == self.nstates**2: 969 | transition_vector = transitions 970 | # Probabilities vector -> transition vector 971 | elif transitions.ndim == 1: 972 | # Get a transition matrix, but missing the last row 973 | transition_matrix = transitions.reshape( 974 | (self.nstates-1, self.nstates), 975 | order=order 976 | ) 977 | # Calculate and append the last row 978 | transitions = np.c_[ 979 | transition_matrix.T, 1-transition_matrix.sum(0) 980 | ].T 981 | # Vectorize 982 | transition_vector = transitions.ravel(order=order) 983 | # Transition matrix -> transition vector 984 | elif transitions.ndim == 2: 985 | transition_vector = transitions.ravel(order=order) 986 | else: 987 | raise ValueError('Invalid input array. Must be 1-dimensional (a' 988 | ' probability vector) or 2-dimensional (a' 989 | ' transition matrix. Got %d dimensions.' % 990 | transitions.ndim) 991 | 992 | return transition_vector 993 | 994 | def unconditional_probabilities(self, transitions): 995 | """ 996 | Calculate the unconditional probabilities ("ergodic probabilities") 997 | from a (left stochastic) transition matrix. 998 | 999 | Parameters 1000 | ---------- 1001 | transitions : array-like 1002 | A probability vector, transition vector, or transition matrix. 1003 | 1004 | Returns 1005 | ------- 1006 | unconditional_probabilities : array 1007 | A 1-dimensional, self.nstates length vector of the unconditional 1008 | probabilities of each state. 1009 | """ 1010 | transition_matrix = self.transition_matrix(transitions, 'right') 1011 | A = np.r_[ 1012 | np.eye(self.nstates) - transition_matrix, 1013 | np.ones((self.nstates, 1)).T 1014 | ] 1015 | return np.linalg.pinv(A)[:,-1] 1016 | 1017 | def marginalize_probabilities(self, joint_probabilities, nremaining=1): 1018 | """ 1019 | Calculate marginal(ized) probabilities from joint probabilities. 1020 | 1021 | This is used in two ways: 1022 | 1. With the output from the filter, to calculate the marginal 1023 | probabilities that the time period t is in each of the possible 1024 | states given time t information 1025 | 2. With the output from the smoother, to calculate the marginal 1026 | probabilities that the time period t is in each of the possible 1027 | states given time T information. 1028 | 1029 | By default it integrates out all but one state. 1030 | 1031 | Parameters 1032 | ---------- 1033 | joint_probabilities : array-like 1034 | A vector of joint probabilities of state sequences ordered in 1035 | increasing lexicographic fashion. 1036 | nremaining : integer, optional 1037 | The dimension the state sequences remaining after the 1038 | marginalization is performed. 1039 | 1040 | Returns 1041 | ------- 1042 | marginalized_probabilities : array-like 1043 | An M^(nremaining) length vector of probabilities; marginal 1044 | probabilities if nremaining is 1, otherwise joint probabilities. 1045 | 1046 | Notes 1047 | ----- 1048 | Given joint_probabilities[t] - which is an M^k length vector of the 1049 | joint probabilities of state sequences ordered in increasing 1050 | lexicographic fashion where the 0-th element corresponds to the s_t, 1051 | the state at time t - the marginal probability of (S_t = s_t) is 1052 | achieved by integrating out the other k-1 states, 1053 | (S_{t-1}, ..., S_{t-k}). 1054 | 1055 | This can be computed for the i-th 1056 | (zero-indexed) state simply by summing the M^(k-1) elements of the 1057 | joint_probabilities[t] vector corresponding to vector locations 1058 | [i*M^(k-1), (i+1)*M^(k-1)-1]. For example, for i=0, this corresponds to 1059 | array locations [0, (i+1)*M^(k-1)-1], inclusive (actually retrieving 1060 | this using slice notation is joint_probabilities[t][0:(i+1)*M^(k-1)]). 1061 | """ 1062 | 1063 | marginalized_probabilities = joint_probabilities.reshape(( 1064 | joint_probabilities.shape[0], 1065 | self.nstates**nremaining, 1066 | joint_probabilities.shape[1] / self.nstates**nremaining 1067 | )).sum(-1) 1068 | return marginalized_probabilities 1069 | 1070 | def smooth(self, joint_probabilities, joint_probabilities_t1, transitions): 1071 | """ 1072 | Calculate smoothed probabilities (using all information in the sample), 1073 | using Kim's smoothing algorithm. 1074 | 1075 | Calculates the marginal probability that the time period t is in each 1076 | of the possible states, given time T information 1077 | 1078 | Parameters 1079 | ---------- 1080 | joint_probabilities : array-like 1081 | The nobs+1 x M^k output from the hamilton filter; the t-th row is 1082 | conditional on time t information. 1083 | joint_probabilities_t1 : array-like, optional 1084 | The nobs+1 x M^(k+1) output from the hamilton filter; the t-th row 1085 | is conditional on time t-1 information. 1086 | transitions : array-like 1087 | A probability vector, transition vector, or transition matrix. 1088 | 1089 | Returns 1090 | ------- 1091 | smoothed_marginal_probabilities : array-like 1092 | An nobs x M length vector of marginal probabilities that the time 1093 | period t is in each of the possible states given time T information. 1094 | """ 1095 | transition_vectors = self.tvtp_transition_vectors(transitions, 'right') 1096 | 1097 | marginal_probabilities = self.marginalize_probabilities( 1098 | joint_probabilities[1:] 1099 | ) 1100 | marginal_probabilities_t1 = self.marginalize_probabilities( 1101 | joint_probabilities_t1[1:] 1102 | ) 1103 | 1104 | smoothed_marginal_probabilities = np.zeros((self.nobs, self.nstates)) 1105 | smoothed_marginal_probabilities[self.nobs-1] = marginal_probabilities[self.nobs-1] 1106 | 1107 | for t in range(self.nobs-1, 0, -1): 1108 | transition_matrix = self.transition_matrix( 1109 | transition_vectors[t], 'right' 1110 | ) 1111 | smoothed_marginal_probabilities[t-1] = ( 1112 | marginal_probabilities[t-1] * np.dot( 1113 | transition_matrix.T, 1114 | (smoothed_marginal_probabilities[t] / 1115 | marginal_probabilities_t1[t-1]) 1116 | ) 1117 | ) 1118 | 1119 | return smoothed_marginal_probabilities 1120 | 1121 | def filter(self, params): 1122 | """ 1123 | Filter the data via the Hamilton Filter 1124 | 1125 | Parameters 1126 | ---------- 1127 | params : array-like 1128 | An array of constrained parameters 1129 | method : {'c', 'python'}, optional 1130 | The method used to run the Hamilton Filter. 1131 | - 'c' runs the filter using an optimized version written in Cython 1132 | - 'python' runs the filter using a slower Python implementation 1133 | 1134 | Returns 1135 | ------- 1136 | marginal_densities : array-like 1137 | The marginal densities of endog at each time t; byproduct of the 1138 | hamilton filter. 1139 | joint_probabilities : array-like 1140 | The nobs+1 x M^k output from the hamilton filter; the t-th row is 1141 | conditional on time t information. 1142 | joint_probabilities_t1 : array-like, optional 1143 | The nobs+1 x M^(k+1) output from the hamilton filter; the t-th row 1144 | is conditional on time t-1 information. 1145 | 1146 | """ 1147 | params = self.transform_params(params) 1148 | params = self.expand_params(params) 1149 | transitions, _, _, _ = self.separate_params(params) 1150 | transition_vectors = self.tvtp_transition_vectors(transitions, 'right') 1151 | transition_vectors = transition_vectors[self.nobs_initial:] 1152 | 1153 | (joint_probabilities, 1154 | marginal_conditional_densities) = self.initialize_filter(params) 1155 | 1156 | args = (self.nobs, self.nstates, self.order, transition_vectors, 1157 | joint_probabilities, marginal_conditional_densities) 1158 | 1159 | marginal_densities, joint_probabilities, joint_probabilities_t1 = hamilton_filter(*args) 1160 | 1161 | return ( 1162 | marginal_densities, joint_probabilities, joint_probabilities_t1 1163 | ) 1164 | 1165 | def initial_joint_probabilities(self, transitions): 1166 | # The initialized values for the joint probabilities of states are 1167 | # calculated from the unconditional probabilities 1168 | # Note: considering k states 1169 | # The order of the states is lexicographic, increasing 1170 | """ 1171 | The initialized values for the joint probabilities of each length k 1172 | sequence of states are calculated from the unconditional probabilities 1173 | 1174 | At this stage, we are interested in calculating this for each of the 1175 | possible k-permutations (with replacement) of the states (a set with M 1176 | elements), so there are M^k values. Any particular sequence looks like: 1177 | (s_0, s_{-1}, ..., s_{-k+1}) 1178 | where the lowecase s denotes a particular realization of one of the 1179 | random state variables S. 1180 | 1181 | The sequences of states are ordered in increasing lexicographic order: 1182 | 1183 | (0, 0, ..., 0), 1184 | (0, 0, ..., 1), 1185 | ... 1186 | (M, M, ..., M-1) 1187 | (M, M, ..., M) 1188 | 1189 | (this is also equivalent to sequences of bits in left-zero-padded 1190 | base-M counting to k^M) 1191 | 1192 | For each sequence of states, the order is descending in time, so that 1193 | (0, 0, 2) corresponds to (s_{0} = 0, s_{-1} = 0, s_{-2} = 1) 1194 | 1195 | The joint probability of each of the M^k possible state combinations 1196 | (s_0, s_{-1}, ..., s_{-k+1}), is computed using two elements: 1197 | - The unconditional probability of state s_{-k+1}: P[S_{-k+1}=s_{-k+1}] 1198 | This is just the s_{-k+1}-th element of the \pi vector 1199 | - The Markov transition probabilities (there are k of these) 1200 | P[S_{-k+2}=s_{-k+2} | S_{-k+1}=s_{-k+1}] 1201 | P[S_{-k+3}=s_{-k+3} | S_{-k+2}=s_{-k+2}] 1202 | ... 1203 | P[S_{-k+k}s_{-k+k} | S_{-k+(k-1)}=s_{-k+(k-1)}] 1204 | 1205 | Example (k=2, M=2): 1206 | 1207 | In general, each joint probability will be of the form: 1208 | P[S_{-2+2}=s_{-2+2} | S_{-2+1}=s_{-2+1}] * P[S_{-2+1}=s_{-2+1}] 1209 | or 1210 | P[S_{0}=s_{0} | S_{-1}=s_{-1}] * P[S_{-1}=s_{-1}] 1211 | 1212 | The 2^2=4 sequences of states, with their joint probabilities are: 1213 | (0, 0) => P[S_{0} = 0 | S_{-1} = 0] * P[S_{-1} = 0] 1214 | (0, 1) => P[S_{0} = 0 | S_{-1} = 1] * P[S_{-1} = 1] 1215 | (1, 0) => P[S_{0} = 1 | S_{-1} = 0] * P[S_{-1} = 0] 1216 | (1, 1) => P[S_{0} = 1 | S_{-1} = 1] * P[S_{-1} = 1] 1217 | 1218 | The result is a M^k length vector giving the resultant joint 1219 | probabilities. It could be reshaped into an (M x M x ... x M) (k times) 1220 | dimensional matrix. In the example above, if you construe each sequence 1221 | of states as the coordinates to a matrix, the corresponding matrix 1222 | would be: 1223 | 1224 | | (0,0), (0,1) | 1225 | | (1,0), (1,1) | 1226 | 1227 | Given the vector, it is trivial to reshape into a matrix: 1228 | 1229 | joint_probabilities.reshape([self.nstates] * self.order) 1230 | 1231 | Note that the conditional probabilities are elements of the transition 1232 | matrix, with indices corresponding to the *reverse* of the state 1233 | sequence (it is the reverse because in a (i,j) segment of a state 1234 | sequence, we are moving from the jth to the ith state, but because the 1235 | transition matrix is in left stochastic form, the row corresponds to 1236 | the state being moved to. Thus the matrix indices are (j,i) - the 1237 | reverse of the segment (i,j)). It will be convenient to vectorize this 1238 | matrix (i.e. convert it to a vector by "stacking" the rows). This is a 1239 | simple reshaping of the matrix. 1240 | 1241 | transition_vector = transition_matrix.reshape( 1242 | (1, transition_matrix.size) 1243 | ) 1244 | 1245 | In the k=2, M=2 case, the transition vector is: 1246 | 1247 | | P[S_{t} = 0 | S_{t-1} = 0] | 1248 | | P[S_{t} = 0 | S_{t-1} = 1] | 1249 | | P[S_{t} = 1 | S_{t-1} = 0] | 1250 | | P[S_{t} = 1 | S_{t-1} = 1] | 1251 | 1252 | Or represented more compactly: 1253 | 1254 | | P[0|0] | 1255 | | P[0|1] | 1256 | | P[1|0] | 1257 | | P[1|1] | 1258 | 1259 | The vector is constructed using vectorized operations rather than 1260 | iteration. As may be seen even in the example above, this requires 1261 | multiplying the following vectors: 1262 | 1263 | | P[S_{0} = 0 | S_{-1} = 0] | | P[S_{-1} = 0] | 1264 | | P[S_{0} = 0 | S_{-1} = 1] | | P[S_{-1} = 1] | 1265 | | P[S_{0} = 1 | S_{-1} = 0] | * | P[S_{-1} = 0] | 1266 | | P[S_{0} = 1 | S_{-1} = 1] | | P[S_{-1} = 1] | 1267 | 1268 | We can represent the above vector operation more compactly: 1269 | 1270 | | P[0|0] | | P[0] | 1271 | | P[0|1] | | P[1] | 1272 | | P[1|0] | * | P[0] | 1273 | | P[1|1] | | P[1] | 1274 | 1275 | Notice that: 1276 | - The last vector is just the \pi vector tiled 2^1 = 2 times 1277 | - The first vector is just the transition vector repeated 2^0 = 1 times 1278 | and tiled 2^0 = 1 times. 1279 | 1280 | Consider increasing the order to k=3. 1281 | Now the 2^3 = 8 sequences of states are: 1282 | 1283 | (0,0,0) 1284 | (0,0,1) 1285 | (0,1,0) 1286 | (0,1,1) 1287 | (1,0,0) 1288 | (1,0,1) 1289 | (1,1,0) 1290 | (1,1,1) 1291 | 1292 | And the vector operation to create the joint probabilities is: 1293 | 1294 | | P[0|0] | | P[0|0] | | P[0] | 1295 | | P[0|0] | | P[0|1] | | P[1] | 1296 | | P[0|1] | | P[1|0] | | P[0] | 1297 | | P[0|1] | | P[1|1] | | P[1] | 1298 | | P[1|0] | * | P[0|0] | * | P[0] | 1299 | | P[1|0] | | P[0|1] | | P[1] | 1300 | | P[1|1] | | P[1|0] | | P[0] | 1301 | | P[1|1] | | P[1|1] | | P[1] | 1302 | 1303 | Notice that: 1304 | - The total length of the vectors is M^k = 2^3 = 8 1305 | - The last vector is the \pi vector (length M=2) tiled M^2 = 4 times 1306 | - The middle vector is the transition vector (length M^2=4) 1307 | repeated M^0 = 1 time and tiled M^1 = 2 times 1308 | - The first vector is the transition vector (length M^2=4) 1309 | repeated M^1 = 2 times and tiled M^0 = 1 time 1310 | 1311 | In general, given an order k and number of states M: 1312 | 1313 | 1. The joint probabilities will have M^k elements 1314 | 2. The joint probabilities will be calculated as the result of k-1 1315 | multiplication operations on k-1 conditional probability vectors 1316 | constructed from the transition vector and 1 unconditional 1317 | probability vector constructed from the \pi vector. 1318 | 3. Tiling and repeating each create a new vector the length of which is 1319 | the length of the vector multiplied by the number of times it is 1320 | tiled or repeated. 1321 | 4. The transition vector has length M^2. To achieve a length of M^k, it 1322 | must be tiled or repeated a total of M^(k-2) times. Note that the 1323 | tiling and repeating is multiplicative, in the sense that tiling 1324 | n times and repeating m times multiplies the length by n*m. Thus we 1325 | must have n * m = M^{k-2}. In general, n and m will be exponents of 1326 | M, so M^n * M^m = M^(n+m) = M^{k-2}, or n+m = k-2. 1327 | 5. The rightmost conditional probability is constructed by repeating 1328 | the transition vector M^0 = 1 time and tiling it M^(k-2-0) = M^(k-2) 1329 | times. 1330 | 6. The next left conditional probability is constructed by repeating 1331 | the transition vector M^1 = times and tiling it M^(k-2-1) = M^(k-3) 1332 | times. 1333 | 7. The leftmost conditional probability is constructed by repeating 1334 | the transition vector M^(k-2) times and tiling it 1335 | M^(k-2-(k-2)) = M^0 = 1 time. 1336 | 7. There are k-1 conditional probability vectors: The i-th vector is 1337 | constructed by (assume i is zero-indexed): 1338 | - Repeating the transition vector M^(k-2-i) times 1339 | - Tiling the transition vector M^i times 1340 | Note that k-2-i+i = k-2, which is the total number of times required 1341 | 8. Note that k >= 1. If k == 1 (so that k-2-i < 0) then the joint 1342 | probabilities are only the marginal unconditional probabilities 1343 | (i.e. there are no conditional probability vectors at all). 1344 | 8. The unconditional probabilities vector is constructed from only 1345 | tiling the \pi vector, with no repeating. Since the \pi vector 1346 | has length M and the resultant vector needs to have length M^k, it 1347 | must be tiled M^(k-1) times. 1348 | """ 1349 | transition_vectors = self.tvtp_transition_vectors(transitions, 'right') 1350 | 1351 | # Get the unconditional probabilities of the states, given a set of 1352 | # transition probabilities 1353 | unconditional_probabilities = self.unconditional_probabilities( 1354 | transition_vectors[0] 1355 | ) 1356 | 1357 | if self.order > 1: 1358 | conditional_probabilities = [ 1359 | np.tile( 1360 | transition_vectors[self.order-i-1].repeat(self.nstates**(self.order-2-i)), 1361 | self.nstates**i 1362 | )[:,None] # need to add the second dimension to concatenate 1363 | for i in range(self.order-1) # k-1 values; 0=first, k-2=last 1364 | ] 1365 | 1366 | unconditional_probabilities = np.tile( 1367 | unconditional_probabilities, self.nstates**(self.order-1) 1368 | )[:,None] 1369 | 1370 | joint_probabilities = reduce(np.multiply, 1371 | conditional_probabilities + [unconditional_probabilities] 1372 | ).squeeze() 1373 | else: 1374 | joint_probabilities = unconditional_probabilities 1375 | 1376 | return joint_probabilities 1377 | 1378 | def marginal_conditional_densities(self, params, stddevs, means): 1379 | return marginal_conditional_densities( 1380 | self.nobs, self.nstates, self.order, 1381 | params, stddevs, means, self.augmented 1382 | ) 1383 | 1384 | def initialize_filter(self, params): 1385 | """ 1386 | Calculate the joint probability of S_{t-1} = j and S_{t} = i 1387 | for j=0,1; i=0,1; and t in [1,T], given time t-1 information 1388 | 1389 | Parameters 1390 | ---------- 1391 | params : array-like 1392 | The parameters of the model. In order, they are (with the expected 1393 | lengths of the components in paretheses): 1394 | - transition probabilities (nstates^2 or nstates*(nstates-1)) 1395 | - AR parameters (order) 1396 | - stddevs (nstates) 1397 | - means (nstates) 1398 | TODO specify the lengths of stddevs and means in the constructor, so 1399 | that they can be different (e.g. constrain some or all regimes to 1400 | have the same values, constrain certain regimes to have certain 1401 | values, etc.) 1402 | 1403 | Returns 1404 | ------- 1405 | joints : array-like 1406 | An nobs x (nstates^(self.order+1)) array, where 1407 | joints[t][i_0, i_1, ..., i_k] corresponds to the joint probability 1408 | of S_{t} = i_0, S_{t-1} = i_1, ... S_{t-k} = i_k, given time t-1 1409 | information. So, importantly, lower-numbered axes corresponds to 1410 | the more recent time periods (i.e. the zero-index is time t) 1411 | :math:`Pr(S_t, \dots, S_{t-k}|\psi_{t-1})` 1412 | marginals : array 1413 | A (T+1) x nstates array, where marginals[t][i] corresponds to the 1414 | marginal probability that S_{t} = i given time t information. 1415 | :math:`Pr(S_t|\psi_t)` 1416 | params : iterable 1417 | The AR parameters (1 x self.order) 1418 | stddevs : iterable 1419 | A vector of standard deviations, corresponding to each state. 1420 | (1 x self.order+1) 1421 | means : iterable 1422 | A vector of means, corresponding to each state. 1423 | (1 x self.orrder+1) 1424 | """ 1425 | transitions, ar_params, stddevs, means = self.separate_params(params) 1426 | 1427 | # Joint probabilities (of states): (nobs+1) x (M x ... x M), ndim = k+1 1428 | # It's time dimension is nobs+1 because the 0th joint probability is 1429 | # the input (calculated from the unconditional probabilities) for the 1430 | # first iteration of the algorithm, which starts at time t=1 1431 | order = max(self.order, 1) 1432 | joint_probabilities = np.zeros((self.nobs+1, self.nstates**order)) 1433 | joint_probabilities[0] = self.initial_joint_probabilities(transitions) 1434 | 1435 | # Marginal conditional densities 1436 | params = np.c_[ 1437 | [1]*self.nstates, 1438 | -ar_params.reshape((self.nstates, self.order)) 1439 | ] 1440 | mcds = self.marginal_conditional_densities( 1441 | params, stddevs, means 1442 | ) 1443 | 1444 | return joint_probabilities, mcds 1445 | 1446 | def score(self, params): 1447 | ''' 1448 | Gradient of log-likelihood evaluated at params 1449 | ''' 1450 | kwds = {} 1451 | kwds.setdefault('centered', True) 1452 | return approx_fprime(params, self.loglike, **kwds).ravel() 1453 | 1454 | def jac(self, params, **kwds): 1455 | ''' 1456 | Jacobian/Gradient of log-likelihood evaluated at params for each 1457 | observation. 1458 | ''' 1459 | #kwds.setdefault('epsilon', 1e-4) 1460 | kwds.setdefault('centered', True) 1461 | return approx_fprime(params, self.loglikeobs, **kwds) 1462 | 1463 | def hessian(self, params): 1464 | ''' 1465 | Hessian of log-likelihood evaluated at params 1466 | ''' 1467 | from statsmodels.tools.numdiff import approx_hess 1468 | # need options for hess (epsilon) 1469 | return approx_hess(params, self.loglike) 1470 | 1471 | class MARResults(tsbase.TimeSeriesModelResults): 1472 | """ 1473 | Class to hold results from fitting a MAR model. 1474 | 1475 | Parameters 1476 | ---------- 1477 | model : ARMA instance 1478 | The fitted model instance 1479 | params : array 1480 | Fitted parameters 1481 | normalized_cov_params : array, optional 1482 | The normalized variance covariance matrix 1483 | scale : float, optional 1484 | Optional argument to scale the variance covariance matrix. 1485 | """ 1486 | 1487 | _cache = {} 1488 | 1489 | def __init__(self, model, params, normalized_cov_params, scale=1.): 1490 | super(MARResults, self).__init__(model, params, 1491 | normalized_cov_params, scale) 1492 | 1493 | def summary(self, yname=None, title=None, alpha=.05): 1494 | """ 1495 | Summarize the MAR Results 1496 | 1497 | Parameters 1498 | ---------- 1499 | yname : string, optional 1500 | Default is `y` 1501 | title : string, optional 1502 | Title for the top table. If not None, then this replaces the 1503 | default title 1504 | alpha : float 1505 | significance level for the confidence intervals 1506 | 1507 | Returns 1508 | ------- 1509 | smry : Summary instance 1510 | this holds the summary tables and text, which can be printed or 1511 | converted to various output formats. 1512 | 1513 | See Also 1514 | -------- 1515 | statsmodels.iolib.summary.Summary : class to hold summary 1516 | results 1517 | 1518 | """ 1519 | 1520 | xname = self._make_exog_names() 1521 | 1522 | model = ( 1523 | self.model.__class__.__name__ + '(' 1524 | + repr(self.model.order) + ';' 1525 | + ','.join([repr(self.model.ar_order), repr(self.model.delay)]) 1526 | + ')' 1527 | ) 1528 | 1529 | try: 1530 | dates = self.data.dates 1531 | sample = [('Sample:', [dates[0].strftime('%m-%d-%Y')])] 1532 | sample += [('', [' - ' + dates[-1].strftime('%m-%d-%Y')])] 1533 | except: 1534 | start = self.model.nobs_initial + 1 1535 | end = repr(self.model.data.orig_endog.shape[0]) 1536 | sample = [('Sample:', [repr(start) + ' - ' + end])] 1537 | 1538 | top_left = [('Dep. Variable:', None), 1539 | ('Model:', [model]), 1540 | ('Method:', ['Least Squares']), 1541 | ('Date:', None), 1542 | ('Time:', None) 1543 | ] + sample 1544 | 1545 | top_right = [('No. Observations:', None), 1546 | ('Df Residuals:', None), 1547 | ('Df Model:', None), 1548 | ('Log-Likelihood:', None), 1549 | ('AIC:', ["%#8.4g" % self.aic]), 1550 | ('BIC:', ["%#8.4g" % self.bic]) 1551 | ] 1552 | 1553 | if title is None: 1554 | title = self.model.__class__.__name__ + ' ' + "Regression Results" 1555 | 1556 | # Create summary table instance 1557 | from statsmodels.iolib.summary import Summary, summary_params, forg 1558 | from statsmodels.iolib.table import SimpleTable 1559 | from statsmodels.iolib.tableformatting import fmt_params 1560 | smry = Summary() 1561 | warnings = [] 1562 | 1563 | # Add model information 1564 | smry.add_table_2cols(self, gleft=top_left, gright=top_right, 1565 | yname=yname, xname=xname, title=title) 1566 | 1567 | # Add hyperparameters summary table 1568 | if (1 - alpha) not in self.model.threshold_crits: 1569 | warnings.append("Critical value for threshold estimates is" 1570 | " unavailable at the %d%% level. Using 95%%" 1571 | " instead." % ((1-alpha)*100)) 1572 | alpha = 0.05 1573 | alp = str((1-alpha)*100)+'%' 1574 | conf_int = self.conf_int_thresholds(alpha) 1575 | 1576 | # (see summary_params()) 1577 | confint = [ 1578 | "%s %s" % tuple(map(forg, conf_int[i])) 1579 | for i in range(len(conf_int)) 1580 | ] 1581 | confint.insert(0, '') 1582 | len_ci = map(len, confint) 1583 | max_ci = max(len_ci) 1584 | min_ci = min(len_ci) 1585 | 1586 | if min_ci < max_ci: 1587 | confint = [ci.center(max_ci) for ci in confint] 1588 | 1589 | thresholds = list(self.model.thresholds) 1590 | param_header = ['coef', '[' + alp + ' Conf. Int.]'] 1591 | param_stubs = ['Delay'] + ['\gamma_%d' % (threshold_idx + 1) 1592 | for threshold_idx in range(len(thresholds))] 1593 | param_data = zip([self.model.delay] + map(forg, thresholds), confint) 1594 | 1595 | parameter_table = SimpleTable(param_data, 1596 | param_header, 1597 | param_stubs, 1598 | title=None, 1599 | txt_fmt=fmt_params) 1600 | smry.tables.append(parameter_table) 1601 | 1602 | # Add parameter tables for each regime 1603 | results = np.c_[ 1604 | self.params, self.bse, self.tvalues, self.pvalues, 1605 | ].T 1606 | conf = self.conf_int(alpha) 1607 | k = self.model.ar_order + self.model.k_trend 1608 | regime_desc = self._make_regime_descriptions() 1609 | max_len = max(map(len, regime_desc)) 1610 | for regime in range(1, self.model.order + 1): 1611 | res = (self,) 1612 | res += tuple(results[:, k*(regime - 1):k*regime]) 1613 | res += (conf[k*(regime - 1):k*regime],) 1614 | table = summary_params(res, yname=yname, 1615 | xname=xname[k*regime:k*(regime+1)], 1616 | alpha=alpha, use_t=True) 1617 | 1618 | # Add regime descriptives, if multiple regimes 1619 | if self.model.order > 1: 1620 | # Replace the header row 1621 | header = ["\n" + str(cell) for cell in table.pop(0)] 1622 | title = ("Regime %d" % regime).center(max_len) 1623 | desc = regime_desc[regime - 1].center(max_len) 1624 | header[0] = "%s \n %s" % (title, desc) 1625 | table.insert_header_row(0, header) 1626 | # Add diagnostic information 1627 | nobs = [ 1628 | 'nobs_%d' % regime, self.model.nobs_regimes[regime - 1], 1629 | '', '', '', '' 1630 | ] 1631 | table.insert(len(table), nobs, 'header') 1632 | 1633 | smry.tables.append(table) 1634 | 1635 | # Add notes / warnings, added to text format only 1636 | warnings.append("Reported parameter standard errors are White's (1980)" 1637 | " heteroskedasticity robust standard errors.") 1638 | warnings.append("Threshold confidence intervals calculated as" 1639 | " Hansen's (1997) conservative (non-disjoint)" 1640 | " intervals") 1641 | 1642 | if self.model.exog.shape[0] < self.model.exog.shape[1]: 1643 | wstr = "The input rank is higher than the number of observations." 1644 | warnings.append(wstr) 1645 | 1646 | if warnings: 1647 | etext = [ 1648 | "[{0}] {1}".format(i + 1, text) 1649 | for i, text in enumerate(warnings) 1650 | ] 1651 | etext.insert(0, "Notes / Warnings:") 1652 | smry.add_extra_txt(etext) 1653 | 1654 | return smry 1655 | 1656 | 1657 | class MARResultsWrapper(tsbase.TimeSeriesResultsWrapper): 1658 | _attrs = {} 1659 | _wrap_attrs = wrap.union_dicts(tsbase.TimeSeriesResultsWrapper._wrap_attrs, 1660 | _attrs) 1661 | _methods = {} 1662 | _wrap_methods = wrap.union_dicts( 1663 | tsbase.TimeSeriesResultsWrapper._wrap_methods, 1664 | _methods 1665 | ) 1666 | wrap.populate_wrapper(MARResultsWrapper, MARResults) -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChadFulton/pymar/4268cbe1b5aeaeede7b3d3dc6e1439a21accfae1/tests/__init__.py -------------------------------------------------------------------------------- /tests/results/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChadFulton/pymar/4268cbe1b5aeaeede7b3d3dc6e1439a21accfae1/tests/results/__init__.py -------------------------------------------------------------------------------- /tests/test_mar.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for Markov Autoregression 3 | 4 | References 5 | ---------- 6 | 7 | Kim, Chang-Jin, and Charles R. Nelson. 1999. 8 | "State-Space Models with Regime Switching: 9 | Classical and Gibbs-Sampling Approaches with Applications". 10 | MIT Press Books. The MIT Press. 11 | """ 12 | 13 | import os 14 | import numpy as np 15 | import pandas as pd 16 | from .results import results_mar 17 | from statsmodels.tsa.base.datetools import dates_from_range 18 | from ..mar_model import MAR 19 | from numpy.testing import assert_almost_equal, assert_equal, assert_raises 20 | 21 | DECIMAL_8 = 8 22 | DECIMAL_6 = 6 23 | DECIMAL_5 = 5 24 | DECIMAL_4 = 4 25 | DECIMAL_3 = 3 26 | DECIMAL_2 = 2 27 | DECIMAL_1 = 1 28 | 29 | 30 | class TestHamilton1989(object): 31 | """ 32 | Hamilton's (1989) Markov Switching Model of GNP (as presented in Kim and 33 | Nelson (1999)) 34 | 35 | Test data produced using GAUSS code described in Kim and Nelson (1999) and 36 | found at http://econ.korea.ac.kr/~cjkim/SSMARKOV.htm 37 | 38 | See `statsmodels.tsa.tests.results.results_mar` for more details. 39 | """ 40 | 41 | def __init__(self): 42 | self.true = results_mar.htm4_kim 43 | 44 | # Hamilton's 1989 GNP dataset: Quarterly, 1947.1 - 1986.4 45 | data = pd.DataFrame( 46 | self.true['data'], 47 | index=pd.date_range('1947-01-01', '1995-07-01', freq='QS'), 48 | columns=['gnp'] 49 | ) 50 | data['dlgnp'] = np.log(data['gnp']).diff()*100 51 | data = data['1952-01-01':'1984-10-01'] 52 | 53 | # Two-state Markov-switching process, where GNP is an AR(4) 54 | mod = MAR(data.dlgnp, order=4, nstates=2) 55 | 56 | # Parameters from Table 4.1, Kim and Nelson (1999) 57 | params = np.array([ 58 | 1.15590, -2.20657, 59 | 0.08983, -0.01861, -0.17434, -0.08392, 60 | -np.log(0.79619), # necessary due to transformation 61 | -0.21320, 1.12828 62 | ]) 63 | 64 | # Log Likelihood 65 | self.loglike = mod.loglike(params) 66 | 67 | # Filtered probabilities 68 | ( 69 | marginal_densities, filtered_joint_probabilities, 70 | filtered_joint_probabilities_t1 71 | ) = mod.filter(params) 72 | filtered_marginal_probabilities = mod.marginalize_probabilities( 73 | filtered_joint_probabilities[1:] 74 | ) 75 | self.filtered = filtered_marginal_probabilities 76 | 77 | # Smoothed probabilities 78 | transitions = mod.separate_params(params)[0] 79 | smoothed_marginal_probabilities = mod.smooth( 80 | filtered_joint_probabilities, filtered_joint_probabilities_t1, 81 | transitions 82 | ) 83 | self.smoothed = smoothed_marginal_probabilities 84 | 85 | def test_loglike(self): 86 | assert_almost_equal( 87 | self.loglike, self.true['-1*fout'], DECIMAL_5 88 | ) 89 | 90 | def test_filtered_recession_probabilities(self): 91 | assert_almost_equal( 92 | self.filtered[:, 0], self.true['pr_tt0'], DECIMAL_5 93 | ) 94 | 95 | def test_smoothed_recession_probabilities(self): 96 | assert_almost_equal( 97 | self.smoothed[:, 0], self.true['smooth0'], DECIMAL_5 98 | ) 99 | 100 | 101 | class TestFilardo1994TVTP(object): 102 | """ 103 | Filardo's (1994) Markov-Switching Model with Time-Varying Transition 104 | Probabilities (as presented in Kim and Nelson (1999)) 105 | 106 | Test data produced using GAUSS code described in Kim and Nelson (1999) and 107 | found at http://econ.korea.ac.kr/~cjkim/SSMARKOV.htm 108 | 109 | See `statsmodels.tsa.tests.results.results_mar` for more details. 110 | """ 111 | 112 | def __init__(self): 113 | self.true = results_mar.htm_tvp 114 | 115 | # Filardo's Industrual Production dataset, 1948.1 - 1991.4 116 | data = pd.DataFrame( 117 | self.true['data'], 118 | index=pd.date_range('1948-01-01', '1991-04-01', freq='MS'), 119 | columns=['month', 'ip', 'idx'] 120 | ) 121 | data['dlip'] = np.log(data['ip']).diff()*100 122 | 123 | # Deflated pre-1960 observations by ratio of std. devs. 124 | # See hmt_tvp.opt or Filardo (1994) p. 302 125 | std_ratio = data['dlip']['1960-01-01':].std() / data['dlip'][:'1959-12-01'].std() 126 | data['dlip'][:'1959-12-01'] = data['dlip'][:'1959-12-01'] * std_ratio 127 | data['dlidx'] = np.log(data['idx']).diff()*100 128 | data['dmdlidx'] = data['dlidx'] - data['dlidx'].mean() 129 | 130 | # Two-state Markov-switching process, where IP is an AR(4) 131 | mod = MAR(data.dlip[2:], order=4, nstates=2, 132 | switch_ar=False, switch_var=False, switch_mean=True, 133 | tvtp_exog=data.dmdlidx[1:]) 134 | 135 | 136 | # Parameters conform to hmt_tvp.opt, after transformation 137 | params = np.array([ 138 | 1.64982, -0.99472, -4.35966, -1.77043, # TVTP parameters 139 | 0.18947, 0.07933, 0.11094, 0.12226, # AR parameters 140 | -np.log(0.69596), # Std. Dev 141 | -0.86585, 0.51733 # Mean 142 | ]) 143 | params = np.array(np.r_[ 144 | [1.64982, -0.99472, -4.35966, -1.77043], # TVTP parameters 145 | [0.18947, 0.07933, 0.11094, 0.12226], # AR parameters 146 | [-np.log(0.69596)], # Std. Dev 147 | [-0.86585, 0.51733] # Mean 148 | ]) 149 | 150 | # Log Likelihood 151 | self.loglike = mod.loglike(params) 152 | 153 | # Filtered probabilities 154 | ( 155 | marginal_densities, filtered_joint_probabilities, 156 | filtered_joint_probabilities_t1 157 | ) = mod.filter(params) 158 | filtered_marginal_probabilities = mod.marginalize_probabilities( 159 | filtered_joint_probabilities[1:] 160 | ) 161 | self.filtered = filtered_marginal_probabilities 162 | 163 | # Smoothed probabilities 164 | transitions = mod.separate_params(params)[0] 165 | smoothed_marginal_probabilities = mod.smooth( 166 | filtered_joint_probabilities, filtered_joint_probabilities_t1, 167 | transitions 168 | ) 169 | self.smoothed = smoothed_marginal_probabilities 170 | 171 | def test_loglike(self): 172 | assert_almost_equal( 173 | self.loglike, self.true['-1*fout'], DECIMAL_5 174 | ) 175 | 176 | def test_filtered_recession_probabilities(self): 177 | # Have to use self.true['pr_tt0'][4:] because Kim and Nelson include 178 | # 0's in the first 4 spots (which correspond to the initial 179 | # observations required for the AR(4) model to be fit.) 180 | assert_almost_equal( 181 | self.filtered[:, 0], self.true['pr_tt0'][4:], DECIMAL_5 182 | ) 183 | 184 | def test_smoothed_recession_probabilities(self): 185 | # Kim and Nelson do not provide smoothed estimates here, possibly 186 | # because the smoothed estimate is zero for almost all periods. 187 | #assert_almost_equal( 188 | # self.smoothed[:, 0], self.true['smooth0'], DECIMAL_5 189 | #) 190 | pass 191 | 192 | 193 | class TestKimNelsonStartz1998(object): 194 | """ 195 | Kim, Nelson, and Startz's (1998) "application of a three-state 196 | Markov-switching variance model to monthly stock returns for the period 197 | 1926:1 - 1986:12" 198 | 199 | Test data produced using GAUSS code described in Kim and Nelson (1999) and 200 | found at http://econ.korea.ac.kr/~cjkim/SSMARKOV.htm 201 | 202 | See `statsmodels.tsa.tests.results.results_mar` for more details. 203 | """ 204 | 205 | def __init__(self): 206 | self.true = results_mar.stck_v3 207 | 208 | # Equal-Weighted Excess Returns 209 | data = pd.DataFrame( 210 | self.true['data'], 211 | # Note: it's not clear that these are the correct dates, but it 212 | # doesn't matter for the test. 213 | index=pd.date_range('1926-01-01', '1995-12-01', freq='MS'), 214 | columns=['ewer'] 215 | ) 216 | data = data[0:732] 217 | data['dmewer'] = data['ewer'] - data['ewer'].mean() 218 | 219 | # Two-state Markov-switching process, where GNP is an AR(4) 220 | mod = MAR(data.dmewer, order=0, nstates=3, 221 | switch_ar=False, switch_var=True, switch_mean=[0,0,0]) 222 | 223 | # Parameters from stck_v3.opt 224 | # Also correspond to Kim and Nelson (1999) Table 4.3, after 225 | # transformations. 226 | params = np.array([ 227 | 16.399767, 12.791361, 0.522758, 4.417225, -5.845336, -3.028234, 228 | # Division by 2 because in stck_v3.opt the parameters are 229 | # variances, and here they are standard deviations 230 | 6.704260/2, 5.520378/2, 3.473059/2 231 | ]) 232 | 233 | # Log Likelihood 234 | self.loglike = mod.loglike(params) 235 | 236 | # Filtered probabilities 237 | ( 238 | marginal_densities, filtered_joint_probabilities, 239 | filtered_joint_probabilities_t1 240 | ) = mod.filter(params) 241 | filtered_marginal_probabilities = mod.marginalize_probabilities( 242 | filtered_joint_probabilities[1:] 243 | ) 244 | self.filtered = filtered_marginal_probabilities 245 | 246 | # Smoothed probabilities 247 | transitions = mod.separate_params(params)[0] 248 | smoothed_marginal_probabilities = mod.smooth( 249 | filtered_joint_probabilities, filtered_joint_probabilities_t1, 250 | transitions 251 | ) 252 | self.smoothed = smoothed_marginal_probabilities 253 | 254 | def test_loglike(self): 255 | assert_almost_equal( 256 | self.loglike, self.true['fout'], DECIMAL_5 257 | ) 258 | 259 | def test_filtered_recession_probabilities(self): 260 | assert_almost_equal( 261 | self.filtered, self.true['prtt'], DECIMAL_5 262 | ) 263 | 264 | def test_smoothed_recession_probabilities(self): 265 | assert_almost_equal( 266 | self.smoothed, self.true['sm0'], DECIMAL_5 267 | ) --------------------------------------------------------------------------------