├── README.md ├── 0. Updating The Dataset.ipynb ├── 11. Auto ARIMA.ipynb ├── 10. The GARCH model.ipynb └── 4. Analyzing prices using the AR model.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Time-series-analysis-in-Python 2 | I perform time series analysis of data from scratch. I also implement The Autoregressive (AR) Model, The Moving Average (MA) Model, The Autoregressive Moving Average (ARMA) Model, The Autoregressive Integrated Moving Average (ARIMA) Model, The ARCH Model, The GARCH model, Auto ARIMA, forecasting and exploring a business case. 3 | 4 | Index2018.csv is the dataset used for time series analysis in this project 5 | -------------------------------------------------------------------------------- /0. Updating The Dataset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Importing the necessary package \n", 10 | "import yfinance " 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "# Ignoring warning messages\n", 20 | "import warnings \n", 21 | "warnings.filterwarnings(\"ignore\")" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "name": "stdout", 31 | "output_type": "stream", 32 | "text": [ 33 | "[*********************100%***********************] 4 of 4 downloaded\n" 34 | ] 35 | } 36 | ], 37 | "source": [ 38 | "# Using the .download() method to get our data\n", 39 | "\n", 40 | "raw_data = yfinance.download (tickers = \"^GSPC ^FTSE ^N225 ^GDAXI\", start = \"1994-01-07\", end = \"2019-09-27\", interval = \"1d\", group_by = 'ticker', auto_adjust = True, treads = True)\n", 41 | "\n", 42 | "# tickers -> The time series we are interested in - (in our case, these are the S&P, FTSE, NIKKEI and DAX)\n", 43 | "# start -> The starting date of our data set\n", 44 | "# end -> The ending date of our data set (at the time of upload, this is the current date)\n", 45 | "# interval -> The distance in time between two recorded observations. Since we're using daily closing prices, we set it equal to \"1d\", which indicates 1 day. \n", 46 | "# group_by -> The way we want to group the scraped data. Usually we want it to be \"ticker\", so that we have all the information about a time series in 1 variable.\n", 47 | "# auto_adjust -> Automatically adjust the closing prices for each period. \n", 48 | "# treads - > Whether to use threads for mass downloading. " 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 4, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "# Creating a back up copy in case we remove/alter elements of the data by mistake\n", 58 | "df_comp = raw_data.copy()" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 5, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "# Adding new columns to the data set\n", 68 | "df_comp['spx'] = df_comp['^GSPC'].Close\n", 69 | "df_comp['dax'] = df_comp['^GDAXI'].Close\n", 70 | "df_comp['ftse'] = df_comp['^FTSE'].Close\n", 71 | "df_comp['nikkei'] = df_comp['^N225'].Close" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 6, 77 | "metadata": { 78 | "scrolled": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "df_comp = df_comp.iloc[1:] # Removing the first elements, since we always start 1 period before the first, due to time zone differences of closing prices\n", 83 | "del df_comp['^N225'] # Removing the original tickers of the data set\n", 84 | "del df_comp['^GSPC']\n", 85 | "del df_comp['^GDAXI']\n", 86 | "del df_comp['^FTSE']\n", 87 | "df_comp=df_comp.asfreq('b') # Setting the frequency of the data\n", 88 | "df_comp=df_comp.fillna(method='ffill') # Filling any missing values" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 7, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | " spx dax ftse nikkei\n", 101 | " \n", 102 | "Date \n", 103 | "1994-01-07 469.90 2224.95 3446.0 18124.01\n", 104 | "1994-01-10 475.27 2225.00 3440.6 18443.44\n", 105 | "1994-01-11 474.13 2228.10 3413.8 18485.25\n", 106 | "1994-01-12 474.17 2182.06 3372.0 18793.88\n", 107 | "1994-01-13 472.47 2142.37 3360.0 18577.26\n", 108 | " spx dax ftse nikkei\n", 109 | " \n", 110 | "Date \n", 111 | "2019-09-20 2992.07 12468.01 7344.9 22079.09\n", 112 | "2019-09-23 2991.78 12342.33 7326.1 22079.09\n", 113 | "2019-09-24 2966.60 12307.15 7291.4 22098.84\n", 114 | "2019-09-25 2984.87 12234.18 7290.0 22020.15\n", 115 | "2019-09-26 2977.62 12288.54 7351.1 22048.24\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "print (df_comp.head()) # Displaying the first 5 elements to make sure the data was scraped correctly\n", 121 | "print (df_comp.tail()) # Making sure the last day we're including in the series are correct" 122 | ] 123 | } 124 | ], 125 | "metadata": { 126 | "kernelspec": { 127 | "display_name": "Python 3", 128 | "language": "python", 129 | "name": "python3" 130 | }, 131 | "language_info": { 132 | "codemirror_mode": { 133 | "name": "ipython", 134 | "version": 3 135 | }, 136 | "file_extension": ".py", 137 | "mimetype": "text/x-python", 138 | "name": "python", 139 | "nbconvert_exporter": "python", 140 | "pygments_lexer": "ipython3", 141 | "version": "3.7.4" 142 | } 143 | }, 144 | "nbformat": 4, 145 | "nbformat_minor": 2 146 | } 147 | -------------------------------------------------------------------------------- /11. Auto ARIMA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Packages" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "import scipy\n", 19 | "import statsmodels.api as sm\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import seaborn as sns\n", 22 | "import sklearn\n", 23 | "from statsmodels.tsa.arima_model import ARIMA\n", 24 | "from arch import arch_model\n", 25 | "import seaborn as sns\n", 26 | "import yfinance\n", 27 | "import warnings\n", 28 | "warnings.filterwarnings(\"ignore\")\n", 29 | "sns.set()" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "### Loading the data" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "name": "stdout", 46 | "output_type": "stream", 47 | "text": [ 48 | "[*********************100%***********************] 4 of 4 downloaded\n" 49 | ] 50 | } 51 | ], 52 | "source": [ 53 | "raw_data = yfinance.download (tickers = \"^GSPC ^FTSE ^N225 ^GDAXI\", start = \"1994-01-07\", end = \"2018-01-29\", \n", 54 | " interval = \"1d\", group_by = 'ticker', auto_adjust = True, treads = True)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 3, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "df_comp = raw_data.copy()" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 4, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "df_comp['spx'] = df_comp['^GSPC'].Close[:]\n", 73 | "df_comp['dax'] = df_comp['^GDAXI'].Close[:]\n", 74 | "df_comp['ftse'] = df_comp['^FTSE'].Close[:]\n", 75 | "df_comp['nikkei'] = df_comp['^N225'].Close[:]" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 5, 81 | "metadata": { 82 | "scrolled": true 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "df_comp = df_comp.iloc[1:]\n", 87 | "del df_comp['^N225']\n", 88 | "del df_comp['^GSPC']\n", 89 | "del df_comp['^GDAXI']\n", 90 | "del df_comp['^FTSE']\n", 91 | "df_comp=df_comp.asfreq('b')\n", 92 | "df_comp=df_comp.fillna(method='ffill')" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "### Creating Returns" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 6, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "df_comp['ret_spx'] = df_comp.spx.pct_change(1)*100\n", 109 | "df_comp['ret_ftse'] = df_comp.ftse.pct_change(1)*100\n", 110 | "df_comp['ret_dax'] = df_comp.dax.pct_change(1)*100\n", 111 | "df_comp['ret_nikkei'] = df_comp.nikkei.pct_change(1)*100" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "### Splitting the Data" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 7, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "size = int(len(df_comp)*0.8)\n", 128 | "df, df_test = df_comp.iloc[:size], df_comp.iloc[size:]" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "### Fitting a Model" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 8, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "from pmdarima.arima import auto_arima" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 9, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "model_auto = auto_arima(df.ret_ftse[1:])" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 10, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "data": { 163 | "text/plain": [ 164 | "ARIMA(maxiter=50, method='lbfgs', order=(2, 0, 5), out_of_sample_size=0,\n", 165 | " scoring='mse', scoring_args=None, seasonal_order=(0, 0, 0, 1),\n", 166 | " start_params=None, suppress_warnings=False, trend=None,\n", 167 | " with_intercept=True)" 168 | ] 169 | }, 170 | "execution_count": 10, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "model_auto" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 11, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "data": { 186 | "text/html": [ 187 | "\n", 188 | "\n", 189 | "\n", 190 | " \n", 191 | "\n", 192 | "\n", 193 | " \n", 194 | "\n", 195 | "\n", 196 | " \n", 197 | "\n", 198 | "\n", 199 | " \n", 200 | "\n", 201 | "\n", 202 | " \n", 203 | "\n", 204 | "\n", 205 | " \n", 206 | "\n", 207 | "\n", 208 | " \n", 209 | "\n", 210 | "
Statespace Model Results
Dep. Variable: y No. Observations: 5020
Model: SARIMAX(2, 0, 5) Log Likelihood -7885.690
Date: Fri, 03 Jan 2020 AIC 15789.380
Time: 10:19:03 BIC 15848.070
Sample: 0 HQIC 15809.946
- 5020
Covariance Type: opg
\n", 211 | "\n", 212 | "\n", 213 | " \n", 214 | "\n", 215 | "\n", 216 | " \n", 217 | "\n", 218 | "\n", 219 | " \n", 220 | "\n", 221 | "\n", 222 | " \n", 223 | "\n", 224 | "\n", 225 | " \n", 226 | "\n", 227 | "\n", 228 | " \n", 229 | "\n", 230 | "\n", 231 | " \n", 232 | "\n", 233 | "\n", 234 | " \n", 235 | "\n", 236 | "\n", 237 | " \n", 238 | "\n", 239 | "\n", 240 | " \n", 241 | "\n", 242 | "
coef std err z P>|z| [0.025 0.975]
intercept 0.0309 0.024 1.289 0.197 -0.016 0.078
ar.L1 0.1766 0.039 4.544 0.000 0.100 0.253
ar.L2 -0.8128 0.035 -22.984 0.000 -0.882 -0.743
ma.L1 -0.2005 0.038 -5.239 0.000 -0.275 -0.125
ma.L2 0.7654 0.037 20.436 0.000 0.692 0.839
ma.L3 -0.0953 0.012 -8.246 0.000 -0.118 -0.073
ma.L4 0.0112 0.009 1.229 0.219 -0.007 0.029
ma.L5 -0.1113 0.009 -12.960 0.000 -0.128 -0.094
sigma2 1.3550 0.014 94.014 0.000 1.327 1.383
\n", 243 | "\n", 244 | "\n", 245 | " \n", 246 | "\n", 247 | "\n", 248 | " \n", 249 | "\n", 250 | "\n", 251 | " \n", 252 | "\n", 253 | "\n", 254 | " \n", 255 | "\n", 256 | "
Ljung-Box (Q): 69.64 Jarque-Bera (JB): 6575.67
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 2.00 Skew: -0.18
Prob(H) (two-sided): 0.00 Kurtosis: 8.60


Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step)." 257 | ], 258 | "text/plain": [ 259 | "\n", 260 | "\"\"\"\n", 261 | " Statespace Model Results \n", 262 | "==============================================================================\n", 263 | "Dep. Variable: y No. Observations: 5020\n", 264 | "Model: SARIMAX(2, 0, 5) Log Likelihood -7885.690\n", 265 | "Date: Fri, 03 Jan 2020 AIC 15789.380\n", 266 | "Time: 10:19:03 BIC 15848.070\n", 267 | "Sample: 0 HQIC 15809.946\n", 268 | " - 5020 \n", 269 | "Covariance Type: opg \n", 270 | "==============================================================================\n", 271 | " coef std err z P>|z| [0.025 0.975]\n", 272 | "------------------------------------------------------------------------------\n", 273 | "intercept 0.0309 0.024 1.289 0.197 -0.016 0.078\n", 274 | "ar.L1 0.1766 0.039 4.544 0.000 0.100 0.253\n", 275 | "ar.L2 -0.8128 0.035 -22.984 0.000 -0.882 -0.743\n", 276 | "ma.L1 -0.2005 0.038 -5.239 0.000 -0.275 -0.125\n", 277 | "ma.L2 0.7654 0.037 20.436 0.000 0.692 0.839\n", 278 | "ma.L3 -0.0953 0.012 -8.246 0.000 -0.118 -0.073\n", 279 | "ma.L4 0.0112 0.009 1.229 0.219 -0.007 0.029\n", 280 | "ma.L5 -0.1113 0.009 -12.960 0.000 -0.128 -0.094\n", 281 | "sigma2 1.3550 0.014 94.014 0.000 1.327 1.383\n", 282 | "===================================================================================\n", 283 | "Ljung-Box (Q): 69.64 Jarque-Bera (JB): 6575.67\n", 284 | "Prob(Q): 0.00 Prob(JB): 0.00\n", 285 | "Heteroskedasticity (H): 2.00 Skew: -0.18\n", 286 | "Prob(H) (two-sided): 0.00 Kurtosis: 8.60\n", 287 | "===================================================================================\n", 288 | "\n", 289 | "Warnings:\n", 290 | "[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n", 291 | "\"\"\"" 292 | ] 293 | }, 294 | "execution_count": 11, 295 | "metadata": {}, 296 | "output_type": "execute_result" 297 | } 298 | ], 299 | "source": [ 300 | "model_auto.summary()" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": {}, 306 | "source": [ 307 | "### Important Arguments" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 14, 313 | "metadata": { 314 | "scrolled": true 315 | }, 316 | "outputs": [], 317 | "source": [ 318 | "model_auto = auto_arima(df_comp.ret_ftse[1:], exogenous = df_comp[['ret_spx', 'ret_dax', 'ret_nikkei']][1:], m = 5,\n", 319 | " max_order = None, max_p = 7, max_q = 7, max_d = 2, max_P = 4, max_Q = 4, max_D = 2,\n", 320 | " maxiter = 50, alpha = 0.05, n_jobs = -1, trend = 'ct', information_criterion = 'oob',\n", 321 | " out_of_sample = int(len(df_comp)*0.2))\n", 322 | "\n", 323 | "\n", 324 | "# !!! Important Note: In pdmarima v1.5.2, out_of_sample_size is replaced with out_of_sample, so make sure to use the latter!\n", 325 | "\n", 326 | "\n", 327 | "# exogenous -> outside factors (e.g other time series)\n", 328 | "# m -> seasonal cycle length\n", 329 | "# max_order -> maximum amount of variables to be used in the regression (p + q)\n", 330 | "# max_p -> maximum AR components\n", 331 | "# max_q -> maximum MA components\n", 332 | "# max_d -> maximum Integrations\n", 333 | "# maxiter -> maximum iterations we're giving the model to converge the coefficients (becomes harder as the order increases)\n", 334 | "# alpha -> level of significance, default is 5%, which we should be using most of the time\n", 335 | "# n_jobs -> how many models to fit at a time (-1 indicates \"as many as possible\")\n", 336 | "# trend -> \"ct\" usually\n", 337 | "# information_criterion -> 'aic', 'aicc', 'bic', 'hqic', 'oob' \n", 338 | "# (Akaike Information Criterion, Corrected Akaike Information Criterion,\n", 339 | "# Bayesian Information Criterion, Hannan-Quinn Information Criterion, or\n", 340 | "# \"out of bag\"--for validation scoring--respectively)\n", 341 | "# out_of_smaple -> validates the model selection (pass the entire dataset, and set 20% to be the out_of_sample_size)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 15, 347 | "metadata": {}, 348 | "outputs": [ 349 | { 350 | "data": { 351 | "text/html": [ 352 | "\n", 353 | "\n", 354 | "\n", 355 | " \n", 356 | "\n", 357 | "\n", 358 | " \n", 359 | "\n", 360 | "\n", 361 | " \n", 362 | "\n", 363 | "\n", 364 | " \n", 365 | "\n", 366 | "\n", 367 | " \n", 368 | "\n", 369 | "\n", 370 | " \n", 371 | "\n", 372 | "\n", 373 | " \n", 374 | "\n", 375 | "
Statespace Model Results
Dep. Variable: y No. Observations: 6276
Model: SARIMAX(2, 0, 2)x(1, 0, 1, 5) Log Likelihood -6340.661
Date: Fri, 03 Jan 2020 AIC 12705.322
Time: 10:34:50 BIC 12786.256
Sample: 01-10-1994 HQIC 12733.364
- 01-29-2018
Covariance Type: opg
\n", 376 | "\n", 377 | "\n", 378 | " \n", 379 | "\n", 380 | "\n", 381 | " \n", 382 | "\n", 383 | "\n", 384 | " \n", 385 | "\n", 386 | "\n", 387 | " \n", 388 | "\n", 389 | "\n", 390 | " \n", 391 | "\n", 392 | "\n", 393 | " \n", 394 | "\n", 395 | "\n", 396 | " \n", 397 | "\n", 398 | "\n", 399 | " \n", 400 | "\n", 401 | "\n", 402 | " \n", 403 | "\n", 404 | "\n", 405 | " \n", 406 | "\n", 407 | "\n", 408 | " \n", 409 | "\n", 410 | "\n", 411 | " \n", 412 | "\n", 413 | "\n", 414 | " \n", 415 | "\n", 416 | "
coef std err z P>|z| [0.025 0.975]
intercept -0.0043 0.009 -0.500 0.617 -0.021 0.013
drift -1.45e-06 2.72e-06 -0.533 0.594 -6.78e-06 3.88e-06
ret_spx 0.0958 0.006 16.947 0.000 0.085 0.107
ret_dax 0.5582 0.005 113.729 0.000 0.549 0.568
ret_nikkei 0.0702 0.004 16.547 0.000 0.062 0.079
ar.L1 -0.1932 0.101 -1.918 0.055 -0.390 0.004
ar.L2 0.5083 0.059 8.643 0.000 0.393 0.624
ma.L1 0.0863 0.101 0.855 0.393 -0.112 0.284
ma.L2 -0.5369 0.056 -9.535 0.000 -0.647 -0.427
ar.S.L5 0.0705 0.339 0.208 0.836 -0.595 0.736
ma.S.L5 -0.0987 0.339 -0.291 0.771 -0.764 0.567
sigma2 0.4404 0.004 99.131 0.000 0.432 0.449
\n", 417 | "\n", 418 | "\n", 419 | " \n", 420 | "\n", 421 | "\n", 422 | " \n", 423 | "\n", 424 | "\n", 425 | " \n", 426 | "\n", 427 | "\n", 428 | " \n", 429 | "\n", 430 | "
Ljung-Box (Q): 84.27 Jarque-Bera (JB): 15325.76
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 0.54 Skew: 0.25
Prob(H) (two-sided): 0.00 Kurtosis: 10.64


Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step)." 431 | ], 432 | "text/plain": [ 433 | "\n", 434 | "\"\"\"\n", 435 | " Statespace Model Results \n", 436 | "=========================================================================================\n", 437 | "Dep. Variable: y No. Observations: 6276\n", 438 | "Model: SARIMAX(2, 0, 2)x(1, 0, 1, 5) Log Likelihood -6340.661\n", 439 | "Date: Fri, 03 Jan 2020 AIC 12705.322\n", 440 | "Time: 10:34:50 BIC 12786.256\n", 441 | "Sample: 01-10-1994 HQIC 12733.364\n", 442 | " - 01-29-2018 \n", 443 | "Covariance Type: opg \n", 444 | "==============================================================================\n", 445 | " coef std err z P>|z| [0.025 0.975]\n", 446 | "------------------------------------------------------------------------------\n", 447 | "intercept -0.0043 0.009 -0.500 0.617 -0.021 0.013\n", 448 | "drift -1.45e-06 2.72e-06 -0.533 0.594 -6.78e-06 3.88e-06\n", 449 | "ret_spx 0.0958 0.006 16.947 0.000 0.085 0.107\n", 450 | "ret_dax 0.5582 0.005 113.729 0.000 0.549 0.568\n", 451 | "ret_nikkei 0.0702 0.004 16.547 0.000 0.062 0.079\n", 452 | "ar.L1 -0.1932 0.101 -1.918 0.055 -0.390 0.004\n", 453 | "ar.L2 0.5083 0.059 8.643 0.000 0.393 0.624\n", 454 | "ma.L1 0.0863 0.101 0.855 0.393 -0.112 0.284\n", 455 | "ma.L2 -0.5369 0.056 -9.535 0.000 -0.647 -0.427\n", 456 | "ar.S.L5 0.0705 0.339 0.208 0.836 -0.595 0.736\n", 457 | "ma.S.L5 -0.0987 0.339 -0.291 0.771 -0.764 0.567\n", 458 | "sigma2 0.4404 0.004 99.131 0.000 0.432 0.449\n", 459 | "===================================================================================\n", 460 | "Ljung-Box (Q): 84.27 Jarque-Bera (JB): 15325.76\n", 461 | "Prob(Q): 0.00 Prob(JB): 0.00\n", 462 | "Heteroskedasticity (H): 0.54 Skew: 0.25\n", 463 | "Prob(H) (two-sided): 0.00 Kurtosis: 10.64\n", 464 | "===================================================================================\n", 465 | "\n", 466 | "Warnings:\n", 467 | "[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n", 468 | "\"\"\"" 469 | ] 470 | }, 471 | "execution_count": 15, 472 | "metadata": {}, 473 | "output_type": "execute_result" 474 | } 475 | ], 476 | "source": [ 477 | "model_auto.summary()" 478 | ] 479 | } 480 | ], 481 | "metadata": { 482 | "kernelspec": { 483 | "display_name": "Python 3", 484 | "language": "python", 485 | "name": "python3" 486 | }, 487 | "language_info": { 488 | "codemirror_mode": { 489 | "name": "ipython", 490 | "version": 3 491 | }, 492 | "file_extension": ".py", 493 | "mimetype": "text/x-python", 494 | "name": "python", 495 | "nbconvert_exporter": "python", 496 | "pygments_lexer": "ipython3", 497 | "version": "3.6.7" 498 | } 499 | }, 500 | "nbformat": 4, 501 | "nbformat_minor": 2 502 | } 503 | -------------------------------------------------------------------------------- /10. The GARCH model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Importing the relevant packages" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "import numpy as np\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "import statsmodels.graphics.tsaplots as sgt\n", 20 | "import statsmodels.tsa.stattools as sts\n", 21 | "from statsmodels.tsa.arima_model import ARIMA\n", 22 | "from scipy.stats.distributions import chi2 \n", 23 | "from arch import arch_model\n", 24 | "from math import sqrt\n", 25 | "import seaborn as sns\n", 26 | "sns.set()" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | " " 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Importing the Data and Pre-processing " 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "raw_csv_data = pd.read_csv(\"Index2018.csv\") \n", 50 | "df_comp=raw_csv_data.copy()\n", 51 | "df_comp.date = pd.to_datetime(df_comp.date, dayfirst = True)\n", 52 | "df_comp.set_index(\"date\", inplace=True)\n", 53 | "df_comp=df_comp.asfreq('b')\n", 54 | "df_comp=df_comp.fillna(method='ffill')" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 3, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "df_comp['market_value']=df_comp.ftse" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 4, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "del df_comp['spx']\n", 73 | "del df_comp['dax']\n", 74 | "del df_comp['ftse']\n", 75 | "del df_comp['nikkei']\n", 76 | "size = int(len(df_comp)*0.8)\n", 77 | "df, df_test = df_comp.iloc[:size], df_comp.iloc[size:]" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 5, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "import warnings\n", 87 | "warnings.filterwarnings(\"ignore\")" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | " " 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "## The LLR Test" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 6, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "def LLR_test(mod_1, mod_2, DF = 1):\n", 111 | " L1 = mod_1.fit(start_ar_lags = 11).llf\n", 112 | " L2 = mod_2.fit(start_ar_lags = 11).llf\n", 113 | " LR = (2*(L2-L1)) \n", 114 | " p = chi2.sf(LR, DF).round(3)\n", 115 | " return p" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "## Creating Returns" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 7, 128 | "metadata": { 129 | "scrolled": true 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "df['returns'] = df.market_value.pct_change(1)*100" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "## The Simple GARCH Model" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 8, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | "Iteration: 5, Func. Count: 39, Neg. LLF: 6972.734752397977\n", 153 | "Iteration: 10, Func. Count: 73, Neg. LLF: 6970.088043671654\n", 154 | "Optimization terminated successfully. (Exit mode 0)\n", 155 | " Current function value: 6970.058366189876\n", 156 | " Iterations: 13\n", 157 | " Function evaluations: 91\n", 158 | " Gradient evaluations: 13\n" 159 | ] 160 | }, 161 | { 162 | "data": { 163 | "text/html": [ 164 | "\n", 165 | "\n", 166 | "\n", 167 | " \n", 168 | "\n", 169 | "\n", 170 | " \n", 171 | "\n", 172 | "\n", 173 | " \n", 174 | "\n", 175 | "\n", 176 | " \n", 177 | "\n", 178 | "\n", 179 | " \n", 180 | "\n", 181 | "\n", 182 | " \n", 183 | "\n", 184 | "\n", 185 | " \n", 186 | "\n", 187 | "\n", 188 | " \n", 189 | "\n", 190 | "
Constant Mean - GARCH Model Results
Dep. Variable: returns R-squared: -0.001
Mean Model: Constant Mean Adj. R-squared: -0.001
Vol Model: GARCH Log-Likelihood: -6970.06
Distribution: Normal AIC: 13948.1
Method: Maximum Likelihood BIC: 13974.2
No. Observations: 5020
Date: Thu, Sep 19 2019 Df Residuals: 5016
Time: 17:53:26 Df Model: 4
\n", 191 | "\n", 192 | "\n", 193 | "\n", 194 | " \n", 195 | "\n", 196 | "\n", 197 | " \n", 198 | "\n", 199 | "
Mean Model
coef std err t P>|t| 95.0% Conf. Int.
mu 0.0466 1.183e-02 3.939 8.187e-05 [2.342e-02,6.981e-02]
\n", 200 | "\n", 201 | "\n", 202 | "\n", 203 | " \n", 204 | "\n", 205 | "\n", 206 | " \n", 207 | "\n", 208 | "\n", 209 | " \n", 210 | "\n", 211 | "\n", 212 | " \n", 213 | "\n", 214 | "
Volatility Model
coef std err t P>|t| 95.0% Conf. Int.
omega 0.0109 3.004e-03 3.640 2.724e-04 [5.048e-03,1.682e-02]
alpha[1] 0.0835 1.071e-02 7.794 6.476e-15 [6.249e-02, 0.104]
beta[1] 0.9089 1.148e-02 79.168 0.000 [ 0.886, 0.931]


Covariance estimator: robust" 215 | ], 216 | "text/plain": [ 217 | "\n", 218 | "\"\"\"\n", 219 | " Constant Mean - GARCH Model Results \n", 220 | "==============================================================================\n", 221 | "Dep. Variable: returns R-squared: -0.001\n", 222 | "Mean Model: Constant Mean Adj. R-squared: -0.001\n", 223 | "Vol Model: GARCH Log-Likelihood: -6970.06\n", 224 | "Distribution: Normal AIC: 13948.1\n", 225 | "Method: Maximum Likelihood BIC: 13974.2\n", 226 | " No. Observations: 5020\n", 227 | "Date: Thu, Sep 19 2019 Df Residuals: 5016\n", 228 | "Time: 17:53:26 Df Model: 4\n", 229 | " Mean Model \n", 230 | "============================================================================\n", 231 | " coef std err t P>|t| 95.0% Conf. Int.\n", 232 | "----------------------------------------------------------------------------\n", 233 | "mu 0.0466 1.183e-02 3.939 8.187e-05 [2.342e-02,6.981e-02]\n", 234 | " Volatility Model \n", 235 | "============================================================================\n", 236 | " coef std err t P>|t| 95.0% Conf. Int.\n", 237 | "----------------------------------------------------------------------------\n", 238 | "omega 0.0109 3.004e-03 3.640 2.724e-04 [5.048e-03,1.682e-02]\n", 239 | "alpha[1] 0.0835 1.071e-02 7.794 6.476e-15 [6.249e-02, 0.104]\n", 240 | "beta[1] 0.9089 1.148e-02 79.168 0.000 [ 0.886, 0.931]\n", 241 | "============================================================================\n", 242 | "\n", 243 | "Covariance estimator: robust\n", 244 | "\"\"\"" 245 | ] 246 | }, 247 | "execution_count": 8, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "model_garch_1_1 = arch_model(df.returns[1:], mean = \"Constant\", vol = \"GARCH\", p = 1, q = 1)\n", 254 | "results_garch_1_1 = model_garch_1_1.fit(update_freq = 5)\n", 255 | "results_garch_1_1.summary()" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "## Higher-Lag GARCH Models" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 9, 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "name": "stdout", 272 | "output_type": "stream", 273 | "text": [ 274 | "Iteration: 5, Func. Count: 44, Neg. LLF: 6978.593988486672\n", 275 | "Iteration: 10, Func. Count: 80, Neg. LLF: 6970.063553697997\n", 276 | "Optimization terminated successfully. (Exit mode 0)\n", 277 | " Current function value: 6970.058366227228\n", 278 | " Iterations: 12\n", 279 | " Function evaluations: 95\n", 280 | " Gradient evaluations: 12\n" 281 | ] 282 | }, 283 | { 284 | "data": { 285 | "text/html": [ 286 | "\n", 287 | "\n", 288 | "\n", 289 | " \n", 290 | "\n", 291 | "\n", 292 | " \n", 293 | "\n", 294 | "\n", 295 | " \n", 296 | "\n", 297 | "\n", 298 | " \n", 299 | "\n", 300 | "\n", 301 | " \n", 302 | "\n", 303 | "\n", 304 | " \n", 305 | "\n", 306 | "\n", 307 | " \n", 308 | "\n", 309 | "\n", 310 | " \n", 311 | "\n", 312 | "
Constant Mean - GARCH Model Results
Dep. Variable: returns R-squared: -0.001
Mean Model: Constant Mean Adj. R-squared: -0.001
Vol Model: GARCH Log-Likelihood: -6970.06
Distribution: Normal AIC: 13950.1
Method: Maximum Likelihood BIC: 13982.7
No. Observations: 5020
Date: Thu, Sep 19 2019 Df Residuals: 5015
Time: 17:54:06 Df Model: 5
\n", 313 | "\n", 314 | "\n", 315 | "\n", 316 | " \n", 317 | "\n", 318 | "\n", 319 | " \n", 320 | "\n", 321 | "
Mean Model
coef std err t P>|t| 95.0% Conf. Int.
mu 0.0466 1.184e-02 3.938 8.219e-05 [2.341e-02,6.982e-02]
\n", 322 | "\n", 323 | "\n", 324 | "\n", 325 | " \n", 326 | "\n", 327 | "\n", 328 | " \n", 329 | "\n", 330 | "\n", 331 | " \n", 332 | "\n", 333 | "\n", 334 | " \n", 335 | "\n", 336 | "\n", 337 | " \n", 338 | "\n", 339 | "
Volatility Model
coef std err t P>|t| 95.0% Conf. Int.
omega 0.0109 2.908e-03 3.761 1.696e-04 [5.236e-03,1.663e-02]
alpha[1] 0.0835 1.189e-02 7.019 2.231e-12 [6.017e-02, 0.107]
beta[1] 0.9089 0.188 4.845 1.268e-06 [ 0.541, 1.277]
beta[2] 0.0000 0.180 0.000 1.000 [ -0.352, 0.352]


Covariance estimator: robust" 340 | ], 341 | "text/plain": [ 342 | "\n", 343 | "\"\"\"\n", 344 | " Constant Mean - GARCH Model Results \n", 345 | "==============================================================================\n", 346 | "Dep. Variable: returns R-squared: -0.001\n", 347 | "Mean Model: Constant Mean Adj. R-squared: -0.001\n", 348 | "Vol Model: GARCH Log-Likelihood: -6970.06\n", 349 | "Distribution: Normal AIC: 13950.1\n", 350 | "Method: Maximum Likelihood BIC: 13982.7\n", 351 | " No. Observations: 5020\n", 352 | "Date: Thu, Sep 19 2019 Df Residuals: 5015\n", 353 | "Time: 17:54:06 Df Model: 5\n", 354 | " Mean Model \n", 355 | "============================================================================\n", 356 | " coef std err t P>|t| 95.0% Conf. Int.\n", 357 | "----------------------------------------------------------------------------\n", 358 | "mu 0.0466 1.184e-02 3.938 8.219e-05 [2.341e-02,6.982e-02]\n", 359 | " Volatility Model \n", 360 | "============================================================================\n", 361 | " coef std err t P>|t| 95.0% Conf. Int.\n", 362 | "----------------------------------------------------------------------------\n", 363 | "omega 0.0109 2.908e-03 3.761 1.696e-04 [5.236e-03,1.663e-02]\n", 364 | "alpha[1] 0.0835 1.189e-02 7.019 2.231e-12 [6.017e-02, 0.107]\n", 365 | "beta[1] 0.9089 0.188 4.845 1.268e-06 [ 0.541, 1.277]\n", 366 | "beta[2] 0.0000 0.180 0.000 1.000 [ -0.352, 0.352]\n", 367 | "============================================================================\n", 368 | "\n", 369 | "Covariance estimator: robust\n", 370 | "\"\"\"" 371 | ] 372 | }, 373 | "execution_count": 9, 374 | "metadata": {}, 375 | "output_type": "execute_result" 376 | } 377 | ], 378 | "source": [ 379 | "model_garch_1_2 = arch_model(df.returns[1:], mean = \"Constant\", vol = \"GARCH\", p = 1, q = 2)\n", 380 | "results_garch_1_2 = model_garch_1_2.fit(update_freq = 5)\n", 381 | "results_garch_1_2.summary()" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 10, 387 | "metadata": {}, 388 | "outputs": [ 389 | { 390 | "name": "stdout", 391 | "output_type": "stream", 392 | "text": [ 393 | "Iteration: 5, Func. Count: 51, Neg. LLF: 6993.438671672955\n", 394 | "Iteration: 10, Func. Count: 97, Neg. LLF: 6972.43119644999\n", 395 | "Iteration: 15, Func. Count: 138, Neg. LLF: 6970.058671960955\n", 396 | "Optimization terminated successfully. (Exit mode 0)\n", 397 | " Current function value: 6970.05836623016\n", 398 | " Iterations: 17\n", 399 | " Function evaluations: 154\n", 400 | " Gradient evaluations: 17\n" 401 | ] 402 | }, 403 | { 404 | "data": { 405 | "text/html": [ 406 | "\n", 407 | "\n", 408 | "\n", 409 | " \n", 410 | "\n", 411 | "\n", 412 | " \n", 413 | "\n", 414 | "\n", 415 | " \n", 416 | "\n", 417 | "\n", 418 | " \n", 419 | "\n", 420 | "\n", 421 | " \n", 422 | "\n", 423 | "\n", 424 | " \n", 425 | "\n", 426 | "\n", 427 | " \n", 428 | "\n", 429 | "\n", 430 | " \n", 431 | "\n", 432 | "
Constant Mean - GARCH Model Results
Dep. Variable: returns R-squared: -0.001
Mean Model: Constant Mean Adj. R-squared: -0.001
Vol Model: GARCH Log-Likelihood: -6970.06
Distribution: Normal AIC: 13952.1
Method: Maximum Likelihood BIC: 13991.2
No. Observations: 5020
Date: Thu, Sep 19 2019 Df Residuals: 5014
Time: 17:54:23 Df Model: 6
\n", 433 | "\n", 434 | "\n", 435 | "\n", 436 | " \n", 437 | "\n", 438 | "\n", 439 | " \n", 440 | "\n", 441 | "
Mean Model
coef std err t P>|t| 95.0% Conf. Int.
mu 0.0466 1.179e-02 3.954 7.683e-05 [2.351e-02,6.972e-02]
\n", 442 | "\n", 443 | "\n", 444 | "\n", 445 | " \n", 446 | "\n", 447 | "\n", 448 | " \n", 449 | "\n", 450 | "\n", 451 | " \n", 452 | "\n", 453 | "\n", 454 | " \n", 455 | "\n", 456 | "\n", 457 | " \n", 458 | "\n", 459 | "\n", 460 | " \n", 461 | "\n", 462 | "
Volatility Model
coef std err t P>|t| 95.0% Conf. Int.
omega 0.0109 8.157e-03 1.341 0.180 [-5.052e-03,2.692e-02]
alpha[1] 0.0835 6.059e-02 1.378 0.168 [-3.528e-02, 0.202]
beta[1] 0.9089 2.148 0.423 0.672 [ -3.301, 5.119]
beta[2] 0.0000 3.375 0.000 1.000 [ -6.614, 6.614]
beta[3] 3.9718e-13 1.294 3.070e-13 1.000 [ -2.536, 2.536]


Covariance estimator: robust" 463 | ], 464 | "text/plain": [ 465 | "\n", 466 | "\"\"\"\n", 467 | " Constant Mean - GARCH Model Results \n", 468 | "==============================================================================\n", 469 | "Dep. Variable: returns R-squared: -0.001\n", 470 | "Mean Model: Constant Mean Adj. R-squared: -0.001\n", 471 | "Vol Model: GARCH Log-Likelihood: -6970.06\n", 472 | "Distribution: Normal AIC: 13952.1\n", 473 | "Method: Maximum Likelihood BIC: 13991.2\n", 474 | " No. Observations: 5020\n", 475 | "Date: Thu, Sep 19 2019 Df Residuals: 5014\n", 476 | "Time: 17:54:23 Df Model: 6\n", 477 | " Mean Model \n", 478 | "============================================================================\n", 479 | " coef std err t P>|t| 95.0% Conf. Int.\n", 480 | "----------------------------------------------------------------------------\n", 481 | "mu 0.0466 1.179e-02 3.954 7.683e-05 [2.351e-02,6.972e-02]\n", 482 | " Volatility Model \n", 483 | "=============================================================================\n", 484 | " coef std err t P>|t| 95.0% Conf. Int.\n", 485 | "-----------------------------------------------------------------------------\n", 486 | "omega 0.0109 8.157e-03 1.341 0.180 [-5.052e-03,2.692e-02]\n", 487 | "alpha[1] 0.0835 6.059e-02 1.378 0.168 [-3.528e-02, 0.202]\n", 488 | "beta[1] 0.9089 2.148 0.423 0.672 [ -3.301, 5.119]\n", 489 | "beta[2] 0.0000 3.375 0.000 1.000 [ -6.614, 6.614]\n", 490 | "beta[3] 3.9718e-13 1.294 3.070e-13 1.000 [ -2.536, 2.536]\n", 491 | "=============================================================================\n", 492 | "\n", 493 | "Covariance estimator: robust\n", 494 | "\"\"\"" 495 | ] 496 | }, 497 | "execution_count": 10, 498 | "metadata": {}, 499 | "output_type": "execute_result" 500 | } 501 | ], 502 | "source": [ 503 | "model_garch_1_3 = arch_model(df.returns[1:], mean = \"Constant\", vol = \"GARCH\", p = 1, q = 3)\n", 504 | "results_garch_1_3 = model_garch_1_3.fit(update_freq = 5)\n", 505 | "results_garch_1_3.summary()" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 11, 511 | "metadata": {}, 512 | "outputs": [ 513 | { 514 | "name": "stdout", 515 | "output_type": "stream", 516 | "text": [ 517 | "Iteration: 5, Func. Count: 44, Neg. LLF: 6973.212556117614\n", 518 | "Iteration: 10, Func. Count: 85, Neg. LLF: 6967.735886702618\n", 519 | "Optimization terminated successfully. (Exit mode 0)\n", 520 | " Current function value: 6967.731020076142\n", 521 | " Iterations: 12\n", 522 | " Function evaluations: 99\n", 523 | " Gradient evaluations: 12\n" 524 | ] 525 | }, 526 | { 527 | "data": { 528 | "text/html": [ 529 | "\n", 530 | "\n", 531 | "\n", 532 | " \n", 533 | "\n", 534 | "\n", 535 | " \n", 536 | "\n", 537 | "\n", 538 | " \n", 539 | "\n", 540 | "\n", 541 | " \n", 542 | "\n", 543 | "\n", 544 | " \n", 545 | "\n", 546 | "\n", 547 | " \n", 548 | "\n", 549 | "\n", 550 | " \n", 551 | "\n", 552 | "\n", 553 | " \n", 554 | "\n", 555 | "
Constant Mean - GARCH Model Results
Dep. Variable: returns R-squared: -0.001
Mean Model: Constant Mean Adj. R-squared: -0.001
Vol Model: GARCH Log-Likelihood: -6967.73
Distribution: Normal AIC: 13945.5
Method: Maximum Likelihood BIC: 13978.1
No. Observations: 5020
Date: Thu, Sep 19 2019 Df Residuals: 5015
Time: 17:54:44 Df Model: 5
\n", 556 | "\n", 557 | "\n", 558 | "\n", 559 | " \n", 560 | "\n", 561 | "\n", 562 | " \n", 563 | "\n", 564 | "
Mean Model
coef std err t P>|t| 95.0% Conf. Int.
mu 0.0466 1.187e-02 3.922 8.780e-05 [2.329e-02,6.982e-02]
\n", 565 | "\n", 566 | "\n", 567 | "\n", 568 | " \n", 569 | "\n", 570 | "\n", 571 | " \n", 572 | "\n", 573 | "\n", 574 | " \n", 575 | "\n", 576 | "\n", 577 | " \n", 578 | "\n", 579 | "\n", 580 | " \n", 581 | "\n", 582 | "
Volatility Model
coef std err t P>|t| 95.0% Conf. Int.
omega 0.0129 4.097e-03 3.158 1.589e-03 [4.908e-03,2.097e-02]
alpha[1] 0.0547 1.665e-02 3.286 1.017e-03 [2.208e-02,8.735e-02]
alpha[2] 0.0389 2.345e-02 1.659 9.709e-02 [-7.056e-03,8.488e-02]
beta[1] 0.8974 1.712e-02 52.415 0.000 [ 0.864, 0.931]


Covariance estimator: robust" 583 | ], 584 | "text/plain": [ 585 | "\n", 586 | "\"\"\"\n", 587 | " Constant Mean - GARCH Model Results \n", 588 | "==============================================================================\n", 589 | "Dep. Variable: returns R-squared: -0.001\n", 590 | "Mean Model: Constant Mean Adj. R-squared: -0.001\n", 591 | "Vol Model: GARCH Log-Likelihood: -6967.73\n", 592 | "Distribution: Normal AIC: 13945.5\n", 593 | "Method: Maximum Likelihood BIC: 13978.1\n", 594 | " No. Observations: 5020\n", 595 | "Date: Thu, Sep 19 2019 Df Residuals: 5015\n", 596 | "Time: 17:54:44 Df Model: 5\n", 597 | " Mean Model \n", 598 | "============================================================================\n", 599 | " coef std err t P>|t| 95.0% Conf. Int.\n", 600 | "----------------------------------------------------------------------------\n", 601 | "mu 0.0466 1.187e-02 3.922 8.780e-05 [2.329e-02,6.982e-02]\n", 602 | " Volatility Model \n", 603 | "=============================================================================\n", 604 | " coef std err t P>|t| 95.0% Conf. Int.\n", 605 | "-----------------------------------------------------------------------------\n", 606 | "omega 0.0129 4.097e-03 3.158 1.589e-03 [4.908e-03,2.097e-02]\n", 607 | "alpha[1] 0.0547 1.665e-02 3.286 1.017e-03 [2.208e-02,8.735e-02]\n", 608 | "alpha[2] 0.0389 2.345e-02 1.659 9.709e-02 [-7.056e-03,8.488e-02]\n", 609 | "beta[1] 0.8974 1.712e-02 52.415 0.000 [ 0.864, 0.931]\n", 610 | "=============================================================================\n", 611 | "\n", 612 | "Covariance estimator: robust\n", 613 | "\"\"\"" 614 | ] 615 | }, 616 | "execution_count": 11, 617 | "metadata": {}, 618 | "output_type": "execute_result" 619 | } 620 | ], 621 | "source": [ 622 | "model_garch_2_1 = arch_model(df.returns[1:], mean = \"Constant\", vol = \"GARCH\", p = 2, q = 1)\n", 623 | "results_garch_2_1 = model_garch_2_1.fit(update_freq = 5)\n", 624 | "results_garch_2_1.summary()" 625 | ] 626 | }, 627 | { 628 | "cell_type": "code", 629 | "execution_count": 12, 630 | "metadata": {}, 631 | "outputs": [ 632 | { 633 | "name": "stdout", 634 | "output_type": "stream", 635 | "text": [ 636 | "Iteration: 5, Func. Count: 49, Neg. LLF: 6974.886815445026\n", 637 | "Iteration: 10, Func. Count: 94, Neg. LLF: 6968.538209285089\n", 638 | "Iteration: 15, Func. Count: 134, Neg. LLF: 6967.731020049032\n", 639 | "Optimization terminated successfully. (Exit mode 0)\n", 640 | " Current function value: 6967.731020049671\n", 641 | " Iterations: 15\n", 642 | " Function evaluations: 134\n", 643 | " Gradient evaluations: 15\n" 644 | ] 645 | }, 646 | { 647 | "data": { 648 | "text/html": [ 649 | "\n", 650 | "\n", 651 | "\n", 652 | " \n", 653 | "\n", 654 | "\n", 655 | " \n", 656 | "\n", 657 | "\n", 658 | " \n", 659 | "\n", 660 | "\n", 661 | " \n", 662 | "\n", 663 | "\n", 664 | " \n", 665 | "\n", 666 | "\n", 667 | " \n", 668 | "\n", 669 | "\n", 670 | " \n", 671 | "\n", 672 | "\n", 673 | " \n", 674 | "\n", 675 | "
Constant Mean - GARCH Model Results
Dep. Variable: returns R-squared: -0.001
Mean Model: Constant Mean Adj. R-squared: -0.001
Vol Model: GARCH Log-Likelihood: -6967.73
Distribution: Normal AIC: 13947.5
Method: Maximum Likelihood BIC: 13986.6
No. Observations: 5020
Date: Thu, Sep 19 2019 Df Residuals: 5014
Time: 17:55:01 Df Model: 6
\n", 676 | "\n", 677 | "\n", 678 | "\n", 679 | " \n", 680 | "\n", 681 | "\n", 682 | " \n", 683 | "\n", 684 | "
Mean Model
coef std err t P>|t| 95.0% Conf. Int.
mu 0.0466 1.187e-02 3.924 8.721e-05 [2.330e-02,6.982e-02]
\n", 685 | "\n", 686 | "\n", 687 | "\n", 688 | " \n", 689 | "\n", 690 | "\n", 691 | " \n", 692 | "\n", 693 | "\n", 694 | " \n", 695 | "\n", 696 | "\n", 697 | " \n", 698 | "\n", 699 | "\n", 700 | " \n", 701 | "\n", 702 | "\n", 703 | " \n", 704 | "\n", 705 | "
Volatility Model
coef std err t P>|t| 95.0% Conf. Int.
omega 0.0129 4.816e-03 2.687 7.217e-03 [3.500e-03,2.238e-02]
alpha[1] 0.0547 1.665e-02 3.285 1.019e-03 [2.207e-02,8.736e-02]
alpha[2] 0.0389 2.505e-02 1.553 0.120 [-1.018e-02,8.800e-02]
alpha[3] 0.0000 2.572e-02 0.000 1.000 [-5.041e-02,5.041e-02]
beta[1] 0.8974 2.245e-02 39.978 0.000 [ 0.853, 0.941]


Covariance estimator: robust" 706 | ], 707 | "text/plain": [ 708 | "\n", 709 | "\"\"\"\n", 710 | " Constant Mean - GARCH Model Results \n", 711 | "==============================================================================\n", 712 | "Dep. Variable: returns R-squared: -0.001\n", 713 | "Mean Model: Constant Mean Adj. R-squared: -0.001\n", 714 | "Vol Model: GARCH Log-Likelihood: -6967.73\n", 715 | "Distribution: Normal AIC: 13947.5\n", 716 | "Method: Maximum Likelihood BIC: 13986.6\n", 717 | " No. Observations: 5020\n", 718 | "Date: Thu, Sep 19 2019 Df Residuals: 5014\n", 719 | "Time: 17:55:01 Df Model: 6\n", 720 | " Mean Model \n", 721 | "============================================================================\n", 722 | " coef std err t P>|t| 95.0% Conf. Int.\n", 723 | "----------------------------------------------------------------------------\n", 724 | "mu 0.0466 1.187e-02 3.924 8.721e-05 [2.330e-02,6.982e-02]\n", 725 | " Volatility Model \n", 726 | "=============================================================================\n", 727 | " coef std err t P>|t| 95.0% Conf. Int.\n", 728 | "-----------------------------------------------------------------------------\n", 729 | "omega 0.0129 4.816e-03 2.687 7.217e-03 [3.500e-03,2.238e-02]\n", 730 | "alpha[1] 0.0547 1.665e-02 3.285 1.019e-03 [2.207e-02,8.736e-02]\n", 731 | "alpha[2] 0.0389 2.505e-02 1.553 0.120 [-1.018e-02,8.800e-02]\n", 732 | "alpha[3] 0.0000 2.572e-02 0.000 1.000 [-5.041e-02,5.041e-02]\n", 733 | "beta[1] 0.8974 2.245e-02 39.978 0.000 [ 0.853, 0.941]\n", 734 | "=============================================================================\n", 735 | "\n", 736 | "Covariance estimator: robust\n", 737 | "\"\"\"" 738 | ] 739 | }, 740 | "execution_count": 12, 741 | "metadata": {}, 742 | "output_type": "execute_result" 743 | } 744 | ], 745 | "source": [ 746 | "model_garch_3_1 = arch_model(df.returns[1:], mean = \"Constant\", vol = \"GARCH\", p = 3, q = 1)\n", 747 | "results_garch_3_1 = model_garch_3_1.fit(update_freq = 5)\n", 748 | "results_garch_3_1.summary()" 749 | ] 750 | } 751 | ], 752 | "metadata": { 753 | "kernelspec": { 754 | "display_name": "Python 3", 755 | "language": "python", 756 | "name": "python3" 757 | }, 758 | "language_info": { 759 | "codemirror_mode": { 760 | "name": "ipython", 761 | "version": 3 762 | }, 763 | "file_extension": ".py", 764 | "mimetype": "text/x-python", 765 | "name": "python", 766 | "nbconvert_exporter": "python", 767 | "pygments_lexer": "ipython3", 768 | "version": "3.6.7" 769 | } 770 | }, 771 | "nbformat": 4, 772 | "nbformat_minor": 2 773 | } 774 | -------------------------------------------------------------------------------- /4. Analyzing prices using the AR model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Importing the relevant packages\n" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "import numpy as np\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "import statsmodels.graphics.tsaplots as sgt\n", 20 | "from statsmodels.tsa.arima_model import ARMA\n", 21 | "from scipy.stats.distributions import chi2\n", 22 | "import statsmodels.tsa.stattools as sts \n", 23 | "import seaborn as sns\n", 24 | "sns.set()" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "## Importing the Data and Pre-processing " 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "raw_csv_data = pd.read_csv(\"Index2018.csv\") \n", 41 | "df_comp=raw_csv_data.copy()\n", 42 | "df_comp.date = pd.to_datetime(df_comp.date, dayfirst = True)\n", 43 | "df_comp.set_index(\"date\", inplace=True)\n", 44 | "df_comp=df_comp.asfreq('b')\n", 45 | "df_comp=df_comp.fillna(method='ffill')" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "df_comp['market_value']=df_comp.ftse" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 4, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "del df_comp['spx']\n", 64 | "del df_comp['dax']\n", 65 | "del df_comp['ftse']\n", 66 | "del df_comp['nikkei']\n", 67 | "size = int(len(df_comp)*0.8)\n", 68 | "df, df_test = df_comp.iloc[:size], df_comp.iloc[size:]" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "## The ACF" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 5, 81 | "metadata": { 82 | "scrolled": true 83 | }, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "image/png": "\n", 88 | "text/plain": [ 89 | "
" 90 | ] 91 | }, 92 | "metadata": { 93 | "needs_background": "light" 94 | }, 95 | "output_type": "display_data" 96 | } 97 | ], 98 | "source": [ 99 | "sgt.plot_acf(df.market_value, zero = False, lags = 40)\n", 100 | "plt.title(\"ACF for Prices\", size = 20)\n", 101 | "plt.show()" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 6, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "import warnings\n", 111 | "warnings.filterwarnings(\"ignore\")" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "## The PACF" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 7, 124 | "metadata": { 125 | "scrolled": true 126 | }, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "image/png": "\n", 131 | "text/plain": [ 132 | "
" 133 | ] 134 | }, 135 | "metadata": { 136 | "needs_background": "light" 137 | }, 138 | "output_type": "display_data" 139 | } 140 | ], 141 | "source": [ 142 | "sgt.plot_pacf(df.market_value, lags = 40, alpha = 0.05, zero = False, method = ('ols'))\n", 143 | "plt.title(\"PACF for Prices\", size = 20)\n", 144 | "plt.show()" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "## The AR(1) Model" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 8, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "model_ar = ARMA(df.market_value, order=(1,0))" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 9, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "results_ar = model_ar.fit()" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 10, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "text/html": [ 180 | "\n", 181 | "\n", 182 | "\n", 183 | " \n", 184 | "\n", 185 | "\n", 186 | " \n", 187 | "\n", 188 | "\n", 189 | " \n", 190 | "\n", 191 | "\n", 192 | " \n", 193 | "\n", 194 | "\n", 195 | " \n", 196 | "\n", 197 | "\n", 198 | " \n", 199 | "\n", 200 | "\n", 201 | " \n", 202 | "\n", 203 | "
ARMA Model Results
Dep. Variable: market_value No. Observations: 5021
Model: ARMA(1, 0) Log Likelihood -27622.598
Method: css-mle S.D. of innovations 59.258
Date: Fri, 27 Sep 2019 AIC 55251.196
Time: 10:03:35 BIC 55270.760
Sample: 01-07-1994 HQIC 55258.052
- 04-05-2013
\n", 204 | "\n", 205 | "\n", 206 | " \n", 207 | "\n", 208 | "\n", 209 | " \n", 210 | "\n", 211 | "\n", 212 | " \n", 213 | "\n", 214 | "
coef std err z P>|z| [0.025 0.975]
const 5057.0038 486.461 10.395 0.000 4103.558 6010.450
ar.L1.market_value 0.9985 0.001 1298.863 0.000 0.997 1.000
\n", 215 | "\n", 216 | "\n", 217 | "\n", 218 | " \n", 219 | "\n", 220 | "\n", 221 | " \n", 222 | "\n", 223 | "
Roots
Real Imaginary Modulus Frequency
AR.1 1.0015 +0.0000j 1.0015 0.0000
" 224 | ], 225 | "text/plain": [ 226 | "\n", 227 | "\"\"\"\n", 228 | " ARMA Model Results \n", 229 | "==============================================================================\n", 230 | "Dep. Variable: market_value No. Observations: 5021\n", 231 | "Model: ARMA(1, 0) Log Likelihood -27622.598\n", 232 | "Method: css-mle S.D. of innovations 59.258\n", 233 | "Date: Fri, 27 Sep 2019 AIC 55251.196\n", 234 | "Time: 10:03:35 BIC 55270.760\n", 235 | "Sample: 01-07-1994 HQIC 55258.052\n", 236 | " - 04-05-2013 \n", 237 | "======================================================================================\n", 238 | " coef std err z P>|z| [0.025 0.975]\n", 239 | "--------------------------------------------------------------------------------------\n", 240 | "const 5057.0038 486.461 10.395 0.000 4103.558 6010.450\n", 241 | "ar.L1.market_value 0.9985 0.001 1298.863 0.000 0.997 1.000\n", 242 | " Roots \n", 243 | "=============================================================================\n", 244 | " Real Imaginary Modulus Frequency\n", 245 | "-----------------------------------------------------------------------------\n", 246 | "AR.1 1.0015 +0.0000j 1.0015 0.0000\n", 247 | "-----------------------------------------------------------------------------\n", 248 | "\"\"\"" 249 | ] 250 | }, 251 | "execution_count": 10, 252 | "metadata": {}, 253 | "output_type": "execute_result" 254 | } 255 | ], 256 | "source": [ 257 | "results_ar.summary()" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": {}, 263 | "source": [ 264 | "## Higher-Lag AR Models" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": 11, 270 | "metadata": {}, 271 | "outputs": [ 272 | { 273 | "data": { 274 | "text/html": [ 275 | "\n", 276 | "\n", 277 | "\n", 278 | " \n", 279 | "\n", 280 | "\n", 281 | " \n", 282 | "\n", 283 | "\n", 284 | " \n", 285 | "\n", 286 | "\n", 287 | " \n", 288 | "\n", 289 | "\n", 290 | " \n", 291 | "\n", 292 | "\n", 293 | " \n", 294 | "\n", 295 | "\n", 296 | " \n", 297 | "\n", 298 | "
ARMA Model Results
Dep. Variable: market_value No. Observations: 5021
Model: ARMA(2, 0) Log Likelihood -27621.866
Method: css-mle S.D. of innovations 59.249
Date: Fri, 27 Sep 2019 AIC 55251.731
Time: 10:03:37 BIC 55277.817
Sample: 01-07-1994 HQIC 55260.872
- 04-05-2013
\n", 299 | "\n", 300 | "\n", 301 | " \n", 302 | "\n", 303 | "\n", 304 | " \n", 305 | "\n", 306 | "\n", 307 | " \n", 308 | "\n", 309 | "\n", 310 | " \n", 311 | "\n", 312 | "
coef std err z P>|z| [0.025 0.975]
const 5036.7744 495.444 10.166 0.000 4065.721 6007.828
ar.L1.market_value 0.9814 0.014 69.595 0.000 0.954 1.009
ar.L2.market_value 0.0171 0.014 1.211 0.226 -0.011 0.045
\n", 313 | "\n", 314 | "\n", 315 | "\n", 316 | " \n", 317 | "\n", 318 | "\n", 319 | " \n", 320 | "\n", 321 | "\n", 322 | " \n", 323 | "\n", 324 | "
Roots
Real Imaginary Modulus Frequency
AR.1 1.0015 +0.0000j 1.0015 0.0000
AR.2 -58.4573 +0.0000j 58.4573 0.5000
" 325 | ], 326 | "text/plain": [ 327 | "\n", 328 | "\"\"\"\n", 329 | " ARMA Model Results \n", 330 | "==============================================================================\n", 331 | "Dep. Variable: market_value No. Observations: 5021\n", 332 | "Model: ARMA(2, 0) Log Likelihood -27621.866\n", 333 | "Method: css-mle S.D. of innovations 59.249\n", 334 | "Date: Fri, 27 Sep 2019 AIC 55251.731\n", 335 | "Time: 10:03:37 BIC 55277.817\n", 336 | "Sample: 01-07-1994 HQIC 55260.872\n", 337 | " - 04-05-2013 \n", 338 | "======================================================================================\n", 339 | " coef std err z P>|z| [0.025 0.975]\n", 340 | "--------------------------------------------------------------------------------------\n", 341 | "const 5036.7744 495.444 10.166 0.000 4065.721 6007.828\n", 342 | "ar.L1.market_value 0.9814 0.014 69.595 0.000 0.954 1.009\n", 343 | "ar.L2.market_value 0.0171 0.014 1.211 0.226 -0.011 0.045\n", 344 | " Roots \n", 345 | "=============================================================================\n", 346 | " Real Imaginary Modulus Frequency\n", 347 | "-----------------------------------------------------------------------------\n", 348 | "AR.1 1.0015 +0.0000j 1.0015 0.0000\n", 349 | "AR.2 -58.4573 +0.0000j 58.4573 0.5000\n", 350 | "-----------------------------------------------------------------------------\n", 351 | "\"\"\"" 352 | ] 353 | }, 354 | "execution_count": 11, 355 | "metadata": {}, 356 | "output_type": "execute_result" 357 | } 358 | ], 359 | "source": [ 360 | "model_ar_2 = ARMA(df.market_value, order=(2,0))\n", 361 | "results_ar_2 = model_ar_2.fit()\n", 362 | "results_ar_2.summary()" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 12, 368 | "metadata": {}, 369 | "outputs": [ 370 | { 371 | "data": { 372 | "text/html": [ 373 | "\n", 374 | "\n", 375 | "\n", 376 | " \n", 377 | "\n", 378 | "\n", 379 | " \n", 380 | "\n", 381 | "\n", 382 | " \n", 383 | "\n", 384 | "\n", 385 | " \n", 386 | "\n", 387 | "\n", 388 | " \n", 389 | "\n", 390 | "\n", 391 | " \n", 392 | "\n", 393 | "\n", 394 | " \n", 395 | "\n", 396 | "
ARMA Model Results
Dep. Variable: market_value No. Observations: 5021
Model: ARMA(3, 0) Log Likelihood -27615.871
Method: css-mle S.D. of innovations 59.178
Date: Fri, 27 Sep 2019 AIC 55241.742
Time: 10:03:37 BIC 55274.349
Sample: 01-07-1994 HQIC 55253.168
- 04-05-2013
\n", 397 | "\n", 398 | "\n", 399 | " \n", 400 | "\n", 401 | "\n", 402 | " \n", 403 | "\n", 404 | "\n", 405 | " \n", 406 | "\n", 407 | "\n", 408 | " \n", 409 | "\n", 410 | "\n", 411 | " \n", 412 | "\n", 413 | "
coef std err z P>|z| [0.025 0.975]
const 5089.6412 520.376 9.781 0.000 4069.723 6109.560
ar.L1.market_value 0.9806 0.014 69.619 0.000 0.953 1.008
ar.L2.market_value -0.0309 0.020 -1.563 0.118 -0.070 0.008
ar.L3.market_value 0.0489 0.014 3.466 0.001 0.021 0.077
\n", 414 | "\n", 415 | "\n", 416 | "\n", 417 | " \n", 418 | "\n", 419 | "\n", 420 | " \n", 421 | "\n", 422 | "\n", 423 | " \n", 424 | "\n", 425 | "\n", 426 | " \n", 427 | "\n", 428 | "
Roots
Real Imaginary Modulus Frequency
AR.1 1.0013 -0.0000j 1.0013 -0.0000
AR.2 -0.1850 -4.5161j 4.5199 -0.2565
AR.3 -0.1850 +4.5161j 4.5199 0.2565
" 429 | ], 430 | "text/plain": [ 431 | "\n", 432 | "\"\"\"\n", 433 | " ARMA Model Results \n", 434 | "==============================================================================\n", 435 | "Dep. Variable: market_value No. Observations: 5021\n", 436 | "Model: ARMA(3, 0) Log Likelihood -27615.871\n", 437 | "Method: css-mle S.D. of innovations 59.178\n", 438 | "Date: Fri, 27 Sep 2019 AIC 55241.742\n", 439 | "Time: 10:03:37 BIC 55274.349\n", 440 | "Sample: 01-07-1994 HQIC 55253.168\n", 441 | " - 04-05-2013 \n", 442 | "======================================================================================\n", 443 | " coef std err z P>|z| [0.025 0.975]\n", 444 | "--------------------------------------------------------------------------------------\n", 445 | "const 5089.6412 520.376 9.781 0.000 4069.723 6109.560\n", 446 | "ar.L1.market_value 0.9806 0.014 69.619 0.000 0.953 1.008\n", 447 | "ar.L2.market_value -0.0309 0.020 -1.563 0.118 -0.070 0.008\n", 448 | "ar.L3.market_value 0.0489 0.014 3.466 0.001 0.021 0.077\n", 449 | " Roots \n", 450 | "=============================================================================\n", 451 | " Real Imaginary Modulus Frequency\n", 452 | "-----------------------------------------------------------------------------\n", 453 | "AR.1 1.0013 -0.0000j 1.0013 -0.0000\n", 454 | "AR.2 -0.1850 -4.5161j 4.5199 -0.2565\n", 455 | "AR.3 -0.1850 +4.5161j 4.5199 0.2565\n", 456 | "-----------------------------------------------------------------------------\n", 457 | "\"\"\"" 458 | ] 459 | }, 460 | "execution_count": 12, 461 | "metadata": {}, 462 | "output_type": "execute_result" 463 | } 464 | ], 465 | "source": [ 466 | "model_ar_3 = ARMA(df.market_value, order=(3,0))\n", 467 | "results_ar_3 = model_ar_3.fit()\n", 468 | "results_ar_3.summary()" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": 13, 474 | "metadata": {}, 475 | "outputs": [ 476 | { 477 | "data": { 478 | "text/html": [ 479 | "\n", 480 | "\n", 481 | "\n", 482 | " \n", 483 | "\n", 484 | "\n", 485 | " \n", 486 | "\n", 487 | "\n", 488 | " \n", 489 | "\n", 490 | "\n", 491 | " \n", 492 | "\n", 493 | "\n", 494 | " \n", 495 | "\n", 496 | "\n", 497 | " \n", 498 | "\n", 499 | "\n", 500 | " \n", 501 | "\n", 502 | "
ARMA Model Results
Dep. Variable: market_value No. Observations: 5021
Model: ARMA(4, 0) Log Likelihood -27602.863
Method: css-mle S.D. of innovations 59.024
Date: Fri, 27 Sep 2019 AIC 55217.726
Time: 10:03:38 BIC 55256.854
Sample: 01-07-1994 HQIC 55231.437
- 04-05-2013
\n", 503 | "\n", 504 | "\n", 505 | " \n", 506 | "\n", 507 | "\n", 508 | " \n", 509 | "\n", 510 | "\n", 511 | " \n", 512 | "\n", 513 | "\n", 514 | " \n", 515 | "\n", 516 | "\n", 517 | " \n", 518 | "\n", 519 | "\n", 520 | " \n", 521 | "\n", 522 | "
coef std err z P>|z| [0.025 0.975]
const 5493.1989 959.292 5.726 0.000 3613.022 7373.376
ar.L1.market_value 0.9771 0.014 69.567 0.000 0.950 1.005
ar.L2.market_value -0.0286 0.020 -1.455 0.146 -0.067 0.010
ar.L3.market_value -0.0224 0.020 -1.135 0.256 -0.061 0.016
ar.L4.market_value 0.0729 0.014 5.171 0.000 0.045 0.100
\n", 523 | "\n", 524 | "\n", 525 | "\n", 526 | " \n", 527 | "\n", 528 | "\n", 529 | " \n", 530 | "\n", 531 | "\n", 532 | " \n", 533 | "\n", 534 | "\n", 535 | " \n", 536 | "\n", 537 | "\n", 538 | " \n", 539 | "\n", 540 | "
Roots
Real Imaginary Modulus Frequency
AR.1 1.0009 -0.0000j 1.0009 -0.0000
AR.2 0.9543 -2.0876j 2.2954 -0.1818
AR.3 0.9543 +2.0876j 2.2954 0.1818
AR.4 -2.6026 -0.0000j 2.6026 -0.5000
" 541 | ], 542 | "text/plain": [ 543 | "\n", 544 | "\"\"\"\n", 545 | " ARMA Model Results \n", 546 | "==============================================================================\n", 547 | "Dep. Variable: market_value No. Observations: 5021\n", 548 | "Model: ARMA(4, 0) Log Likelihood -27602.863\n", 549 | "Method: css-mle S.D. of innovations 59.024\n", 550 | "Date: Fri, 27 Sep 2019 AIC 55217.726\n", 551 | "Time: 10:03:38 BIC 55256.854\n", 552 | "Sample: 01-07-1994 HQIC 55231.437\n", 553 | " - 04-05-2013 \n", 554 | "======================================================================================\n", 555 | " coef std err z P>|z| [0.025 0.975]\n", 556 | "--------------------------------------------------------------------------------------\n", 557 | "const 5493.1989 959.292 5.726 0.000 3613.022 7373.376\n", 558 | "ar.L1.market_value 0.9771 0.014 69.567 0.000 0.950 1.005\n", 559 | "ar.L2.market_value -0.0286 0.020 -1.455 0.146 -0.067 0.010\n", 560 | "ar.L3.market_value -0.0224 0.020 -1.135 0.256 -0.061 0.016\n", 561 | "ar.L4.market_value 0.0729 0.014 5.171 0.000 0.045 0.100\n", 562 | " Roots \n", 563 | "=============================================================================\n", 564 | " Real Imaginary Modulus Frequency\n", 565 | "-----------------------------------------------------------------------------\n", 566 | "AR.1 1.0009 -0.0000j 1.0009 -0.0000\n", 567 | "AR.2 0.9543 -2.0876j 2.2954 -0.1818\n", 568 | "AR.3 0.9543 +2.0876j 2.2954 0.1818\n", 569 | "AR.4 -2.6026 -0.0000j 2.6026 -0.5000\n", 570 | "-----------------------------------------------------------------------------\n", 571 | "\"\"\"" 572 | ] 573 | }, 574 | "execution_count": 13, 575 | "metadata": {}, 576 | "output_type": "execute_result" 577 | } 578 | ], 579 | "source": [ 580 | "model_ar_4 = ARMA(df.market_value, order=[4,0])\n", 581 | "results_ar_4 = model_ar_4.fit()\n", 582 | "results_ar_4.summary()" 583 | ] 584 | }, 585 | { 586 | "cell_type": "markdown", 587 | "metadata": {}, 588 | "source": [ 589 | "## LLR Test" 590 | ] 591 | }, 592 | { 593 | "cell_type": "code", 594 | "execution_count": 16, 595 | "metadata": {}, 596 | "outputs": [], 597 | "source": [ 598 | "def LLR_test(mod_1, mod_2, DF=1):\n", 599 | " L1 = mod_1.fit().llf\n", 600 | " L2 = mod_2.fit().llf\n", 601 | " LR = (2*(L2-L1))\n", 602 | " p = chi2.sf(LR, DF).round(3)\n", 603 | " return p" 604 | ] 605 | }, 606 | { 607 | "cell_type": "markdown", 608 | "metadata": {}, 609 | "source": [ 610 | "## Comparing Higher-Lag AR Models" 611 | ] 612 | }, 613 | { 614 | "cell_type": "code", 615 | "execution_count": 17, 616 | "metadata": {}, 617 | "outputs": [ 618 | { 619 | "data": { 620 | "text/plain": [ 621 | "0.001" 622 | ] 623 | }, 624 | "execution_count": 17, 625 | "metadata": {}, 626 | "output_type": "execute_result" 627 | } 628 | ], 629 | "source": [ 630 | "LLR_test(model_ar_2, model_ar_3)" 631 | ] 632 | }, 633 | { 634 | "cell_type": "code", 635 | "execution_count": null, 636 | "metadata": {}, 637 | "outputs": [], 638 | "source": [ 639 | "LLR_test(model_ar_3, model_ar_4)" 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": null, 645 | "metadata": {}, 646 | "outputs": [], 647 | "source": [ 648 | "model_ar_4 = ARMA(df.market_value, order=[4,0])\n", 649 | "results_ar_4 = model_ar_4.fit()\n", 650 | "print(results_ar_4.summary()) \n", 651 | "print (\"LLR test: \" + str(LLR_test(model_ar_3, model_ar_4)))" 652 | ] 653 | }, 654 | { 655 | "cell_type": "code", 656 | "execution_count": null, 657 | "metadata": {}, 658 | "outputs": [], 659 | "source": [ 660 | "model_ar_5 = ARMA(df.market_value, order=(5,0))\n", 661 | "results_ar_5 = model_ar_5.fit()\n", 662 | "print(results_ar_5.summary())\n", 663 | "print(\"\\nLLR test p-value = \" + str(LLR_test(model_ar_4, model_ar_5)))" 664 | ] 665 | }, 666 | { 667 | "cell_type": "code", 668 | "execution_count": null, 669 | "metadata": {}, 670 | "outputs": [], 671 | "source": [ 672 | "model_ar_6 = ARMA(df.market_value, order=(6,0))\n", 673 | "results_ar_6 = model_ar_6.fit()\n", 674 | "print(results_ar_6.summary())\n", 675 | "print(\"\\nLLR test p-value = \" + str(LLR_test(model_ar_5, model_ar_6)))" 676 | ] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": null, 681 | "metadata": {}, 682 | "outputs": [], 683 | "source": [ 684 | "model_ar_7 = ARMA(df.market_value, order=(7,0))\n", 685 | "results_ar_7 = model_ar_7.fit()\n", 686 | "print(results_ar_7.summary())\n", 687 | "print(\"\\nLLR test p-value = \" + str(LLR_test(model_ar_6, model_ar_7)))" 688 | ] 689 | }, 690 | { 691 | "cell_type": "code", 692 | "execution_count": null, 693 | "metadata": {}, 694 | "outputs": [], 695 | "source": [ 696 | "model_ar_8 = ARMA(df.market_value, order=(8,0))\n", 697 | "results_ar_8 = model_ar_8.fit()\n", 698 | "print(results_ar_8.summary())\n", 699 | "print(\"\\nLLR test p-value = \" + str(LLR_test(model_ar_7, model_ar_8)))" 700 | ] 701 | }, 702 | { 703 | "cell_type": "code", 704 | "execution_count": null, 705 | "metadata": {}, 706 | "outputs": [], 707 | "source": [ 708 | "print(\"LLR test: \" + str(LLR_test(model_ar, model_ar_7, DF = 6)))" 709 | ] 710 | }, 711 | { 712 | "cell_type": "markdown", 713 | "metadata": {}, 714 | "source": [ 715 | "## Analysing the Residuals" 716 | ] 717 | }, 718 | { 719 | "cell_type": "code", 720 | "execution_count": null, 721 | "metadata": {}, 722 | "outputs": [], 723 | "source": [ 724 | "df['res_price'] = results_ar_7.resid" 725 | ] 726 | }, 727 | { 728 | "cell_type": "code", 729 | "execution_count": null, 730 | "metadata": {}, 731 | "outputs": [], 732 | "source": [ 733 | "df.res_price.mean()" 734 | ] 735 | }, 736 | { 737 | "cell_type": "code", 738 | "execution_count": null, 739 | "metadata": {}, 740 | "outputs": [], 741 | "source": [ 742 | "df.res_price.var()" 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "execution_count": null, 748 | "metadata": {}, 749 | "outputs": [], 750 | "source": [ 751 | "sts.adfuller(df.res_price)" 752 | ] 753 | }, 754 | { 755 | "cell_type": "code", 756 | "execution_count": null, 757 | "metadata": {}, 758 | "outputs": [], 759 | "source": [ 760 | "sgt.plot_acf(df.res_price, zero = False, lags = 40)\n", 761 | "plt.title(\"ACF Of Residuals for Prices\",size=24)\n", 762 | "plt.show()" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": null, 768 | "metadata": {}, 769 | "outputs": [], 770 | "source": [ 771 | "df.res_price[1:].plot(figsize=(20,5))\n", 772 | "plt.title(\"Residuals of Prices\",size=24)\n", 773 | "plt.show()" 774 | ] 775 | } 776 | ], 777 | "metadata": { 778 | "kernelspec": { 779 | "display_name": "Python 3", 780 | "language": "python", 781 | "name": "python3" 782 | }, 783 | "language_info": { 784 | "codemirror_mode": { 785 | "name": "ipython", 786 | "version": 3 787 | }, 788 | "file_extension": ".py", 789 | "mimetype": "text/x-python", 790 | "name": "python", 791 | "nbconvert_exporter": "python", 792 | "pygments_lexer": "ipython3", 793 | "version": "3.7.4" 794 | } 795 | }, 796 | "nbformat": 4, 797 | "nbformat_minor": 2 798 | } 799 | --------------------------------------------------------------------------------