├── .gitignore ├── LICENSE ├── README.md ├── data ├── HWG.csv ├── OTZ.csv ├── RLI.csv ├── RMCC.csv ├── price_options.csv └── price_stock.csv ├── imgs ├── all_pnl.png ├── all_pnl_after.png ├── pnl_AA_II.png ├── pnl_BB_JJ.png ├── pnl_DD_HH.png ├── pnl_FF_MM.png ├── pos_AA_II.png ├── pos_BB_JJ.png ├── pos_DD_HH.png ├── pos_FF_MM.png ├── res_AA_II.png ├── res_BB_DD.png ├── res_BB_HH.png ├── res_BB_JJ.png ├── res_DD_HH.png ├── res_DD_JJ.png ├── res_FF_MM.png ├── res_FF_NN.png ├── res_HH_JJ.png ├── res_MM_NN.png ├── res_thre_AA_II.png ├── res_thre_BB_DD.png ├── res_thre_BB_HH.png ├── res_thre_BB_JJ.png ├── res_thre_DD_HH.png ├── res_thre_DD_JJ.png ├── res_thre_FF_MM.png ├── res_thre_FF_NN.png ├── res_thre_HH_JJ.png └── res_thre_MM_NN.png ├── models ├── BM.py ├── BS_model.py └── Vasicek.py ├── report ├── figures │ ├── all_pnl_after.png │ ├── all_pnl_before.png │ ├── arb1.png │ ├── arb2.png │ ├── pnl_AA_II.png │ ├── pos_BB_JJ.png │ ├── pos_DD_HH.png │ ├── res_HH_JJ.png │ └── res_thre_BB_HH.png └── report.pdf ├── requirements.txt ├── res ├── pnl_all.csv ├── pnl_best_thresholds.csv ├── pnl_final_portfolio_results.csv ├── pos_thresholds_count.csv └── positions_all.csv ├── statistical_arbitrage.ipynb └── utils └── ArbUtils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore some folders 2 | arb.md 3 | 4 | 5 | papers/ 6 | test/ 7 | arbUtils/ 8 | 9 | *.tex 10 | *.txt 11 | *.bib 12 | *.bbl 13 | *.blg 14 | *.gz 15 | *.sty 16 | 17 | *.aux 18 | *.lof 19 | *.log 20 | *.lot 21 | *.fls 22 | *.out 23 | *.toc 24 | *.fmt 25 | *.fot 26 | *.cb 27 | *.cb2 28 | .*.lb 29 | 30 | # Byte-compiled / optimized / DLL files 31 | __pycache__/ 32 | *.py[cod] 33 | *$py.class 34 | 35 | # C extensions 36 | *.so 37 | 38 | # Distribution / packaging 39 | .Python 40 | build/ 41 | develop-eggs/ 42 | dist/ 43 | downloads/ 44 | eggs/ 45 | .eggs/ 46 | lib/ 47 | lib64/ 48 | parts/ 49 | sdist/ 50 | var/ 51 | wheels/ 52 | share/python-wheels/ 53 | *.egg-info/ 54 | .installed.cfg 55 | *.egg 56 | MANIFEST 57 | 58 | # PyInstaller 59 | # Usually these files are written by a python script from a template 60 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 61 | *.manifest 62 | *.spec 63 | 64 | # Installer logs 65 | pip-log.txt 66 | pip-delete-this-directory.txt 67 | 68 | # Unit test / coverage reports 69 | htmlcov/ 70 | .tox/ 71 | .nox/ 72 | .coverage 73 | .coverage.* 74 | .cache 75 | nosetests.xml 76 | coverage.xml 77 | *.cover 78 | *.py,cover 79 | .hypothesis/ 80 | .pytest_cache/ 81 | cover/ 82 | 83 | # Translations 84 | *.mo 85 | *.pot 86 | 87 | # Django stuff: 88 | *.log 89 | local_settings.py 90 | db.sqlite3 91 | db.sqlite3-journal 92 | 93 | # Flask stuff: 94 | instance/ 95 | .webassets-cache 96 | 97 | # Scrapy stuff: 98 | .scrapy 99 | 100 | # Sphinx documentation 101 | docs/_build/ 102 | 103 | # PyBuilder 104 | .pybuilder/ 105 | target/ 106 | 107 | # Jupyter Notebook 108 | .ipynb_checkpoints 109 | 110 | # IPython 111 | profile_default/ 112 | ipython_config.py 113 | 114 | # pyenv 115 | # For a library or package, you might want to ignore these files since the code is 116 | # intended to run in multiple environments; otherwise, check them in: 117 | # .python-version 118 | 119 | # pipenv 120 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 121 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 122 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 123 | # install all needed dependencies. 124 | #Pipfile.lock 125 | 126 | # poetry 127 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 128 | # This is especially recommended for binary packages to ensure reproducibility, and is more 129 | # commonly ignored for libraries. 130 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 131 | #poetry.lock 132 | 133 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 134 | __pypackages__/ 135 | 136 | # Celery stuff 137 | celerybeat-schedule 138 | celerybeat.pid 139 | 140 | # SageMath parsed files 141 | *.sage.py 142 | 143 | # Environments 144 | .env 145 | .venv 146 | env/ 147 | venv/ 148 | ENV/ 149 | env.bak/ 150 | venv.bak/ 151 | 152 | # Spyder project settings 153 | .spyderproject 154 | .spyproject 155 | 156 | # Rope project settings 157 | .ropeproject 158 | 159 | # mkdocs documentation 160 | /site 161 | 162 | # mypy 163 | .mypy_cache/ 164 | .dmypy.json 165 | dmypy.json 166 | 167 | # Pyre type checker 168 | .pyre/ 169 | 170 | # pytype static type analyzer 171 | .pytype/ 172 | 173 | # Cython debug symbols 174 | cython_debug/ 175 | 176 | # PyCharm 177 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 178 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 179 | # and can be added to the global gitignore or merged into this file. For a more nuclear 180 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 181 | #.idea/ 182 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 bradleyboyuyang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Statistical-Arbitrage 2 | In this project we provide a backtesting pipeline for intraday statistical arbitrage. Both traditional spread models (i.e. pairs trading with cointegration tests, time series analysis) and continuous time trading models (i.e. Ornstein-Uhlenbeck process) are used to model the spread portfolios. 3 | 4 | ## Scripts 5 | - `data`: intraday data files, including stocks, options, and dual listing stocks 6 | - `utils`: arbitrage tool functions including cointegration tests and regression analysis 7 | - `models`: simulations and parameter estimations for stochastic models and option greeks 8 | - `BM.py`: brownian motion related functions 9 | - `Vasicek.py`: OU-process related functions 10 | - `BS_model.py`: Black-Scholes model and option greeks 11 | - `statistical_arbitrage`: notebook for realizing pair trading based on limit orderbook stock data 12 | - `res`: results for positions, thresholds, and PnLs 13 | 14 | 15 | ## Backtesting 16 | #### Spread Portfolios 17 | 18 | 19 | #### Threshold Analysis 20 | 21 | 22 | #### Position Analysis 23 | 24 | 25 | 26 | 27 | 28 | 29 | #### PnL Visualization 30 | 31 | 32 | 33 | ## Note 34 | - Higher the transaction costs, larger the optimal entry points for arbitrage, lower the trading frequency. 35 | - Sensitivity analysis needs to be conducted with respect to the level of transaction costs (price impact, bid-ask spread, and commission fees). 36 | -------------------------------------------------------------------------------- /imgs/all_pnl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/all_pnl.png -------------------------------------------------------------------------------- /imgs/all_pnl_after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/all_pnl_after.png -------------------------------------------------------------------------------- /imgs/pnl_AA_II.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/pnl_AA_II.png -------------------------------------------------------------------------------- /imgs/pnl_BB_JJ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/pnl_BB_JJ.png -------------------------------------------------------------------------------- /imgs/pnl_DD_HH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/pnl_DD_HH.png -------------------------------------------------------------------------------- /imgs/pnl_FF_MM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/pnl_FF_MM.png -------------------------------------------------------------------------------- /imgs/pos_AA_II.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/pos_AA_II.png -------------------------------------------------------------------------------- /imgs/pos_BB_JJ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/pos_BB_JJ.png -------------------------------------------------------------------------------- /imgs/pos_DD_HH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/pos_DD_HH.png -------------------------------------------------------------------------------- /imgs/pos_FF_MM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/pos_FF_MM.png -------------------------------------------------------------------------------- /imgs/res_AA_II.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_AA_II.png -------------------------------------------------------------------------------- /imgs/res_BB_DD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_BB_DD.png -------------------------------------------------------------------------------- /imgs/res_BB_HH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_BB_HH.png -------------------------------------------------------------------------------- /imgs/res_BB_JJ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_BB_JJ.png -------------------------------------------------------------------------------- /imgs/res_DD_HH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_DD_HH.png -------------------------------------------------------------------------------- /imgs/res_DD_JJ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_DD_JJ.png -------------------------------------------------------------------------------- /imgs/res_FF_MM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_FF_MM.png -------------------------------------------------------------------------------- /imgs/res_FF_NN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_FF_NN.png -------------------------------------------------------------------------------- /imgs/res_HH_JJ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_HH_JJ.png -------------------------------------------------------------------------------- /imgs/res_MM_NN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_MM_NN.png -------------------------------------------------------------------------------- /imgs/res_thre_AA_II.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_thre_AA_II.png -------------------------------------------------------------------------------- /imgs/res_thre_BB_DD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_thre_BB_DD.png -------------------------------------------------------------------------------- /imgs/res_thre_BB_HH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_thre_BB_HH.png -------------------------------------------------------------------------------- /imgs/res_thre_BB_JJ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_thre_BB_JJ.png -------------------------------------------------------------------------------- /imgs/res_thre_DD_HH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_thre_DD_HH.png -------------------------------------------------------------------------------- /imgs/res_thre_DD_JJ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_thre_DD_JJ.png -------------------------------------------------------------------------------- /imgs/res_thre_FF_MM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_thre_FF_MM.png -------------------------------------------------------------------------------- /imgs/res_thre_FF_NN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_thre_FF_NN.png -------------------------------------------------------------------------------- /imgs/res_thre_HH_JJ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_thre_HH_JJ.png -------------------------------------------------------------------------------- /imgs/res_thre_MM_NN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/imgs/res_thre_MM_NN.png -------------------------------------------------------------------------------- /models/BM.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | import numpy as np 5 | 6 | 7 | def get_dW(T: int, random_state: Optional[int] = None) -> np.ndarray: 8 | """ 9 | Sample T times from a normal distribution, 10 | to simulate discrete increments (dW) of a Brownian Motion. 11 | Optional random_state to reproduce results. 12 | """ 13 | np.random.seed(random_state) 14 | return np.random.normal(0.0, 1.0, T) 15 | 16 | 17 | def get_W(T: int, random_state: Optional[int] = None) -> np.ndarray: 18 | """ 19 | Simulate a Brownian motion discretely samplet at unit time increments. 20 | Returns the cumulative sum 21 | """ 22 | dW = get_dW(T, random_state) 23 | # cumulative sum and then make the first index 0. 24 | dW_cs = dW.cumsum() 25 | return np.insert(dW_cs, 0, 0)[:-1] 26 | 27 | 28 | if __name__ == "__main__": 29 | dW = get_dW(T=1_000) 30 | W = get_W(T=1_000) 31 | fig = plt.figure(figsize=(15, 5)) 32 | 33 | title = "Brownian motion increments" 34 | plt.subplot(1, 2, 1) 35 | plt.plot(dW) 36 | plt.gca().set_title(title, fontsize=15) 37 | plt.xticks(fontsize=15) 38 | plt.yticks(fontsize=15) 39 | 40 | title = "Brownian motion path" 41 | plt.subplot(1, 2, 2) 42 | plt.plot(W) 43 | plt.gca().set_title(title, fontsize=15) 44 | plt.xticks(fontsize=15) 45 | plt.yticks(fontsize=15) -------------------------------------------------------------------------------- /models/BS_model.py: -------------------------------------------------------------------------------- 1 | from scipy import stats 2 | import numpy as np 3 | 4 | _norm_cdf = stats.norm(0, 1).cdf 5 | _norm_pdf = stats.norm(0, 1).pdf 6 | 7 | def gbm(S, T, r, sigma): 8 | return S*np.exp((r-0.5*sigma**2)*T + sigma*np.sqrt(T)*np.random.normal()) 9 | 10 | 11 | def _d1(S, K, T, r, sigma): 12 | return (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T)) 13 | 14 | 15 | def _d2(S, K, T, r, sigma): 16 | return _d1(S, K, T, r, sigma) - sigma * np.sqrt(T) 17 | 18 | 19 | def call_value(S, K, T, r, sigma): 20 | ''' 21 | The fair value of a call option paying max(S_T-K, 0) at expiry, under the Black-scholes model, 22 | for an option with strike , expiring in years, under a fixed interest rate , 23 | a stock volatility , and when the current price of the underlying stock is . 24 | 25 | Parameters 26 | ---------- 27 | S : float 28 | The current value of the underlying stock (S0). 29 | 30 | K : float 31 | The strike price of the option. 32 | 33 | T : float 34 | Time to expiry in years. 35 | 36 | r : float 37 | The fixed interest rate valid between now and expiry. 38 | 39 | sigma : float 40 | The volatility of the underlying stock process. 41 | 42 | Returns 43 | ------- 44 | call_value : float 45 | The fair present value of the option. 46 | 47 | ''' 48 | 49 | return S * _norm_cdf(_d1(S, K, T, r, sigma)) - K * np.exp(-r * T) * _norm_cdf(_d2(S, K, T, r, sigma)) 50 | 51 | 52 | def put_value(S, K, T, r, sigma): 53 | ''' 54 | The fair value of a put option paying max(K-S_T, 0) at expiry, under the Black-scholes model, 55 | for an option with strike , expiring in years, under a fixed interest rate , 56 | a stock volatility , and when the current price of the underlying stock is . 57 | 58 | Parameters 59 | ---------- 60 | S : float 61 | The value of the underlying stock (S0). 62 | 63 | K : float 64 | The strike price of the option. 65 | 66 | T : float 67 | Time to expiry in years. 68 | 69 | r : float 70 | The fixed interest rate valid between now and expiry. 71 | 72 | sigma : float 73 | The volatility of the underlying stock process. 74 | 75 | Returns 76 | ------- 77 | put_value : float 78 | The fair present value of the option. 79 | ''' 80 | 81 | return np.exp(-r * T) * K * _norm_cdf(-_d2(S, K, T, r, sigma)) - S * _norm_cdf(-_d1(S, K, T, r, sigma)) 82 | 83 | 84 | def call_delta(S, K, T, r, sigma): 85 | ''' 86 | The delta, i.e. the first derivative of the option value with respect to the underlying, 87 | of a call option paying max(S_T-K, 0) at expiry, under the Black-scholes model, for an option 88 | with strike , expiring in years, under a fixed interest rate , a stock 89 | volatility , and when the current price of the underlying stock is . 90 | 91 | Parameters 92 | ---------- 93 | S : float 94 | The value of the underlying stock. 95 | 96 | K : float 97 | The strike price of the option. 98 | 99 | T : float 100 | Time to expiry in years. 101 | 102 | r : float 103 | The fixed interest rate valid between now and expiry. 104 | 105 | sigma : float 106 | The volatility of the underlying stock process. 107 | 108 | Returns 109 | ------- 110 | call_delta : float 111 | The fair present value of the option. 112 | ''' 113 | 114 | # return _norm_cdf(_d1(S, K, T, r, sigma)) 115 | S_T = gbm(S, T, r, sigma) 116 | return max(S_T-K, 0)*np.exp(-r*T)*S_T/S 117 | 118 | 119 | def put_delta(S, K, T, r, sigma): 120 | ''' 121 | The delta, i.e. the first derivative of the option value with respect to the underlying, 122 | of a put option paying max(K-S_T, 0) at expiry, under the Black-scholes model, for an option 123 | with strike , expiring in years, under a fixed interest rate , a stock 124 | volatility , and when the current price of the underlying stock is . 125 | 126 | Parameters 127 | ---------- 128 | S : float 129 | The value of the underlying stock. 130 | 131 | K : float 132 | The strike price of the option. 133 | 134 | T : float 135 | Time to expiry in years. 136 | 137 | r : float 138 | The fixed interest rate valid between now and expiry. 139 | 140 | sigma : float 141 | The volatility of the underlying stock process. 142 | 143 | Returns 144 | ------- 145 | put_delta : float 146 | The fair present value of the option. 147 | ''' 148 | 149 | # return call_delta(S, K, T, r, sigma) - 1 150 | S_T = gbm(S, T, r, sigma) 151 | return max(K-S_T, 0)*np.exp(-r*T)*S_T/S 152 | 153 | 154 | def call_vega(S, K, T, r, sigma): 155 | ''' 156 | The vega, i.e. the derivative of the option value with respect to the volatility, 157 | of a call option paying max(S-K, 0) at expiry, under the Black-scholes model, for an option 158 | with strike , expiring in years, under a fixed interest rate , a stock 159 | volatility , and when the current price of the underlying stock is . 160 | 161 | Parameters 162 | ---------- 163 | S : float 164 | The value of the underlying stock. 165 | 166 | K : float 167 | The strike price of the option. 168 | 169 | T : float 170 | Time to expiry in years. 171 | 172 | r : float 173 | The fixed interest rate valid between now and expiry. 174 | 175 | sigma : float 176 | The volatility of the underlying stock process. 177 | 178 | Returns 179 | ------- 180 | call_delta : float 181 | The fair present value of the option. 182 | ''' 183 | 184 | # return S * _norm_pdf(_d1(S, K, T, r, sigma)) * np.sqrt(T) 185 | S_T = gbm(S, T, r, sigma) 186 | return max(S_T-K, 0)*np.exp(-r*T)*S_T*((np.log(S_T/S)-(r+0.5*sigma**2)*T)/sigma) 187 | 188 | 189 | def put_vega(S, K, T, r, sigma): 190 | ''' 191 | The vega, i.e. the derivative of the option value with respect to the volatility, 192 | of a put option paying max(K-S, 0) at expiry, under the Black-scholes model, for an option 193 | with strike , expiring in years, under a fixed interest rate , a stock 194 | volatility , and when the current price of the underlying stock is . 195 | 196 | Parameters 197 | ---------- 198 | S : float 199 | The value of the underlying stock. 200 | 201 | K : float 202 | The strike price of the option. 203 | 204 | T : float 205 | Time to expiry in years. 206 | 207 | r : float 208 | The fixed interest rate valid between now and expiry. 209 | 210 | sigma : float 211 | The volatility of the underlying stock process. 212 | 213 | Returns 214 | ------- 215 | call_delta : float 216 | The fair present value of the option. 217 | ''' 218 | 219 | # return call_vega(S, K, T, r, sigma) 220 | S_T = gbm(S, T, r, sigma) 221 | return max(K-S_T, 0)*np.exp(-r*T)*S_T*((np.log(S_T/S)-(r+0.5*sigma**2)*T)/sigma) -------------------------------------------------------------------------------- /models/Vasicek.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Optional 3 | import numpy as np 4 | from .BM import get_dW 5 | from sklearn.linear_model import LinearRegression 6 | import matplotlib.pyplot as plt 7 | 8 | @dataclass 9 | class OUParams: 10 | alpha: float # mean reversion parameter 11 | gamma: float # asymptotic mean 12 | beta: float # Brownian motion scale (standard deviation) 13 | 14 | def get_OU_process( 15 | T: int, 16 | OU_params: OUParams, 17 | X_0: Optional[float] = None, 18 | random_state: Optional[int] = None, 19 | ) -> np.ndarray: 20 | """ 21 | - T is the sample size. 22 | - Ou_params is an instance of OUParams dataclass. 23 | - X_0 the initial value for the process, if None, then X_0 is taken 24 | to be gamma (the asymptotic mean). 25 | Returns a 1D array. 26 | """ 27 | t = np.arange(T, dtype=np.float64) # float to avoid np.exp overflow 28 | exp_alpha_t = np.exp(-OU_params.alpha * t) 29 | dW = get_dW(T, random_state) 30 | integral_W = _get_integal_W(t, dW, OU_params) 31 | _X_0 = _select_X_0(X_0, OU_params) 32 | return ( 33 | _X_0 * exp_alpha_t 34 | + OU_params.gamma * (1 - exp_alpha_t) 35 | + OU_params.beta * exp_alpha_t * integral_W 36 | ) 37 | 38 | 39 | def _select_X_0(X_0_in: Optional[float], OU_params: OUParams) -> float: 40 | """Returns X_0 input if not none, else gamma (the long term mean).""" 41 | if X_0_in is not None: 42 | return X_0_in 43 | return OU_params.gamma 44 | 45 | 46 | def _get_integal_W( 47 | t: np.ndarray, dW: np.ndarray, OU_params: OUParams 48 | ) -> np.ndarray: 49 | """Integral with respect to Brownian Motion (W), ∫...dW.""" 50 | exp_alpha_s = np.exp(OU_params.alpha * t) 51 | integral_W = np.cumsum(exp_alpha_s * dW) 52 | return np.insert(integral_W, 0, 0)[:-1] 53 | 54 | def estimate_OU_params(X_t: np.ndarray) -> OUParams: 55 | """ 56 | Estimate OU params from OLS regression. 57 | - X_t is a 1D array. 58 | Returns instance of OUParams. 59 | """ 60 | y = np.diff(X_t) 61 | X = X_t[:-1].reshape(-1, 1) 62 | reg = LinearRegression(fit_intercept=True) 63 | reg.fit(X, y) 64 | # regression coeficient and constant 65 | alpha = -reg.coef_[0] 66 | gamma = reg.intercept_ / alpha 67 | # residuals and their standard deviation 68 | y_hat = reg.predict(X) 69 | beta = np.std(y - y_hat) 70 | return OUParams(alpha, gamma, beta) 71 | 72 | 73 | if __name__ == '__main__': 74 | OU_params = OUParams(alpha=0.07, gamma=0.0, beta=0.001) 75 | OU_proc = get_OU_process(1000, OU_params) 76 | fig = plt.figure(figsize=(15, 7)) 77 | 78 | title = "Ornstein-Uhlenbeck process, " 79 | title += r"$\alpha=0.07$, $\gamma = 0$, $\beta = 0.001$" 80 | plt.plot(OU_proc) 81 | plt.gca().set_title(title, fontsize=15) 82 | plt.xticks(fontsize=15) 83 | plt.yticks(fontsize=15) 84 | plt.show() 85 | 86 | # generate process with random_state to reproduce results 87 | OU_params = OUParams(alpha=0.07, gamma=0.0, beta=0.001) 88 | OU_proc = get_OU_process(10000, OU_params, random_state=7) 89 | 90 | OU_params_hat = estimate_OU_params(OU_proc) 91 | print(OU_params_hat) -------------------------------------------------------------------------------- /report/figures/all_pnl_after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/report/figures/all_pnl_after.png -------------------------------------------------------------------------------- /report/figures/all_pnl_before.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/report/figures/all_pnl_before.png -------------------------------------------------------------------------------- /report/figures/arb1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/report/figures/arb1.png -------------------------------------------------------------------------------- /report/figures/arb2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/report/figures/arb2.png -------------------------------------------------------------------------------- /report/figures/pnl_AA_II.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/report/figures/pnl_AA_II.png -------------------------------------------------------------------------------- /report/figures/pos_BB_JJ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/report/figures/pos_BB_JJ.png -------------------------------------------------------------------------------- /report/figures/pos_DD_HH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/report/figures/pos_DD_HH.png -------------------------------------------------------------------------------- /report/figures/res_HH_JJ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/report/figures/res_HH_JJ.png -------------------------------------------------------------------------------- /report/figures/res_thre_BB_HH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/report/figures/res_thre_BB_HH.png -------------------------------------------------------------------------------- /report/report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bradleyboyuyang/Statistical-Arbitrage/edd2f1234be4211d99c3a84f2bf7430a1d3fde8e/report/report.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.20.0 2 | pandas==1.4.2 3 | scipy==1.7.1 4 | statsmodels==0.12.2 5 | -------------------------------------------------------------------------------- /res/pnl_best_thresholds.csv: -------------------------------------------------------------------------------- 1 | Pairs,Thresholds,PnLs,rank 2 | "('BB', 'DD')",0.0005,25935.799999999952,1.0 3 | "('BB', 'DD')",0.0015555555555555557,16501.04999999998,2.0 4 | "('BB', 'DD')",0.0026111111111111114,12406.19999999997,3.0 5 | "('BB', 'DD')",0.0036666666666666666,9018.149999999985,4.0 6 | "('BB', 'DD')",0.004722222222222222,7457.8000000000175,5.0 7 | "('BB', 'DD')",0.005777777777777778,6556.800000000007,6.0 8 | "('BB', 'DD')",0.006833333333333334,5757.399999999998,7.0 9 | "('BB', 'DD')",0.00788888888888889,4732.19999999999,8.0 10 | "('BB', 'DD')",0.008944444444444446,4414.899999999992,10.0 11 | "('BB', 'DD')",0.01,4505.35000000002,9.0 12 | "('BB', 'JJ')",0.00022000000000000003,67810.25000000006,1.0 13 | "('BB', 'JJ')",0.0006844444444444444,55427.350000000064,2.0 14 | "('BB', 'JJ')",0.001148888888888889,45365.10000000001,3.0 15 | "('BB', 'JJ')",0.0016133333333333334,38873.15000000001,4.0 16 | "('BB', 'JJ')",0.0020777777777777778,34452.94999999998,5.0 17 | "('BB', 'JJ')",0.0025422222222222226,29942.69999999993,6.0 18 | "('BB', 'JJ')",0.003006666666666667,29722.24999999997,7.0 19 | "('BB', 'JJ')",0.003471111111111112,27583.29999999996,8.0 20 | "('BB', 'JJ')",0.003935555555555556,24979.79999999997,9.0 21 | "('BB', 'JJ')",0.0044,22907.39999999994,10.0 22 | "('DD', 'HH')",0.000485,13098.55,1.0 23 | "('DD', 'HH')",0.001508888888888889,7741.050000000005,2.0 24 | "('DD', 'HH')",0.002532777777777778,5910.549999999996,3.0 25 | "('DD', 'HH')",0.0035566666666666663,4758.399999999989,4.0 26 | "('DD', 'HH')",0.004580555555555556,4416.9,5.0 27 | "('DD', 'HH')",0.005604444444444445,3973.2000000000116,6.0 28 | "('DD', 'HH')",0.006628333333333333,3345.299999999992,7.0 29 | "('DD', 'HH')",0.0076522222222222235,2946.549999999992,8.0 30 | "('DD', 'HH')",0.008676111111111112,2622.30000000001,9.0 31 | "('DD', 'HH')",0.0097,2345.300000000012,10.0 32 | "('DD', 'JJ')",0.00036,27720.250000000022,1.0 33 | "('DD', 'JJ')",0.00112,18763.800000000017,2.0 34 | "('DD', 'JJ')",0.0018800000000000002,14338.30000000002,3.0 35 | "('DD', 'JJ')",0.0026399999999999996,13283.949999999995,4.0 36 | "('DD', 'JJ')",0.0034,11836.95000000002,5.0 37 | "('DD', 'JJ')",0.0041600000000000005,10601.850000000048,6.0 38 | "('DD', 'JJ')",0.00492,9583.100000000017,7.0 39 | "('DD', 'JJ')",0.00568,8666.30000000001,8.0 40 | "('DD', 'JJ')",0.00644,8096.4,9.0 41 | "('DD', 'JJ')",0.0072,7553.250000000018,10.0 42 | "('FF', 'MM')",0.000155,79082.70000000006,1.0 43 | "('FF', 'MM')",0.0004822222222222222,63170.449999999975,2.0 44 | "('FF', 'MM')",0.0008094444444444444,54144.55000000003,3.0 45 | "('FF', 'MM')",0.0011366666666666665,46575.60000000002,4.0 46 | "('FF', 'MM')",0.0014638888888888887,41351.09999999996,5.0 47 | "('FF', 'MM')",0.0017911111111111112,36311.399999999994,6.0 48 | "('FF', 'MM')",0.002118333333333333,33438.399999999936,7.0 49 | "('FF', 'MM')",0.002445555555555556,30954.599999999944,8.0 50 | "('FF', 'MM')",0.0027727777777777777,28452.19999999996,9.0 51 | "('FF', 'MM')",0.0031,26322.099999999984,10.0 52 | "('FF', 'NN')",0.00030500000000000004,26567.49999999997,1.0 53 | "('FF', 'NN')",0.000948888888888889,17358.300000000036,2.0 54 | "('FF', 'NN')",0.001592777777777778,12685.250000000022,4.0 55 | "('FF', 'NN')",0.0022366666666666668,13133.89999999999,3.0 56 | "('FF', 'NN')",0.0028805555555555555,11462.849999999995,5.0 57 | "('FF', 'NN')",0.003524444444444445,10005.400000000016,6.0 58 | "('FF', 'NN')",0.004168333333333334,9166.399999999996,7.0 59 | "('FF', 'NN')",0.004812222222222223,7924.550000000011,8.0 60 | "('FF', 'NN')",0.005456111111111112,7317.550000000004,9.0 61 | "('FF', 'NN')",0.0061,6487.900000000012,10.0 62 | "('MM', 'NN')",0.00034,21631.400000000038,1.0 63 | "('MM', 'NN')",0.0010577777777777777,14150.399999999987,2.0 64 | "('MM', 'NN')",0.0017755555555555556,10763.800000000008,3.0 65 | "('MM', 'NN')",0.002493333333333333,8504.399999999989,4.0 66 | "('MM', 'NN')",0.0032111111111111108,6993.599999999988,5.0 67 | "('MM', 'NN')",0.003928888888888889,6374.400000000009,6.0 68 | "('MM', 'NN')",0.004646666666666666,4626.199999999973,7.0 69 | "('MM', 'NN')",0.005364444444444445,3640.7999999999956,8.0 70 | "('MM', 'NN')",0.006082222222222222,3486.799999999983,9.0 71 | "('MM', 'NN')",0.0068,2967.1999999999807,10.0 72 | "('BB', 'HH')",0.0009050000000000001,6505.80000000001,1.0 73 | "('BB', 'HH')",0.002815555555555556,2856.0000000000073,2.0 74 | "('BB', 'HH')",0.0047261111111111115,1297.9999999999782,3.0 75 | "('BB', 'HH')",0.006636666666666667,527.5999999999894,4.0 76 | "('BB', 'HH')",0.008547222222222223,-210.0,5.0 77 | "('BB', 'HH')",0.01045777777777778,-800.8000000000084,7.0 78 | "('BB', 'HH')",0.012368333333333335,-855.600000000004,8.0 79 | "('BB', 'HH')",0.014278888888888891,-665.7999999999975,6.0 80 | "('BB', 'HH')",0.016189444444444447,-1105.199999999987,9.0 81 | "('BB', 'HH')",0.0181,-1216.9999999999955,10.0 82 | "('HH', 'JJ')",0.00045,9973.300000000003,1.0 83 | "('HH', 'JJ')",0.0014,5711.1,2.0 84 | "('HH', 'JJ')",0.00235,3792.1500000000233,3.0 85 | "('HH', 'JJ')",0.0032999999999999995,2606.000000000018,4.0 86 | "('HH', 'JJ')",0.0042499999999999994,2285.5999999999967,5.0 87 | "('HH', 'JJ')",0.0052,2262.8499999999985,6.0 88 | "('HH', 'JJ')",0.00615,2022.3999999999996,7.0 89 | "('HH', 'JJ')",0.0071,1476.9500000000062,8.0 90 | "('HH', 'JJ')",0.00805,916.2000000000025,9.0 91 | "('HH', 'JJ')",0.009,886.0499999999993,10.0 92 | "('AA', 'II')",0.00107,18215.249999999935,1.0 93 | "('AA', 'II')",0.0033288888888888886,10419.649999999983,2.0 94 | "('AA', 'II')",0.0055877777777777774,6042.70000000001,3.0 95 | "('AA', 'II')",0.007846666666666665,5491.199999999993,4.0 96 | "('AA', 'II')",0.010105555555555555,4634.700000000001,5.0 97 | "('AA', 'II')",0.012364444444444445,3626.7999999999975,6.0 98 | "('AA', 'II')",0.014623333333333334,3260.500000000002,7.0 99 | "('AA', 'II')",0.016882222222222222,1600.3999999999978,10.0 100 | "('AA', 'II')",0.019141111111111112,1730.0,8.0 101 | "('AA', 'II')",0.0214,1682.3999999999978,9.0 102 | -------------------------------------------------------------------------------- /res/pos_thresholds_count.csv: -------------------------------------------------------------------------------- 1 | ,0 2 | BB Pos - Thres: 0.0005,760 3 | DD Pos - Thres: 0.0005,760 4 | BB Pos - Thres: 0.0015555555555555557,624 5 | DD Pos - Thres: 0.0015555555555555557,624 6 | BB Pos - Thres: 0.0026111111111111114,463 7 | DD Pos - Thres: 0.0026111111111111114,463 8 | BB Pos - Thres: 0.0036666666666666666,329 9 | DD Pos - Thres: 0.0036666666666666666,329 10 | BB Pos - Thres: 0.004722222222222222,263 11 | DD Pos - Thres: 0.004722222222222222,263 12 | BB Pos - Thres: 0.005777777777777778,225 13 | DD Pos - Thres: 0.005777777777777778,225 14 | BB Pos - Thres: 0.006833333333333334,193 15 | DD Pos - Thres: 0.006833333333333334,193 16 | BB Pos - Thres: 0.00788888888888889,161 17 | DD Pos - Thres: 0.00788888888888889,161 18 | BB Pos - Thres: 0.008944444444444446,145 19 | DD Pos - Thres: 0.008944444444444446,145 20 | BB Pos - Thres: 0.01,137 21 | DD Pos - Thres: 0.01,137 22 | BB Pos - Thres: 0.00022000000000000003,1605 23 | JJ Pos - Thres: 0.00022000000000000003,1605 24 | BB Pos - Thres: 0.0006844444444444444,1697 25 | JJ Pos - Thres: 0.0006844444444444444,1697 26 | BB Pos - Thres: 0.001148888888888889,1527 27 | JJ Pos - Thres: 0.001148888888888889,1527 28 | BB Pos - Thres: 0.0016133333333333334,1333 29 | JJ Pos - Thres: 0.0016133333333333334,1333 30 | BB Pos - Thres: 0.0020777777777777778,1176 31 | JJ Pos - Thres: 0.0020777777777777778,1176 32 | BB Pos - Thres: 0.0025422222222222226,1011 33 | JJ Pos - Thres: 0.0025422222222222226,1011 34 | BB Pos - Thres: 0.003006666666666667,860 35 | JJ Pos - Thres: 0.003006666666666667,860 36 | BB Pos - Thres: 0.003471111111111112,768 37 | JJ Pos - Thres: 0.003471111111111112,768 38 | BB Pos - Thres: 0.003935555555555556,672 39 | JJ Pos - Thres: 0.003935555555555556,672 40 | BB Pos - Thres: 0.0044,594 41 | JJ Pos - Thres: 0.0044,594 42 | DD Pos - Thres: 0.000485,440 43 | HH Pos - Thres: 0.000485,440 44 | DD Pos - Thres: 0.001508888888888889,308 45 | HH Pos - Thres: 0.001508888888888889,308 46 | DD Pos - Thres: 0.002532777777777778,227 47 | HH Pos - Thres: 0.002532777777777778,227 48 | DD Pos - Thres: 0.0035566666666666663,177 49 | HH Pos - Thres: 0.0035566666666666663,177 50 | DD Pos - Thres: 0.004580555555555556,153 51 | HH Pos - Thres: 0.004580555555555556,153 52 | DD Pos - Thres: 0.005604444444444445,131 53 | HH Pos - Thres: 0.005604444444444445,131 54 | DD Pos - Thres: 0.006628333333333333,109 55 | HH Pos - Thres: 0.006628333333333333,109 56 | DD Pos - Thres: 0.0076522222222222235,95 57 | HH Pos - Thres: 0.0076522222222222235,95 58 | DD Pos - Thres: 0.008676111111111112,83 59 | HH Pos - Thres: 0.008676111111111112,83 60 | DD Pos - Thres: 0.0097,73 61 | HH Pos - Thres: 0.0097,73 62 | DD Pos - Thres: 0.00036,650 63 | JJ Pos - Thres: 0.00036,650 64 | DD Pos - Thres: 0.00112,507 65 | JJ Pos - Thres: 0.00112,507 66 | DD Pos - Thres: 0.0018800000000000002,359 67 | JJ Pos - Thres: 0.0018800000000000002,359 68 | DD Pos - Thres: 0.0026399999999999996,305 69 | JJ Pos - Thres: 0.0026399999999999996,305 70 | DD Pos - Thres: 0.0034,251 71 | JJ Pos - Thres: 0.0034,251 72 | DD Pos - Thres: 0.0041600000000000005,209 73 | JJ Pos - Thres: 0.0041600000000000005,209 74 | DD Pos - Thres: 0.00492,184 75 | JJ Pos - Thres: 0.00492,184 76 | DD Pos - Thres: 0.00568,158 77 | JJ Pos - Thres: 0.00568,158 78 | DD Pos - Thres: 0.00644,138 79 | JJ Pos - Thres: 0.00644,138 80 | DD Pos - Thres: 0.0072,122 81 | JJ Pos - Thres: 0.0072,122 82 | FF Pos - Thres: 0.000155,1543 83 | MM Pos - Thres: 0.000155,1543 84 | FF Pos - Thres: 0.0004822222222222222,1583 85 | MM Pos - Thres: 0.0004822222222222222,1583 86 | FF Pos - Thres: 0.0008094444444444444,1463 87 | MM Pos - Thres: 0.0008094444444444444,1463 88 | FF Pos - Thres: 0.0011366666666666665,1257 89 | MM Pos - Thres: 0.0011366666666666665,1257 90 | FF Pos - Thres: 0.0014638888888888887,1105 91 | MM Pos - Thres: 0.0014638888888888887,1105 92 | FF Pos - Thres: 0.0017911111111111112,938 93 | MM Pos - Thres: 0.0017911111111111112,938 94 | FF Pos - Thres: 0.002118333333333333,831 95 | MM Pos - Thres: 0.002118333333333333,831 96 | FF Pos - Thres: 0.002445555555555556,743 97 | MM Pos - Thres: 0.002445555555555556,743 98 | FF Pos - Thres: 0.0027727777777777777,659 99 | MM Pos - Thres: 0.0027727777777777777,659 100 | FF Pos - Thres: 0.0031,591 101 | MM Pos - Thres: 0.0031,591 102 | FF Pos - Thres: 0.00030500000000000004,809 103 | NN Pos - Thres: 0.00030500000000000004,809 104 | FF Pos - Thres: 0.000948888888888889,690 105 | NN Pos - Thres: 0.000948888888888889,690 106 | FF Pos - Thres: 0.001592777777777778,527 107 | NN Pos - Thres: 0.001592777777777778,527 108 | FF Pos - Thres: 0.0022366666666666668,406 109 | NN Pos - Thres: 0.0022366666666666668,406 110 | FF Pos - Thres: 0.0028805555555555555,336 111 | NN Pos - Thres: 0.0028805555555555555,336 112 | FF Pos - Thres: 0.003524444444444445,276 113 | NN Pos - Thres: 0.003524444444444445,276 114 | FF Pos - Thres: 0.004168333333333334,238 115 | NN Pos - Thres: 0.004168333333333334,238 116 | FF Pos - Thres: 0.004812222222222223,196 117 | NN Pos - Thres: 0.004812222222222223,196 118 | FF Pos - Thres: 0.005456111111111112,172 119 | NN Pos - Thres: 0.005456111111111112,172 120 | FF Pos - Thres: 0.0061,146 121 | NN Pos - Thres: 0.0061,146 122 | MM Pos - Thres: 0.00034,695 123 | NN Pos - Thres: 0.00034,695 124 | MM Pos - Thres: 0.0010577777777777777,588 125 | NN Pos - Thres: 0.0010577777777777777,588 126 | MM Pos - Thres: 0.0017755555555555556,460 127 | NN Pos - Thres: 0.0017755555555555556,460 128 | MM Pos - Thres: 0.002493333333333333,361 129 | NN Pos - Thres: 0.002493333333333333,361 130 | MM Pos - Thres: 0.0032111111111111108,295 131 | NN Pos - Thres: 0.0032111111111111108,295 132 | MM Pos - Thres: 0.003928888888888889,261 133 | NN Pos - Thres: 0.003928888888888889,261 134 | MM Pos - Thres: 0.004646666666666666,201 135 | NN Pos - Thres: 0.004646666666666666,201 136 | MM Pos - Thres: 0.005364444444444445,167 137 | NN Pos - Thres: 0.005364444444444445,167 138 | MM Pos - Thres: 0.006082222222222222,155 139 | NN Pos - Thres: 0.006082222222222222,155 140 | MM Pos - Thres: 0.0068,137 141 | NN Pos - Thres: 0.0068,137 142 | BB Pos - Thres: 0.0009050000000000001,373 143 | HH Pos - Thres: 0.0009050000000000001,373 144 | BB Pos - Thres: 0.002815555555555556,253 145 | HH Pos - Thres: 0.002815555555555556,253 146 | BB Pos - Thres: 0.0047261111111111115,173 147 | HH Pos - Thres: 0.0047261111111111115,173 148 | BB Pos - Thres: 0.006636666666666667,131 149 | HH Pos - Thres: 0.006636666666666667,131 150 | BB Pos - Thres: 0.008547222222222223,101 151 | HH Pos - Thres: 0.008547222222222223,101 152 | BB Pos - Thres: 0.01045777777777778,79 153 | HH Pos - Thres: 0.01045777777777778,79 154 | BB Pos - Thres: 0.012368333333333335,71 155 | HH Pos - Thres: 0.012368333333333335,71 156 | BB Pos - Thres: 0.014278888888888891,69 157 | HH Pos - Thres: 0.014278888888888891,69 158 | BB Pos - Thres: 0.016189444444444447,57 159 | HH Pos - Thres: 0.016189444444444447,57 160 | BB Pos - Thres: 0.0181,51 161 | HH Pos - Thres: 0.0181,51 162 | HH Pos - Thres: 0.00045,395 163 | JJ Pos - Thres: 0.00045,395 164 | HH Pos - Thres: 0.0014,261 165 | JJ Pos - Thres: 0.0014,261 166 | HH Pos - Thres: 0.00235,177 167 | JJ Pos - Thres: 0.00235,177 168 | HH Pos - Thres: 0.0032999999999999995,129 169 | JJ Pos - Thres: 0.0032999999999999995,129 170 | HH Pos - Thres: 0.0042499999999999994,109 171 | JJ Pos - Thres: 0.0042499999999999994,109 172 | HH Pos - Thres: 0.0052,99 173 | JJ Pos - Thres: 0.0052,99 174 | HH Pos - Thres: 0.00615,87 175 | JJ Pos - Thres: 0.00615,87 176 | HH Pos - Thres: 0.0071,71 177 | JJ Pos - Thres: 0.0071,71 178 | HH Pos - Thres: 0.00805,57 179 | JJ Pos - Thres: 0.00805,57 180 | HH Pos - Thres: 0.009,53 181 | JJ Pos - Thres: 0.009,53 182 | AA Pos - Thres: 0.00107,390 183 | II Pos - Thres: 0.00107,390 184 | AA Pos - Thres: 0.0033288888888888886,179 185 | II Pos - Thres: 0.0033288888888888886,179 186 | AA Pos - Thres: 0.0055877777777777774,79 187 | II Pos - Thres: 0.0055877777777777774,79 188 | AA Pos - Thres: 0.007846666666666665,59 189 | II Pos - Thres: 0.007846666666666665,59 190 | AA Pos - Thres: 0.010105555555555555,41 191 | II Pos - Thres: 0.010105555555555555,41 192 | AA Pos - Thres: 0.012364444444444445,27 193 | II Pos - Thres: 0.012364444444444445,27 194 | AA Pos - Thres: 0.014623333333333334,19 195 | II Pos - Thres: 0.014623333333333334,19 196 | AA Pos - Thres: 0.016882222222222222,10 197 | II Pos - Thres: 0.016882222222222222,10 198 | AA Pos - Thres: 0.019141111111111112,10 199 | II Pos - Thres: 0.019141111111111112,10 200 | AA Pos - Thres: 0.0214,10 201 | II Pos - Thres: 0.0214,10 202 | -------------------------------------------------------------------------------- /utils/ArbUtils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | from statsmodels.api import OLS, add_constant 4 | from statsmodels.tsa.stattools import adfuller 5 | 6 | def fit_ols(y, x): 7 | """Estimates long-run and short-run cointegration relationship for series y and x. 8 | 9 | Uses a 2-step process to first estimate coefficients for the long-run relationship 10 | y_t = c + gamma * x_t + z_t 11 | 12 | and then the short-term relationship, 13 | y_t - y_(t-1) = alpha * z_(t-1) + epsilon_t, 14 | 15 | with z the found residuals of the first equation. 16 | 17 | Parameters 18 | ---------- 19 | y : pd.Series 20 | The first time series of the pair to analyse. 21 | 22 | x : pd.Series 23 | The second time series of the pair to analyse. 24 | 25 | Returns 26 | ------- 27 | c : float 28 | The constant term in the long-run relationship y_t = c + gamma * x_t + z_t. This 29 | describes the static shift of y with respect to gamma * x. 30 | 31 | gamma : float 32 | The gamma term in the long-run relationship y_t = c + gamma * x_t + z_t. This 33 | describes the ratio between the const-shifted y and x. 34 | 35 | alpha : float 36 | The alpha term in the short-run relationship y_t - y_(t-1) = alpha * z_(t-1) + epsilon. This 37 | gives an indication of the strength of the error correction toward the long-run mean. 38 | 39 | z : pd.Series 40 | Series of residuals z_t from the long-run relationship y_t = c + gamma * x_t + z_t, representing 41 | the value of the error correction term. 42 | 43 | """ 44 | 45 | assert isinstance(y, pd.Series), 'Input series y should be of type pd.Series' 46 | assert isinstance(x, pd.Series), 'Input series x should be of type pd.Series' 47 | assert sum(y.isnull()) == 0, 'Input series y has nan-values. Unhandled case.' 48 | assert sum(x.isnull()) == 0, 'Input series x has nan-values. Unhandled case.' 49 | assert y.index.equals(x.index), 'The two input series y and x do not have the same index.' 50 | 51 | long_run_ols = OLS(y, add_constant(x), has_const=True) 52 | long_run_ols_fit = long_run_ols.fit() 53 | 54 | c, gamma = long_run_ols_fit.params 55 | z = long_run_ols_fit.resid 56 | 57 | short_run_ols = OLS(y.diff().iloc[1:], (z.shift().iloc[1:])) 58 | short_run_ols_fit = short_run_ols.fit() 59 | 60 | alpha = short_run_ols_fit.params[0] 61 | 62 | return c, gamma, alpha, z 63 | 64 | 65 | def granger_cointegration_test(y, x): 66 | """Applies the two-step Engle & Granger test for cointegration. 67 | 68 | First fits the long-run relationship 69 | y_t = c + gamma * x_t + z_t 70 | 71 | and then tests, by Dickey-Fuller phi=1 vs phi < 1 in 72 | z_t = phi * z_(t-1) + eta_t 73 | 74 | If this implies phi < 1, the z series is stationary is concluded to be 75 | stationary, and thus the series y and x are concluded to be cointegrated. 76 | 77 | Parameters 78 | ---------- 79 | y : pd.Series 80 | the first time series of the pair to analyse 81 | 82 | x : pd.Series 83 | the second time series of the pair to analyse 84 | 85 | Returns 86 | ------- 87 | dfstat : float 88 | The Dickey Fuller test-statistic for phi = 1 vs phi < 1 in the second equation. A more 89 | negative value implies the existence of stronger cointegration. 90 | 91 | pvalue : float 92 | The p-value corresponding to the Dickey Fuller test-statistic. A lower value implies 93 | stronger rejection of no-cointegration, thus stronger evidence of cointegration. 94 | 95 | """ 96 | 97 | assert isinstance(y, pd.Series), 'Input series y should be of type pd.Series' 98 | assert isinstance(x, pd.Series), 'Input series x should be of type pd.Series' 99 | assert sum(y.isnull()) == 0, 'Input series y has nan-values. Unhandled case.' 100 | assert sum(x.isnull()) == 0, 'Input series x has nan-values. Unhandled case.' 101 | assert y.index.equals(x.index), 'The two input series y and x do not have the same index.' 102 | 103 | c, gamma, alpha, z = fit_ols(y, x) 104 | 105 | # NOTE: The p-value returned by the adfuller function assumes we do not estimate z first, but test 106 | # stationarity of an unestimated series directly. This assumption should have limited effect for high N, 107 | # so for the purposes of this course this p-value can be used for the EG-test. Critical values taking 108 | # this into account more accurately are provided in e.g. McKinnon (1990) and Engle & Yoo (1987). 109 | 110 | adfstat, pvalue, usedlag, nobs, crit_values = adfuller(z, maxlag=1, autolag=None) 111 | 112 | return adfstat, pvalue 113 | 114 | --------------------------------------------------------------------------------