├── .gitignore ├── .idea ├── .gitignore ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── statarb.iml └── vcs.xml ├── LICENSE ├── Pairs_Trading_Strategy_Backtest.ipynb ├── Pairs_Trading_Strategy_Notes.ipynb ├── README.md ├── data └── data.csv.gz ├── ou_lsq_est.py ├── ou_seq_ols.py ├── ou_strategy_simulation.py ├── ref └── RLS.pdf ├── src ├── estimation │ ├── coint_johansen.py │ ├── kalman_filter.py │ ├── linear_algebra_tools.py │ ├── ou_parameter_estimation.py │ └── rls.py ├── optimal_controls │ ├── ou_params.py │ ├── ou_spread_model.py │ ├── ou_spread_model_output.py │ └── ou_spread_model_parameters.py ├── portfolio │ ├── contract.py │ ├── portfolio.py │ ├── position.py │ ├── position_info.py │ ├── trade.py │ └── utilities.py └── simulation │ ├── geom_brown_motion.py │ ├── ornstein_uhlenbeck.py │ ├── simulate_cointegrated_assets.py │ └── simulate_pairs_trading.py ├── test └── test_main.py └── utils └── plot_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml 3 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | 8 | 10 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/statarb.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 29 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Juha Hellén 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pairs trading model based on stochastic optimal control 2 | -------------------------------------------------------------------------------- /data/data.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jshellen/statarb/24d369c262e043c774a8e793d61064d63095f25e/data/data.csv.gz -------------------------------------------------------------------------------- /ou_lsq_est.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | <<<<<<< HEAD:ou_lsq_est.py 4 | ======= 5 | import pandas as pd 6 | >>>>>>> a3507c187b5b0d726d1e5d3ff8642e728fccec0a:out_lsq_est.py 7 | 8 | from src.simulation.ornstein_uhlenbeck import ( 9 | sim_ou 10 | ) 11 | 12 | from src.estimation.ou_parameter_estimation import ( 13 | estimate_ou_parameters_using_lsq 14 | ) 15 | 16 | 17 | def sample_estimation_error(kappa_true, sigma, dt, n_grid): 18 | 19 | n_samples = 50 20 | bias_n = np.zeros(len(n_grid)) 21 | kappa_n = np.zeros(len(n_grid)) 22 | for i in range(0, len(n_grid)): 23 | bias_sum = 0 24 | kappa_sum = 0 25 | # Sample estimation error 26 | for j in range(0, n_samples): 27 | # Simulate ou process 28 | x = sim_ou(0, kappa_true, 0, sigma, dt, n_grid[i]) 29 | # Estimate parameters 30 | kappa_est, theta_est, sigma_est = estimate_ou_parameters_using_lsq(x, dt) 31 | <<<<<<< HEAD:ou_lsq_est.py 32 | # Error 33 | bias = kappa_est - kappa_true 34 | bias_sum += bias 35 | kappa_sum += kappa_est 36 | ======= 37 | 38 | 39 | if kappa_est is not None: 40 | # Error 41 | bias = kappa_est - kappa_true 42 | bias_sum += bias 43 | kappa_sum += kappa_est 44 | >>>>>>> a3507c187b5b0d726d1e5d3ff8642e728fccec0a:out_lsq_est.py 45 | # Compute mean error 46 | bias_n[i] = bias_sum / float(n_samples) 47 | kappa_n[i] = kappa_sum / float(n_samples) 48 | 49 | return kappa_n, bias_n 50 | 51 | 52 | 53 | def ou_bias(n, dt): 54 | 55 | if not isinstance(n, (int, np.int32, np.int64)): 56 | raise ValueError(f'n has to be integer. {type(n)}') 57 | 58 | return 1372.96281*(1 + 1.0/n) - 1373.2467 59 | 60 | 61 | def ou_bias2(n): 62 | 63 | if not isinstance(n, (int, np.int32, np.int64)): 64 | raise ValueError(f'n has to be integer. {type(n)}') 65 | 66 | return 29.70381061*(1/(n*0.0189243637761786)) 67 | 68 | def main(): 69 | 70 | n_grid = np.arange(50, 1500, 50) 71 | 72 | # Sample estimation error with different kappa parameters 73 | dt = 1.0/250.0 74 | k_1, e_1 = sample_estimation_error(1.5, 0.25, dt, n_grid) 75 | k_2, e_2 = sample_estimation_error(0.5, 0.5, dt, n_grid) 76 | 77 | #k_1 = pd.DataFrame(data=k_1) 78 | #k_2 = pd.DataFrame(data=k_2) 79 | 80 | #k_1.to_excel('k_1.xlsx') 81 | #k_2.to_excel('k_2.xlsx') 82 | 83 | k_1_unbiased = np.array([k_1[i] - ou_bias2(n_grid[i]) for i in range(0, len(n_grid))]) 84 | k_2_unbiased = np.array([k_2[i] - ou_bias2(n_grid[i]) for i in range(0, len(n_grid))]) 85 | 86 | #params, _ = curve_fit(ou_bias, n_grid, e_1) 87 | #a = params[0] 88 | #b = params[1] 89 | 90 | fig, ax = plt.subplots(1, 3, figsize=(12, 4)) 91 | 92 | ax[0].plot(n_grid, [ou_bias2(n) for n in n_grid], color='black') 93 | ax[0].scatter(n_grid, e_1, color='blue') 94 | ax[0].scatter(n_grid, e_2, color='red') 95 | 96 | ax[1].scatter(n_grid, k_1, color='blue') 97 | ax[1].scatter(n_grid, k_2, color='red') 98 | 99 | ax[2].scatter(n_grid, k_1_unbiased, color='blue') 100 | ax[2].axhline(y=np.mean(k_1_unbiased), color='blue') 101 | ax[2].scatter(n_grid, k_2_unbiased, color='red') 102 | ax[2].axhline(y=np.mean(k_2_unbiased), color='red') 103 | 104 | plt.show() 105 | 106 | if __name__ == '__main__': 107 | main() 108 | -------------------------------------------------------------------------------- /ou_seq_ols.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from src.estimation.rls import ( 4 | RLSFilter 5 | ) 6 | 7 | def RLS(x, d, y, n, mu, s): 8 | 9 | a = np.zeros([n, d]) 10 | r_1 = np.linalg.inv(np.matmul(x[0:s, :].T, x[0:s, :])) 11 | r_2 = np.matmul(x[0:s, :].T, y[0:s].reshape(-1, 1)) 12 | w_0 = np.dot(r_1, r_2) 13 | a[0] = w_0.flatten() 14 | P = np.zeros([n, d, d]) 15 | P[0] = np.linalg.inv(np.dot(x[0:s, :].T, x[0:s, :])) 16 | for t in range(1, n): 17 | xt = np.reshape(x[t], [1, d]) 18 | e = y[t] - np.dot(xt, np.reshape(a[t - 1], [d, 1])) 19 | k = np.dot(P[t - 1], xt.T) / (mu + np.linalg.multi_dot([xt, P[t - 1], xt.T])) 20 | a[t] = a[t - 1] + np.dot(k, e).T 21 | P[t] = (1 / mu) * (P[t - 1] - np.linalg.multi_dot([k, xt, P[t - 1]])) 22 | return a 23 | 24 | 25 | def main(): 26 | 27 | import numpy as np 28 | import matplotlib.pyplot as plt 29 | import statsmodels.api as sm 30 | 31 | n_steps = 1500 32 | dt = 1.0 / 250.0 33 | x = np.zeros((n_steps, 1)) 34 | for i in range(1, n_steps): 35 | x[i] = 0.1 + 0.75 * x[i - 1] + 0.05 * np.random.normal(0, 1) 36 | 37 | x_ = sm.add_constant(np.roll(x, 1)[1:]) 38 | y_ = x[1:] 39 | 40 | # Padasip filter 41 | w_0 = np.array([1, 0.1]) 42 | F_1 = RLSFilter(2, mu=0.997, eps=0.99999, w_0=w_0.reshape(-1, 1)) 43 | w_1 = np.zeros((len(y_), 2)) 44 | w_1[0, :] = w_0 45 | for i in range(1, len(y_)): 46 | F_1.update(y_[i][0], x_[i]) 47 | w_1[i, :] = F_1.w.flatten() 48 | 49 | # Naive implementation 50 | w_3 = RLS(x_, 2, y_, len(x_), 0.997, 10) 51 | 52 | fig, ax = plt.subplots(1, 2, figsize=(8, 3)) 53 | ax[0].plot(w_1[10:, 0], color='blue', ls='-', lw=2, label='IG-RLS') 54 | ax[0].plot(w_3[10:, 0], color='red', ls='-', lw=2, label='NIG-RLS') 55 | 56 | ax[1].plot(w_1[10:, 1], color='blue', ls='-', lw=2, label='IG-RLS') 57 | ax[1].plot(w_3[10:, 1], color='red', ls='-', lw=2, label='NIG-RLS') 58 | plt.show() 59 | 60 | 61 | if __name__ == '__main__': 62 | main() -------------------------------------------------------------------------------- /ou_strategy_simulation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from src.optimal_controls.ou_params import OrnsteinUhlenbeckProcessParameters 6 | from src.optimal_controls.ou_spread_model_parameters import OUSpreadModelStrategyParameters 7 | from src.simulation.simulate_cointegrated_assets import simulate_ou_spread 8 | from src.simulation.simulate_pairs_trading import simulate_strategy 9 | 10 | 11 | def main(): 12 | 13 | # Trading strategy parameters 14 | nominal = 1000000 15 | symbol_a = 'A' 16 | symbol_b = 'B' 17 | horizon = 1 18 | risk_tol = -float(500) # risk penalty parameter 19 | max_leverage = 1 20 | strategy_parameters = OUSpreadModelStrategyParameters( 21 | nominal, symbol_a, symbol_b, 22 | horizon, risk_tol, max_leverage) 23 | 24 | # OU process parameters 25 | n_sim = 20 26 | n_steps = 500 27 | b_0 = 100 28 | mu_b = 0.05 # drift of the asset b 29 | 30 | x_0 = 0.0 31 | kappa = 5.5 # spread mean-reversion speed 32 | theta = 0.0 # average spread level 33 | eta = 0.05 # spread (normal) volatility 34 | sigma_b = 0.20 # asset b annual volatility 35 | rho = 0.0 # correlation dW_x*dW_b = rho*dt, TODO: implement in simulation, curr. not supported. 36 | #dt = 1.0/250.0 # implied by n_steps and horizon 37 | model_parameters = OrnsteinUhlenbeckProcessParameters( 38 | kappa, theta, eta, sigma_b, rho, mu_b, x_0, b_0) 39 | 40 | # Run strategy simulation 41 | a_prices, b_prices, portfolios = simulate_strategy( 42 | model_parameters, strategy_parameters, n_steps, n_sim) 43 | 44 | # Plot results 45 | pos_a = portfolios[0].get_position('A') 46 | pos_b = portfolios[0].get_position('B') 47 | 48 | report_a = pos_a.generate_report_frame() 49 | report_b = pos_b.generate_report_frame() 50 | 51 | fig, ax = plt.subplots(3, 1, figsize=(8, 6)) 52 | 53 | # Plot asset prices 54 | ax[0].plot(a_prices[0], color='red', label='A price') 55 | ax[0].plot(b_prices[0], color='blue', label='B price') 56 | ax[0].legend(loc=2) 57 | 58 | # Plot logarithmic spread 59 | ax02 = ax[0].twinx() 60 | ax02.plot(np.log(a_prices[0]) - np.log(b_prices[0]), color='black', label='ln(A)-ln(B)') 61 | ax02.legend(loc=1) 62 | 63 | # Plot positions 64 | ax[1].plot(report_a['NET_POSITION'], color='red', label='A') 65 | ax[1].plot(report_b['NET_POSITION'], color='blue', label='B') 66 | ax[1].set_ylabel('Positions') 67 | 68 | # Plot profit and loss 69 | ax[2].plot(report_a['TOTAL_PNL'], color='red', label='A') 70 | ax[2].plot(report_b['TOTAL_PNL'], color='blue', label='B') 71 | ax[2].legend(loc=2) 72 | 73 | ax22 = ax[2].twinx() 74 | ax22.plot(report_a['TOTAL_PNL'] + report_b['TOTAL_PNL'], color='black', label='A+B') 75 | ax22.legend(loc=1) 76 | 77 | plt.show() 78 | 79 | fig, ax = plt.subplots(figsize=(8, 6)) 80 | for k, v in portfolios.items(): 81 | 82 | pos_a = v.get_position('A') 83 | pos_b = v.get_position('B') 84 | report_a = pos_a.generate_report_frame() 85 | report_b = pos_b.generate_report_frame() 86 | ax.plot(report_a['TOTAL_PNL'] + report_b['TOTAL_PNL'], 87 | color='blue', label='A+B', alpha=0.1) 88 | 89 | plt.show() 90 | 91 | #plot_optimal_solution(X, ou_params, model_params, 100) 92 | 93 | print(" ") 94 | 95 | if __name__ == '__main__': 96 | main() 97 | 98 | 99 | -------------------------------------------------------------------------------- /ref/RLS.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jshellen/statarb/24d369c262e043c774a8e793d61064d63095f25e/ref/RLS.pdf -------------------------------------------------------------------------------- /src/estimation/coint_johansen.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from statsmodels.tsa.tsatools import lagmat 3 | 4 | MAX_EVAL_0 = """2.98 4.13 6.94 5 | 9.47 11.22 15.09 6 | 15.72 17.80 22.25 7 | 21.84 24.16 29.06 8 | 27.92 30.44 35.72 9 | 33.93 36.63 42.23 10 | 39.91 42.77 48.66 11 | 45.89 48.88 55.04 12 | 51.85 54.97 61.35 13 | 57.80 61.03 67.65 14 | 63.73 67.08 73.89 15 | 69.65 73.09 80.12""" 16 | 17 | TRACE_0 = """2.98 4.13 6.94 18 | 10.47 12.32 16.36 19 | 21.78 24.28 29.51 20 | 37.03 40.17 46.57 21 | 56.28 60.06 67.64 22 | 79.53 83.94 92.71 23 | 106.74 111.78 121.74 24 | 138.00 143.67 154.80 25 | 173.23 179.52 191.83 26 | 212.47 219.41 232.84 27 | 255.68 263.26 278.00 28 | 302.90 311.13 326.96""" 29 | 30 | MAX_EVAL_1 = """7.56 9.16 12.76 31 | 13.91 15.89 20.16 32 | 20.05 22.30 27.07 33 | 26.12 28.59 33.73 34 | 32.17 34.81 40.29 35 | 38.16 40.96 46.75 36 | 44.13 47.07 53.12 37 | 50.11 53.19 59.51 38 | 56.05 59.24 65.79 39 | 61.99 65.30 72.10 40 | 67.93 71.33 78.29 41 | 73.85 77.38 84.51""" 42 | 43 | TRACE_1 = """7.56 9.16 12.76 44 | 17.98 20.26 25.08 45 | 32.27 35.19 41.20 46 | 50.53 54.08 61.27 47 | 72.77 76.97 85.34 48 | 99.02 103.84 113.42 49 | 129.23 134.68 145.40 50 | 163.50 169.61 181.51 51 | 201.69 208.45 221.45 52 | 243.96 251.27 265.53 53 | 290.17 298.17 313.75 54 | 340.38 348.99 365.64""" 55 | 56 | MAX_EVAL_2 = """2.71 3.84 6.63 57 | 12.30 14.26 18.52 58 | 18.89 21.13 25.86 59 | 25.12 27.58 32.71 60 | 31.24 33.88 39.37 61 | 37.28 40.08 45.87 62 | 43.23 46.23 52.31 63 | 49.29 52.36 58.67 64 | 55.24 58.43 64.99 65 | 61.20 64.51 71.26 66 | 67.13 70.53 77.49 67 | 73.06 76.58 83.70""" 68 | 69 | TRACE_2 = """2.71 3.84 6.63 70 | 13.43 15.50 19.94 71 | 27.07 29.80 35.46 72 | 44.49 47.86 54.68 73 | 65.82 69.82 77.82 74 | 91.11 95.75 104.96 75 | 120.37 125.61 135.97 76 | 153.63 159.53 171.09 77 | 190.88 197.37 210.06 78 | 232.11 239.25 253.24 79 | 277.38 285.14 300.29 80 | 326.53 334.98 351.25""" 81 | 82 | MAX_EVAL_3 = """10.67 12.52 16.55 83 | 17.23 19.39 23.97 84 | 23.44 25.82 30.83 85 | 29.54 32.12 37.49 86 | 35.58 38.33 44.02 87 | 41.60 44.50 50.47 88 | 47.56 50.59 56.85 89 | 53.55 56.71 63.17 90 | 59.49 62.75 69.44 91 | 65.44 68.81 75.69 92 | 71.36 74.84 81.94 93 | 77.30 80.87 88.11""" 94 | 95 | TRACE_3 = """10.67 12.52 16.55 96 | 23.34 25.87 31.16 97 | 39.75 42.91 49.36 98 | 60.09 63.88 71.47 99 | 84.38 88.80 97.60 100 | 112.65 117.71 127.71 101 | 144.87 150.56 161.72 102 | 181.16 187.47 199.81 103 | 221.36 228.31 241.74 104 | 265.63 273.19 287.87 105 | 313.86 322.06 337.97 106 | 366.11 374.91 392.01""" 107 | 108 | MAX_EVAL_4 = """2.71 3.84 6.63 109 | 15.00 17.15 21.74 110 | 21.87 24.25 29.26 111 | 28.24 30.82 36.19 112 | 34.42 37.16 42.86 113 | 40.53 43.42 49.41 114 | 46.56 49.58 55.81 115 | 52.58 55.73 62.17 116 | 58.53 61.81 68.50 117 | 64.53 67.90 74.74 118 | 70.46 73.94 81.07 119 | 76.41 79.97 87.23""" 120 | 121 | TRACE_4 = """2.71 3.84 6.63 122 | 16.16 18.40 23.15 123 | 32.06 35.01 41.08 124 | 51.65 55.24 62.52 125 | 75.10 79.34 87.78 126 | 102.47 107.34 116.99 127 | 133.79 139.28 150.08 128 | 169.07 175.16 187.20 129 | 208.36 215.12 228.23 130 | 251.63 259.02 273.37 131 | 298.89 306.90 322.41 132 | 350.12 358.72 375.30""" 133 | 134 | mapping = { 135 | "MAX_EVAL_0": MAX_EVAL_0, 136 | "TRACE_0": TRACE_0, 137 | "MAX_EVAL_1": MAX_EVAL_1, 138 | "TRACE_1": TRACE_1, 139 | "MAX_EVAL_2": MAX_EVAL_2, 140 | "TRACE_2": TRACE_2, 141 | "MAX_EVAL_3": MAX_EVAL_3, 142 | "TRACE_3": TRACE_3, 143 | "MAX_EVAL_4": MAX_EVAL_4, 144 | "TRACE_4": TRACE_4 145 | } 146 | 147 | 148 | class Johansen(object): 149 | """ 150 | Implementation of the Johansen test for cointegration. 151 | 152 | References: 153 | 154 | - Hamilton, J. D. (1994) 'Time Series Analysis', Princeton Univ. Press. 155 | - MacKinnon, Haug, Michelis (1996) 'Numerical distribution functions of 156 | likelihood ratio tests for cointegration', Queen's University Institute 157 | for Economic Research Discussion paper. 158 | """ 159 | 160 | def __init__(self, x, model, k=1, trace=True, significance_level=1): 161 | """ 162 | :param x: (nobs, m) array of time series. nobs is the number of 163 | observations, or time stamps, and m is the number of series. 164 | :param k: The number of lags to use when regressing on the first 165 | difference of x. 166 | :param trace: Whether to use the trace or max eigenvalue statistic for 167 | the hypothesis testing. If False the latter is used. 168 | :param model: Which of the five cases in Osterwald-Lenum 1992 (or 169 | MacKinnon 1996) to use. 170 | - If set to 0, case 0 will be used. This case should be used if 171 | the input time series have no deterministic terms and all the 172 | cointegrating relations are expected to have 0 mean. 173 | - If set to 1, case 1* will be used. This case should be used if 174 | the input time series has neither a quadratic nor linear trend, 175 | but may have a constant term, and additionally if the cointegrating 176 | relations may have nonzero means. 177 | - If set to 2, case 1 will be used. This case should be used if 178 | the input time series have linear trends but the cointegrating 179 | relations are not expected to have linear trends. 180 | - If set to 3, case 2* will be used. This case should be used if 181 | the input time series do not have quadratic trends, but they and 182 | the cointegrating relations may have linear trends. 183 | - If set to 4, case 2 will be used. This case should be used if 184 | the input time series have quadratic trends, but the cointegrating 185 | relations are expected to only have linear trends. 186 | :param significance_level: Which significance level to use. If set to 187 | 0, 90% significance will be used. If set to 1, 95% will be used. If set 188 | to 2, 99% will be used. 189 | """ 190 | 191 | self.x = x 192 | self.k = k 193 | self.trace = trace 194 | self.model = model 195 | self.significance_level = significance_level 196 | 197 | if trace: 198 | key = "TRACE_{}".format(model) 199 | else: 200 | key = "MAX_EVAL_{}".format(model) 201 | 202 | critical_values_str = mapping[key] 203 | 204 | select_critical_values = np.array( 205 | critical_values_str.split(), 206 | float).reshape(-1, 3) 207 | 208 | self.critical_values = select_critical_values[:, significance_level] 209 | 210 | def mle(self): 211 | """Obtain the cointegrating vectors and corresponding eigenvalues. 212 | Maximum likelihood estimation and reduced rank regression are used to 213 | obtain the cointegrating vectors and corresponding eigenvalues, as 214 | outlined in Hamilton 1994. 215 | :return: The possible cointegrating vectors, i.e. the eigenvectors 216 | resulting from maximum likelihood estimation and reduced rank 217 | regression, and the corresponding eigenvalues. 218 | """ 219 | 220 | # Regressions on diffs and levels of x. Get regression residuals. 221 | 222 | # First differences of x. 223 | x_diff = np.diff(self.x, axis=0) 224 | 225 | # Lags of x_diff. 226 | x_diff_lags = lagmat(x_diff, self.k, trim='both') 227 | 228 | # First lag of x. 229 | x_lag = lagmat(self.x, 1, trim='both') 230 | 231 | # Trim x_diff and x_lag so they line up with x_diff_lags. 232 | x_diff = x_diff[self.k:] 233 | x_lag = x_lag[self.k:] 234 | 235 | # Include intercept in the regressions if self.model != 0. 236 | if self.model != 0: 237 | ones = np.ones((x_diff_lags.shape[0], 1)) 238 | x_diff_lags = np.append(x_diff_lags, ones, axis=1) 239 | 240 | # Include time trend in the regression if self.model = 3 or 4. 241 | if self.model in (3, 4): 242 | times = np.asarray(range(x_diff_lags.shape[0])).reshape((-1, 1)) 243 | x_diff_lags = np.append(x_diff_lags, times, axis=1) 244 | 245 | # Residuals of the regressions of x_diff and x_lag on x_diff_lags. 246 | try: 247 | inverse = np.linalg.pinv(x_diff_lags) 248 | except: 249 | print("Unable to take inverse of x_diff_lags.") 250 | return None 251 | 252 | u = x_diff - np.dot(x_diff_lags, np.dot(inverse, x_diff)) 253 | v = x_lag - np.dot(x_diff_lags, np.dot(inverse, x_lag)) 254 | 255 | # Covariance matrices of the residuals. 256 | t = x_diff_lags.shape[0] 257 | Svv = np.dot(v.T, v) / t 258 | Suu = np.dot(u.T, u) / t 259 | Suv = np.dot(u.T, v) / t 260 | Svu = Suv.T 261 | 262 | try: 263 | Svv_inv = np.linalg.inv(Svv) 264 | except: 265 | print("Unable to take inverse of Svv.") 266 | return None 267 | try: 268 | Suu_inv = np.linalg.inv(Suu) 269 | except: 270 | print("Unable to take inverse of Suu.") 271 | return None 272 | 273 | # Eigenvalues and eigenvectors of the product of covariances. 274 | cov_prod = np.dot(Svv_inv, np.dot(Svu, np.dot(Suu_inv, Suv))) 275 | eigenvalues, eigenvectors = np.linalg.eig(cov_prod) 276 | 277 | # Normalize the eigenvectors using Cholesky decomposition. 278 | evec_Svv_evec = np.dot(eigenvectors.T, np.dot(Svv, eigenvectors)) 279 | cholesky_factor = np.linalg.cholesky(evec_Svv_evec) 280 | try: 281 | eigenvectors = np.dot(eigenvectors, 282 | np.linalg.inv(cholesky_factor.T)) 283 | except: 284 | print("Unable to take the inverse of the Cholesky factor.") 285 | return None 286 | 287 | # Ordering the eigenvalues and eigenvectors from largest to smallest. 288 | indices_ordered = np.argsort(eigenvalues) 289 | indices_ordered = np.flipud(indices_ordered) 290 | eigenvalues = eigenvalues[indices_ordered] 291 | eigenvectors = eigenvectors[:, indices_ordered] 292 | 293 | return eigenvectors, eigenvalues 294 | 295 | def h_test(self, eigenvalues, r): 296 | """Carry out hypothesis test. 297 | The null hypothesis is that there are at most r cointegrating vectors. 298 | The alternative hypothesis is that there are at most m cointegrating 299 | vectors, where m is the number of input time series. 300 | :param eigenvalues: The list of eigenvalues returned from the mle 301 | function. 302 | :param r: The number of cointegrating vectors to use in the null 303 | hypothesis. 304 | :return: True if the null hypothesis is rejected, False otherwise. 305 | """ 306 | 307 | nobs, m = self.x.shape 308 | t = nobs - self.k - 1 309 | 310 | if self.trace: 311 | m = len(eigenvalues) 312 | statistic = -t * np.sum(np.log(np.ones(m) - eigenvalues)[r:]) 313 | else: 314 | statistic = -t * np.sum(np.log(1 - eigenvalues[r])) 315 | 316 | critical_value = self.critical_values[m - r - 1] 317 | 318 | if statistic > critical_value: 319 | return True 320 | else: 321 | return False 322 | 323 | def johansen(self): 324 | """Obtain the possible cointegrating relations and numbers of them. 325 | See the documentation for methods mle and h_test. 326 | :return: The possible cointegrating relations, i.e. the eigenvectors 327 | obtained from maximum likelihood estimation, and the numbers of 328 | cointegrating relations for which the null hypothesis is rejected. 329 | """ 330 | 331 | nobs, m = self.x.shape 332 | 333 | try: 334 | eigenvectors, eigenvalues = self.mle() 335 | except: 336 | print("Unable to obtain possible cointegrating relations.") 337 | return None 338 | 339 | rejected_r_values = [] 340 | for r in range(m): 341 | if self.h_test(eigenvalues, r): 342 | rejected_r_values.append(r) 343 | 344 | return eigenvectors, rejected_r_values -------------------------------------------------------------------------------- /src/estimation/kalman_filter.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | from .linear_algebra_tools import ( 5 | givens_rotation 6 | ) 7 | 8 | 9 | def kalman_filter_predict(X, P, A, Q, B, U): 10 | """ 11 | Perform predict step 12 | """ 13 | X = np.dot(A, X) + np.dot(B, U) 14 | P = np.dot(A, np.dot(P, A.T)) + Q 15 | return (X, P) 16 | 17 | 18 | def kalman_filter_update(X, P, Y, H, R): 19 | """ 20 | Perform update step 21 | """ 22 | IM = np.dot(H, X) 23 | IS = R + np.dot(H, np.dot(P, H.T)) 24 | # QR ! 25 | Q, R = givens_rotation(IS) 26 | IS_inv = np.matmul(np.linalg.inv(R), Q.T) 27 | #IS_inv_svd = np.linalg.pinv(IS) 28 | K = np.dot(P, np.dot(H.T, IS_inv)) 29 | X = X + np.dot(K, (Y - IM)) 30 | P = P - np.dot(K, np.dot(IS, K.T)) 31 | LH = gauss_pdf(Y, IM, IS) 32 | return (X, P, K, IM, IS, LH) 33 | 34 | 35 | def gauss_pdf(X, M, S): 36 | 37 | 38 | if M.shape[1] == 1: 39 | 40 | DX = X - np.tile(M, X.shape[1]) 41 | E = 0.5 * np.sum(DX * (np.dot(np.linalg.inv(S), DX)), axis=0) 42 | E = E + 0.5 * M.shape[0] * np.log(2 * np.pi) + 0.5 * np.log(np.linalg.det(S)) 43 | P = np.exp(-E) 44 | 45 | elif X.shape[1] == 1: 46 | 47 | DX = np.tile(X, M.shape()[1]) - M 48 | E = 0.5 * np.sum(DX * (np.dot(np.linalg.inv(S), DX)), axis=0) 49 | E = E + 0.5 * M.shape[0] * np.log(2 * np.pi) + 0.5 * np.log(np.linalg.det(S)) 50 | P = np.exp(-E) 51 | 52 | else: 53 | 54 | DX = X - M 55 | E = 0.5 * np.dot(DX.T, np.dot(np.inv(S), DX)) 56 | E = E + 0.5 * M.shape[0] * np.log(2 * np.pi) + 0.5 * np.log(np.det(S)) 57 | P = np.exp(-E) 58 | 59 | return (P[0], E[0]) 60 | 61 | 62 | if __name__ == '__main__': 63 | 64 | import matplotlib.pyplot as plt 65 | 66 | n_step = 500 67 | y_1 = np.cumprod(np.exp(0.05*np.sqrt(1.0/250)*np.random.normal(0, 1, n_step))) 68 | y_2 = 1.2*y_1 + 0.005*np.random.normal(0, 1, n_step) 69 | 70 | fig, ax = plt.subplots(figsize=(6, 6)) 71 | ax.plot(y_1) 72 | ax.plot(y_2) 73 | plt.show() 74 | 75 | # Initialization of state matrices 76 | X = np.array([[1.0, 1.0]]).reshape(-1, 1) 77 | delta = 1e-5 78 | P = delta / (1 - delta) * np.eye(2) 79 | A = np.array([[1, 0], 80 | [0, 1]]) 81 | Q = np.zeros(X.shape) 82 | B = np.zeros(X.shape).reshape(1, -1) 83 | U = np.zeros((X.shape[0], 1)) 84 | 85 | # Measurement matrices 86 | 87 | R = 0.000000000001 88 | 89 | X_t = np.zeros((n_step, 2)) 90 | 91 | for i in np.arange(0, n_step): 92 | Y = np.array([[y_2[i]]]) 93 | H = np.array([y_1[i], 1]).reshape(1, -1) 94 | (X, P) = kalman_filter_predict(X, P, A, Q, B, U) 95 | (X, P, K, IM, IS, LH) = kalman_filter_update(X, P, Y, H, R) 96 | X_t[i, :] = X.flatten() 97 | 98 | fig, ax = plt.subplots(figsize=(6, 6)) 99 | ax.plot(X_t) 100 | plt.show() 101 | -------------------------------------------------------------------------------- /src/estimation/linear_algebra_tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from math import copysign, hypot 4 | 5 | 6 | def gram_schmidt_process(A): 7 | """ 8 | Perform QR decomposition of matrix A using Gram-Schmidt process. 9 | """ 10 | (num_rows, num_cols) = np.shape(A) 11 | 12 | # Initialize empty orthogonal matrix Q. 13 | Q = np.empty([num_rows, num_rows]) 14 | cnt = 0 15 | 16 | # Compute orthogonal matrix Q. 17 | for a in A.T: 18 | u = np.copy(a) 19 | for i in range(0, cnt): 20 | proj = np.dot(np.dot(Q[:, i].T, a), Q[:, i]) 21 | u -= proj 22 | 23 | e = u / np.linalg.norm(u) 24 | Q[:, cnt] = e 25 | 26 | cnt += 1 # Increase columns counter. 27 | 28 | # Compute upper triangular matrix R. 29 | R = np.dot(Q.T, A) 30 | 31 | return (Q, R) 32 | 33 | 34 | def householder_reflection(A): 35 | """ 36 | Perform QR decomposition of matrix A using Householder reflection. 37 | """ 38 | (num_rows, num_cols) = np.shape(A) 39 | 40 | # Initialize orthogonal matrix Q and upper triangular matrix R. 41 | Q = np.identity(num_rows) 42 | R = np.copy(A) 43 | 44 | # Iterative over column sub-vector and 45 | # compute Householder matrix to zero-out lower triangular matrix entries. 46 | for cnt in range(num_rows - 1): 47 | x = R[cnt:, cnt] 48 | 49 | e = np.zeros_like(x) 50 | e[0] = copysign(np.linalg.norm(x), -A[cnt, cnt]) 51 | u = x + e 52 | v = u / np.linalg.norm(u) 53 | 54 | Q_cnt = np.identity(num_rows) 55 | Q_cnt[cnt:, cnt:] -= 2.0 * np.outer(v, v) 56 | 57 | R = np.dot(Q_cnt, R) 58 | Q = np.dot(Q, Q_cnt.T) 59 | 60 | return Q, R 61 | 62 | 63 | def givens_rotation(A): 64 | """ 65 | Perform QR decomposition of matrix A using Givens rotation. 66 | """ 67 | (num_rows, num_cols) = np.shape(A) 68 | 69 | # Initialize orthogonal matrix Q and upper triangular matrix R. 70 | Q = np.identity(num_rows) 71 | R = np.copy(A) 72 | 73 | # Iterate over lower triangular matrix. 74 | (rows, cols) = np.tril_indices(num_rows, -1, num_cols) 75 | for (row, col) in zip(rows, cols): 76 | 77 | # Compute Givens rotation matrix and 78 | # zero-out lower triangular matrix entries. 79 | if R[row, col] != 0: 80 | (c, s) = _givens_rotation_matrix_entries(R[col, col], R[row, col]) 81 | 82 | G = np.identity(num_rows) 83 | G[[col, row], [col, row]] = c 84 | G[row, col] = s 85 | G[col, row] = -s 86 | 87 | R = np.dot(G, R) 88 | Q = np.dot(Q, G.T) 89 | 90 | return Q, R 91 | 92 | 93 | def _givens_rotation_matrix_entries(a, b): 94 | """ 95 | Compute matrix entries for Givens rotation. 96 | """ 97 | r = hypot(a, b) 98 | c = a/r 99 | s = -b/r 100 | 101 | return c, s 102 | -------------------------------------------------------------------------------- /src/estimation/ou_parameter_estimation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import statsmodels.api as sm 3 | 4 | from .coint_johansen import Johansen 5 | 6 | 7 | def ou_bias_correction(n): 8 | 9 | if not isinstance(n, (int, np.int32, np.int64)): 10 | raise ValueError(f'n has to be integer. {type(n)}') 11 | 12 | return 29.70381061*(1/(n*0.0189243637761786)) 13 | 14 | 15 | def estimate_ou_parameters_using_lsq(x, dt, bias_corretion=False): 16 | """ 17 | Estimates parameters of Ornstein-Uhlenbeck style stochastic process: 18 | 19 | dX_t = kappa*(theta - X_t)*dt + sigma*dW_t 20 | 21 | NOTE: The standard least squares estimation is very upward biased. Therefore, we need to adjust it down. 22 | 23 | :param x: 24 | :param dt: 25 | :return: kappa, theta, sigma 26 | """ 27 | 28 | if not isinstance(x, np.ndarray): 29 | raise TypeError(f'x needs to be type of numpy.ndarray, it was {type(x)}') 30 | 31 | if not isinstance(dt, float): 32 | raise TypeError('dt needs to be type of float!') 33 | 34 | if dt <= 0: 35 | raise ValueError('Delta time has to be positive and non-zero!') 36 | 37 | S_m = np.roll(x, 1)[1:] 38 | S_p = x[1:] 39 | X = sm.add_constant(S_m) 40 | Y = S_p 41 | ols_est = sm.OLS(Y, X).fit() 42 | 43 | a = ols_est._results.params[1] 44 | b = ols_est._results.params[0] 45 | 46 | kappa = None # mean-reversion speed 47 | theta = None # long-run average 48 | sigma = None # spread volatility 49 | 50 | if a > 0: 51 | kappa = -np.log(a)/dt 52 | theta = b/(1 - a) 53 | sigma = np.std(ols_est.resid)*(np.sqrt(-2*np.log(a)/(dt*(1-a**2)))) 54 | if bias_corretion: 55 | kappa = kappa - ou_bias_correction(len(x)) 56 | 57 | return kappa, theta, sigma, a, b 58 | 59 | 60 | def estimate_z_model_params(x, y, dt): 61 | """ 62 | Estimates log-price sensitivity "delta" w.r.t cointegrating factor: 63 | 64 | dln(S_t) = (mu - 1/2*sigma^2 + delta * Z_t)*dt + sigma*dW_t 65 | 66 | Dynamic Optimal Portfolios for Multiple Co-Integrated Assets 67 | 68 | """ 69 | if not isinstance(x, np.ndarray): 70 | raise TypeError(f'x needs to be type of numpy.ndarray, it was {type(x)}') 71 | 72 | if not isinstance(y, np.ndarray): 73 | raise TypeError(f'y needs to be type of numpy.ndarray, it was {type(y)}') 74 | 75 | if not isinstance(dt, float): 76 | raise TypeError('dt needs to be type of float!') 77 | 78 | if len(x.shape) != 2: 79 | x = x.reshape(-1, 1) 80 | 81 | if len(y.shape) != 2: 82 | y = y.reshape(-1, 1) 83 | 84 | # Estimate cointegration factor beta_i 85 | estimator = Johansen(np.concatenate([x, y], axis=1), model=2, significance_level=0) 86 | e_, r = estimator.johansen() 87 | e = e_[:, 0] / e_[0, 0] 88 | beta = e[1] 89 | 90 | # Compute Z_t - a_i = ln(s_0) + beta_i * ln(s_i) 91 | z_minus_a = x + beta * y 92 | 93 | # Estimate a_i 94 | a = -np.mean(z_minus_a) 95 | 96 | # Recompute Z_t 97 | z = a + x + beta * y 98 | 99 | # Estimate Ornstein-Uhlenbeck parameters 100 | kappa, theta, sigma = estimate_ou_parameters_using_lsq(z, dt, True) 101 | 102 | delta = None 103 | if (kappa != None) and (theta != None) and (sigma != None): 104 | 105 | # Compute delta from mean-reversion speed and beta_i 106 | delta = kappa/(-beta) 107 | 108 | return delta, beta, kappa, a 109 | 110 | -------------------------------------------------------------------------------- /src/estimation/rls.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | class RLSFilter: 5 | 6 | def __init__(self, n, mu=0.99, eps=0.1, w_0="random"): 7 | 8 | self.kind = "RLS filter" 9 | 10 | if type(n) == int: 11 | self.n = n 12 | else: 13 | raise ValueError('The size of filter must be an integer') 14 | 15 | self.mu = mu 16 | self.eps = eps 17 | self.w = w_0.copy() 18 | self.R = 1/self.eps * np.eye(n) 19 | self.w_history = False 20 | 21 | def update(self, d, x): 22 | """ 23 | Update filter 24 | 25 | d = outcome 26 | x = predictive variables (observed before outcome) 27 | """ 28 | x = x.reshape(-1, 1) 29 | y = np.dot(self.w.T, x).flatten()[0] 30 | e = d - y 31 | R1 = np.dot(np.dot(np.dot(self.R, x), x.T), self.R) 32 | R2 = self.mu + np.dot(np.dot(x.T, self.R), x) 33 | self.R = 1.0/self.mu * (self.R - R1/R2) 34 | self.R = 0.5 * (self.R + self.R.T) # Ensure symmetry 35 | dw = np.dot(self.R, x) * e 36 | self.w += dw 37 | 38 | -------------------------------------------------------------------------------- /src/optimal_controls/ou_params.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from pandas.core.frame import DataFrame 4 | from pandas.core.series import Series 5 | from pandas import concat 6 | from scipy.stats.stats import pearsonr 7 | from numpy import log, sqrt, ndarray, log 8 | 9 | from src.estimation.ou_parameter_estimation import estimate_ou_parameters_using_lsq 10 | 11 | 12 | class OrnsteinUhlenbeckProcessParameters: 13 | 14 | def __init__(self, kappa, theta, eta, sigma_b, rho, mu_b, x_0, b_0): 15 | 16 | def check_numeric(arg, arg_name): 17 | if not isinstance(arg, (int, float)): 18 | raise TypeError('{} has to be type of or .'.format(arg_name)) 19 | 20 | check_numeric(kappa, 'kappa') 21 | check_numeric(theta, 'theta') 22 | check_numeric(eta, 'eta') 23 | check_numeric(sigma_b, 'sigma_b') 24 | check_numeric(rho, 'rho') 25 | check_numeric(mu_b, 'mu_b') 26 | check_numeric(x_0, 'x_0') 27 | check_numeric(b_0, 'b_0') 28 | 29 | # Initialize values to none 30 | self.m_eta = eta 31 | self.m_sigma_b = sigma_b 32 | self.m_rho = rho 33 | self.m_theta = theta 34 | self.m_kappa = kappa 35 | self.m_mu_b = mu_b 36 | self.m_x_0 = x_0 37 | self.m_b_0 = b_0 38 | 39 | @property 40 | def kappa(self): 41 | """ 42 | Mean-reversion speed of the process. 43 | """ 44 | return self.m_kappa 45 | 46 | @property 47 | def theta(self): 48 | """ 49 | Long-term average level of the spread 50 | """ 51 | return self.m_theta 52 | 53 | @property 54 | def rho(self): 55 | """ 56 | Correlation coefficient between the spread and the price series 57 | """ 58 | return self.m_rho 59 | 60 | @property 61 | def eta(self): 62 | """ 63 | Volatility of the spread. 64 | """ 65 | return self.m_eta 66 | 67 | @property 68 | def sigma_b(self): 69 | """ 70 | Volatility of asset "B" (GBM) 71 | """ 72 | return self.m_sigma_b 73 | 74 | @property 75 | def mu_b(self): 76 | """ 77 | Drift of asset "B" (GBM) 78 | """ 79 | return self.m_x_0 80 | 81 | @property 82 | def x_0(self): 83 | """ 84 | Initial OU Process value 85 | """ 86 | return self.m_x_0 87 | 88 | @property 89 | def b_0(self): 90 | """ 91 | Initial value of asset "B" (GBM) 92 | """ 93 | return self.m_b_0 94 | 95 | @classmethod 96 | def ols_parameter_estimation(cls, a_data, b_data, dt): 97 | 98 | if not isinstance(a_data, (DataFrame, Series, ndarray)): 99 | raise TypeError('a_data has invalid data type') 100 | 101 | if not isinstance(b_data, (DataFrame, Series, ndarray)): 102 | raise TypeError('b_data has invalid data type') 103 | 104 | if not isinstance(dt, (int, float)): 105 | raise TypeError('dt has to be type of float.') 106 | 107 | if dt <= 0: 108 | raise ValueError('Delta time has to be positive and non-zero!') 109 | 110 | if isinstance(a_data, Series): 111 | a_data = a_data.to_frame(name=0) 112 | 113 | if isinstance(b_data, Series): 114 | b_data = b_data.to_frame(name=0) 115 | 116 | if isinstance(a_data, ndarray): 117 | a_data = DataFrame(data=a_data) 118 | 119 | if isinstance(b_data, ndarray): 120 | b_data = DataFrame(data=b_data) 121 | 122 | # Compute logarithmic spread level 123 | x = log(a_data) - log(b_data) 124 | 125 | # Estimate OU parameters 126 | pars = estimate_ou_parameters_using_lsq(x.values, dt) 127 | kappa_est = pars[0] 128 | theta_est = pars[1] 129 | eta_est = pars[2] 130 | 131 | # Compute correlation between asset a and spread level 132 | a = b_data.pct_change(1) 133 | b = x.diff(1) 134 | c = concat([a, b], axis=1).dropna() 135 | rho_est = pearsonr(c.iloc[:, 0], c.iloc[:, 1])[0] 136 | 137 | # Compute scaled volatility for asset b 138 | sigma_est = a.std().values[0]*sqrt(1.0/dt) 139 | 140 | x_0 = a_data.iloc[0, 0] 141 | b_0 = b_data.iloc[0, 0] 142 | 143 | N = b_data.shape[0] 144 | sigma_est = a.std().values[0]*sqrt(1.0/dt) 145 | mu_est = log(b_data.iloc[-1, 0] / b_data.iloc[0, 0]) / (dt * N) +0.5 * sigma_est**2 146 | 147 | return cls(kappa_est, theta_est, eta_est, sigma_est, rho_est, mu_est, x_0, b_0) 148 | 149 | def __str__(self): 150 | 151 | if isinstance(self.kappa, float): 152 | kappa = round(self.kappa, 2) 153 | else: 154 | kappa = '' 155 | 156 | if isinstance(self.theta, float): 157 | theta = round(self.theta, 2) 158 | else: 159 | theta = '' 160 | 161 | if isinstance(self.rho, float): 162 | rho = round(self.rho, 2) 163 | else: 164 | rho = '' 165 | 166 | if isinstance(self.eta, float): 167 | eta = round(self.eta, 2) 168 | else: 169 | eta = '' 170 | 171 | return f"Ornstein-Uhlenbeck Parameters: Kappa = {kappa}, Theta = {theta}, Rho = {rho}, Eta = {eta}" 172 | -------------------------------------------------------------------------------- /src/optimal_controls/ou_spread_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from numpy import sqrt, exp, finfo 4 | from .ou_params import OrnsteinUhlenbeckProcessParameters 5 | from .ou_spread_model_parameters import OUSpreadModelStrategyParameters 6 | from .ou_spread_model_output import OUSpreadModelOutput 7 | 8 | 9 | class OUSpreadModelSolver: 10 | 11 | @staticmethod 12 | def solve_alpha(gamma, kappa, eta, tau): 13 | 14 | if not isinstance(gamma, (float, int)): 15 | raise TypeError('Gamma needs to be type of float') 16 | 17 | if not isinstance(kappa, (float, int)): 18 | raise TypeError('Kappa needs to be type of float') 19 | 20 | if not isinstance(eta, (float, int)): 21 | raise TypeError('Eta needs to be type of float') 22 | 23 | if not isinstance(tau, (float, int)): 24 | raise TypeError('Tau needs to be type of float') 25 | 26 | if gamma > 1.0: 27 | raise ValueError('Gamma has to be lower than 1.') 28 | 29 | if tau < 0.0: 30 | raise ValueError('Tau cannot be negative.') 31 | 32 | a = sqrt(1.0-gamma) 33 | 34 | if abs(a) < 10e-16: 35 | raise ValueError('gamma too close to one. Will result in zero division in alpha function.') 36 | else: 37 | t_1 = (kappa*(1-a))/(2*eta**2) 38 | t_2 = 1.0+(2*a)/(1-a-(1+a)*exp((2*kappa*tau)/a)) 39 | 40 | return t_1*t_2 41 | 42 | @staticmethod 43 | def solve_beta(gamma, kappa, theta, eta, rho, sigma, tau): 44 | 45 | if not isinstance(gamma, (float, int)): 46 | raise TypeError('Gamma needs to be type of float') 47 | 48 | if not isinstance(kappa, (float, int)): 49 | raise TypeError('Kappa needs to be type of float') 50 | 51 | if not isinstance(theta, (float, int)): 52 | raise TypeError('Theta needs to be type of float') 53 | 54 | if not isinstance(eta, (float, int)): 55 | raise TypeError('Eta needs to be type of float') 56 | 57 | if not isinstance(rho, (float, int)): 58 | raise TypeError('Rho needs to be type of float') 59 | 60 | if not isinstance(tau, (float, int)): 61 | raise TypeError('Tau needs to be type of float') 62 | 63 | if gamma >= 1.0: 64 | raise ValueError('Gamma has to be strictly lower than 1.0!') 65 | 66 | if tau < 0.0: 67 | raise ValueError('Tau cannot be negative.') 68 | 69 | a = sqrt(1.0-gamma) 70 | 71 | # Machine epsilon to prevent zero division 72 | eps = finfo(float).eps 73 | 74 | b = exp(2*kappa*tau/(a+eps)) 75 | t_1 = 1.0/((2*eta**2)*((1-a)-(1+a)*exp((2*kappa*tau)/(a+eps)))) 76 | t_2 = gamma*a*(eta**2 + 2*rho*sigma*eta)*((1-b)**2) 77 | t_3 = -gamma*(eta**2 + 2*rho*sigma*eta + 2*kappa*theta)*(1-b) 78 | 79 | return t_1*(t_2+t_3) 80 | 81 | @staticmethod 82 | def solve_h_prime(gamma, kappa, theta, eta, sigma_b, rho, tau, x): 83 | 84 | if not isinstance(gamma, (float, int)): 85 | raise TypeError('Gamma needs to be type of float!') 86 | 87 | if not isinstance(kappa, (float, int)): 88 | raise TypeError('Kappa needs to be type of float!') 89 | 90 | if not isinstance(theta, (float, int)): 91 | raise TypeError('Theta needs to be type of float!') 92 | 93 | if not isinstance(eta, (float, int)): 94 | raise TypeError('Eta needs to be type of float!') 95 | 96 | if not isinstance(sigma_b, (float, int)): 97 | raise TypeError('sigma_b needs to be type of float!') 98 | 99 | if not isinstance(rho, (float, int)): 100 | raise TypeError('Rho needs to be type of float!') 101 | 102 | if not isinstance(tau, (float, int)): 103 | raise TypeError('Tau needs to be type of float!') 104 | 105 | if not isinstance(x, (float, int)): 106 | raise TypeError('X needs to be type of float!') 107 | 108 | if gamma > 1.0: 109 | raise ValueError('Gamma has to be strictly lower than 1!') 110 | 111 | if tau < 0.0: 112 | raise ValueError('Tau cannot be negative!') 113 | 114 | if eta < 0.0: 115 | raise ValueError('Eta cannot be negative!') 116 | 117 | if sigma_b < 0.0: 118 | raise ValueError('Sigma_B cannot be negative!') 119 | 120 | # Solve alpha 121 | a = OUSpreadModelSolver.solve_alpha(gamma, kappa, eta, tau) 122 | 123 | # Solve beta 124 | b = OUSpreadModelSolver.solve_beta(gamma, kappa, theta, eta, rho, sigma_b, tau) 125 | 126 | # Solve optimal solution "h" 127 | 128 | # Machine epsilon to prevent division by zero 129 | eps = finfo(float).eps 130 | 131 | h = (1.0/(1.0-gamma + eps))*(b + 2*x*a - (kappa*(x-theta))/(eta**2 + eps) + (rho*sigma_b)/(eta+eps) + 0.5) 132 | 133 | return h 134 | 135 | @staticmethod 136 | def solve_asset_weights(model_params, strategy_params, spread_level, time_left): 137 | """ 138 | 139 | """ 140 | if not isinstance(model_params, OrnsteinUhlenbeckProcessParameters): 141 | raise TypeError('OU parameters have to be type of .') 142 | 143 | if not isinstance(strategy_params, OUSpreadModelStrategyParameters): 144 | raise TypeError('Model parameters have to be type of .') 145 | 146 | if not isinstance(spread_level, (float, int)): 147 | raise TypeError('X has to be type of float!') 148 | 149 | if not isinstance(time_left, (float, int)): 150 | raise TypeError('Tau has to be type of float!') 151 | 152 | solution = OUSpreadModelSolver.solve_h_prime(strategy_params.risk_tolerance, model_params.kappa, 153 | model_params.theta, model_params.eta, 154 | model_params.sigma_b, model_params.rho, time_left, spread_level) 155 | 156 | out = OUSpreadModelOutput(solution, model_params, strategy_params, spread_level, time_left) 157 | 158 | return out 159 | 160 | 161 | 162 | 163 | -------------------------------------------------------------------------------- /src/optimal_controls/ou_spread_model_output.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from copy import deepcopy 4 | 5 | from .ou_params import OrnsteinUhlenbeckProcessParameters 6 | from .ou_spread_model_parameters import OUSpreadModelStrategyParameters 7 | 8 | 9 | class OUSpreadModelOutput: 10 | 11 | def __init__(self, opt_alloc, model_params, strategy_params, x_ref, tau_ref): 12 | 13 | if not isinstance(opt_alloc, (int, float)): 14 | raise TypeError('Opt_alloc has to be or ') 15 | 16 | if not isinstance(model_params, OrnsteinUhlenbeckProcessParameters): 17 | raise TypeError('OU parameters have to be .') 18 | 19 | if not isinstance(strategy_params, OUSpreadModelStrategyParameters): 20 | raise TypeError('Model parameters have to be type of OU_Spread_Model_Parameters!') 21 | 22 | if not isinstance(x_ref, (int, float)): 23 | raise TypeError('X has to be or ') 24 | 25 | if not isinstance(tau_ref, (int, float)): 26 | raise TypeError('Tau has to be or .') 27 | 28 | self.m_opt_alloc = opt_alloc 29 | self.m_model_params = model_params 30 | self.m_strategy_params = strategy_params 31 | self.m_x_ref = x_ref 32 | self.m_tau_ref = tau_ref 33 | 34 | @property 35 | def optimal_allocation(self): 36 | """ 37 | Returns the optimal allocation. 38 | 39 | """ 40 | return self.m_opt_alloc 41 | 42 | @property 43 | def model_parameters(self): 44 | """ 45 | Returns a deep copied instance of the Ornstein-Uhlenbec parameters 46 | used to arrive at the optimal solution. 47 | """ 48 | return deepcopy(self.m_model_params) 49 | 50 | @property 51 | def strategy_parameters(self): 52 | """ 53 | Returns a deep copied instance of the model parameters used to 54 | arrive at the optimal solution. 55 | 56 | """ 57 | return deepcopy(self.m_strategy_params) 58 | 59 | @property 60 | def alloc_a(self): 61 | """ 62 | 63 | Dollar allocation for asset A given nominal. 64 | 65 | TODO: Check for None before multiplying 66 | 67 | """ 68 | return self.strategy_parameters.nominal * self.alloc_a_pct 69 | 70 | @property 71 | def alloc_b(self): 72 | """ 73 | 74 | Dollar allocation for asset B given nominal. 75 | 76 | TODO: Check for None before multiplying 77 | 78 | """ 79 | return self.strategy_parameters.nominal * self.alloc_b_pct 80 | 81 | @property 82 | def alloc_a_trunc(self): 83 | """ 84 | 85 | Dollar allocation for asset A - truncated to maximum leverage % times 86 | nominal allocation. 87 | 88 | TODO: Check for None before multiplying 89 | 90 | """ 91 | return self.strategy_parameters.nominal * self.alloc_a_pct_trunc 92 | 93 | @property 94 | def alloc_b_trunc(self): 95 | """ 96 | 97 | Dollar allocation for asset B - truncated to maximum leverage % times 98 | nominal allocation. 99 | 100 | TODO: Check for None before multiplying 101 | 102 | """ 103 | return self.strategy_parameters.nominal * self.alloc_b_pct_trunc 104 | 105 | @property 106 | def alloc_a_pct(self): 107 | """ 108 | 109 | % allocation for asset A 110 | 111 | """ 112 | return self.m_opt_alloc 113 | 114 | @property 115 | def alloc_b_pct(self): 116 | """ 117 | 118 | % allocation for asset B 119 | 120 | TODO: Check for None before multiplying 121 | 122 | """ 123 | return -self.m_opt_alloc 124 | 125 | @property 126 | def alloc_a_pct_trunc(self): 127 | """ 128 | 129 | % allocation for asset A - truncated to maximum leverage % 130 | 131 | TODO: Check for None before operations 132 | 133 | """ 134 | if self.m_opt_alloc < 0: 135 | pct_a = max(-self.strategy_parameters.maximum_leverage, self.m_opt_alloc) 136 | else: 137 | pct_a = min(self.strategy_parameters.maximum_leverage, self.m_opt_alloc) 138 | 139 | return pct_a 140 | 141 | 142 | @property 143 | def alloc_b_pct_trunc(self): 144 | """ 145 | 146 | % allocation for asset B - truncated to maximum leverage % 147 | 148 | TODO: Check for None before operations 149 | 150 | """ 151 | if self.m_opt_alloc < 0: 152 | pct_b = min(self.strategy_parameters.maximum_leverage, -self.m_opt_alloc) 153 | else: 154 | pct_b = max(-self.strategy_parameters.maximum_leverage, -self.m_opt_alloc) 155 | 156 | return pct_b 157 | -------------------------------------------------------------------------------- /src/optimal_controls/ou_spread_model_parameters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | class OUSpreadModelStrategyParameters: 5 | 6 | def __init__(self, nominal, asset_a_symbol, asset_b_symbol, trading_horizon, risk_tolerance, maximum_leverage=1.0): 7 | 8 | if not isinstance(nominal, (int, float)): 9 | raise TypeError('nominal has to be type of or .') 10 | 11 | if not isinstance(asset_a_symbol, str): 12 | raise TypeError('asset_a_symbol has to be type of .') 13 | 14 | if not isinstance(asset_b_symbol, str): 15 | raise TypeError('asset_b_symbol has to be type of .') 16 | 17 | if not isinstance(trading_horizon, (int, float)): 18 | raise TypeError('trading_horizon has to be type of or .') 19 | 20 | if not isinstance(risk_tolerance, float): 21 | raise TypeError('risk_tolerance has to be type of .') 22 | 23 | if risk_tolerance == 1.0: 24 | raise ValueError('risk_tolerance has to be smaller than 1.0 .') 25 | 26 | if not isinstance(maximum_leverage, (int, float)): 27 | raise TypeError('maximum_leverage has to be type of or .') 28 | 29 | self.m_nominal = nominal 30 | self.m_asset_a_symbol = asset_a_symbol 31 | self.m_asset_b_symbol = asset_b_symbol 32 | self.m_trading_horizon = trading_horizon 33 | self.m_risk_tolerance = risk_tolerance 34 | self.m_maximum_leverage = maximum_leverage 35 | 36 | @property 37 | def nominal(self): 38 | return self.m_nominal 39 | 40 | @property 41 | def risk_tolerance(self): 42 | return self.m_risk_tolerance 43 | 44 | @property 45 | def trading_horizon(self): 46 | return self.m_trading_horizon 47 | 48 | @property 49 | def maximum_leverage(self): 50 | return self.m_maximum_leverage 51 | 52 | @property 53 | def symbol_a(self): 54 | return self.m_asset_a_symbol 55 | 56 | @property 57 | def symbol_b(self): 58 | return self.m_asset_b_symbol 59 | -------------------------------------------------------------------------------- /src/portfolio/contract.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | class Contract: 5 | 6 | def __init__(self, symbol, security_type, multiplier=1): 7 | 8 | self.m_symbol = symbol 9 | self.m_sec_type = security_type 10 | self.m_multiplier = multiplier 11 | 12 | @property 13 | def symbol(self): 14 | return self.m_symbol 15 | 16 | @property 17 | def sec_type(self): 18 | return self.m_sec_type 19 | 20 | @property 21 | def multiplier(self): 22 | return self.m_multiplier 23 | 24 | def __str__(self): 25 | 26 | return f"Symbol: {self.symbol} Sec. Type: {self.sec_type} Multiplier: {self.multiplier}" 27 | 28 | def __eq__(self, other): 29 | """ 30 | Equal operator overload. 31 | 32 | Here comparison is made based on the 33 | symbol, security type (sec_type) and price point change multiplier 34 | (multiplier). 35 | """ 36 | if( (self.symbol == other.symbol) 37 | and (self.sec_type == other.sec_type) 38 | and (self.multiplier == self.multiplier)): 39 | return True 40 | else: 41 | return False 42 | 43 | -------------------------------------------------------------------------------- /src/portfolio/portfolio.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .position import Position2 4 | import pandas 5 | 6 | 7 | class Portfolio: 8 | """ 9 | Convenience class for storing individual positions and for generating 10 | various reports. 11 | """ 12 | def __init__(self, name): 13 | 14 | self.m_name = name 15 | self.m_positions = {} 16 | 17 | @property 18 | def name(self): 19 | return self.m_name 20 | 21 | def add_position(self, position): 22 | """ 23 | Add a new position to portfolio. Raises value error if the input position 24 | has a symbol that already exist in the portfolio. 25 | """ 26 | if position.contract.symbol in self.m_positions: 27 | raise ValueError('Position already exists for symbol: {position.contract.symbol}!') 28 | else: 29 | self.m_positions.update({position.contract.symbol: position}) 30 | 31 | def add_trade(self, trade): 32 | """ 33 | Add a new trade to a position in the portfolio. 34 | """ 35 | if trade.symbol in self.m_positions: 36 | self.m_positions[trade.symbol].add_trade(trade) 37 | else: 38 | raise ValueError('Cannot add trade. No position exists for symbol: {trade.symbol}!') 39 | 40 | def get_position(self, symbol): 41 | """ 42 | Get current position in a symbol. 43 | """ 44 | if symbol in self.m_positions: 45 | return self.m_positions[symbol] 46 | else: 47 | print(f'Position with symbol: {symbol} not found from portfolio.') 48 | return None 49 | 50 | def update_market_value(self, symbol, bid, ask, time=None): 51 | """ 52 | Update market value of a position given symbol and corresponding 53 | bid and ask prices. 54 | """ 55 | if symbol in self.m_positions: 56 | self.m_positions[symbol].update_market_value(bid, ask, time) 57 | else: 58 | raise ValueError('Cannot update market value. No position exists for symbol: {trade.symbol}!') 59 | 60 | -------------------------------------------------------------------------------- /src/portfolio/position.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | import pandas as pd 3 | from .utilities import infer_trade_action 4 | 5 | 6 | class Position2: 7 | 8 | def __init__(self, contract): 9 | 10 | self.m_contract = contract 11 | self.m_multiplier = contract.multiplier 12 | 13 | self.m_net_position = 0 14 | self.m_avg_open_price = 0 15 | self.m_net_investment = 0 16 | 17 | self.m_realized_pnl = 0 18 | self.m_unrealized_pnl = 0 19 | self.m_total_pnl = 0 20 | self.m_commissions = 0 21 | 22 | self.m_bid = None 23 | self.m_ask = None 24 | 25 | self.m_records = {} 26 | 27 | @property 28 | def contract(self): 29 | return self.m_contract 30 | 31 | @property 32 | def market_value(self): 33 | 34 | # Compute mid-price 35 | mid_price = 0.5*(self.m_bid + self.m_ask) 36 | 37 | return self.m_net_position * self.m_multiplier * mid_price 38 | 39 | @property 40 | def quantity(self): 41 | 42 | return abs(self.m_net_position) 43 | 44 | @property 45 | def net_position(self): 46 | return deepcopy(self.m_net_position) 47 | 48 | # buy_or_sell: 1 is buy, 2 is sell 49 | def add_trade(self, trade): 50 | 51 | action = infer_trade_action(trade.action) 52 | traded_price = trade.price 53 | commission = trade.commission 54 | traded_quantity = trade.quantity 55 | 56 | # buy: positive position, sell: negative position 57 | signed_quantity = traded_quantity if action == 1 else (-1) * traded_quantity 58 | 59 | # Check if the trade will revert the direction of the position 60 | is_still_open = (self.m_net_position * signed_quantity) >= 0 61 | 62 | # Update Realizd and Total PnL 63 | if not is_still_open: 64 | 65 | # Remember to keep the sign as the net position 66 | self.m_realized_pnl += self.m_multiplier *( traded_price - self.m_avg_open_price ) \ 67 | * min(abs(signed_quantity), 68 | abs(self.m_net_position) ) \ 69 | * ( abs(self.m_net_position) / 70 | self.m_net_position ) 71 | 72 | # total pnl 73 | self.m_total_pnl = self.m_realized_pnl + self.m_unrealized_pnl 74 | 75 | # Commissions 76 | self.m_commissions += commission 77 | 78 | # Update Average Openin Price 79 | if is_still_open: 80 | #print("Still open") 81 | # Update average open price 82 | self.m_avg_open_price = ( ( self.m_avg_open_price * self.m_net_position ) 83 | + ( traded_price * signed_quantity ) ) \ 84 | / ( self.m_net_position + signed_quantity ) 85 | 86 | else: 87 | #print("Not open") 88 | # Check if it is close-and-open 89 | if traded_quantity > abs(self.m_net_position): 90 | self.m_avg_open_price = traded_price 91 | 92 | # Update net position 93 | self.m_net_position += signed_quantity 94 | 95 | # net investment 96 | # self.m_net_investment = max( self.m_net_investment, 97 | # abs( self.m_multiplier * self.m_net_position * self.m_avg_open_price ) ) 98 | 99 | self.m_net_investment = self.m_multiplier * self.m_net_position * self.m_avg_open_price 100 | 101 | # Update Unrealized and Total PnL 102 | #self.update_market_value(bid,ask) 103 | 104 | def update_market_value(self, bid, ask, time=None): 105 | 106 | if (bid > 0) and (ask > 0): 107 | mid = 0.5*(bid + ask) 108 | self.m_unrealized_pnl = self.m_multiplier * (mid - self.m_avg_open_price) * self.m_net_position 109 | self.m_total_pnl = self.m_realized_pnl + self.m_unrealized_pnl - self.m_commissions 110 | else: 111 | raise ValueError('Prices have to be positive') 112 | 113 | record = {} 114 | record.update({'NET_POSITION': self.m_net_position}) 115 | record.update({'NET_INVESTMENT': self.m_net_investment}) 116 | record.update({'REALIZED_PNL': self.m_realized_pnl}) 117 | record.update({'UNREALIZED_PNL': self.m_unrealized_pnl}) 118 | record.update({'TOTAL_PNL': self.m_total_pnl}) 119 | record.update({'TOTAL_COM': self.m_commissions}) 120 | 121 | if time is not None: 122 | self.m_records.update({time: record}) 123 | else: 124 | i = len(self.m_records) 125 | self.m_records.update({i+1: record}) 126 | 127 | def generate_report_frame(self): 128 | 129 | return pd.DataFrame.from_dict(self.m_records, orient='index') -------------------------------------------------------------------------------- /src/portfolio/position_info.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | 3 | 4 | class PositionInfo: 5 | 6 | def __init__(self, position): 7 | 8 | self.m_position = position 9 | self.m_trades = {} 10 | self.m_records = {} 11 | 12 | def add_trade(self, trade): 13 | """ 14 | Log a trade for TCA. 15 | 16 | """ 17 | self.m_position.add_trade(trade) 18 | self.m_trades.update({trade.time: trade}) 19 | 20 | def log_position_status(self, time): 21 | """ 22 | Log position status for PNL-analysis. 23 | 24 | """ 25 | record = {} 26 | record.update({'NET_POSITION': self.m_position.m_net_position}) 27 | record.update({'NET_INVESTMENT': self.m_position.m_net_investment}) 28 | record.update({'REALIZED_PNL': self.m_position.m_realized_pnl}) 29 | record.update({'UNREALIZED_PNL': self.m_position.m_unrealized_pnl}) 30 | record.update({'TOTAL_PNL': self.m_position.m_total_pnl}) 31 | record.update({'TOTAL_COM': self.m_position.m_commissions}) 32 | 33 | self.m_records.update({time: record}) 34 | 35 | def generate_pnl_report(self, formate='frame'): 36 | """ 37 | Returns a PNL report either as a dictionary or as a Pandas DataFrame. 38 | """ 39 | if formate == 'frame': 40 | report = pandas.DataFrame.from_dict(self.m_records, orient='index') 41 | elif formate == 'dict': 42 | report = self.m_records 43 | else: 44 | raise ValueError('Formate has to be either "frame" or "dict"!') 45 | return report 46 | 47 | -------------------------------------------------------------------------------- /src/portfolio/trade.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class Trade: 4 | """ 5 | Convenience class to keep records of a trade. 6 | """ 7 | def __init__(self, symbol, action, price, quantity, sec_type, commission): 8 | 9 | if not isinstance(symbol, str): 10 | raise ValueError('Symbol needs to be !') 11 | 12 | if not isinstance(action, str): 13 | raise ValueError('Action needs to be !') 14 | else: 15 | if action not in ['BOT', 'SLD']: 16 | raise ValueError('Action needs to be either "BOT" or "SLD"!') 17 | 18 | if not isinstance(price, (float, int)): 19 | raise ValueError('Price needs to be either or !') 20 | 21 | if not isinstance(quantity, int): 22 | raise ValueError('Quantity needs to be !') 23 | 24 | if not isinstance(sec_type, str): 25 | raise ValueError('sec_type needs to be !') 26 | 27 | if not isinstance(commission, (float, int)): 28 | raise ValueError('Commission needs to be either or !') 29 | 30 | self.m_symbol = symbol 31 | self.m_action = action 32 | self.m_price = price 33 | self.m_quantity = quantity 34 | self.m_sec_type = sec_type 35 | self.m_commission = commission 36 | 37 | @property 38 | def symbol(self): 39 | return self.m_symbol 40 | 41 | @property 42 | def action(self): 43 | return self.m_action 44 | 45 | @property 46 | def price(self): 47 | return self.m_price 48 | 49 | @property 50 | def quantity(self): 51 | return self.m_quantity 52 | 53 | @property 54 | def sec_type(self): 55 | return self.m_sec_type 56 | 57 | @property 58 | def commission(self): 59 | return self.m_commission 60 | 61 | def __str__(self): 62 | 63 | return f"Trade: Action: {self.m_action} Symbol: {self.symbol} Sec. Type: {self.sec_type} Quantity: {self.m_quantity}" 64 | -------------------------------------------------------------------------------- /src/portfolio/utilities.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | def signed_quantity(action,quantity): 4 | 5 | if(not isinstance(quantity,int)): 6 | raise ValueError('Input quantity has to be an integer!') 7 | if(not isinstance(action,str)): 8 | raise ValueError('Input action has to be a string!') 9 | out = None 10 | if(action=="SLD"): 11 | out = -quantity 12 | elif(action=="BOT"): 13 | out = quantity 14 | else: 15 | raise ValueError("Action has to be either BOT or SLD") 16 | return out 17 | 18 | def action_and_quantity(x): 19 | 20 | if(not isinstance(x,int)): 21 | raise ValueError('Input has to be an integer') 22 | if(x<0): 23 | return 'SLD',abs(x) 24 | elif(x>0): 25 | return 'BOT',abs(x) 26 | else: 27 | return None,None 28 | 29 | def infer_trade_action(action): 30 | 31 | out = None 32 | if(isinstance(action,str)): 33 | if(action == "BOT"): 34 | out = 1 35 | elif(action == "SLD"): 36 | out = 2 37 | else: 38 | raise ValueError(f'{action} is not valid action!') 39 | else: 40 | if(action not in [1,2]): 41 | raise ValueError(f'{action} is not valid action!') 42 | else: 43 | out = action 44 | return out 45 | 46 | -------------------------------------------------------------------------------- /src/simulation/geom_brown_motion.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | def geometrix_brownian_motion(S0, mu, sigma, dt, N_steps, N_paths): 5 | 6 | size = (N_steps, N_paths) 7 | s = np.sqrt(dt) * np.random.normal(0, sigma, size) + mu * dt * np.ones(size) 8 | 9 | return np.cumprod(np.exp(s), axis=0) 10 | -------------------------------------------------------------------------------- /src/simulation/ornstein_uhlenbeck.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | 5 | def simulate_one_ornstein_uhlenbeck_path(x_0, k, theta, sigma, dt, n_steps): 6 | """ 7 | Simulate Ornstein-Uhlenbeck process 8 | """ 9 | x = np.zeros(n_steps) 10 | x[0] = x_0 11 | 12 | for i in range(1, n_steps): 13 | x[i] = x[i-1] + k*(theta - x[i-1])*dt + sigma*np.sqrt(dt)*np.random.normal(0, 1) 14 | 15 | return x 16 | 17 | 18 | def simulate_ornstein_uhlenbeck_paths(X_0, k, theta, sigma, dt, n_steps, n_paths): 19 | """ 20 | Simulate Ornstein-Uhlenbeck process 21 | """ 22 | size = (n_steps, n_paths) 23 | x = np.zeros(size) 24 | 25 | for j in range(0, n_paths): 26 | x[:, j] = simulate_one_ornstein_uhlenbeck_path(X_0, k, theta, sigma, dt, n_steps) 27 | 28 | return x -------------------------------------------------------------------------------- /src/simulation/simulate_cointegrated_assets.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | from scipy.linalg import cholesky 5 | 6 | 7 | def simulate_b(N_sim, N_steps, B_0, mu, sigma_B, dt): 8 | """ 9 | 10 | 11 | Parameters 12 | ---------- 13 | N_sim : TYPE 14 | DESCRIPTION. 15 | N_steps : TYPE 16 | DESCRIPTION. 17 | B_0 : TYPE 18 | DESCRIPTION. 19 | mu : TYPE 20 | DESCRIPTION. 21 | sigma_B : TYPE 22 | DESCRIPTION. 23 | dt : TYPE 24 | DESCRIPTION. 25 | 26 | Returns 27 | ------- 28 | B : TYPE 29 | DESCRIPTION. 30 | 31 | """ 32 | size = (N_steps, N_sim) 33 | 34 | # B(k+1) = B(k) * e^{dM + dW} 35 | dM = (mu - 0.5 * sigma_B**2) * dt 36 | dW = sigma_B * np.sqrt(dt) * np.random.normal(0, 1, size) 37 | B = B_0 * np.exp(np.cumsum(dM + dW, axis=0)) 38 | 39 | # Shift and include inception value (t=0). 40 | B = np.insert(B, 0, B_0, axis=0) 41 | 42 | return B 43 | 44 | 45 | def simulate_ou_spread(N_sim, N_steps, B_0, X_0, kappa, theta, eta, mu, sigma_B, dt): 46 | """ 47 | This function simulates Ornstein-Uhlenbeck spread for pairs trading model 48 | 49 | Parameters 50 | ---------- 51 | N_sim : TYPE 52 | DESCRIPTION. 53 | N_steps : TYPE 54 | DESCRIPTION. 55 | B_0 : TYPE 56 | DESCRIPTION. 57 | X_0 : TYPE 58 | DESCRIPTION. 59 | kappa : TYPE 60 | DESCRIPTION. 61 | theta : TYPE 62 | DESCRIPTION. 63 | eta : TYPE 64 | DESCRIPTION. 65 | mu : TYPE 66 | DESCRIPTION. 67 | sigma_B : TYPE 68 | DESCRIPTION. 69 | dt : TYPE 70 | DESCRIPTION. 71 | 72 | Returns 73 | ------- 74 | A : TYPE 75 | DESCRIPTION. 76 | B : TYPE 77 | DESCRIPTION. 78 | X : TYPE 79 | DESCRIPTION. 80 | 81 | """ 82 | 83 | size = (N_steps + 1, N_sim) 84 | 85 | # Simulate asset b 86 | B = simulate_b(N_sim, N_steps, B_0, mu, sigma_B, dt) 87 | 88 | # Simulate spread 89 | X = np.empty(size) 90 | X[0, :] = X_0 91 | randn = np.random.normal(0, 1, size) 92 | for j in range(N_sim): 93 | for i in range(N_steps): 94 | dX = kappa*(theta - X[i, j])*dt + eta*np.sqrt(dt) * randn[i, j] 95 | X[i+1, j] = X[i, j] + dX 96 | 97 | # Simulate price path for A 98 | A = B * np.exp(X) 99 | 100 | return A, B, X 101 | -------------------------------------------------------------------------------- /src/simulation/simulate_pairs_trading.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | from copy import deepcopy 5 | 6 | from ..portfolio.contract import Contract 7 | from ..portfolio.position import Position2 8 | from ..portfolio.portfolio import Portfolio 9 | from ..portfolio.trade import Trade 10 | from ..optimal_controls.ou_spread_model import ( 11 | OUSpreadModelSolver 12 | ) 13 | from ..simulation.simulate_cointegrated_assets import simulate_ou_spread 14 | 15 | 16 | def compute_rebalancing_amount(target_nominal, price, contract, portfolio): 17 | """ 18 | Computes number of units to rebalance 19 | """ 20 | n_new = int(target_nominal / float(price * contract.multiplier)) 21 | position = portfolio.get_position(contract.symbol) 22 | if position is not None: 23 | n_old = position.net_position 24 | else: 25 | raise ValueError(f'Cannot compute rebalancing amount. Position for symbol: {contract.symbol} not found.') 26 | 27 | amount = n_new - n_old 28 | 29 | return amount 30 | 31 | 32 | def create_trade(price, amount, contract): 33 | """ 34 | Creates a new trade 35 | """ 36 | if not isinstance(price, (float, int)): 37 | raise ValueError('price has to be or .') 38 | 39 | if not isinstance(amount, int): 40 | raise ValueError('amount has to be .') 41 | 42 | if not isinstance(contract, Contract): 43 | raise ValueError('contract has to be .') 44 | 45 | if amount == 0: 46 | return None 47 | 48 | if amount > 0: 49 | action = 'BOT' 50 | elif amount < 0: 51 | action = 'SLD' 52 | else: 53 | raise ValueError('amount invalid.') 54 | 55 | # Compute trading commissions 56 | if contract.sec_type == 'STK': 57 | commission = (4.0 / 10000) * abs(amount) * price 58 | 59 | elif contract.sec_type == 'F': 60 | commission = 1.0 61 | 62 | return Trade(contract.symbol, action, price, abs(amount), 63 | contract.sec_type, commission) 64 | 65 | 66 | def simulate_pairs_trading( 67 | model_parameters, 68 | strategy_parameters, 69 | a, b, s, T, dt, 70 | contract_a, contract_b, 71 | n_steps, 72 | simulation_number): 73 | 74 | #T = strategy_parameters.trading_horizon 75 | #dt = T / n_steps 76 | 77 | # Create contract objects 78 | #contract_a = Contract(strategy_parameters.symbol_a, 'F', 50) 79 | #contract_b = Contract(strategy_parameters.symbol_b, 'F', 20) 80 | 81 | # Simulate prices 82 | n_sim = 1 # hard coded, sima are instead produced by several calls. 83 | # a, b, s = simulate_ou_spread( 84 | # n_sim, n_steps, model_parameters.b_0, model_parameters.x_0, 85 | # model_parameters.kappa, model_parameters.theta, 86 | # model_parameters.eta, model_parameters.mu_b, 87 | # model_parameters.sigma_b, dt) 88 | 89 | # Create position objects 90 | PositionA = Position2(contract_a) 91 | PositionB = Position2(contract_b) 92 | 93 | # Create portfolio 94 | portfolio = Portfolio(f'Portfolio #{simulation_number}') 95 | portfolio.add_position(PositionA) 96 | portfolio.add_position(PositionB) 97 | 98 | for i in range(0, n_steps): 99 | 100 | 101 | #------------------------------------------------------------------# 102 | # Update Tradng Model # 103 | #------------------------------------------------------------------# 104 | 105 | # Compute ln-spread 106 | spread = np.log(a[i]) - np.log(b[i]) 107 | 108 | # Percentage allocations 109 | time_left = T - dt*i 110 | optimal_decisions = OUSpreadModelSolver.solve_asset_weights( 111 | model_parameters, strategy_parameters, spread, time_left) 112 | 113 | 114 | #------------------------------------------------------------------# 115 | # Rebalance position in A # 116 | #------------------------------------------------------------------# 117 | 118 | # Rebalance position in A 119 | amount = compute_rebalancing_amount( 120 | optimal_decisions.alloc_a_trunc, a[i], contract_a, portfolio) 121 | 122 | # Create trade 123 | trade = create_trade(a[i], amount, contract_a) 124 | 125 | if trade is not None: 126 | 127 | # Add the trade to the position 128 | portfolio.add_trade(trade) 129 | 130 | 131 | # Update portfolio market value 132 | portfolio.update_market_value(contract_a.symbol, a[i], a[i]) 133 | 134 | 135 | #------------------------------------------------------------------# 136 | # Rebalance position in B # 137 | #------------------------------------------------------------------# 138 | 139 | # Rebalance position in B 140 | amount = compute_rebalancing_amount( 141 | optimal_decisions.alloc_b_trunc, b[i], contract_b, portfolio) 142 | 143 | # Create trade 144 | trade = create_trade(b[i], amount, contract_b) 145 | 146 | if trade is not None: 147 | 148 | # Add the trade to the position 149 | portfolio.add_trade(trade) 150 | 151 | 152 | 153 | # Update portfolio market value 154 | portfolio.update_market_value(contract_b.symbol, b[i], b[i]) 155 | 156 | return portfolio 157 | 158 | 159 | def simulate_strategy(model_parameters, strategy_parameters, n_steps, n_sim): 160 | 161 | 162 | contract_a = Contract(strategy_parameters.symbol_a, 'F', 50) 163 | contract_b = Contract(strategy_parameters.symbol_b, 'F', 20) 164 | 165 | portfolios = {} 166 | a_prices = {} 167 | b_prices = {} 168 | 169 | for i in range(0, n_sim): 170 | 171 | # Simulate prices and spread 172 | A_t, B_t, X_t = simulate_ou_spread( 173 | n_sim, n_steps, model_parameters.b_0, model_parameters.x_0, 174 | model_parameters.kappa, model_parameters.theta, 175 | model_parameters.eta, model_parameters.mu_b, 176 | model_parameters.sigma_b, 177 | strategy_parameters.trading_horizon / float(n_steps)) 178 | 179 | # Simulate pairs trading strategy 180 | portfolio = simulate_pairs_trading( 181 | model_parameters, 182 | strategy_parameters, 183 | A_t, B_t, X_t, 184 | strategy_parameters.trading_horizon, 185 | strategy_parameters.trading_horizon / float(n_steps), 186 | contract_a, contract_b, 187 | n_steps, 188 | i) 189 | 190 | portfolios.update({i: deepcopy(portfolio)}) 191 | a_prices.update({i: A_t}) 192 | b_prices.update({i: B_t}) 193 | 194 | return a_prices, b_prices, portfolios 195 | -------------------------------------------------------------------------------- /test/test_main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | import os 5 | dirname = os.path.abspath(os.path.join(os.path.dirname(__file__),"..")) 6 | 7 | import sys 8 | sys.path.append(dirname) 9 | 10 | 11 | 12 | from unittest import TestCase, main 13 | 14 | class Test_OU_Spread_Model(TestCase): 15 | 16 | def test_parameter_fit(self): 17 | 18 | from src.estimation.ou_parameter_estimation import estimate_ou_parameters 19 | import numpy as np 20 | 21 | 22 | data = np.array([ 3.0000,1.7600, 1.2693, 1.1960, 0.9468, 0.9532, 0.6252, 0.8604, 1.0984, 23 | 1.4310, 1.3019, 1.4005, 1.2686, 0.7147, 0.9237, 0.7297, 0.7105, 24 | 0.8683, 0.7406, 0.7314, 0.6232]) 25 | 26 | res = estimate_ou_parameters(data,0.25) 27 | 28 | kappa = res[0] 29 | theta = res[1] 30 | sigma = res[2] 31 | 32 | self.assertAlmostEqual(kappa,3.1288,places=2) 33 | self.assertAlmostEqual(theta,0.9075,places=2) 34 | self.assertAlmostEqual(sigma,0.5531,places=2) 35 | 36 | if __name__ == '__main__': 37 | main() 38 | 39 | -------------------------------------------------------------------------------- /utils/plot_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | #%% 4 | import numpy as np 5 | 6 | from src.optimal_controls.ou_spread_model import OUSpreadModelSolver 7 | 8 | import matplotlib.pyplot as plt 9 | from mpl_toolkits.axes_grid1 import make_axes_locatable 10 | 11 | 12 | def find_nearest(array, value): 13 | 14 | array = np.asarray(array) 15 | idx = (np.abs(array - value)).argmin() 16 | 17 | return idx, array[idx] 18 | 19 | 20 | def plot_optimal_solution(X,ou_params,model_params,N_points=200): 21 | 22 | eta = ou_params.eta 23 | theta = ou_params.theta 24 | 25 | from matplotlib import rcParams 26 | rcParams['axes.titlepad'] = 20 27 | 28 | # Compute optimal solution over a (t,X_t) grid 29 | taus = np.linspace(1,0.001,N_points) 30 | xs = np.linspace(theta - 1.5*eta,theta + 1.5*eta,N_points) 31 | hs = np.zeros((len(taus),len(xs))) 32 | for i,tau in enumerate(taus): 33 | for j,x in enumerate(xs): 34 | opt_sol = OUSpreadModelSolver.solve_asset_weights(ou_params,model_params,x,tau) 35 | hs[i,j] = opt_sol.alloc_a_pct_trunc 36 | 37 | # Plot the spread with the optimal solution 38 | t_x = np.linspace(1,0.001,len(X)) 39 | data = {} 40 | for i in range(0,len(X)): 41 | ix_y,v_y = find_nearest(taus,t_x[i]) 42 | ix_x,v_x = find_nearest(xs,X[i]) 43 | data.update({ix_y:ix_x}) 44 | fig,ax = plt.subplots(figsize=(7,7)) 45 | 46 | # Plot spread path as yellow squares 47 | ax.plot(list(data.values()),list(data.keys()),color='black',lw=3) 48 | 49 | # Plot heatmap of the optimal solution 50 | im_1 = ax.imshow(hs, cmap = plt.cm.winter) 51 | 52 | # Set y-labels 53 | y_rng = np.arange(0,len(taus),50) 54 | ax.set_ylabel(r'$Trading \ time \ remaining \ (T-t)$',fontsize=14) 55 | ax.set_yticks(y_rng) 56 | ax.set_yticklabels([round(taus[t],2) for t in y_rng]) 57 | 58 | # Set x-labels 59 | x_rng = np.arange(0,len(xs),50) 60 | ax.set_xlabel(r'$Spread \ level \ (X_t)$',fontsize=14) 61 | ax.set_xticks(y_rng) 62 | ax.set_xticklabels([round(xs[i],2) for i in x_rng]) 63 | 64 | # Plot contour lines 65 | cset = plt.contour(hs, np.arange(-1, 1.5, 0.2), linewidths=2,colors = 'red') 66 | plt.clabel(cset, inline=True, fmt='%1.1f', fontsize=14) 67 | 68 | # Set colorbar 69 | divider = make_axes_locatable(ax) 70 | cax = divider.append_axes("right", size="5%", pad=0.05) 71 | plt.colorbar(im_1, cax = cax) 72 | 73 | # Set title 74 | ax.set_title(r'$Optimal \ allocation \ (h_t)$') 75 | 76 | fig.tight_layout() 77 | 78 | plt.show() 79 | 80 | 81 | 82 | #%% 83 | 84 | --------------------------------------------------------------------------------