├── .gitignore ├── .travis.yml ├── LICENSE.txt ├── README.md ├── requirements.txt ├── setup.py ├── stock_pairs_trading ├── __init__.py └── stock_pairs_trading.py └── tests ├── pairs.png ├── performance.png └── test_stock_pairs_trading.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | __pycache__/ 3 | .venv/ 4 | *.egg-info/ 5 | dist/ 6 | .coverage 7 | .pytest_cache/ 8 | build/ 9 | .vscode/ 10 | .mypy_cache/ 11 | mlruns/ 12 | data/ 13 | outputs/ 14 | Pipfile 15 | Pipfile.lock 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Set the build language to Python 2 | language: python 3 | 4 | # Set the python version 5 | python: 6 | - "3.8" 7 | - "3.9" 8 | 9 | # Install the codecov pip dependency 10 | install: 11 | - pip install -r requirements.txt 12 | - pip install codecov 13 | 14 | # Run the unit test 15 | script: 16 | - python -m pytest --cov=stock_pairs_trading tests/ 17 | 18 | # Push the results back to codecov 19 | after_success: 20 | - codecov 21 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 10mohi6 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # stock-pairs-trading 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/stock-pairs-trading)](https://pypi.org/project/stock-pairs-trading/) 4 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 5 | [![codecov](https://codecov.io/gh/10mohi6/stock-pairs-trading-python/branch/main/graph/badge.svg?token=DukbkJ6Pnx)](https://codecov.io/gh/10mohi6/stock-pairs-trading-python) 6 | [![Build Status](https://app.travis-ci.com/10mohi6/stock-pairs-trading-python.svg?branch=main)](https://app.travis-ci.com/10mohi6/stock-pairs-trading-python) 7 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/stock-pairs-trading)](https://pypi.org/project/stock-pairs-trading/) 8 | [![Downloads](https://pepy.tech/badge/stock-pairs-trading)](https://pepy.tech/project/stock-pairs-trading) 9 | 10 | stock-pairs-trading is a python library for backtest with stock pairs trading using kalman filter on Python 3.8 and above. 11 | 12 | ## Installation 13 | 14 | $ pip install stock-pairs-trading 15 | 16 | ## Usage 17 | 18 | ### find pairs 19 | ```python 20 | from stock_pairs_trading import StockPairsTrading 21 | 22 | spt = StockPairsTrading( 23 | start="2007-12-01", 24 | end="2017-12-01", 25 | ) 26 | r = spt.find_pairs(["AAPL", "ADBE", "MSFT", "IBM"]) 27 | print(r) 28 | ``` 29 | ```python 30 | [('ADBE', 'MSFT')] 31 | ``` 32 | ![pairs.png](https://raw.githubusercontent.com/10mohi6/stock-pairs-trading-python/main/tests/pairs.png) 33 | 34 | ### backtest 35 | ```python 36 | from pprint import pprint 37 | from stock_pairs_trading import StockPairsTrading 38 | 39 | spt = StockPairsTrading( 40 | start="2007-12-01", 41 | end="2017-12-01", 42 | ) 43 | r = spt.backtest(('ADBE', 'MSFT')) 44 | pprint(r) 45 | ``` 46 | ```python 47 | {'cointegration': 0.0018311528816901195, 48 | 'correlation': 0.9858057442729853, 49 | 'maximum_drawdown': 34.801876068115234, 50 | 'profit_factor': 1.1214715644744209, 51 | 'riskreward_ratio': 0.8095390763424627, 52 | 'sharpe_ratio': 0.03606830691295276, 53 | 'total_profit': 35.97085762023926, 54 | 'total_trades': 520, 55 | 'win_rate': 0.5807692307692308} 56 | ``` 57 | ![performance.png](https://raw.githubusercontent.com/10mohi6/stock-pairs-trading-python/main/tests/performance.png) 58 | 59 | ### latest signal 60 | ```python 61 | from pprint import pprint 62 | from stock_pairs_trading import StockPairsTrading 63 | 64 | spt = StockPairsTrading( 65 | start="2007-12-01", 66 | end="2017-12-01", 67 | ) 68 | r = spt.latest_signal(("ADBE", "MSFT")) 69 | pprint(r) 70 | ``` 71 | ```python 72 | {'ADBE Adj Close': 299.5, 73 | 'ADBE Buy': True, # entry buy 74 | 'ADBE Cover': False, # exit buy 75 | 'ADBE Sell': False, # entry sell 76 | 'ADBE Short': False, # exit sell 77 | 'MSFT Adj Close': 244.74000549316406, 78 | 'MSFT Buy': False, # entry buy 79 | 'MSFT Cover': False, # exit buy 80 | 'MSFT Sell': True, # entry sell 81 | 'MSFT Short': False, # exit sell 82 | 'date': '2022-09-16', 83 | 'zscore': -36.830427514962274} 84 | ``` 85 | ## Advanced Usage 86 | ```python 87 | from pprint import pprint 88 | from stock_pairs_trading import StockPairsTrading 89 | 90 | spt = StockPairsTrading( 91 | start="2007-12-01", 92 | end="2017-12-01", 93 | outputs_dir_path = "outputs", 94 | data_dir_path = "data", 95 | column = "Adj Close", 96 | window = 1, 97 | transition_covariance = 0.01, 98 | ) 99 | r = spt.backtest(('ADBE', 'MSFT')) 100 | pprint(r) 101 | ``` 102 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==22.1.0 2 | certifi==2022.9.14 3 | charset-normalizer==2.1.1 4 | contourpy==1.0.5 5 | coverage==6.4.4 6 | cycler==0.11.0 7 | fonttools==4.37.2 8 | idna==3.4 9 | iniconfig==1.1.1 10 | kiwisolver==1.4.4 11 | lxml==4.9.1 12 | matplotlib==3.6.0 13 | multitasking==0.0.11 14 | numpy==1.23.3 15 | packaging==21.3 16 | pandas==1.4.4 17 | patsy==0.5.2 18 | Pillow==9.2.0 19 | pluggy==1.0.0 20 | py==1.11.0 21 | pykalman==0.9.5 22 | pyparsing==3.0.9 23 | pytest==7.1.3 24 | pytest-cov==3.0.0 25 | python-dateutil==2.8.2 26 | pytz==2022.2.1 27 | requests==2.28.1 28 | scipy==1.9.1 29 | seaborn==0.12.0 30 | six==1.16.0 31 | statsmodels==0.13.2 32 | tomli==2.0.1 33 | urllib3==1.26.12 34 | yfinance==0.1.74 35 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | setup( 4 | name="stock-pairs-trading", 5 | version="0.1.1", 6 | description="stock-pairs-trading is a python library \ 7 | for backtest with stock pairs trading using kalman filter on Python 3.8 and above.", 8 | long_description=open("README.md").read(), 9 | long_description_content_type="text/markdown", 10 | license="MIT", 11 | author="10mohi6", 12 | author_email="10.mohi.6.y@gmail.com", 13 | url="https://github.com/10mohi6/stock-pairs-trading-python", 14 | keywords="pairs trading python backtest stock kalman filter", 15 | packages=find_packages(), 16 | install_requires=[ 17 | "yfinance", 18 | "matplotlib", 19 | "statsmodels", 20 | "pykalman", 21 | "seaborn", 22 | ], 23 | python_requires=">=3.8.0", 24 | classifiers=[ 25 | "Development Status :: 4 - Beta", 26 | "Programming Language :: Python", 27 | "Programming Language :: Python :: 3", 28 | "Programming Language :: Python :: 3.8", 29 | "Programming Language :: Python :: 3.9", 30 | "Intended Audience :: Developers", 31 | "Intended Audience :: Financial and Insurance Industry", 32 | "Operating System :: OS Independent", 33 | "Topic :: Office/Business :: Financial :: Investment", 34 | "License :: OSI Approved :: MIT License", 35 | ], 36 | ) 37 | -------------------------------------------------------------------------------- /stock_pairs_trading/__init__.py: -------------------------------------------------------------------------------- 1 | from stock_pairs_trading.stock_pairs_trading import StockPairsTrading 2 | -------------------------------------------------------------------------------- /stock_pairs_trading/stock_pairs_trading.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | from enum import IntEnum 4 | from typing import Any 5 | 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import pandas as pd 9 | import seaborn 10 | import yfinance as yf 11 | from pykalman import KalmanFilter 12 | from statsmodels.tsa.stattools import coint 13 | 14 | 15 | class Col(IntEnum): 16 | S1 = 0 17 | S2 = 1 18 | ZSCORE = 2 19 | S1BUY = 3 20 | S1SELL = 4 21 | S1PROFIT = 5 22 | S1PERFORMANCE = 6 23 | S2BUY = 7 24 | S2SELL = 8 25 | S2PROFIT = 9 26 | S2PERFORMANCE = 10 27 | 28 | 29 | class StockPairsTrading: 30 | def __init__( 31 | self, 32 | *, 33 | start: str = "2008-01-01", 34 | end: str = str(datetime.date.today()), 35 | outputs_dir_path: str = "outputs", 36 | data_dir_path: str = "data", 37 | column: str = "Adj Close", 38 | window: int = 1, 39 | transition_covariance: float = 0.01, 40 | ) -> None: 41 | self.outputs_dir_path = outputs_dir_path 42 | self.data_dir_path = data_dir_path 43 | self.start = start 44 | self.end = end 45 | self.column = column 46 | self.window = window 47 | self.transition_covariance = transition_covariance 48 | os.makedirs(self.outputs_dir_path, exist_ok=True) 49 | os.makedirs(self.data_dir_path, exist_ok=True) 50 | 51 | def _is_exit(self, df: pd.DataFrame, i: int) -> bool: 52 | return abs(df.iat[i, Col.ZSCORE]) < 0.5 or ( 53 | df.iat[i - 1, Col.ZSCORE] > 0.5 54 | and df.iat[i, Col.ZSCORE] < -0.5 55 | or ( 56 | df.iat[i - 1, Col.ZSCORE] < -0.5 57 | and df.iat[i, Col.ZSCORE] > 0.5 58 | ) 59 | ) 60 | 61 | def latest_signal(self, pair: tuple) -> dict: 62 | s1 = pair[0] 63 | s2 = pair[1] 64 | df = yf.download(pair) 65 | df = ( 66 | df[[(self.column, s1), (self.column, s2)]] 67 | .set_axis(pair, axis="columns") 68 | .fillna(method="ffill") 69 | .dropna() 70 | ) 71 | kf = KalmanFilter( 72 | transition_matrices=[1], 73 | observation_matrices=[1], 74 | initial_state_mean=0, 75 | initial_state_covariance=1, 76 | observation_covariance=1, 77 | transition_covariance=self.transition_covariance, 78 | ) 79 | state_means, state_cov = kf.filter(df[s1] / df[s2]) 80 | state_means, state_std = state_means.squeeze(), np.std( 81 | state_cov.squeeze() 82 | ) 83 | ma = (df[s1] / df[s2]).rolling(window=self.window, center=False).mean() 84 | df["zscore"] = (ma - state_means) / state_std 85 | r = {} 86 | r["date"] = df.index[-1].strftime("%Y-%m-%d") 87 | r[ 88 | "{} {}".format( 89 | s1, 90 | self.column, 91 | ) 92 | ] = df.iat[-1, Col.S1] 93 | r[ 94 | "{} {}".format( 95 | s2, 96 | self.column, 97 | ) 98 | ] = df.iat[-1, Col.S2] 99 | r["zscore"] = df.iat[-1, Col.ZSCORE] 100 | r["{} Buy".format(s1)] = df.iat[-1, Col.ZSCORE] < -1 101 | r["{} Cover".format(s1)] = self._is_exit(df, -1) 102 | r["{} Sell".format(s1)] = df.iat[-1, Col.ZSCORE] > 1 103 | r["{} Short".format(s1)] = self._is_exit(df, -1) 104 | r["{} Buy".format(s2)] = df.iat[-1, Col.ZSCORE] > 1 105 | r["{} Cover".format(s2)] = self._is_exit(df, -1) 106 | r["{} Sell".format(s2)] = df.iat[-1, Col.ZSCORE] < -1 107 | r["{} Short".format(s2)] = self._is_exit(df, -1) 108 | return r 109 | 110 | def find_pairs(self, tickers: list) -> list: 111 | columns = [] 112 | for i in tickers: 113 | columns.append((self.column, i)) 114 | df = ( 115 | yf.download(tickers, start=self.start, end=self.end)[columns] 116 | .set_axis(tickers, axis="columns") 117 | .fillna(method="ffill") 118 | .dropna() 119 | ) 120 | _, pvalues, pairs = self._find_cointegrated_pairs(df) 121 | plt.figure(figsize=(15, 7)) 122 | seaborn.heatmap( 123 | pvalues, 124 | xticklabels=tickers, 125 | yticklabels=tickers, 126 | cmap="RdYlGn_r", 127 | mask=(pvalues >= 0.05), 128 | ) 129 | plt.savefig("{}/pairs.png".format(self.outputs_dir_path)) 130 | plt.clf() 131 | plt.close() 132 | return pairs 133 | 134 | def _find_cointegrated_pairs(self, data: pd.DataFrame) -> Any: 135 | n = data.shape[1] 136 | score_matrix = np.zeros((n, n)) 137 | pvalue_matrix = np.ones((n, n)) 138 | keys = data.keys() 139 | pairs = [] 140 | for i in range(n): 141 | for j in range(i + 1, n): 142 | S1 = data[keys[i]] 143 | S2 = data[keys[j]] 144 | result = coint(S1, S2) 145 | score = result[0] 146 | pvalue = result[1] 147 | score_matrix[i, j] = score 148 | pvalue_matrix[i, j] = pvalue 149 | if pvalue < 0.05: 150 | pairs.append((keys[i], keys[j])) 151 | return score_matrix, pvalue_matrix, pairs 152 | 153 | def backtest( 154 | self, 155 | pair: tuple, 156 | ) -> dict: 157 | s1 = pair[0] 158 | s2 = pair[1] 159 | path = "{}/{}-{}-{}-{}.pkl".format( 160 | self.data_dir_path, s1, s2, self.start, self.end 161 | ) 162 | if os.path.isfile(path): 163 | df = pd.read_pickle(path) 164 | else: 165 | df = yf.download(pair, start=self.start, end=self.end) 166 | df = ( 167 | df[[(self.column, s1), (self.column, s2)]] 168 | .set_axis(pair, axis="columns") 169 | .fillna(method="ffill") 170 | .dropna() 171 | ) 172 | df.to_pickle(path) 173 | kf = KalmanFilter( 174 | transition_matrices=[1], 175 | observation_matrices=[1], 176 | initial_state_mean=0, 177 | initial_state_covariance=1, 178 | observation_covariance=1, 179 | transition_covariance=self.transition_covariance, 180 | ) 181 | state_means, state_cov = kf.filter(df[s1] / df[s2]) 182 | state_means, state_std = state_means.squeeze(), np.std( 183 | state_cov.squeeze() 184 | ) 185 | ma = (df[s1] / df[s2]).rolling(window=self.window, center=False).mean() 186 | df["zscore"] = (ma - state_means) / state_std 187 | df["s1Buy"] = df["s1Sell"] = df["s1Profit"] = df[ 188 | "s1Performance" 189 | ] = np.nan 190 | df["s2buy"] = df["s2Sell"] = df["s2Profit"] = df[ 191 | "s2Performance" 192 | ] = np.nan 193 | s1Profit = [np.nan, np.nan] 194 | s2Profit = [np.nan, np.nan] 195 | s1Performance = s2Performance = 0.0 196 | flag = 0 197 | for i in range(len(df)): 198 | if self._is_exit(df, i): 199 | if flag == 1: 200 | if not np.isnan(s1Profit[0]): 201 | df.iat[i, Col.S1SELL] = df.iat[i, Col.S1] 202 | df.iat[i, Col.S1PROFIT] = ( 203 | df.iat[i, Col.S1SELL] - s1Profit[0] 204 | ) 205 | elif not np.isnan(s1Profit[1]): 206 | df.iat[i, Col.S1BUY] = df.iat[i, Col.S1] 207 | df.iat[i, Col.S1PROFIT] = ( 208 | s1Profit[1] - df.iat[i, Col.S1BUY] 209 | ) 210 | s1Profit = [np.nan, np.nan] 211 | s1Performance += df.iat[i, Col.S1PROFIT] 212 | if not np.isnan(s2Profit[0]): 213 | df.iat[i, Col.S2SELL] = df.iat[i, Col.S2] 214 | df.iat[i, Col.S2PROFIT] = ( 215 | df.iat[i, Col.S2SELL] - s2Profit[0] 216 | ) 217 | elif not np.isnan(s2Profit[1]): 218 | df.iat[i, Col.S2BUY] = df.iat[i, Col.S2] 219 | df.iat[i, Col.S2PROFIT] = ( 220 | s2Profit[1] - df.iat[i, Col.S2BUY] 221 | ) 222 | s2Profit = [np.nan, np.nan] 223 | s2Performance += df.iat[i, Col.S2PROFIT] 224 | flag = 0 225 | elif df.iat[i, Col.ZSCORE] > 1: 226 | if flag == 0: 227 | df.iat[i, Col.S1SELL] = df.iat[i, Col.S1] 228 | s1Profit = [np.nan, df.iat[i, Col.S1SELL]] 229 | df.iat[i, Col.S2BUY] = df.iat[i, Col.S2] 230 | s2Profit = [df.iat[i, Col.S2BUY], np.nan] 231 | flag = 1 232 | elif df.iat[i, Col.ZSCORE] < -1: 233 | if flag == 0: 234 | df.iat[i, Col.S1BUY] = df.iat[i, Col.S1] 235 | s1Profit = [df.iat[i, Col.S1BUY], np.nan] 236 | df.iat[i, Col.S2SELL] = df.iat[i, Col.S2] 237 | s2Profit = [np.nan, df.iat[i, Col.S2SELL]] 238 | flag = 1 239 | 240 | df.iat[i, Col.S1PERFORMANCE] = s1Performance 241 | df.iat[i, Col.S2PERFORMANCE] = s2Performance 242 | 243 | plt.figure(figsize=(15, 7)) 244 | plt.plot(df.index, df["s1Performance"].values, alpha=0.5) 245 | plt.plot(df.index, df["s2Performance"].values, alpha=0.5) 246 | plt.plot( 247 | df.index, df["s1Performance"].values + df["s2Performance"].values 248 | ) 249 | plt.legend( 250 | [ 251 | s1, 252 | s2, 253 | "{} + {}".format(s1, s2), 254 | ] 255 | ) 256 | plt.savefig("{}/performance.png".format(self.outputs_dir_path)) 257 | plt.clf() 258 | plt.close() 259 | 260 | df.to_csv("{}/performance.csv".format(self.outputs_dir_path)) 261 | _, pvalue, _ = coint(df[s1], df[s2]) 262 | score = df[s1].corr(df[s2]) 263 | win_num = (df["s1Profit"] > 0).sum() + (df["s2Profit"] > 0).sum() 264 | loss_num = (df["s1Profit"] <= 0).sum() + (df["s2Profit"] <= 0).sum() 265 | total_trades = win_num + loss_num 266 | win = ( 267 | df["s1Profit"].where(df["s1Profit"] > 0, 0).sum() 268 | + df["s2Profit"].where(df["s2Profit"] > 0, 0).sum() 269 | ) 270 | loss = ( 271 | df["s1Profit"].where(df["s1Profit"] <= 0, 0).sum() 272 | + df["s2Profit"].where(df["s2Profit"] <= 0, 0).sum() 273 | ) 274 | total_profit = df["s1Profit"].sum() + df["s2Profit"].sum() 275 | win_rate = win_num / total_trades 276 | profit_factor = win / abs(loss) 277 | average_win = win / win_num 278 | average_loss = abs(loss) / loss_num 279 | riskreward_ratio = average_win / average_loss 280 | mdd = ( 281 | np.maximum.accumulate(df["s1Performance"] + df["s2Performance"]) 282 | - (df["s1Performance"] + df["s2Performance"]) 283 | ).max() 284 | sharpe_ratio = (df["s1Profit"].mean() + df["s2Profit"].mean()) / ( 285 | df["s1Profit"].std() + df["s2Profit"].std() 286 | ) 287 | r = {} 288 | r["cointegration"] = pvalue 289 | r["correlation"] = score 290 | r["total_profit"] = total_profit 291 | r["total_trades"] = total_trades 292 | r["win_rate"] = win_rate 293 | r["profit_factor"] = profit_factor 294 | r["riskreward_ratio"] = riskreward_ratio 295 | r["sharpe_ratio"] = sharpe_ratio 296 | r["maximum_drawdown"] = mdd 297 | return r 298 | -------------------------------------------------------------------------------- /tests/pairs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/10mohi6/stock-pairs-trading-python/f00052844fe182bb9cef4b343eaf3fdd954a85ad/tests/pairs.png -------------------------------------------------------------------------------- /tests/performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/10mohi6/stock-pairs-trading-python/f00052844fe182bb9cef4b343eaf3fdd954a85ad/tests/performance.png -------------------------------------------------------------------------------- /tests/test_stock_pairs_trading.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from stock_pairs_trading import StockPairsTrading 3 | 4 | 5 | @pytest.fixture(scope="module", autouse=True) 6 | def scope_module(): 7 | yield StockPairsTrading( 8 | start="2007-12-01", 9 | end="2017-12-01", 10 | window=1, 11 | transition_covariance=0.01, 12 | ) 13 | 14 | 15 | @pytest.fixture(scope="function", autouse=True) 16 | def spt(scope_module): 17 | yield scope_module 18 | 19 | 20 | # @pytest.mark.skip 21 | def test_backtest(spt): 22 | spt.backtest(("ADBE", "MSFT")) 23 | 24 | 25 | # @pytest.mark.skip 26 | def test_latest_signal(spt): 27 | spt.latest_signal(("ADBE", "MSFT")) 28 | 29 | 30 | # @pytest.mark.skip 31 | def test_find_pairs(spt): 32 | spt.find_pairs(["AAPL", "ADBE", "MSFT", "IBM"]) 33 | --------------------------------------------------------------------------------