├── README.md └── pairs-trade.py /README.md: -------------------------------------------------------------------------------- 1 | # python-pairs-trading 2 | A simple implementation of a pairs trading strategy. 3 | 4 | This script was part of a seminar paper I did for a financial econometrics course at my university. 5 | It needs a lot of improvement but I will keep on updating it. 6 | -------------------------------------------------------------------------------- /pairs-trade.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Seminar paper: Pairs trading 5 | 6 | # #### Author: Alexander Franz 7 | 8 | # In[26]: 9 | 10 | # Import important packages 11 | import pandas as pd 12 | import pandas.io.data 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | import math 16 | import pytz 17 | from datetime import datetime 18 | import zipline as zp 19 | import itertools 20 | 21 | 22 | # In[35]: 23 | 24 | # Create a list of all ticker symbols of the DAX30 companies 25 | 26 | aktien = ['adidas AG','Allianz SE','BASF SE','BAYER AG','Beiersdorf AG','BMW AG','Commerzbank AG', 27 | 'Continental AG','Daimler AG','Deutsche Bank AG','Deutsche Börse AG','Deutsche Lufthansa AG', 28 | 'Deutsche Post AG','Deutsche Telekom AG', 'E.ON AG','Fresenius Medical Care AG & Co. KGaA', 29 | 'Fresenius SE','HeidelbergCement AG', 'Henkel AG & Co. KGaA', 'Infineon Technologies AG', 30 | 'K+S Aktiengesellschaft', 'LANXESS AG','Linde AG','Merck KGaA', 'Münchener Rück AG', 31 | 'RWE AG', 'SAP AG', 'Siemens AG', 'ThyssenKrupp AG', 'Volkswagen AG'] 32 | 33 | kurz = ['ADS','ALV','BAS','BAYN','BEI','BMW','CBK','CON','DAI','DBK','DB1','LHA','DPW','DTE','EOAN','FME','FRE', 34 | 'HEI','HEN','IFX','SDF','LXS','LIN','MRK','MUV2','RWE','SAP','SIE','TKA','VOW'] 35 | 36 | # Adds the .DE appendix to the list, needed for Yahoo Finance quotes 37 | 38 | kurzde = [] 39 | for i in kurz: 40 | d = i + ".DE" 41 | kurzde.append(d) 42 | 43 | dax = pd.DataFrame({'company' : aktien, 'ticker' : kurz}) 44 | 45 | 46 | # In[36]: 47 | 48 | dax 49 | 50 | 51 | # In[5]: 52 | 53 | # Getting the Data from Yahoo Finance, using the stocks from the "kurzde" list, as Yahoo needs the .DE appendix for German stocks. 54 | start = datetime(2009, 1, 1, 0, 0, 0, 0, pytz.utc) 55 | end = datetime(2009, 12, 30, 0, 0, 0, 0, pytz.utc) 56 | data = zp.utils.factory.load_from_yahoo(stocks = kurzde, indexes={}, start=start, end=end, adjusted=True) 57 | data.head() 58 | 59 | 60 | # In[12]: 61 | 62 | # Adds the old ticker without the .DE appendix 63 | data.columns = kurz 64 | # Remove Christmas as a non-trading day 65 | #data = data.drop(pd.Timestamp('2009-12-24 00:00:00')) 66 | # BMW had a non trading day, replacing with last close price 67 | data['BMW'][pd.Timestamp("2009-03-09")] = data['BMW'][pd.Timestamp("2009-02-09")] 68 | 69 | 70 | 71 | # In[13]: 72 | 73 | data.describe() 74 | 75 | 76 | # In[19]: 77 | 78 | # Normalize the whole DataFrame by dividing by first observation 79 | # Remove 2009-12-24 and 2009-12-31, as nontrading on those days. 80 | def norming(x): 81 | return x / x[0] 82 | 83 | datanorm = data.apply(norming) 84 | 85 | 86 | # In[20]: 87 | 88 | # Saving both DataFrames as .csv 89 | data.to_csv("dax.csv") 90 | datanorm.to_csv("dax_normalized.csv") 91 | 92 | 93 | # # Pairs formation 94 | 95 | # #### The second part involves finding potential trading pairs. For this, we create all possible pair combinations and compute the sum of squared distances (SSD) in normalized prices as a selection criterion. We then rank the pairs according to minimal SSD and choose our pairs! 96 | 97 | # In[22]: 98 | 99 | daxnorm = pd.read_csv("dax_normalized.csv", index_col=0, parse_dates=True) 100 | 101 | 102 | # In[23]: 103 | 104 | def combomaker(x): 105 | # Takes a list of ticker symbols as an input and returns a list of 106 | # all possible combination without repetition 107 | 108 | combos = itertools.combinations(x, 2) 109 | usable_pairs = [] 110 | for i in combos: 111 | usable_pairs.append(i) 112 | return usable_pairs 113 | 114 | 115 | # In[24]: 116 | 117 | def ssd(X,Y): 118 | #This function returns the sum of squared differences between two lists, in addition the 119 | #standard deviation of the spread between the two lists are calculated and reported. 120 | spread = [] #Initialize variables 121 | std = 0 122 | cumdiff = 0 123 | for i in range(len(X)): #Calculate and store the sum of squares 124 | cumdiff += (X[i]-Y[i])**2 125 | spread.append(X[i]-Y[i]) 126 | std = np.std(spread) #Calculate the standard deviation 127 | return cumdiff,std 128 | 129 | 130 | # In[28]: 131 | 132 | def pairsmatch(x): 133 | allpairs = combomaker(x) 134 | squared = [] 135 | std = [] 136 | for i in allpairs: 137 | squared.append(ssd(daxnorm[i[0]],daxnorm[i[1]])[0]) 138 | std.append(ssd(daxnorm[i[0]],daxnorm[i[1]])[1]) 139 | distancetable = pd.DataFrame({'Pair' : allpairs, 'SSD' : squared, 'Standard Deviation' : std}) 140 | distancetable.sort(columns=['SSD'], axis=0, ascending=True, inplace=True) 141 | 142 | return distancetable 143 | 144 | daxpairs = pairsmatch(kurz) 145 | 146 | 147 | # In[40]: 148 | 149 | # Save the Top Five Pairs in a new variable 150 | topfive = daxpairs[:5] 151 | topfive 152 | 153 | 154 | # In[37]: 155 | 156 | # Create a dictionary, that makes it easier to connect Ticker and Company name 157 | dax_dict = dict(zip(dax.ticker, dax.company)) 158 | dax_dict 159 | 160 | 161 | # In[41]: 162 | 163 | # Get a list of the tickers of the 164 | topfive['Pair'] 165 | fivedax = [] 166 | for i in topfive['Pair']: 167 | fivedax.append(i[0]) 168 | fivedax.append(i[1]) 169 | 170 | uniquefivedax = list(set(fivedax)) 171 | fivedax 172 | 173 | 174 | # ## Getting data for the backtest 175 | 176 | # In[44]: 177 | 178 | shortde = [] # Adds the .DE appendix to the list, needed for Yahoo Finance quotes 179 | for i in uniquefivedax: 180 | d = i + ".DE" 181 | shortde.append(d) 182 | 183 | start = datetime(2010, 1, 1, 0, 0, 0, 0, pytz.utc) 184 | end = datetime(2010, 6, 30, 0, 0, 0, 0, pytz.utc) 185 | backtest = zp.utils.factory.load_from_yahoo(stocks = shortde, indexes={}, start=start, end=end, adjusted=True) 186 | backtest.head() 187 | 188 | 189 | # In[45]: 190 | 191 | backtest.describe() 192 | 193 | 194 | # In[47]: 195 | 196 | # Remove the .DE appendix in the columnnames 197 | colnamesfive = [] 198 | for i in backtest.columns: 199 | colnamesfive.append(i[:-3]) 200 | colnamesfive 201 | backtest.columns = colnamesfive 202 | backtest.head() 203 | 204 | 205 | # In[48]: 206 | 207 | # Saving as a csv 208 | backtest.to_csv("dax_backtest.csv") 209 | 210 | 211 | # In[49]: 212 | 213 | backtest = pd.read_csv("dax5_backtest.csv", index_col=0, parse_dates=True) 214 | backtest.head() 215 | 216 | 217 | # In[50]: 218 | 219 | backtestnorm = backtest.apply(norming) 220 | backtestnorm.head() 221 | 222 | 223 | # In[51]: 224 | 225 | # Create a dictionary for each pair with its corresponding standard deviation 226 | fivepair = [] 227 | fivesd = [] 228 | for i in topfive['Pair']: 229 | fivepair.append(i[0]+i[1]) 230 | for i in topfive['Standard Deviation']: 231 | fivesd.append(i) 232 | fivedic = dict(zip(fivepair, fivesd)) 233 | fivedic 234 | 235 | 236 | # # The Backtesting Function 237 | 238 | # In[56]: 239 | 240 | def tradedax(daxtuple): 241 | 242 | X = daxtuple[0] 243 | Y = daxtuple[1] 244 | portfolio = 1000 245 | half = portfolio*0.5 246 | f1 = backtestnorm[X] 247 | f2 = backtestnorm[Y] 248 | 249 | s1 = backtest[X] 250 | s2 = backtest[Y] 251 | 252 | pairstring = str(X)+str(Y) 253 | hsd = fivedic[pairstring] 254 | 255 | openpos = False 256 | trades = [] # Contains logs of the the entire pair trades occured in the period 257 | tradeno = 0 258 | forcedclose = False 259 | tradingday_enter = [] 260 | tradingday_exit = [] 261 | spread = [] # Spread of normalized prices on i-th trading day 262 | pricediff = [] # Absolute price difference of raw stock prices 263 | profits = [] 264 | period = [] 265 | possize = [] 266 | overval = "" 267 | treshold = 2*hsd # Uses the standard deviation from the dax_dict Dictionary here 268 | for i in range(len(f1)): #Calculate and store the sum of squares 269 | #ordersize1 = (half - (half%s1[i]))/s1[i] 270 | 271 | 272 | date = f1.index[i] 273 | spread.append(f1[i]-f2[i]) 274 | pricediff.append(s1[i]-s2[i]) 275 | 276 | 277 | 278 | if openpos == False: 279 | 280 | if abs(spread[i]) > treshold: 281 | 282 | if spread[i] > 0: 283 | overval = "A" 284 | else: 285 | overval = "B" 286 | order1 = (half - (half%s1[i]))/s1[i] 287 | remaining = portfolio - order1*s1[i] 288 | order2 = (remaining - (remaining%s2[i]))/s2[i] 289 | if overval == "A": # A is overvalued, thus we short it and buy B long 290 | 291 | shortpos = s1[i]*order1 292 | remaining = portfolio - shortpos 293 | tradingday_enter.append(date) 294 | dayenter = i 295 | longpos = s2[i]*order2 296 | posvolume = longpos + shortpos 297 | possize.append(posvolume) 298 | tradelog = "Enter:%i %s short @ %s, %i %s long @ %s on %s" % (order1, X, s1[i], order2, Y, s2[i], date) 299 | trades.append(tradelog) 300 | 301 | 302 | openpos = True 303 | 304 | if overval == "B": # Same for above, just other way around 305 | 306 | tradingday_enter.append(date) 307 | shortpos = s2[i]*order2 308 | longpos = s1[i]*order1 309 | posvolume = longpos + shortpos 310 | possize.append(posvolume) 311 | tradelog = "Enter:%i %s long @ %s, %i %s short @ %s on %s, Volume: %i" % (order1, X, s1[i], order2, Y, s2[i], date, posvolume) 312 | trades.append(tradelog) 313 | openpos = True 314 | 315 | if openpos == True: 316 | 317 | prevspread = spread[i-1] 318 | 319 | if abs(spread[i]) < treshold*0.5: 320 | 321 | if overval == "A": 322 | 323 | shortprofit = shortpos - s1[i]*order1 324 | longprofit = s2[i]*order2 - longpos 325 | totalprofit = shortprofit + longprofit 326 | tradelog = "Exit: %s short @ %s, %s long @ %s on %s with total profit of %s." % (X, s1[i], Y, s2[i], date, totalprofit) 327 | trades.append(tradelog) 328 | portfolio += totalprofit 329 | profits.append(totalprofit) 330 | tradingday_exit.append(date) 331 | tradeno += 1 332 | openpos = False 333 | 334 | 335 | if overval == "B": 336 | 337 | shortprofit = shortpos - s2[i]*order2 338 | longprofit = s1[i]*order1 - longpos 339 | totalprofit = shortprofit + longprofit 340 | portfolio += totalprofit 341 | tradelog = "Exit: %s long @ %s, %s short @ %s on %s with total profit of %s." % (X, s1[i], Y, s2[i], date, totalprofit) 342 | trades.append(tradelog) 343 | profits.append(totalprofit) 344 | tradingday_exit.append(date) 345 | tradeno += 1 346 | openpos = False 347 | 348 | if openpos == True: 349 | if overval == "A": 350 | shortprofit = shortpos - s1[i]*order1 351 | longprofit = s2[i]*order2 - longpos 352 | totalprofit = shortprofit + longprofit 353 | portfolio += totalprofit 354 | 355 | tradelog = "Exit: No convergence to the end of trading period, Profit: %s" % (totalprofit) 356 | trades.append(tradelog) 357 | profits.append(totalprofit) 358 | tradingday_exit.append(date) 359 | forcedclose = True 360 | openpos = False 361 | 362 | if overval == "B": 363 | 364 | shortprofit = shortpos - s2[i]*order2 365 | longprofit = s1[i]*order1 - longpos 366 | totalprofit = shortprofit + longprofit 367 | portfolio += totalprofit 368 | tradelog = "Exit: No convergence to the end of trading period, Profit: %s" % (totalprofit) 369 | trades.append(tradelog) 370 | profits.append(totalprofit) 371 | tradingday_exit.append(date) 372 | forcedclose = True 373 | openpos = False 374 | 375 | totalprofits = sum(profits) 376 | lastlog = "Total profits of this pair in this timewindow: %s, Portfolio value = %i " % (totalprofits,portfolio) 377 | trades.append(lastlog) 378 | totalinvested = sum(possize) 379 | totalinvestedlog = "Total invested capital: %s" % (totalinvested) 380 | trades.append(totalinvestedlog) 381 | return trades, tradeno, totalinvested, totalprofits 382 | 383 | 384 | # In[53]: 385 | 386 | #Makes tuples from the pairs 387 | fivedaxpairs = zip(fivedax[::2], fivedax[1::2]) 388 | fivedaxpairs 389 | 390 | 391 | # ## Trading log of the first pair 392 | 393 | # In[59]: 394 | 395 | tradedax(fivedaxpairs[0])[0] 396 | 397 | 398 | # ## Summary and returns 399 | 400 | # In[65]: 401 | 402 | def portfolio(): 403 | profits = [] 404 | roundtrips = [] 405 | invested = [] 406 | forced = ["No","No","Yes","Yes","Yes"] 407 | 408 | for pair in fivedaxpairs: 409 | profits.append(tradedax(pair)[3]) 410 | roundtrips.append(tradedax(pair)[1]) 411 | invested.append(tradedax(pair)[2]) 412 | 413 | relreturn = [] 414 | for i in profits: 415 | ret = i/1000 416 | relreturn.append(ret) 417 | summary = pd.DataFrame({'Pair' : fivedaxpairs, 'Absolute profits ' : profits, 'Completed roundtrips' : roundtrips, 418 | 'Total invested': invested, 'Relative return': relreturn, 'Forced close' : forced}) 419 | avreturn = np.mean(relreturn) 420 | return summary, avreturn 421 | 422 | table = portfolio()[0] 423 | table 424 | 425 | 426 | # In[71]: 427 | 428 | # Average return across the five pairs 429 | avgreturn = portfolio()[1] 430 | annualized = ((1 + avgreturn) ** (12/6)) - 1 431 | annualized 432 | 433 | 434 | # ### 1.04% annualized returns for the five pairs presented! 435 | --------------------------------------------------------------------------------