├── README.md ├── img └── VOI.png ├── src ├── BuildLinearData.py ├── BuildLinearModel.py ├── Config.py ├── LinearStrategy.py ├── ReadFiles.py ├── RunStrategy.py ├── Session.py ├── config.ini └── config_rb.ini └── tools ├── download.py └── plot.py /README.md: -------------------------------------------------------------------------------- 1 | # Order Imbalance HFT: 非平衡订单流高频交易模型 2 | 3 | # 说明 4 | 基于前一段时间内订单流的非平衡特征,利用机器学习模型预测下一阶段价格涨跌。 5 | 在跨度为几十秒的时间范围内,订单流的非平衡特征与下一阶段的价格涨跌,可能会存在一定关联。(在足够好的流动性、以及某些品种下有效,例如15年前的期指) 6 | 7 | 简单的线性模型: 8 | ![](img/VOI.png) -------------------------------------------------------------------------------- /img/VOI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/algosenses/Order_Imbalance_HFT/d8c156ea8e723ab750c7b0b3b197e0742838716d/img/VOI.png -------------------------------------------------------------------------------- /src/BuildLinearData.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from Session import * 4 | 5 | def BuildLinearData(data, 6 | session = Session.Morning, 7 | open_int = False, 8 | delay = 20, # next 10 seconds (20 time-steps) average mid-price change 9 | lags = 5, 10 | config = None, 11 | functions = None): 12 | 13 | # declare constants 14 | day = dict() 15 | day_start = 34200 16 | 17 | multiplier = float(config['trading']['multiplier']) 18 | 19 | def secondofday(time): 20 | t = time.split(':') 21 | return int(t[0]) * 3600 + int(t[1]) * 60 + int(t[2]) 22 | 23 | morning = dict() 24 | morning['start'] = secondofday(config['morning']['start']) 25 | morning['open'] = secondofday(config['morning']['open']) 26 | morning['close'] = secondofday(config['morning']['close']) 27 | morning['end'] = secondofday(config['morning']['end']) 28 | 29 | afternoon = dict() 30 | afternoon['start'] = secondofday(config['afternoon']['start']) 31 | afternoon['open'] = secondofday(config['afternoon']['open']) 32 | afternoon['close'] = secondofday(config['afternoon']['close']) 33 | afternoon['end'] = secondofday(config['afternoon']['end']) 34 | 35 | night = dict() 36 | night['start'] = secondofday(config['night']['start']) 37 | night['open'] = secondofday(config['night']['open']) 38 | night['close'] = secondofday(config['night']['close']) 39 | night['end'] = secondofday(config['night']['end']) 40 | 41 | if session == Session.Morning: 42 | start_time = morning['start'] # - data start 43 | open_time = morning['open'] # - trade open 44 | close_time = morning['close'] # - trade close 45 | end_time = morning['end'] # - data end 46 | elif session == Session.Afternoon: 47 | start_time = afternoon['start'] 48 | open_time = afternoon['open'] 49 | close_time = afternoon['close'] 50 | end_time = afternoon['end'] 51 | elif session == Session.Night: 52 | start_time = night['start'] 53 | open_time = night['open'] 54 | close_time = night['close'] 55 | end_time = night['end'] 56 | 57 | 58 | ind = data[(data['SecondOfDay'] >= start_time) & (data['SecondOfDay'] < end_time)].index.values.astype(int) 59 | main_data = data.loc[ind].reset_index(drop=True) 60 | n = len(main_data.index) 61 | time_secs = main_data['SecondOfDay'] + main_data['UpdateMillisec'] / 1000 62 | ind_open = time_secs[time_secs >= open_time].index.values.astype(int)[0] 63 | ind_close = time_secs[time_secs >= close_time].index.values.astype(int)[0] 64 | 65 | # calculate variables 66 | mid_price = (main_data['BidPrice1'] + main_data['AskPrice1']) / 2 67 | spread = main_data['AskPrice1'] - main_data['BidPrice1'] 68 | 69 | OIR_array = (main_data['BidVolume1'] - main_data['AskVolume1']) / (main_data['BidVolume1'] + main_data['AskVolume1']) 70 | dBid_price = main_data['BidPrice1'].diff().fillna(0) 71 | dAsk_price = main_data['AskPrice1'].diff().fillna(0) 72 | 73 | ## build order imbalance signal according to Spec 74 | df = pd.concat([main_data['BidVolume1'], main_data['BidVolume1'].shift(1).fillna(0), dBid_price], axis=1) 75 | df.columns = ['bv', 'sbv', 'dbp'] 76 | 77 | def get_bid_cv(bv, sbv, dbp): 78 | if dbp == 0: 79 | return bv - sbv 80 | elif dbp < 0: 81 | return 0 82 | else: 83 | return bv 84 | 85 | df['bid_CV'] = df.apply(lambda row : get_bid_cv(row['bv'], row['sbv'], row['dbp']), axis=1) 86 | bid_CV = df['bid_CV'] 87 | 88 | df = pd.concat([main_data['AskVolume1'], main_data['AskVolume1'].shift(1).fillna(0), dAsk_price], axis=1) 89 | df.columns = ['av', 'sav', 'dap'] 90 | 91 | def get_ask_cv(av, sav, dap): 92 | if dap == 0: 93 | return av - sav 94 | elif dap < 0: 95 | return av 96 | else: 97 | return 0 98 | 99 | df['ask_CV'] = df.apply(lambda row : get_ask_cv(row['av'], row['sav'], row['dap']), axis=1) 100 | ask_CV = df['ask_CV'] 101 | 102 | VOI_array = bid_CV - ask_CV 103 | 104 | dVol = main_data['Volume'].diff() 105 | dTO = main_data['Turnover'].diff() 106 | AvgTrade_price = dTO / dVol / multiplier 107 | AvgTrade_price = AvgTrade_price.fillna(method='ffill').fillna(method='bfill') 108 | rolling_mean = mid_price.rolling(center=False, window=2).mean() 109 | rolling_mean.iloc[0] = mid_price.iloc[0] 110 | MPB_array = AvgTrade_price - rolling_mean 111 | 112 | k = delay 113 | p = lags 114 | new_ind = list(range(p, n - k)) 115 | 116 | ## arithmetic average of future k midprices minus current midprice 117 | if k > 0: 118 | rolling_mean = mid_price.rolling(center=False, window=k).mean().iloc[k:].reset_index(drop=True) 119 | # rolling_mean = mid_price.shift(-k).iloc[k:].reset_index(drop=True) 120 | fpc = rolling_mean - mid_price[:(n-k)] 121 | dMid_Response = fpc.append(pd.Series([np.nan]*k)) 122 | else: 123 | dMid_Response = pd.Series([0] * n) 124 | 125 | # build VOI , dMid , OIR - first p entries are useless 126 | VOI = pd.DataFrame() 127 | OIR = pd.DataFrame() 128 | MPB = pd.DataFrame() 129 | 130 | if p > 0: 131 | for j in range(0, p + 1): 132 | VOI = pd.concat([VOI, VOI_array.shift(j).rename('VOI.t%d' % j)], axis = 1) 133 | OIR = pd.concat([OIR, OIR_array.shift(j).rename('OIR.t%d' % j)], axis = 1) 134 | MPB = pd.concat([MPB, MPB_array.shift(j).rename('MPB.t%d' % j)], axis = 1) 135 | 136 | dMid_Response = dMid_Response.iloc[new_ind] 137 | VOI = VOI.iloc[new_ind] 138 | OIR = OIR.iloc[new_ind] 139 | MPB = MPB.iloc[new_ind] 140 | 141 | # trim the other supporting data 142 | mid_price = mid_price.iloc[new_ind] 143 | spread = spread.iloc[new_ind] 144 | AvgTrade_price = AvgTrade_price.iloc[new_ind] 145 | main_data = main_data.iloc[new_ind] 146 | time_secs = time_secs.iloc[new_ind] 147 | 148 | ind_open = ind_open - p 149 | ind_close = ind_close - p 150 | 151 | value = dict() 152 | value['data'] = main_data.reset_index(drop=True) 153 | value['dMid_Response'] = dMid_Response.reset_index(drop=True) 154 | value['VOI'] = VOI.reset_index(drop=True) 155 | value['OIR'] = OIR.reset_index(drop=True) 156 | value['MPB'] = MPB.reset_index(drop=True) 157 | 158 | value['time_secs'] = time_secs.reset_index(drop=True) 159 | value['ind'] = ind 160 | value['ind_open'] = ind_open 161 | value['ind_close'] = ind_close 162 | 163 | value['mid_price'] = mid_price.reset_index(drop=True) 164 | value['spread'] = spread.reset_index(drop=True) 165 | value['AvgTrade_price'] = AvgTrade_price.reset_index(drop=True) 166 | 167 | return value -------------------------------------------------------------------------------- /src/BuildLinearModel.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import statsmodels.api as sm 3 | import pandas as pd 4 | from BuildLinearData import * 5 | from Session import * 6 | 7 | def BuildLinearModel(key, 8 | data, 9 | session = Session.FullDay, 10 | morning = True, 11 | open_int = False, 12 | delay = 20, 13 | lags = 5, 14 | config = None, 15 | functions = None): 16 | 17 | strategy = config['trading']['strategy'] 18 | 19 | # check if we need a full-day linear model or for a single trading session 20 | if session == Session.FullDay: 21 | morning_data = BuildLinearData(data, 22 | session = Session.Morning, 23 | open_int = open_int, 24 | delay = delay, 25 | lags = lags, 26 | config = config, 27 | functions = functions) 28 | evening_data = BuildLinearData(data, 29 | session = Session.Afternoon, 30 | open_int = open_int, 31 | delay = delay, 32 | lags = lags, 33 | config = config, 34 | functions = functions) 35 | dMid_Response = morning_data['dMid_Response'].append(evening_data['dMid_Response'], ignore_index=True) 36 | VOI = pd.concat([morning_data['VOI'], evening_data['VOI']], ignore_index=True) 37 | OIR = pd.concat([morning_data['OIR'], evening_data['OIR']], ignore_index=True) 38 | time_secs = morning_data['time_secs'].append(evening_data['time_secs'], ignore_index=True) 39 | mid_price = morning_data['mid_price'].append(evening_data['mid_price'], ignore_index=True) 40 | spread = morning_data['spread'].append(evening_data['spread'], ignore_index=True) 41 | AvgTrade_price = morning_data['AvgTrade_price'].append(evening_data['AvgTrade_price']) 42 | MPB = pd.concat([morning_data['MPB'], evening_data['MPB']], ignore_index=True) 43 | trading_data = pd.concat([morning_data['data'], evening_data['data']], ignore_index=True) 44 | else: 45 | trading_data = BuildLinearData( 46 | data, 47 | session = session, 48 | open_int = open_int, 49 | delay = delay, 50 | lags = lags, 51 | config = config, 52 | functions = functions) 53 | dMid_Response = trading_data['dMid_Response'] 54 | VOI = trading_data['VOI'] 55 | OIR = trading_data['OIR'] 56 | time_secs = trading_data['time_secs'] 57 | mid_price = trading_data['mid_price'] 58 | spread = trading_data['spread'] 59 | AvgTrade_price = trading_data['AvgTrade_price'] 60 | MPB = trading_data['MPB'] 61 | trading_data = trading_data['data'] 62 | 63 | ## build the features matrix (x-variable ) based on strategy 64 | ## transform the variables if necessary 65 | 66 | 67 | ## build the explanatory variables 68 | Y = dMid_Response 69 | x = dict() 70 | if strategy == 'A': 71 | X = sm.add_constant(VOI) 72 | x['A'] = X 73 | model = sm.OLS(Y, X).fit() 74 | elif strategy == 'B': 75 | X = pd.concat([VOI.div(spread, axis = 0), OIR.div(spread, axis = 0), MPB.iloc[:,0].div(spread, axis = 0).rename('MPB')], axis = 1) 76 | x['B'] = X 77 | model = sm.OLS(Y, sm.add_constant(X)).fit() 78 | 79 | value = dict() 80 | ## return values 81 | value['dMid_Response'] = dMid_Response ## y-value 82 | value['VOI'] = VOI 83 | value['OIR'] = OIR 84 | value['spread'] = spread 85 | value['y'] = dMid_Response 86 | value['x'] = x 87 | value['model'] = model 88 | value['data'] = trading_data 89 | value['AvgTrade_price'] = AvgTrade_price 90 | value['mid_price'] = mid_price 91 | value['MPB'] = MPB 92 | value['time_secs'] = time_secs 93 | 94 | return value -------------------------------------------------------------------------------- /src/Config.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | 3 | def read(filename): 4 | dictionary = {} 5 | 6 | config = configparser.ConfigParser() 7 | config.read(filename) 8 | 9 | for section in config.sections(): 10 | dictionary[section] = {} 11 | for option in config.options(section): 12 | dictionary[section][option] = config.get(section, option) 13 | 14 | return dictionary -------------------------------------------------------------------------------- /src/LinearStrategy.py: -------------------------------------------------------------------------------- 1 | ## SELL SIGNAL (at t) 2 | ## * E[ FPC (t)] <= -0.2 3 | ## 4 | ## if signal hits, buy or sell maximum position 5 | # ################################################### 6 | import pandas as pd 7 | import numpy as np 8 | import math 9 | from BuildLinearData import * 10 | from Session import * 11 | 12 | def LinearStrategy(key, 13 | data, 14 | coefs, 15 | session = Session.Morning, 16 | open_int = False, 17 | config = None, 18 | functions = None): 19 | 20 | lags = int(config['trading']['lags']) 21 | strategy = config['trading']['strategy'] 22 | threshold = float(config['trading']['threshold']) 23 | TR_COST = float(config['trading']['tradecost']) # 2.5 * 1e-5, 24 | trade_at_mid = True if config['trading']['trade_at_mid'] == 'True' else False 25 | 26 | ## get all the market data (this would be a data-stream in a real-time system) 27 | value = BuildLinearData(data, 28 | session = session, 29 | open_int = open_int, 30 | delay = 0, 31 | lags = lags, 32 | config = config, 33 | functions = functions) 34 | 35 | main_data = value['data'] 36 | n = len(main_data) 37 | 38 | mid_price = value['mid_price'] 39 | spread = value['spread'] 40 | time_secs = value['time_secs'] 41 | ind_open = value['ind_open'] 42 | ind_close = value['ind_close'] 43 | 44 | own = False 45 | pos = 0 46 | strat = [ 0 ] * n 47 | realized_pnl = [ np.nan ] * n 48 | total_trade_pnl = [] 49 | returns = [] 50 | pnl = 0 51 | trade_costs = 0 52 | buy_price = 0 53 | sell_price = 0 54 | entry = 0 55 | trade_volume = 0 56 | trade_records = [] 57 | sharpes = [] 58 | 59 | 60 | # get the vector of bid/ ask prices (this will be scalar in data stream) 61 | ask = mid_price if trade_at_mid else main_data['AskPrice1'] 62 | bid = mid_price if trade_at_mid else main_data['BidPrice1'] 63 | 64 | # Set the x-values to be used in prediction depending on strategy 65 | # these would be scalar in a data stream 66 | VOI = value['VOI'] 67 | OIR = value['OIR'] 68 | MPB = value['MPB'] 69 | 70 | x = [ 1 ] * n 71 | if strategy == 'A': 72 | x = pd.concat([pd.Series(x).rename('const'), VOI], axis = 1) 73 | elif strategy == 'B': 74 | x = pd.concat([pd.Series(x).rename('const'), VOI.div(spread, axis = 0), OIR.div(spread, axis = 0), MPB.iloc[:,0].div(spread, axis = 0).rename('MPB')], axis = 1) 75 | else: 76 | print('Missing Linear Strategy: %s' % strategy) 77 | exit() 78 | 79 | # this is where we assume we get a data stream instead of looping through the dataset 80 | # multiply the coefficients with the factors and check if it's above / below threshold 81 | # and trade if the signal is good 82 | 83 | # in an actual trading system, the decision would be calculated by a strategy engine 84 | # having the real-time data fed into the engine via a data stream 85 | # but in this simulation, we just assume we have the full dataset and the 86 | # strategy engine is the coefficient multiplication on the next line 87 | efpc_vec = x * pd.DataFrame().append([coefs] * n).reset_index(drop = True) 88 | efpc_vec = efpc_vec.sum(axis = 1) 89 | 90 | trade_ind = range(0, n) 91 | for k in trade_ind: 92 | efpc = efpc_vec[k] 93 | ## check if we are within trading hours 94 | if k >= ind_open and k < ind_close and own == False and efpc >= threshold: 95 | ## BUY to OPEN 96 | strat[k] = 1 97 | own = True 98 | pos = 1 99 | buy_price = ask[k] 100 | entry = k 101 | tc = buy_price * TR_COST 102 | trade_costs = trade_costs + tc 103 | trade_volume = trade_volume + 1 104 | trade_records.append((key, main_data['UpdateTime'][k] + '.' + str(main_data['UpdateMillisec'][k]), 'BUY', 1, buy_price, tc, 0)) 105 | elif k >= ind_open and k < ind_close and own == False and efpc <= -threshold: 106 | ## SELL to OPEN 107 | strat[k] = -1 108 | own = True 109 | pos = -1 110 | sell_price = bid[k] 111 | entry = k 112 | tc = sell_price * TR_COST 113 | trade_costs = trade_costs + tc 114 | trade_volume = trade_volume + 1 115 | trade_records.append((key, main_data['UpdateTime'][k] + '.' + str(main_data['UpdateMillisec'][k]), 'SHORT', 1, sell_price, tc, 0)) 116 | elif own == True and pos == 1 and efpc <= -threshold: 117 | ## SELL to CLOSE 118 | strat[k] = -1 119 | own = False 120 | pos = 0 121 | sell_price = bid[k] 122 | tc = tc + sell_price * TR_COST 123 | trade_costs = trade_costs + tc 124 | trade_pnl = sell_price - buy_price - tc 125 | pnl = pnl + trade_pnl 126 | trade_volume = trade_volume + 1 127 | total_trade_pnl.append(trade_pnl) 128 | trade_records.append((key, main_data['UpdateTime'][k] + '.' + str(main_data['UpdateMillisec'][k]), 'SELL', 1, sell_price, tc, trade_pnl)) 129 | 130 | if k >= ind_open and k < ind_close: 131 | ## SELL to OPEN 132 | strat [k] = -2 133 | own = True 134 | pos = -1 135 | sell_price = bid[k] 136 | entry = k 137 | tc = sell_price * TR_COST 138 | trade_costs = trade_costs + tc 139 | trade_volume = trade_volume + 1 140 | trade_records.append((key, main_data['UpdateTime'][k] + '.' + str(main_data['UpdateMillisec'][k]), 'SHORT', 1, sell_price, tc, 0)) 141 | elif own == True and pos == -1 and efpc >= threshold: 142 | ## BUY to CLOSE 143 | strat [k] = 1 144 | own = False 145 | pos = 0 146 | buy_price = ask[k] 147 | tc = tc + buy_price * TR_COST 148 | trade_costs = trade_costs + tc 149 | trade_pnl = sell_price - buy_price - tc 150 | pnl = pnl + trade_pnl 151 | trade_volume = trade_volume + 1 152 | total_trade_pnl.append(trade_pnl) 153 | trade_records.append((key, main_data['UpdateTime'][k] + '.' + str(main_data['UpdateMillisec'][k]), 'COVER', 1, buy_price, tc, trade_pnl)) 154 | 155 | if k >= ind_open and k < ind_close: 156 | ## BUY to OPEN 157 | strat [k] = 2 158 | own = True 159 | pos = 1 160 | buy_price = ask[k] 161 | entry = k 162 | tc = buy_price * TR_COST 163 | trade_costs = trade_costs + tc 164 | trade_volume = trade_volume + 1 165 | trade_records.append((key, main_data['UpdateTime'][k] + '.' + str(main_data['UpdateMillisec'][k]), 'BUY', 1, buy_price, tc, 0)) 166 | 167 | realized_pnl[k] = pnl 168 | 169 | # check if we have a left-over position at end-of-day and close it 170 | if sum(strat) == 1: 171 | if strat[n-1] == 1: 172 | strat[n-1] = 0 173 | trade_volume = trade_volume - 1 174 | else: 175 | strat[n-1] = -1 176 | sell_price = bid[n-1] 177 | tc = tc + sell_price * TR_COST 178 | trade_costs = trade_costs + tc 179 | trade_pnl = sell_price - buy_price - tc 180 | pnl = pnl + trade_pnl 181 | realized_pnl[n-1] = pnl 182 | total_trade_pnl.append(trade_pnl) 183 | trade_volume = trade_volume + 1 184 | trade_records.append((key, main_data['UpdateTime'][n-1] + '.' + str(main_data['UpdateMillisec'][n-1]), 'SELL', 1, sell_price, tc, trade_pnl)) 185 | elif sum(strat) == -1: 186 | if strat[n-1] == -1: 187 | strat[n-1] = 0 188 | trade_volume = trade_volume - 1 189 | else: 190 | strat[n-1] = 1 191 | buy_price = ask[n-1] 192 | tc = tc + buy_price * TR_COST 193 | trade_costs = trade_costs + tc 194 | trade_pnl = (sell_price - buy_price) - tc 195 | pnl = pnl + trade_pnl 196 | realized_pnl[n-1] = pnl 197 | total_trade_pnl.append(trade_pnl) 198 | trade_volume = trade_volume + 1 199 | trade_records.append((key, main_data['UpdateTime'][n-1] + '.' + str(main_data['UpdateMillisec'][n-1]), 'COVER', 1, buy_price, tc, trade_pnl)) 200 | 201 | # return stats 202 | if math.isnan(realized_pnl[0]): 203 | realized_pnl[0] = 0 204 | 205 | realized_pnl = pd.Series(realized_pnl).fillna(method='ffill').tolist() 206 | 207 | value = {} 208 | value['time'] = time_secs 209 | value['pnl'] = realized_pnl 210 | value['strategy'] = strat 211 | value['trade_records'] = trade_records 212 | value['trade_volume'] = trade_volume 213 | value['trade_pnl'] = total_trade_pnl 214 | value['trade_costs'] = trade_costs 215 | return (value) -------------------------------------------------------------------------------- /src/ReadFiles.py: -------------------------------------------------------------------------------- 1 | from os import listdir 2 | from os.path import isfile, join 3 | import pandas as pd 4 | 5 | # function to read in the market data from CSV files 6 | def ReadFiles(path, contract): 7 | files = [f for f in listdir(path) if isfile(join(path, f))] 8 | data = dict() 9 | for f in files: 10 | key = f[:15] 11 | c = f [:2] 12 | if c != contract: 13 | continue 14 | 15 | data[key] = pd.read_csv(join(path, f)) 16 | 17 | return data -------------------------------------------------------------------------------- /src/RunStrategy.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from collections import namedtuple 3 | from ReadFiles import * 4 | from BuildLinearModel import * 5 | from LinearStrategy import * 6 | import Config 7 | from Session import * 8 | 9 | def run(config_file): 10 | config = Config.read(config_file) 11 | 12 | path = config['global']['datapath'] 13 | contract = config['global']['contract'] 14 | 15 | # one day' data corresponds to a key 16 | data = ReadFiles(path, contract) 17 | 18 | #### AVERAGED LAG LINEAR STRATEGY #### 19 | 20 | ## set trading and model parameters 21 | threshold = float(config['trading']['threshold']) 22 | period = int(config['trading']['period']) 23 | lags = int(config['trading']['lags']) 24 | strategy = config['trading']['strategy'] 25 | night = 'night' in config['global']['sessions'] 26 | fulldaycoefs = True if config['global']['fulldaycoefs'] != 'False' else False 27 | 28 | keys = [] 29 | coefs = [] 30 | params = [] 31 | 32 | ## build the linear models and store their coefficients 33 | for k, v in sorted(data.items()): 34 | # full-day coefficients 35 | if fulldaycoefs: 36 | value = BuildLinearModel(k, 37 | v, 38 | session = Session.FullDay, 39 | delay = period, 40 | lags = lags, 41 | config = config) 42 | model = value['model'] 43 | coefs.append(model.params) 44 | params.append([k, v, Session.FullDay, model.params]) 45 | else: 46 | value = BuildLinearModel(k, 47 | v, 48 | session = Session.Morning, 49 | delay = period, 50 | lags = lags, 51 | config = config) 52 | model = value['model'] 53 | coefs.append(model.params) 54 | params.append([k, v, Session.Morning, model.params]) 55 | 56 | value = BuildLinearModel(k, 57 | v, 58 | session = Session.Afternoon, 59 | delay = period, 60 | lags = lags, 61 | config = config) 62 | model = value['model'] 63 | coefs.append(model.params) 64 | params.append([k, v, Session.Afternoon, model.params]) 65 | 66 | keys.append(k) 67 | 68 | print('Instrument: %s' % k) 69 | for key, val in sorted(model.params.items()): 70 | print("%s: %.10f" % (key, val)) 71 | 72 | return 73 | 74 | StratParam = namedtuple('StratParam', ['key', 'data', 'session', 'coef']) 75 | 76 | strat_params = [] 77 | for idx, val in enumerate(params): 78 | if idx > 0: 79 | strat_params.append(StratParam(val[0], val[1], val[2], coefs[idx-1])) 80 | else: 81 | strat_params.append(StratParam(val[0], val[1], val[2], val[3])) 82 | 83 | coefs = pd.DataFrame(coefs) 84 | 85 | ## set the lagged coefficient weights 86 | coef_weights = [1] 87 | trade_volume = [] 88 | trade_costs = [] 89 | trade_records = [] 90 | 91 | pnl_name = 'pnl-%.1f-%s-%d-F-lag %d' % (threshold, strategy, period, lags) 92 | 93 | pnl_matrix = pd.DataFrame(index = range(0, len(keys)), columns = ['morning', 'afternoon']) 94 | pnl_matrix.iloc[0] = 0 95 | trade_pnl = [] 96 | 97 | ## apply the trading strategy to each trading day using historical linear model coefficients 98 | for idx, val in enumerate(strat_params): 99 | key = val.key 100 | data = val.data 101 | row = 0 102 | col = 0 103 | 104 | if idx > 0: 105 | # coef = 0 106 | # w = coef_weights[:min(len(coef_weights), i)] 107 | # w = w / sum(w) 108 | # for j in range(0, len(w)): 109 | # coef = coef + coefs[i - j, ] * w[j] 110 | coef = val.coef 111 | 112 | if val.session == Session.FullDay: 113 | row = idx 114 | # morning trading using the weighted coefficients from T-1, T-2 ,... 115 | strat = LinearStrategy( 116 | key, 117 | data, 118 | coef, 119 | session = Session.Morning, 120 | config = config) 121 | pnl_matrix.iloc[row, 0] = strat['pnl'][-1] 122 | trade_pnl.append(strat['trade_pnl']) 123 | tv = strat['trade_volume'] 124 | tc = strat['trade_costs'] 125 | trade_records.extend(strat['trade_records']) 126 | 127 | # afternoon trading using the weighted coefficients from T-1, T-2 ,... 128 | strat = LinearStrategy( 129 | key, 130 | data, 131 | coef, 132 | session = Session.Afternoon, 133 | config = config) 134 | pnl_matrix.iloc[row, 1] = strat['pnl'][-1] 135 | trade_pnl.append(strat['trade_pnl']) 136 | tv = tv + strat['trade_volume'] 137 | trade_volume.append(tv) 138 | tc = tc + strat['trade_costs'] 139 | trade_costs.append(tc) 140 | trade_records.extend(strat['trade_records']) 141 | 142 | else: 143 | strat = LinearStrategy( 144 | key, 145 | data, 146 | coef, 147 | session = val.session, 148 | config = config) 149 | row = int(idx / 2) 150 | col = idx % 2 151 | pnl_matrix.iloc[row, col] = strat['pnl'][-1] 152 | trade_pnl.append(strat['trade_pnl']) 153 | tv = strat['trade_volume'] 154 | tc = strat['trade_costs'] 155 | trade_records.extend(strat['trade_records']) 156 | 157 | for idx, key in enumerate(keys): 158 | print('%s %s %d %.1f P&L = %f %f Total = %f' % (key, strategy, period, threshold, pnl_matrix.iloc[idx, 0], pnl_matrix.iloc[idx, 1], pnl_matrix.iloc[0:idx+1, :].values.sum())) 159 | 160 | day_pnl_mat = pnl_matrix.sum(axis = 1) 161 | sharpe_ratio = day_pnl_mat.values.mean() * math.sqrt(len(day_pnl_mat)) / day_pnl_mat.values.std(ddof=1) 162 | 163 | pnl_matrix.insert(0, 'instrument', pd.Series(sorted(data.keys()))) 164 | pnl_matrix.to_csv(pnl_name + '.csv', sep = ',', index=False) 165 | 166 | trade_records = pd.DataFrame(trade_records, columns=['Date', 'Time', 'Action', 'Qty', 'Price', 'TradeCost', 'PnL']) 167 | trade_records.to_csv('trade_records.csv', sep = ',', index=False) 168 | 169 | if __name__ == '__main__': 170 | import argparse 171 | parser = argparse.ArgumentParser(description='Backtest strategy with user configuration.') 172 | parser.add_argument('-c', '--config', help='Configuration.', default='Config.ini') 173 | args = parser.parse_args() 174 | 175 | config_file = args.config 176 | 177 | run(config_file) 178 | -------------------------------------------------------------------------------- /src/Session.py: -------------------------------------------------------------------------------- 1 | from enum import Enum, unique 2 | 3 | @unique 4 | class Session(Enum): 5 | FullDay = 0 6 | Morning = 1 7 | Afternoon = 2 8 | Night = 3 -------------------------------------------------------------------------------- /src/config.ini: -------------------------------------------------------------------------------- 1 | [global] 2 | #datapath = ../Data/ru 3 | #contract = ru 4 | datapath = ../temp 5 | contract = IF 6 | fulldaycoefs = True 7 | sessions = morning, afternoon, night 8 | 9 | [trading] 10 | tradecost = 0.000025 11 | threshold = 0.2 12 | multiplier = 300 13 | period = 20 14 | lags = 5 15 | strategy = A 16 | trade_at_mid = False 17 | 18 | [morning] 19 | start = 9:30:00 20 | open = 9:31:00 21 | close = 11:20:00 22 | end = 11:28:00 23 | 24 | [afternoon] 25 | start = 13:00:00 26 | open = 13:01:00 27 | close = 14:45:00 28 | end = 14:58:00 29 | 30 | [night] 31 | start = 21:00:00 32 | open = 21:01:00 33 | close = 23:20:00 34 | end = 23:28:00 -------------------------------------------------------------------------------- /src/config_rb.ini: -------------------------------------------------------------------------------- 1 | [global] 2 | datapath = ../Data/rb 3 | contract = rb 4 | fulldaycoefs = True 5 | sessions = morning, afternoon, night 6 | 7 | [trading] 8 | tradecost = 0.000025 9 | threshold = 1 10 | multiplier = 10 11 | period = 20 12 | lags = 5 13 | strategy = B 14 | trade_at_mid = False 15 | 16 | [morning] 17 | start = 9:30:00 18 | open = 9:30:00 19 | close = 11:20:00 20 | end = 11:28:00 21 | 22 | [afternoon] 23 | start = 13:00:00 24 | open = 13:01:00 25 | close = 14:45:00 26 | end = 14:58:00 27 | 28 | [night] 29 | start = 21:00:00 30 | open = 21:01:00 31 | close = 23:20:00 32 | end = 23:28:00 -------------------------------------------------------------------------------- /tools/download.py: -------------------------------------------------------------------------------- 1 | import dateutil.parser 2 | import os.path 3 | import datetime, time 4 | import csv 5 | import math 6 | from gmsdk import md 7 | 8 | def download(exchange, instrument, date): 9 | dt = dateutil.parser.parse(date) 10 | 11 | fname = '%s-%4d%02d%02d.csv' % (instrument, dt.year, dt.month, dt.day) 12 | if os.path.isfile(fname): 13 | print('%s already existed!' % fname) 14 | return fname 15 | 16 | print('download ticks \'%s\'...' % fname) 17 | 18 | start = '%4d-%02d-%02d 09:00:00' % (dt.year, dt.month, dt.day) 19 | end = '%4d-%02d-%02d 15:00:00' % (dt.year, dt.month, dt.day) 20 | 21 | md.init('xxx@xxx.com', 'xxx') 22 | 23 | data = md.get_ticks('%s.%s' % (exchange, instrument), start, end) 24 | 25 | if len(data) == 0: 26 | print('download failed!') 27 | return None 28 | else: 29 | print('download %d ticks' % len(data)) 30 | 31 | def secondofday(time): 32 | t = time.split(':') 33 | return int(t[0]) * 3600 + int(t[1]) * 60 + int(t[2]) 34 | 35 | rnd = lambda x : int(x) if math.modf(x)[0] == 0 else round(x, 1) 36 | 37 | ticks = [] 38 | for x in data: 39 | utctime = datetime.datetime.fromtimestamp(x.utc_time) 40 | time = utctime.strftime('%H:%M:%S') 41 | millis = int(int(utctime.strftime('%f')) / 1000) 42 | seconds = secondofday(time) 43 | ticks.append([x.sec_id, time, millis, int(x.cum_volume), rnd(x.cum_amount), x.cum_position, rnd(x.bids[0][0]), x.bids[0][1], rnd(x.asks[0][0]), x.asks[0][1], seconds]) 44 | 45 | with open(fname, 'w') as outfile: 46 | writer = csv.writer(outfile, delimiter=',') 47 | writer.writerow(["InstrumentID", "UpdateTime", "UpdateMillisec", "Volume", "Turnover", "OpenInterest", "BidPrice1", "BidVolume1", "AskPrice1", "AskVolume1", "SecondOfDay"]) 48 | for x in ticks: 49 | writer.writerow(x) 50 | 51 | return fname 52 | 53 | if __name__ == '__main__': 54 | import argparse 55 | parser = argparse.ArgumentParser(description='download ticks from GoldMiner.') 56 | parser.add_argument('-e', '--exchange', help='exchange.', required=True) 57 | parser.add_argument('-i', '--instrument', help='instrument.', required=True) 58 | parser.add_argument('-d', '--date', help='quote date.', required=True) 59 | args = parser.parse_args() 60 | 61 | exchange = args.exchange 62 | instrument = args.instrument 63 | date = args.date 64 | 65 | download(exchange, instrument, date) -------------------------------------------------------------------------------- /tools/plot.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import statsmodels.api as sm 5 | import seaborn as sns 6 | import download 7 | 8 | def plot_voi(csvfile, delay = 20, lags = 5): 9 | delay = delay 10 | lags = lags 11 | morning = True 12 | data = pd.read_csv(csvfile) 13 | 14 | AM = dict() 15 | AM['start'] = 34200 16 | AM['open'] = 34260 17 | AM['close'] = 40800 18 | AM['end'] = 41280 19 | 20 | PM = dict() 21 | PM['start'] = 46800 22 | PM['open'] = 46860 23 | PM['close'] = 53100 24 | PM['end'] = 53880 25 | 26 | start_time = AM['start'] if morning else PM['start'] # - data start 27 | open_time = AM['open'] if morning else PM['open'] # - trade open 28 | close_time = AM['close'] if morning else PM['close'] # - trade close 29 | end_time = AM['end'] if morning else PM['end'] # - data end 30 | 31 | ind = data[(data['SecondOfDay'] >= start_time) & (data['SecondOfDay'] < end_time)].index.values.astype(int) 32 | main_data = data.loc[ind].reset_index(drop=True) 33 | n = len(main_data.index) 34 | time_secs = main_data['SecondOfDay'] + main_data['UpdateMillisec'] / 1000 35 | ind_open = time_secs[time_secs >= open_time].index.values.astype(int)[0] 36 | ind_close = time_secs[time_secs >= close_time].index.values.astype(int)[0] 37 | 38 | # calculate variables 39 | mid_price = (main_data['BidPrice1'] + main_data['AskPrice1']) / 2 40 | spread = main_data['AskPrice1'] - main_data['BidPrice1'] 41 | 42 | OIR_array = (main_data['BidVolume1'] - main_data['AskVolume1']) / (main_data['BidVolume1'] + main_data['AskVolume1']) 43 | dBid_price = main_data['BidPrice1'].diff().fillna(0) 44 | dAsk_price = main_data['AskPrice1'].diff().fillna(0) 45 | 46 | ## build order imbalance signal according to Spec 47 | df = pd.concat([main_data['BidVolume1'], main_data['BidVolume1'].shift(1).fillna(0), dBid_price], axis=1) 48 | df.columns = ['bv', 'sbv', 'dbp'] 49 | 50 | def _get_bid_cv(bv, sbv, dbp): 51 | if dbp == 0: 52 | return bv - sbv 53 | elif dbp < 0: 54 | return 0 55 | else: 56 | return bv 57 | 58 | df['bid_CV'] = df.apply(lambda row : _get_bid_cv(row['bv'], row['sbv'], row['dbp']), axis=1) 59 | bid_CV = df['bid_CV'] 60 | 61 | df = pd.concat([main_data['AskVolume1'], main_data['AskVolume1'].shift(1).fillna(0), dAsk_price], axis=1) 62 | df.columns = ['av', 'sav', 'dap'] 63 | 64 | def _get_ask_cv(av, sav, dap): 65 | if dap == 0: 66 | return av - sav 67 | elif dap < 0: 68 | return av 69 | else: 70 | return 0 71 | 72 | df['ask_CV'] = df.apply(lambda row : _get_ask_cv(row['av'], row['sav'], row['dap']), axis=1) 73 | ask_CV = df['ask_CV'] 74 | 75 | VOI_array = bid_CV - ask_CV 76 | 77 | k = delay 78 | p = lags 79 | 80 | rolling_mean = mid_price.rolling(center=False, window=k).mean().iloc[k-1:].reset_index(drop=True) 81 | # rolling_mean = mid_price.shift(-k).iloc[k:].reset_index(drop=True) 82 | fpc = rolling_mean - mid_price[:(n-k+1)] 83 | 84 | y = mid_price.shift(-(delay-1)) - mid_price 85 | fpc.shift(-(k-1))[:(n-k+1)] 86 | x = VOI_array.rolling(center=False, window=lags).sum().shift(-(lags-1))[:(n-k+1)] 87 | 88 | model = sm.OLS(fpc, sm.add_constant(x)).fit() 89 | print(model.summary()) 90 | 91 | fig, ax = plt.subplots() 92 | 93 | ax.scatter(x, fpc, marker='.') 94 | fig.suptitle('VOI vs Price change') 95 | 96 | x_pred = np.linspace(x.min(), x.max(), 50) 97 | x_pred2 = sm.add_constant(x_pred) 98 | y_pred = model.predict(x_pred2) 99 | ax.plot(x_pred, y_pred, '-', color='darkorchid', linewidth=2) 100 | 101 | plt.show() 102 | 103 | if __name__ == '__main__': 104 | import argparse 105 | parser = argparse.ArgumentParser(description='download ticks from GoldMiner.') 106 | parser.add_argument('-e', '--exchange', help='exchange.', required=True) 107 | parser.add_argument('-i', '--instrument', help='instrument.', required=True) 108 | parser.add_argument('-d', '--date', help='quote date.', required=True) 109 | args = parser.parse_args() 110 | 111 | exchange = args.exchange 112 | instrument = args.instrument 113 | date = args.date 114 | 115 | csvfile = download.download(exchange, instrument, date) 116 | 117 | if csvfile is not None: 118 | plot_voi(csvfile, 20, 5) --------------------------------------------------------------------------------