├── readme.MD └── src ├── __pycache__ ├── config.cpython-35.pyc ├── dqn_model.cpython-35.pyc ├── schedule.cpython-35.pyc ├── Neural_Net.cpython-35.pyc ├── config_AMZN.cpython-35.pyc ├── config_GOOG.cpython-35.pyc ├── model_base.cpython-35.pyc ├── order_queue.cpython-35.pyc ├── message_queue.cpython-35.pyc ├── replay_buffer.cpython-35.pyc ├── evaluate_policy.cpython-35.pyc └── limit_order_book.cpython-35.pyc ├── schedule.py ├── order_queue.py ├── config.py ├── config_GOOG.py ├── config_AAPL.py ├── config_AMZN.py ├── Neural_Net.py ├── plot_reward.py ├── baseline_market_order.py ├── message_queue.py ├── replay_buffer.py ├── evaluate_policy.py ├── test_dqn.py ├── market_policy.py ├── snl_policy.py ├── tree_policy.py ├── baseline_dp.py ├── test_book_queue.py ├── baseline_SnL.py ├── dqn_model.py ├── model_base.py ├── dp_policy.py └── limit_order_book.py /readme.MD: -------------------------------------------------------------------------------- 1 | # Limit-Order-Book-Reinforcement 2 | -------------------------------------------------------------------------------- /src/__pycache__/config.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/config.cpython-35.pyc -------------------------------------------------------------------------------- /src/__pycache__/dqn_model.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/dqn_model.cpython-35.pyc -------------------------------------------------------------------------------- /src/__pycache__/schedule.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/schedule.cpython-35.pyc -------------------------------------------------------------------------------- /src/__pycache__/Neural_Net.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/Neural_Net.cpython-35.pyc -------------------------------------------------------------------------------- /src/__pycache__/config_AMZN.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/config_AMZN.cpython-35.pyc -------------------------------------------------------------------------------- /src/__pycache__/config_GOOG.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/config_GOOG.cpython-35.pyc -------------------------------------------------------------------------------- /src/__pycache__/model_base.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/model_base.cpython-35.pyc -------------------------------------------------------------------------------- /src/__pycache__/order_queue.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/order_queue.cpython-35.pyc -------------------------------------------------------------------------------- /src/__pycache__/message_queue.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/message_queue.cpython-35.pyc -------------------------------------------------------------------------------- /src/__pycache__/replay_buffer.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/replay_buffer.cpython-35.pyc -------------------------------------------------------------------------------- /src/__pycache__/evaluate_policy.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/evaluate_policy.cpython-35.pyc -------------------------------------------------------------------------------- /src/__pycache__/limit_order_book.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jasonzy121/Limit-Order-Book-Reinforcement/HEAD/src/__pycache__/limit_order_book.cpython-35.pyc -------------------------------------------------------------------------------- /src/schedule.py: -------------------------------------------------------------------------------- 1 | class LinearSchedule(object): 2 | def __init__(self, eps_begin, eps_end, nsteps): 3 | self._epsilon = eps_begin 4 | self._eps_begin = eps_begin 5 | self._eps_end = eps_end 6 | self._nsteps = nsteps 7 | 8 | def update(self, t): 9 | alpha = 1.0 * t / self._nsteps 10 | self._epsilon = max(alpha*self._eps_end+(1-alpha)*self._eps_begin, self._eps_end) 11 | 12 | def get_epsilon(self): 13 | return self._epsilon -------------------------------------------------------------------------------- /src/order_queue.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from message_queue import Message_Queue 4 | 5 | class Order_Queue(object): 6 | def __init__(self, path): 7 | self._df = pd.read_csv(path, header=None) 8 | self._row_idx = -1 9 | 10 | def create_orderbook_time(self, time, mq): 11 | mq.jump_to_time(time) 12 | self._row_idx= mq._row_idx 13 | row = self._df.iloc[self._row_idx] 14 | return self._create_orderbook(row) 15 | 16 | def _create_orderbook(self, row): 17 | len_row= int(len(row)/4) 18 | ask= np.array([int(row[4*i]) for i in range(len_row)]) 19 | ask_size= np.array([int(row[4*i+1]) for i in range(len_row)]) 20 | bid = np.array([int(row[4*i+2]) for i in range(len_row)]) 21 | bid_size = np.array([int(row[4*i+3]) for i in range(len_row)]) 22 | orderbook = {'ask':ask, 'ask_size':ask_size, 'bid':bid, 'bid_size':bid_size} 23 | return orderbook -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- 1 | class Config: 2 | def __init__(self): 3 | self.mode = 'train' 4 | 5 | self.nsteps_train = 1000 6 | self.print_freq = 50 7 | self.target_update_freq = 1000 8 | self.saving_freq = 250 9 | self.simulation_freq = 1000 10 | self.model_output = '../output_GOOG/GOOG' 11 | 12 | self.eps_begin = 1.0 13 | self.eps_end = 0.1 14 | self.nsteps = 1000 15 | self.dropout= 0.9 16 | 17 | self.lr_begin = 0.00025 18 | self.lr_end = 0.00005 19 | self.lr_nsteps = self.nsteps_train / 2 20 | 21 | self.gamma = 0.99 22 | self.grad_clip = True 23 | self.clip_val = 10 24 | self.batch_size = 32 25 | 26 | self.order_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_orderbook_10.csv' 27 | self.message_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_message_10.csv' 28 | self.depth = 3 29 | 30 | self.H = 600 31 | self.T = 20 32 | self.L = 10 33 | self.direction = -1 34 | self.base_point = 100 35 | self.train_start = 34200 36 | self.train_end = 46800 37 | self.test_start= 46800 38 | self.test_end= 57600 39 | self.I = 8000 40 | self.hidden_size= 10 41 | 42 | self.state_shape = [self.depth, 4] 43 | self.state_history = 2 44 | -------------------------------------------------------------------------------- /src/config_GOOG.py: -------------------------------------------------------------------------------- 1 | class Config: 2 | def __init__(self): 3 | self.mode = 'train' 4 | 5 | self.nsteps_train = 1000000 6 | self.print_freq = 50 7 | self.target_update_freq = 1000 8 | self.saving_freq = 25000 9 | self.simulation_freq = 1000 10 | self.model_output = '../output_GOOG_Neural' 11 | 12 | self.eps_begin = 1.0 13 | self.eps_end = 0.1 14 | self.nsteps = 1000 15 | self.dropout= 0.9 16 | 17 | self.lr_begin = 0.00025 18 | self.lr_end = 0.00005 19 | self.lr_nsteps = self.nsteps_train / 2 20 | 21 | self.gamma = 1 22 | self.grad_clip = True 23 | self.clip_val = 10 24 | self.batch_size = 32 25 | 26 | self.order_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_orderbook_10.csv' 27 | self.message_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_message_10.csv' 28 | self.depth = 30 29 | 30 | self.H = 600 31 | self.T = 20 32 | self.L = 10 33 | self.direction = -1 34 | self.base_point = 100 35 | self.train_start = 34200 36 | self.train_end = 46800 37 | self.test_start= 46800 38 | self.test_end= 57600 39 | self.I = 8000 40 | self.hidden_size= 20 41 | 42 | self.state_shape = [self.depth, 4] 43 | self.state_history = 4 44 | -------------------------------------------------------------------------------- /src/config_AAPL.py: -------------------------------------------------------------------------------- 1 | class Config: 2 | def __init__(self): 3 | self.mode = 'train' 4 | 5 | self.nsteps_train = 100000 6 | self.print_freq = 50 7 | self.target_update_freq = 1000 8 | self.saving_freq = 25000 9 | self.simulation_freq = 1000 10 | self.model_output = '../output_AAPL_linear' 11 | 12 | self.eps_begin = 1.0 13 | self.eps_end = 0.1 14 | self.nsteps = 1000 15 | self.dropout= 0.9 16 | 17 | self.lr_begin = 0.00025 18 | self.lr_end = 0.00005 19 | self.lr_nsteps = self.nsteps_train / 2 20 | 21 | self.gamma = 0.99 22 | self.grad_clip = True 23 | self.clip_val = 10 24 | self.batch_size = 32 25 | 26 | self.order_path = '../datasets/LOBSTER_SampleFile_AAPL_2012-06-21_10/AAPL_2012-06-21_34200000_57600000_orderbook_10.csv' 27 | self.message_path = '../datasets/LOBSTER_SampleFile_AAPL_2012-06-21_10/AAPL_2012-06-21_34200000_57600000_message_10.csv' 28 | self.depth = 30 29 | 30 | self.H = 600 31 | self.T = 20 32 | self.L = 10 33 | self.direction = -1 34 | self.base_point = 100 35 | self.train_start = 34200 36 | self.train_end = 46800 37 | self.test_start= 46800 38 | self.test_end= 57600 39 | self.I = 8000 40 | self.hidden_size= 10 41 | 42 | self.state_shape = [self.depth, 4] 43 | self.state_history = 2 44 | -------------------------------------------------------------------------------- /src/config_AMZN.py: -------------------------------------------------------------------------------- 1 | class Config: 2 | def __init__(self): 3 | self.mode = 'train' 4 | 5 | self.nsteps_train = 1000000 6 | self.print_freq = 50 7 | self.target_update_freq = 1000 8 | self.saving_freq = 25000 9 | self.simulation_freq = 1000 10 | self.model_output = '../output_AMZN_linear' 11 | 12 | self.eps_begin = 1.0 13 | self.eps_end = 0.1 14 | self.nsteps = 1000 15 | self.dropout= 0.9 16 | 17 | self.lr_begin = 0.00025 18 | self.lr_end = 0.00005 19 | self.lr_nsteps = self.nsteps_train / 2 20 | 21 | self.gamma = 0.99 22 | self.grad_clip = True 23 | self.clip_val = 10 24 | self.batch_size = 32 25 | 26 | self.order_path = '../datasets/LOBSTER_SampleFile_AMZN_2012-06-21_10/AMZN_2012-06-21_34200000_57600000_orderbook_10.csv' 27 | self.message_path = '../datasets/LOBSTER_SampleFile_AMZN_2012-06-21_10/AMZN_2012-06-21_34200000_57600000_message_10.csv' 28 | self.depth = 30 29 | 30 | self.H = 600 31 | self.T = 20 32 | self.L = 10 33 | self.direction = -1 34 | self.base_point = 100 35 | self.train_start = 34200 36 | self.train_end = 46800 37 | self.test_start= 46800 38 | self.test_end= 57600 39 | self.I = 4000 40 | self.hidden_size= 10 41 | 42 | self.state_shape = [self.depth, 4] 43 | self.state_history = 2 44 | -------------------------------------------------------------------------------- /src/Neural_Net.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import tensorflow as tf 4 | import tensorflow.contrib.layers as layers 5 | import os 6 | 7 | from config_GOOG import Config 8 | from model_base import model 9 | from dqn_model import DQN 10 | 11 | class Neural_DQN(DQN): 12 | def get_q_values_op(self, state, scope, reuse= False): 13 | num_actions = self._config.L + 1 # 1 for market order 14 | state_book, state_it = state 15 | 16 | with tf.variable_scope(scope, reuse=reuse): 17 | conv_1 = layers.conv2d(inputs=state_book, num_outputs=4, kernel_size=[3,3], stride=[1,1], activation_fn=tf.nn.relu, padding='same') 18 | conv_2 = layers.conv2d(inputs=conv_1, num_outputs=4, kernel_size=[3,3], stride=[1,1], activation_fn=tf.nn.relu, padding='same') 19 | conv_2_flattened = layers.flatten(inputs=conv_2) 20 | state_out = tf.concat([conv_2_flattened, state_it], axis=1) 21 | state_out= tf.nn.dropout(state_out, self._config.dropout) 22 | state_out= layers.fully_connected(state_out, num_outputs= self._config.hidden_size) 23 | out = layers.fully_connected(state_out, num_actions, activation_fn=None) 24 | return out 25 | 26 | if __name__ == '__main__': 27 | config = Config() 28 | model = Neural_DQN(config) 29 | model.initialize() 30 | model.train() 31 | 32 | -------------------------------------------------------------------------------- /src/plot_reward.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | 6 | parser = argparse.ArgumentParser(description='Plot Reward') 7 | parser.add_argument('--file', default= '../reward.csv', help='File Path', type=str) 8 | parser.add_argument('--ylabel', default= 'Raw Reward', help='Raw Reward/Relative Reward', type=str) 9 | args = parser.parse_args() 10 | 11 | def plot(reward, 12 | ticker=['AMZN','AAPL','GOOG','INTC','MSFT'], 13 | algo=['Market Order','SnL','Tree Search','Nevmyvaka'], 14 | ylabel='Raw Reward'): 15 | ### reward is a N * 4 array 16 | N_ticker, N_algo = reward.shape 17 | ind = np.arange(N_ticker) 18 | width = 1.0 / (N_algo + 1) 19 | fig, ax = plt.subplots() 20 | 21 | rects = dict() 22 | for i in range(N_algo): 23 | c = (i + 0.5) / N_algo 24 | rects[i] = ax.bar(ind+width*i, reward[:,i], width, color=(c,c,1-c)) 25 | ax.set_ylabel(ylabel) 26 | ax.set_xticks(ind+width*(N_algo-1)/2) 27 | ax.set_xticklabels(ticker) 28 | ax.legend((rects[i] for i in range(N_algo)), algo) 29 | plt.show() 30 | 31 | def plot_file(path, ylabel): 32 | df = pd.read_csv(path) 33 | header = df.axes[1] 34 | algo = list(header[1:]) 35 | ticker = list(df[header[0]]) 36 | reward = df[header[1:]].values 37 | plot(reward, ticker, algo, ylabel) 38 | 39 | if __name__ == '__main__': 40 | plot_file(args.file, args.ylabel) -------------------------------------------------------------------------------- /src/baseline_market_order.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from limit_order_book import Limit_Order_book 4 | from message_queue import Message_Queue 5 | 6 | parser = argparse.ArgumentParser(description='Dynamic Programming Solution') 7 | parser.add_argument('--file_msg', help='Message File Path') 8 | parser.add_argument('--base_size', default=1, help='Base Order Size', type=int) 9 | parser.add_argument('--order_size', default=12, help='Order Size', type=int) 10 | parser.add_argument('--order_direction', default=1, help='Buy 1, Sell -1', type=int) 11 | parser.add_argument('--start', default=34200, help='Start Time', type=float) 12 | parser.add_argument('--end', default=34500, help='End Time', type=float) 13 | parser.add_argument('--adj_freq', default=100, help='Adjustment Frequency', type=float) 14 | parser.add_argument('--tol', default=1e-8, help='Remaining Time To Submit Market Order', type=float) 15 | args = parser.parse_args() 16 | 17 | mq = Message_Queue(args.file_msg) 18 | lob = Limit_Order_book(own_amount_to_trade=args.order_size, 19 | own_init_price=-args.order_direction*Limit_Order_book._DUMMY_VARIABLE, 20 | own_trade_type=args.order_direction) 21 | for idx, message in mq.pop_to_next_time(args.start): 22 | lob.process(**message) 23 | 24 | lob.update_own_order(args.order_direction*Limit_Order_book._DUMMY_VARIABLE) 25 | 26 | current_time = args.start 27 | while lob.own_amount_to_trade > 0 and not mq.finished(): 28 | current_time += args.adj_freq 29 | for idx, message in mq.pop_to_next_time(current_time): 30 | lob.process(**message) 31 | if lob.own_amount_to_trade == 0: 32 | break 33 | 34 | if lob.own_amount_to_trade > 0: 35 | reward = Limit_Order_book._DUMMY_VARIABLE * args.order_direction 36 | else: 37 | reward = lob.own_reward 38 | 39 | print(reward) -------------------------------------------------------------------------------- /src/message_queue.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | class Message_Queue(object): 5 | def __init__(self, path): 6 | self._df = pd.read_csv(path, header=None) 7 | self._time = 34200.0 8 | self._row_idx = -1 9 | self._message_count = self._df.shape[0] 10 | self._idx2header = ['Time', 'Type', 'OrderID', 'Size', 'Price', 'Direction'] 11 | self._header2idx = {tmp:idx for idx, tmp in enumerate(self._idx2header)} 12 | 13 | def iterate_queue(self): 14 | for idx, row in self._df.iloc[(self._row_idx+1):].iterrows(): 15 | message = self._create_message(row) 16 | self._row_idx += 1 17 | self._time = row[0] 18 | yield (idx, message) 19 | 20 | def pop_to_next_time(self, time): 21 | while self._row_idx + 1 < self._message_count: 22 | row = self._df.iloc[self._row_idx+1] 23 | if row[0] <= time: 24 | self._row_idx += 1 25 | message = self._create_message(row) 26 | yield (self._row_idx, message) 27 | else: 28 | break 29 | self._time = time 30 | 31 | def finished(self): 32 | return (self._row_idx+1==self._message_count) 33 | 34 | def _create_message(self, row): 35 | order_type = int(row[1]) 36 | order_size = int(row[3]) 37 | order_price = int(row[4]) 38 | order_direction = int(row[5]) 39 | message = {'type':order_type, 'size':order_size, 'price':order_price, 'direction':order_direction} 40 | return message 41 | 42 | def reset(self): 43 | self._time = 34200.0 44 | self._row_idx = -1 45 | 46 | def jump_to_time(self, time): 47 | if time >= self._df.iloc[self._message_count-1][0]: 48 | self._time = time 49 | self._row_idx = self._message_count-1 50 | elif time < self._df.iloc[0][0]: 51 | self._time = 34200.0 52 | self._row_idx = -1 53 | else: 54 | idx_start = 0 55 | idx_end = 0 56 | d_idx = 1 57 | while time >= self._df.iloc[idx_end][0]: 58 | idx_start = idx_end 59 | idx_end = min(idx_end+d_idx, self._message_count-1) 60 | d_idx *= 2 61 | while idx_end - idx_start > 1: 62 | idx_mid = idx_start + (idx_end - idx_start) // 2 63 | if time >= self._df.iloc[idx_mid][0]: 64 | idx_start = idx_mid 65 | else: 66 | idx_end = idx_mid 67 | self._row_idx = idx_start 68 | self._time = time 69 | 70 | -------------------------------------------------------------------------------- /src/replay_buffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class ReplayBuffer(object): 4 | def __init__(self, size, config): 5 | self.config = config 6 | self.size = size 7 | self.last_idx = -1 8 | self.history_size = 0 9 | 10 | self.states_stack = np.empty([self.size]+list(self.config.state_shape)+[self.config.state_history+1], dtype=np.float32) 11 | self.its = np.empty([self.size, 2, 2], dtype=np.float32) 12 | self.actions = np.empty([self.size], dtype=np.int32) 13 | self.rewards = np.empty([self.size], dtype=np.float32) 14 | self.done_mask = np.empty([self.size], dtype=np.bool) 15 | 16 | def process_rewards(self, rewards): 17 | rewards_processed = [] 18 | for reward in rewards: 19 | if reward == -9999999999: 20 | reward = -1000.0 21 | else: 22 | reward = reward * 1.e-10 23 | rewards_processed.append(reward) 24 | return rewards_processed 25 | 26 | def store(self, states, actions, rewards, done_mask): 27 | rewards = self.process_rewards(rewards) 28 | for idx in range(len(actions)): 29 | self.last_idx += 1 30 | if self.last_idx == self.size: 31 | self.last_idx = 0 32 | self.actions[self.last_idx] = actions[idx] 33 | self.done_mask[self.last_idx] = done_mask[idx] 34 | self.rewards[self.last_idx] = rewards[idx] 35 | tmp = states[max(idx-self.config.state_history+1,0):(idx+2)] 36 | tmp_states, tmp_its = zip(*tmp) 37 | tmp_state = np.concatenate([np.expand_dims(state, -1) for state in tmp_states], axis=-1) 38 | tmp_it = np.concatenate([np.expand_dims(it, -1) for it in tmp_its[-2:]], axis=-1) 39 | self.states_stack[self.last_idx] = np.pad(tmp_state, ((0,0),(0,0),(self.config.state_history+1-tmp_state.shape[-1],0)), 'constant', constant_values=0) 40 | self.its[self.last_idx] = tmp_it 41 | self.history_size += 1 42 | 43 | def sample(self, batch_size): 44 | idx = np.arange(min(self.size, self.history_size)) 45 | np.random.shuffle(idx) 46 | idx_choice = idx[:batch_size] 47 | 48 | states = self.states_stack[idx_choice][:,:,:,:-1] 49 | states_p = self.states_stack[idx_choice][:,:,:,1:] 50 | it = self.its[idx_choice][:,:,0] 51 | it_p = self.its[idx_choice][:,:,1] 52 | actions = self.actions[idx_choice] 53 | rewards = self.rewards[idx_choice] 54 | done_mask = self.done_mask[idx_choice] 55 | 56 | return (states, it, states_p, it_p, actions, rewards, done_mask) -------------------------------------------------------------------------------- /src/evaluate_policy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import copy 3 | 4 | from limit_order_book import Limit_Order_book 5 | from message_queue import Message_Queue 6 | from order_queue import Order_Queue 7 | 8 | def evaluate_policy(test_start, test_end, order_direction, V, H, T, oq, mq, action): 9 | rewards = [] 10 | episodes, real_times = load_episodes(test_start, test_end, order_direction, H, oq, mq) 11 | for k in range(len(episodes)): 12 | episode = episodes[k] 13 | real_time = real_times[k] 14 | rewards.append(simulate_reward(episode, V, T, H, action, real_time, mq)) 15 | return rewards 16 | 17 | 18 | def load_episodes(test_start, test_end, order_direction, H, oq, mq): 19 | lob_data, time = read_order_book(test_start, test_end, H, oq, mq) 20 | lob = [Limit_Order_book(**lob_data, own_amount_to_trade = 0, 21 | own_init_price=-order_direction*Limit_Order_book._DUMMY_VARIABLE, 22 | own_trade_type=order_direction) for lob_data in lob_data] 23 | return lob, time 24 | 25 | 26 | def read_order_book(test_start, test_end, H, oq, mq): 27 | """ 28 | read the initial limit order book states from the file 29 | """ 30 | output = [] 31 | time_output = [] 32 | real_time = test_start 33 | while real_time < test_end: 34 | mq.reset() 35 | output.append(oq.create_orderbook_time(real_time, mq)) 36 | time_output.append(real_time) 37 | real_time = real_time + H 38 | return output, time_output 39 | 40 | 41 | def simulate_reward(lob, amount, T, H, action, time, mq): 42 | """ 43 | simulate to next state, we need to calculate the remaining inventory given the current i and price a, and the immediate reward 44 | (revenue from the executed orders) 45 | """ 46 | mq.reset() 47 | mq.jump_to_time(time) 48 | 49 | lob_copy = copy.deepcopy(lob) 50 | 51 | for t in range(time, time + H, H//T): 52 | price = action(time + H - t, amount, lob_copy) 53 | lob_copy.update_own_order(price, amount) 54 | 55 | for idx, message in mq.pop_to_next_time(t + H/T): 56 | lob_copy.process(**message) 57 | if lob_copy.own_amount_to_trade == 0: 58 | return lob_copy.own_reward 59 | 60 | amount = lob_copy.own_amount_to_trade 61 | 62 | lob_copy.update_own_order(lob_copy.own_trade_type*Limit_Order_book._DUMMY_VARIABLE) 63 | if lob_copy.own_amount_to_trade > 0 and lob_copy.own_trade_type == 1: 64 | return -Limit_Order_book._DUMMY_VARIABLE 65 | else: 66 | return lob_copy.own_reward 67 | 68 | 69 | -------------------------------------------------------------------------------- /src/test_dqn.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sys 3 | import numpy as np 4 | import sys 5 | import numpy as np 6 | import tensorflow as tf 7 | import tensorflow.contrib.layers as layers 8 | import os 9 | 10 | 11 | from config_GOOG import Config 12 | from replay_buffer import ReplayBuffer 13 | from schedule import LinearSchedule 14 | from message_queue import Message_Queue 15 | from order_queue import Order_Queue 16 | from limit_order_book import Limit_Order_book 17 | from message_queue import Message_Queue 18 | from order_queue import Order_Queue 19 | from dqn_model import DQN 20 | from Neural_Net import Neural_DQN 21 | 22 | 23 | def evaluate_policy(m, oq, mq): 24 | rewards = [] 25 | test_start, test_end, order_direction, V, H, T, depth= m._config.test_start, m._config.test_end, m._config.direction,\ 26 | m._config.I, m._config.H, m._config.T, m._config.depth 27 | episodes, real_times = load_episodes(test_start, test_end, order_direction, H, oq, mq) 28 | for k in range(len(episodes)): 29 | print ('I am at the %d episode'%(k)) 30 | real_time = real_times[k] 31 | states, reward, actions, done_mask = m.simulate_an_episode(V, T, 32 | H, real_time, order_direction, 33 | m.get_best_action_fn(), depth) 34 | print (reward) 35 | rewards.append(np.sum(reward)) 36 | # Only append the final reward 37 | return rewards 38 | 39 | 40 | def load_episodes(test_start, test_end, order_direction, H, oq, mq): 41 | lob_data, time = read_order_book(test_start, test_end, H, oq, mq) 42 | lob = [Limit_Order_book(**lob_data, own_amount_to_trade = 0, 43 | own_init_price=-order_direction*Limit_Order_book._DUMMY_VARIABLE, 44 | own_trade_type=order_direction) for lob_data in lob_data] 45 | return lob, time 46 | 47 | 48 | def read_order_book(test_start, test_end, H, oq, mq): 49 | """ 50 | read the initial limit order book states from the file 51 | """ 52 | output = [] 53 | time_output = [] 54 | real_time = test_start 55 | while real_time < test_end: 56 | mq.reset() 57 | output.append(oq.create_orderbook_time(real_time, mq)) 58 | time_output.append(real_time) 59 | real_time = real_time + H 60 | return output, time_output 61 | 62 | def main(): 63 | config = Config() 64 | config.mode = 'test' 65 | config.dropout = 1.0 66 | model = Neural_DQN(config) 67 | #model = DQN(config) 68 | model.initialize() 69 | oq = Order_Queue(config.order_path) 70 | mq = Message_Queue(config.message_path) 71 | rewards= evaluate_policy(model, oq, mq) 72 | print(np.mean(rewards)) 73 | 74 | if __name__ == '__main__': 75 | main() 76 | -------------------------------------------------------------------------------- /src/market_policy.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import copy 4 | 5 | from limit_order_book import Limit_Order_book 6 | from message_queue import Message_Queue 7 | from order_queue import Order_Queue 8 | from evaluate_policy import evaluate_policy 9 | 10 | parser = argparse.ArgumentParser(description='Dynamic Programming Algorithm') 11 | parser.add_argument('--tic', default= 'GOOG', help='Company Ticker') 12 | parser.add_argument('--base_point', default=100, help='Base Point', type=int) 13 | parser.add_argument('--order_direction', default=-1, help='Buy 1, Sell -1', type=int) 14 | parser.add_argument('--train_start', default=34200, help='Train Start Time', type=float) 15 | parser.add_argument('--train_end', default=46800, help='Train End Time', type=float) 16 | parser.add_argument('--test_start', default=46800, help='Test End Time', type=float) 17 | parser.add_argument('--test_end', default=57600, help='Test End Time', type=float) 18 | parser.add_argument('--H', default=600, help='Horizon', type=int) 19 | parser.add_argument('--T', default=20, help='Time steps', type=int) 20 | parser.add_argument('--V', default=100, help='Amount to trade', type=int) 21 | parser.add_argument('--I', default=10, help='Inventory Length', type=int) 22 | parser.add_argument('--factor', default=0.3, help='Market order factor', type=float) 23 | parser.add_argument('--mode', default='train', help='Mode: train or test') 24 | args = parser.parse_args() 25 | 26 | file_msg = '../datasets/%s_2012-06-21_34200000_57600000_message_10.csv' % (args.tic) 27 | file_order = '../datasets/%s_2012-06-21_34200000_57600000_orderbook_10.csv' % (args.tic) 28 | 29 | def action_wrapper(order_direction, factor): 30 | def action(remaining_time, amount, lob_copy): 31 | if remaining_time == args.H: 32 | current_mid_price = lob_copy.bid[0] + (lob_copy.ask[0] - lob_copy.bid[0]) // 2 33 | if order_direction == 1: 34 | return int(current_mid_price * (1 + factor)) 35 | else: 36 | return int(current_mid_price * (1 - factor)) 37 | else: 38 | return lob_copy.own_price 39 | return action 40 | 41 | oq = Order_Queue(file_order) 42 | mq = Message_Queue(file_msg) 43 | 44 | if args.mode == 'train': 45 | action_func = action_wrapper(args.order_direction, args.factor) 46 | rewards = evaluate_policy(args.train_start, args.train_end, args.order_direction, args.V, args.H, args.T, oq, mq, action_func) 47 | print(rewards) 48 | print(np.mean(rewards)) 49 | elif args.mode == 'test': 50 | action_func = action_wrapper(args.order_direction, args.factor) 51 | rewards = evaluate_policy(args.test_start, args.test_end, args.order_direction, args.V, args.H, args.T, oq, mq, action_func) 52 | print(rewards) 53 | print(np.mean(rewards)) 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /src/snl_policy.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import copy 4 | 5 | from limit_order_book import Limit_Order_book 6 | from message_queue import Message_Queue 7 | from order_queue import Order_Queue 8 | from evaluate_policy import evaluate_policy 9 | 10 | parser = argparse.ArgumentParser(description='Dynamic Programming Algorithm') 11 | parser.add_argument('--tic', default= 'GOOG', help='Company Ticker') 12 | parser.add_argument('--base_point', default=100, help='Base Point', type=int) 13 | parser.add_argument('--order_direction', default=-1, help='Buy 1, Sell -1', type=int) 14 | parser.add_argument('--train_start', default=34200, help='Train Start Time', type=float) 15 | parser.add_argument('--train_end', default=46800, help='Train End Time', type=float) 16 | parser.add_argument('--test_start', default=46800, help='Test End Time', type=float) 17 | parser.add_argument('--test_end', default=57600, help='Test End Time', type=float) 18 | parser.add_argument('--H', default=600, help='Horizon', type=int) 19 | parser.add_argument('--T', default=20, help='Time steps', type=int) 20 | parser.add_argument('--V', default=100, help='Amount to trade', type=int) 21 | parser.add_argument('--I', default=10, help='Inventory Length', type=int) 22 | parser.add_argument('--mode', default='train', help='Mode: train or test') 23 | parser.add_argument('--num', default= 10, help= 'The number of base points to go', type= int) 24 | parser.add_argument('--diff', default= 0, help= 'The number of base points to go beyond midpoint', type= int) 25 | args = parser.parse_args() 26 | 27 | file_msg = '../datasets/%s_2012-06-21_34200000_57600000_message_10.csv' % (args.tic) 28 | file_order = '../datasets/%s_2012-06-21_34200000_57600000_orderbook_10.csv' % (args.tic) 29 | 30 | def action_wrapper(diff): 31 | def action(remaining_time, amount, lob_copy): 32 | if remaining_time == args.H: 33 | current_mid_price = lob_copy.bid[0] + (lob_copy.ask[0] - lob_copy.bid[0]) // 2 34 | return max(current_mid_price + diff, 0) 35 | else: 36 | return lob_copy.own_price 37 | return action 38 | 39 | def train(train_start, train_end, order_direction, V, H, oq, mq): 40 | rewards = [] 41 | for i in range(-args.num, args.num): 42 | print(i) 43 | action_func = action_wrapper(i * args.base_point) 44 | rewards.append(np.mean(evaluate_policy(train_start, train_end, args.order_direction, V, H, args.T, oq, mq, action_func))) 45 | print(rewards) 46 | 47 | 48 | oq = Order_Queue(file_order) 49 | mq = Message_Queue(file_msg) 50 | 51 | if args.mode == 'train': 52 | train(args.train_start, args.train_end, args.order_direction, args.V, args.H, oq, mq) 53 | elif args.mode == 'test': 54 | diff = args.diff * args.base_point 55 | action_func = action_wrapper(diff) 56 | rewards = evaluate_policy(args.test_start, args.test_end, args.order_direction, args.V, args.H, args.T, oq, mq, action_func) 57 | print(rewards) 58 | print(np.mean(rewards)) 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /src/tree_policy.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import copy 4 | 5 | from limit_order_book import Limit_Order_book 6 | from message_queue import Message_Queue 7 | from order_queue import Order_Queue 8 | from evaluate_policy import evaluate_policy 9 | 10 | parser = argparse.ArgumentParser(description='Dynamic Programming Algorithm') 11 | parser.add_argument('--tic', default= 'GOOG', help='Company Ticker') 12 | parser.add_argument('--base_point', default=100, help='Base Point', type=int) 13 | parser.add_argument('--order_direction', default=-1, help='Buy 1, Sell -1', type=int) 14 | parser.add_argument('--train_start', default=34200, help='Train Start Time', type=float) 15 | parser.add_argument('--train_end', default=46800, help='Train End Time', type=float) 16 | parser.add_argument('--test_start', default=46800, help='Test End Time', type=float) 17 | parser.add_argument('--test_end', default=57600, help='Test End Time', type=float) 18 | parser.add_argument('--H', default=600, help='Horizon', type=int) 19 | parser.add_argument('--T', default=20, help='Time steps', type=int) 20 | parser.add_argument('--V', default=100, help='Amount to trade', type=int) 21 | parser.add_argument('--I', default=10, help='Inventory Length', type=int) 22 | parser.add_argument('--mode', default='train', help='Mode: train or test') 23 | parser.add_argument('--num', default= 10, help= 'The number of base points to go', type= int) 24 | parser.add_argument('--diff', default= 0, help= 'The number of base points to go beyond midpoint', type= int) 25 | args = parser.parse_args() 26 | 27 | file_msg = '../datasets/%s_2012-06-21_34200000_57600000_message_10.csv' % (args.tic) 28 | file_order = '../datasets/%s_2012-06-21_34200000_57600000_orderbook_10.csv' % (args.tic) 29 | 30 | def action_wrapper(diff): 31 | def action(remaining_time, amount, lob_copy): 32 | if remaining_time == args.H: 33 | current_mid_price = lob_copy.bid[0] + (lob_copy.ask[0] - lob_copy.bid[0]) // 2 34 | return max(current_mid_price + diff, 0) 35 | else: 36 | return lob_copy.own_price 37 | return action 38 | 39 | def train(start, end, order_direction, V, k, H, oq, mq, action): 40 | if time==(start+H): # This code force that (args.end-args.start) is a multiple of args.tol 41 | if lob.own_amount_to_trade == 0: 42 | return lob.own_reward 43 | else: 44 | lob.update_own_order(args.order_direction*Limit_Order_book._DUMMY_VARIABLE) 45 | # for idx, message in mq.pop_to_next_time(args.end): 46 | # lob.process(**message) 47 | # if lob.own_amount_to_trade == 0: 48 | # break 49 | return lob.own_reward 50 | else: 51 | current_mid_price = lob.bid[0] + (lob.ask[0] - lob.bid[0]) // 2 52 | init_price = np.arange(current_mid_price-args.num*args.base_point, current_mid_price+args.num*args.base_point, args.base_point) 53 | init_price = init_price[init_price > 0] 54 | max_reward= -99999999.0 55 | for i in range(len(init_price)): 56 | # print ('At least this works') 57 | lob_copy = copy.deepcopy(lob) 58 | lob_copy.update_own_order(init_price[i]) 59 | mq_copy = copy.deepcopy(mq) 60 | for idx, message in mq_copy.pop_to_next_time(time+args.tol): 61 | lob_copy.process(**message) 62 | if lob_copy.own_amount_to_trade == 0: 63 | max_reward= max(max_reward, lob_copy.own_reward) 64 | else: 65 | max_reward= max(max_reward, optimal(time+args.tol, lob_copy, mq_copy)) 66 | return max_reward 67 | 68 | 69 | 70 | oq = Order_Queue(file_order) 71 | mq = Message_Queue(file_msg) 72 | 73 | if args.mode == 'train': 74 | train(args.train_start, args.train_end, args.order_direction, args.V, args.H, oq, mq) 75 | elif args.mode == 'test': 76 | diff = args.diff * args.base_point 77 | action_func = action_wrapper(diff) 78 | rewards = evaluate_policy(args.test_start, args.test_end, args.order_direction, args.V, args.H, args.T, oq, mq, action_func) 79 | print(rewards) 80 | print(np.mean(rewards)) -------------------------------------------------------------------------------- /src/baseline_dp.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import copy 4 | 5 | from limit_order_book import Limit_Order_book 6 | from message_queue import Message_Queue 7 | from order_queue import Order_Queue 8 | # from evaluate_policy import load_episodes, read_order_book 9 | 10 | parser = argparse.ArgumentParser(description='Dynamic Programming Solution') 11 | parser.add_argument('--tic', default= 'GOOG', help='Company Ticker') 12 | parser.add_argument('--order_direction', default=-1, help='Buy 1, Sell -1', type=int) 13 | parser.add_argument('--test_start', default=46800, help='Test End Time', type=float) 14 | parser.add_argument('--test_end', default=57600, help='Test End Time', type=float) 15 | parser.add_argument('--base_point', default=100, help='Base Point', type=int) 16 | parser.add_argument('--num', default= 3, help= 'The number of base points to go', type= int) 17 | parser.add_argument('--H', default=600, help='Horizon', type=int) 18 | parser.add_argument('--T', default=3, help='Time steps', type=int) 19 | parser.add_argument('--V', default=8000, help='Amount to trade', type=int) 20 | args = parser.parse_args() 21 | 22 | file_msg = '../datasets/%s_2012-06-21_34200000_57600000_message_10.csv' % (args.tic) 23 | file_order = '../datasets/%s_2012-06-21_34200000_57600000_orderbook_10.csv' % (args.tic) 24 | 25 | def optimal(time,start, H, lob, mq, T, current_mid_price, V): 26 | # step is how much we move at each time. 27 | # H is the number of inventory to sell. 28 | # I is how many copies of inventory. 29 | if time==(start+H): # This code force that (args.end-args.start) is a multiple of args.tol 30 | if lob.own_amount_to_trade == 0: 31 | return lob.own_reward 32 | else: 33 | lob.update_own_order(args.order_direction*Limit_Order_book._DUMMY_VARIABLE) 34 | if lob.own_amount_to_trade > 0 and lob.own_trade_type == 1: 35 | return -Limit_Order_book._DUMMY_VARIABLE 36 | else: 37 | return lob.own_reward 38 | else: 39 | # current_mid_price = lob.bid[0] + (lob.ask[0] - lob.bid[0]) // 2 40 | init_price = np.arange(current_mid_price-args.num*args.base_point, current_mid_price+args.num*args.base_point, args.base_point) 41 | init_price = init_price[init_price > 0] 42 | max_reward= -1.0*float('inf') 43 | for i in range(len(init_price)): 44 | # print ('At least this works') 45 | lob_copy = copy.deepcopy(lob) 46 | # print (int(init_price[i])) 47 | lob_copy.update_own_order(int(init_price[i]), V) 48 | mq.reset() 49 | mq.jump_to_time(time) 50 | for idx, message in mq.pop_to_next_time(time+H/T): 51 | lob_copy.process(**message) 52 | # print (lob_copy.own_reward) 53 | if lob_copy.own_amount_to_trade == 0: 54 | max_reward= max(max_reward,lob_copy.own_reward) 55 | return max_reward 56 | else: 57 | max_reward= max(max_reward,lob_copy.own_reward+optimal(time+H//T,start,H,lob_copy,mq,T, current_mid_price, lob_copy.own_amount_to_trade)) 58 | # for i in range(len(init_price)): 59 | # for j in range(I): 60 | # # print ('At least this works') 61 | # lob_copy = copy.deepcopy(lob) 62 | # lob_copy.update_own_order(init_price[i], V/I*j) 63 | # mq.reset() 64 | # mq.jump_to_time(time) 65 | # remaining= I-j+int(lob_copy.own_amount_to_trade/V*I)-1 66 | # for idx, message in mq.pop_to_next_time(time+H/T): 67 | # lob_copy.process(**message) 68 | # print (lob_copy.own_reward) 69 | # if lob_copy.own_amount_to_trade == 0: 70 | # max_reward= max(max_reward,lob_copy.own_reward+optimal(time+H//T,start,H,lob_copy,mq,T,remaining,I)) 71 | # return max_reward 72 | # else: 73 | # max_reward= max(max_reward,lob_copy.own_reward+optimal(time+H//T,start,H,lob_copy,mq,T,remaining,I)) 74 | return max_reward 75 | oq = Order_Queue(file_order) 76 | mq = Message_Queue(file_msg) 77 | def load_episodes(test_start, test_end, order_direction, H, oq, mq): 78 | lob_data, time = read_order_book(test_start, test_end, H, oq, mq) 79 | lob = [Limit_Order_book(**lob_data, own_amount_to_trade = 0, 80 | own_init_price=-order_direction*Limit_Order_book._DUMMY_VARIABLE, 81 | own_trade_type=order_direction) for lob_data in lob_data] 82 | return lob, time 83 | 84 | 85 | def read_order_book(test_start, test_end, H, oq, mq): 86 | """ 87 | read the initial limit order book states from the file 88 | """ 89 | output = [] 90 | time_output = [] 91 | real_time = test_start 92 | while real_time < test_end: 93 | mq.reset() 94 | output.append(oq.create_orderbook_time(real_time, mq)) 95 | time_output.append(real_time) 96 | real_time = real_time + H 97 | return output, time_output 98 | 99 | episodes, real_times = load_episodes(args.test_start, args.test_end, args.order_direction, args.H, oq, mq) 100 | rewards= [] 101 | for k in range(len(episodes)): 102 | episode = episodes[k] 103 | # episode.own_amount_to_trade= args.V 104 | # print (episode.own_amount_to_trade) 105 | current_mid_price = episode.bid[0] + (episode.ask[0] - episode.bid[0]) // 2 106 | real_time = real_times[k] 107 | rewards.append(optimal(real_time,real_time, args.H,episode, mq, args.T, current_mid_price, args.V)) 108 | print (np.mean(rewards)) -------------------------------------------------------------------------------- /src/test_book_queue.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from limit_order_book import Limit_Order_book 5 | from message_queue import Message_Queue 6 | 7 | def print_info(idx, msg=None, status='[FAIL]'): 8 | if idx in [1]: 9 | print('Execute Buy Order %s' %status) 10 | elif idx in [48]: 11 | print('Execute Sell Order %s' %status) 12 | elif idx in [9]: 13 | print('Add Buy Order %s' %status) 14 | elif idx in [41]: 15 | print('Add Sell Order %s' %status) 16 | elif idx in [5]: 17 | print('Execute Hidden Order %s' %status) 18 | elif idx in [46]: 19 | print('Delete Buy Order %s' %status) 20 | elif idx in [47]: 21 | print('Delete Sell Order %s' %status) 22 | 23 | if status == '[FAIL]': 24 | print('ERROR! idx %d msg %s' %(idx, str(msg))) 25 | 26 | 27 | message_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_message_10.csv' 28 | mq = Message_Queue(message_path) 29 | 30 | book_path = '../datasets/LOBSTER_SampleFile_GOOG_2012-06-21_10/GOOG_2012-06-21_34200000_57600000_orderbook_10.csv' 31 | df_book = pd.read_csv(book_path, header=None) 32 | level = 10 33 | ask_book = df_book[np.arange(level)*4].values 34 | ask_size_book = df_book[1+np.arange(level)*4].values 35 | bid_book = df_book[2+np.arange(level)*4].values 36 | bid_size_book = df_book[3+np.arange(level)*4].values 37 | book = np.concatenate([tmp[:,:,np.newaxis] for tmp in [bid_book, bid_size_book, ask_book, ask_size_book]], axis=2) 38 | 39 | for idx, message in mq.iterate_queue(): 40 | if idx == 0: 41 | ask_book_init = ask_book[0] 42 | ask_size_book_init = ask_size_book[0] 43 | bid_book_init = bid_book[0] 44 | bid_size_book_init = bid_size_book[0] 45 | lob = Limit_Order_book(bid_book_init, bid_size_book_init, ask_book_init, ask_size_book_init) 46 | snap_shot_book = book[idx] 47 | assert(np.sum(snap_shot_book != lob.display_book(level))==0) 48 | print('Initialize LOB [SUCCESS]') 49 | else: 50 | lob.process(**message) 51 | snap_shot_book = book[idx] 52 | try: 53 | assert(np.sum(snap_shot_book != lob.display_book(level))==0) 54 | print_info(idx, msg=message, status='[SUCCESS]') 55 | except: 56 | print_info(idx, msg=message, status='[FAIL]') 57 | 58 | 59 | if idx == 64: 60 | print('\nFinished! Unable to compare due to invisible order in the book!') 61 | print('Current LOB: ') 62 | print(lob.display_book(16)) 63 | break 64 | 65 | ### test own order 66 | assert(lob.own_earlier_orders == np.sum(lob.ask_size[:-1])) 67 | print('Initial Own Order [SUCCESS]') 68 | 69 | lob.update_own_order(5800100) 70 | assert(lob.own_earlier_orders == 3237) 71 | assert(lob.own_amount_to_trade == 100) 72 | print('Update Price To Price On LOB [SUCCESS]') 73 | 74 | lob.update_own_order(5802000) 75 | assert(lob.own_earlier_orders == 3247) 76 | assert(lob.own_amount_to_trade == 100) 77 | print('Update Price To Price Not On LOB [SUCCESS]') 78 | 79 | lob.update_own_order(5797000) 80 | assert(lob.own_earlier_orders == 0) 81 | assert(lob.own_amount_to_trade == 100) 82 | print('Update Price To Best Ask On LOB [SUCCESS]') 83 | 84 | lob.process(1, 10, 5796900, -1) 85 | assert(lob.own_earlier_orders == 10) 86 | assert(lob.own_amount_to_trade == 100) 87 | print('Insert An Sell Order With Better Ask On LOB [SUCCESS]') 88 | 89 | lob.process(1, 11, 5797000, 1) 90 | assert(lob.own_earlier_orders == 0) 91 | assert(lob.own_amount_to_trade == 99) 92 | print('Execute An Buy Order With Same Ask [SUCCESS]') 93 | 94 | lob.update_own_order(5791900) 95 | assert(lob.own_earlier_orders == 0) 96 | assert(lob.own_amount_to_trade == 24) 97 | print('Update Price To Best Bid [SUCCESS]') 98 | 99 | lob.process(1, 1, 5791600, 1) 100 | lob.process(1, 1, 5791700, 1) 101 | lob.process(1, 1, 5791800, 1) 102 | assert(lob.own_earlier_orders == 0) 103 | assert(lob.own_amount_to_trade == 24) 104 | print('Insert Small Buy Orders With Better Bid On LOB [SUCCESS]') 105 | 106 | lob.update_own_order(5791500) 107 | assert(lob.own_earlier_orders == 0) 108 | assert(lob.own_amount_to_trade == 21) 109 | print('Update Price To Execute 3 Small Buy Orders [SUCCESS]') 110 | 111 | lob.process(1, 30, 5791500, -1) 112 | assert(lob.own_earlier_orders == 0) 113 | assert(lob.own_amount_to_trade == 21) 114 | print('Execute An Sell Order With Same Ask [SUCCESS]') 115 | 116 | lob.process(3, 15, 5791500, -1) 117 | assert(lob.own_earlier_orders == 0) 118 | assert(lob.own_amount_to_trade == 21) 119 | print('Cancel Half Of Previous Order [SUCCESS]') 120 | 121 | lob.update_own_order(5797900) 122 | assert(lob.own_earlier_orders == 225) 123 | assert(lob.own_amount_to_trade == 21) 124 | print('Update Price To Second Best Ask On LOB [SUCCESS]') 125 | 126 | 127 | lob.process(1, 9, 5797900, -1) 128 | assert(lob.own_earlier_orders == 225) 129 | assert(lob.own_amount_to_trade == 21) 130 | print('Insert An Sell Order With Second Best Ask On LOB [SUCCESS]') 131 | 132 | lob.process(3, 215, 5797900, -1) 133 | assert(lob.own_earlier_orders == 15) 134 | assert(lob.own_amount_to_trade == 21) 135 | print('Cancel Sell Order With Second Best Ask On LOB [SUCCESS]') 136 | 137 | lob.process(1, 45, 5799500, 1) 138 | assert(lob.own_earlier_orders == 0) 139 | assert(lob.own_amount_to_trade == 0) 140 | print('Insert Buy Orders [SUCCESS]') -------------------------------------------------------------------------------- /src/baseline_SnL.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import copy 3 | import numpy as np 4 | 5 | from limit_order_book import Limit_Order_book 6 | from message_queue import Message_Queue 7 | 8 | parser = argparse.ArgumentParser(description='Dynamic Programming Solution') 9 | parser.add_argument('--tic', default= 'GOOG', help='Company Ticker') 10 | parser.add_argument('--order_size', default=1200, help='Order Size', type=int) 11 | parser.add_argument('--order_direction', default=1, help='Buy 1, Sell -1', type=int) 12 | parser.add_argument('--train_start', default=34201, help='Train Start Time', type=float) 13 | parser.add_argument('--train_end', default=46800, help='Train End Time', type=float) 14 | parser.add_argument('--test_start', default=46800, help='Test End Time', type=float) 15 | parser.add_argument('--test_end', default=57600, help='Test End Time', type=float) 16 | parser.add_argument('--H', default=600, help='Horizon', type=float) 17 | parser.add_argument('--base_point', default=1000, help='Base Point', type=int) 18 | parser.add_argument('--adj_freq', default=100, help='Adjustment Frequency', type=float) 19 | parser.add_argument('--tol', default=100, help='Remaining Time To Submit Market Order', type=float) 20 | parser.add_argument('--num', default= 10, help= 'The number of base points to go', type= int) 21 | args = parser.parse_args() 22 | # Use the train_start and train_end to find the best num. H: the total amount of time to execute the orders. 23 | file_msg = '../datasets/%s_2012-06-21_34200000_57600000_message_10.csv' % (args.tic) 24 | 25 | mq = Message_Queue(file_msg) 26 | lob = Limit_Order_book(own_amount_to_trade=args.order_size, 27 | own_init_price=-args.order_direction*Limit_Order_book._DUMMY_VARIABLE, 28 | own_trade_type=args.order_direction) 29 | for idx, message in mq.pop_to_next_time(args.train_start): 30 | lob.process(**message) 31 | 32 | current_mid_price = lob.bid[0] + (lob.ask[0] - lob.bid[0]) // 2 33 | init_price = np.arange(current_mid_price-args.num*args.base_point, current_mid_price+args.num*args.base_point, args.base_point) 34 | init_price = init_price[init_price > 0] 35 | 36 | reward = np.zeros(init_price.shape) 37 | 38 | for i in range(len(init_price)): 39 | real_time= args.train_start 40 | # print ('I am at %d now'%(i)) 41 | lob_copy = copy.deepcopy(lob) 42 | lob_copy.update_own_order(init_price[i]) 43 | mq_copy = copy.deepcopy(mq) 44 | num_count= 0 45 | while real_time+args.H 0 and args.order_direction==1: 65 | current_reward = Limit_Order_book._DUMMY_VARIABLE * (-1) 66 | print ('I have ever been here') 67 | else: 68 | current_reward = lob_copy.own_reward 69 | # print (current_reward) 70 | reward[i]= num_count/(num_count+1)*reward[i]+ 1/(num_count+1)*current_reward 71 | num_count= num_count+1 72 | if args.order_direction==1: 73 | best_index= np.argmax(reward) 74 | else: 75 | best_index= np.argmax(reward) 76 | print (reward) 77 | # Now go to evaluate the test mode 78 | mq = Message_Queue(file_msg) 79 | lob = Limit_Order_book(own_amount_to_trade=args.order_size, 80 | own_init_price=-args.order_direction*Limit_Order_book._DUMMY_VARIABLE, 81 | own_trade_type=args.order_direction) 82 | for idx, message in mq.pop_to_next_time(args.test_start): 83 | lob.process(**message) 84 | 85 | current_mid_price = lob.bid[0] + (lob.ask[0] - lob.bid[0]) // 2 86 | init_price= current_mid_price- args.num*args.base_point+ best_index*args.base_point 87 | if init_price<0: 88 | print ('This does not make sense at all') 89 | 90 | reward_test= 0 91 | real_time= args.test_start 92 | num_count= 0 93 | while real_time+args.H 0 and args.order_direction== 1: 110 | current_reward = Limit_Order_book._DUMMY_VARIABLE * (-1) 111 | print ('Have I ever been here') 112 | else: 113 | current_reward = lob_copy.own_reward 114 | print (current_reward) 115 | reward_test= num_count/(num_count+1)*reward_test+ 1/(num_count+1)*current_reward 116 | num_count= num_count+1 117 | print (reward_test) 118 | print (best_index) 119 | exit() 120 | 121 | 122 | 123 | 124 | # for i in range(len(init_price)): 125 | # lob_copy = copy.deepcopy(lob) 126 | # lob_copy.update_own_order(init_price[i]) 127 | # mq_copy = copy.deepcopy(mq) 128 | 129 | # for idx, message in mq_copy.pop_to_next_time(args.end-args.tol): 130 | # lob_copy.process(**message) 131 | # if lob_copy.own_amount_to_trade == 0: 132 | # break 133 | 134 | # if lob_copy.own_amount_to_trade == 0: 135 | # reward[i] = lob_copy.own_reward 136 | # else: 137 | # lob_copy.update_own_order(args.order_direction*Limit_Order_book._DUMMY_VARIABLE) 138 | # for idx, message in mq_copy.pop_to_next_time(args.end): 139 | # lob_copy.process(**message) 140 | # if lob_copy.own_amount_to_trade == 0: 141 | # break 142 | # if lob_copy.own_amount_to_trade > 0: 143 | # reward[i] = Limit_Order_book._DUMMY_VARIABLE * args.order_direction 144 | # else: 145 | # reward[i] = lob_copy.own_reward 146 | 147 | # print(init_price) 148 | # print(max(reward)) -------------------------------------------------------------------------------- /src/dqn_model.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import tensorflow as tf 4 | import tensorflow.contrib.layers as layers 5 | import os 6 | 7 | from config_AMZN import Config 8 | from model_base import model 9 | 10 | class DQN(model): 11 | """ 12 | Implement Neural Network with Tensorflow 13 | """ 14 | def add_placeholders_op(self): 15 | state_shape = self._config.state_shape 16 | state_history = self._config.state_history 17 | # a state shape is (depth, 4) 18 | 19 | # self.s_book: batch of book states, type = float32 20 | # self.s_it: batch of inventory and time states, type = float32 21 | # self.a: batch of actions, type = int32 22 | # self.r: batch of rewards, type = float32 23 | # self.sp_book: batch of next book states, type = float32 24 | # self.sp_it: batch of next inventory and time states, type = float32 25 | # self.done_mask: bath of done, type = bool 26 | # self.lr: learning rate, type = float32 27 | 28 | self.s_book = tf.placeholder(dtype=tf.float32, shape=[None, state_shape[0], state_shape[1], state_history]) 29 | self.s_it = tf.placeholder(dtype=tf.float32, shape=[None, 2]) 30 | self.a = tf.placeholder(dtype=tf.int32, shape=[None]) 31 | self.r = tf.placeholder(dtype=tf.float32, shape=[None]) 32 | self.sp_book = tf.placeholder(dtype=tf.float32, shape=[None, state_shape[0], state_shape[1], state_history]) 33 | self.sp_it = tf.placeholder(dtype=tf.float32, shape=[None, 2]) 34 | self.done_mask = tf.placeholder(dtype=tf.bool, shape=[None]) 35 | self.lr = tf.placeholder(dtype=tf.float32, shape=[]) 36 | 37 | def get_q_values_op(self, state, scope, reuse=False): 38 | ### Implement a Fully-Connected Network, replace with CNN later 39 | num_actions = self._config.L + 1 # 1 for market order 40 | state_book, state_it = state 41 | with tf.variable_scope(scope, reuse=reuse): 42 | state_book_flattened = layers.flatten(state_book) 43 | state_out = tf.concat([state_book_flattened, state_it], axis=1) 44 | 45 | out = layers.fully_connected(state_out, num_actions, activation_fn=None) 46 | return out 47 | 48 | def add_update_target_op(self, q_scope, target_q_scope): 49 | q_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=q_scope) 50 | target_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=target_q_scope) 51 | self.update_target_op = tf.group(*[tf.assign(var1, var2) for var1, var2 in zip(target_var, q_var)]) 52 | 53 | def add_loss_op(self, q, target_q): 54 | num_actions = self._config.L + 1 # 1 for market order 55 | Q_samp = self.r + self._config.gamma * tf.reduce_max(target_q, axis=1) * (1 - tf.cast(self.done_mask, tf.float32)) 56 | Q_s_a = tf.reduce_sum(tf.one_hot(self.a, num_actions) * q, axis=1) 57 | self.loss = tf.reduce_mean(tf.square(Q_samp - Q_s_a)) 58 | 59 | def add_optimizer_op(self, scope): 60 | optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) 61 | var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) 62 | grads = optimizer.compute_gradients(self.loss, var_list=var_list) 63 | if self._config.grad_clip: 64 | grads = [(tf.clip_by_norm(grad, self._config.clip_val), var) for grad, var in grads] 65 | self.train_op = optimizer.apply_gradients(grads) 66 | self.grad_norm = tf.global_norm([grad for grad, _ in grads]) 67 | 68 | def build(self): 69 | self.add_placeholders_op() 70 | 71 | q_state = (self.s_book, self.s_it) 72 | self.q = self.get_q_values_op(q_state, scope='q', reuse=False) 73 | 74 | target_q_state = (self.sp_book, self.sp_it) 75 | self.target_q = self.get_q_values_op(target_q_state, scope='target_q', reuse=False) 76 | 77 | self.add_update_target_op('q','target_q') 78 | 79 | self.add_loss_op(self.q, self.target_q) 80 | 81 | self.add_optimizer_op('q') 82 | 83 | def initialize(self): 84 | self.sess = tf.Session() 85 | self.saver = tf.train.Saver() 86 | if self._config.mode == 'train': 87 | self.sess.run(tf.global_variables_initializer()) 88 | print('running training mode') 89 | elif self._config.mode == 'test': 90 | self.saver.restore(self.sess, tf.train.latest_checkpoint(self._config.model_output)) 91 | print('running test mode') 92 | self.sess.run(self.update_target_op) 93 | 94 | 95 | def train(self): 96 | self.sampling_buffer() 97 | 98 | t = 0 99 | 100 | total_loss = 0 101 | while t < self._config.nsteps_train: 102 | t += 1 103 | self._lr_schedule.update(t) 104 | self._eps_schedule.update(t) 105 | loss_t = self.train_step(t, self._config.batch_size, self._lr_schedule.get_epsilon()) 106 | total_loss += loss_t 107 | if t % self._config.print_freq == 0: 108 | sys.stdout.write('Iter {} \t Loss {} \n'.format(t, total_loss / t)) 109 | sys.stdout.flush() 110 | 111 | def train_step(self, t, batch_size, lr): 112 | states, it, states_p, it_p, actions, rewards, done_mask = self._bf.sample(batch_size) 113 | feed_dict = {self.s_book:states, self.s_it:it, self.sp_book:states_p, self.sp_it:it_p, 114 | self.a:actions, self.r:rewards, self.done_mask:done_mask, self.lr:lr} 115 | loss_eval, _ = self.sess.run([self.loss, self.train_op], feed_dict=feed_dict) 116 | 117 | if t % self._config.target_update_freq == 0: 118 | self.sess.run(self.update_target_op) 119 | if t % self._config.saving_freq == 0: 120 | print(self._config.model_output) 121 | if not os.path.exists(self._config.model_output): 122 | os.makedirs(self._config.model_output) 123 | self.saver.save(self.sess, save_path=os.path.join(self._config.model_output, 'model')) 124 | if t % self._config.simulation_freq == 0: 125 | self.sampling_buffer() 126 | 127 | return loss_eval 128 | 129 | def get_random_action(self, state): 130 | action = np.random.randint(self._config.L) 131 | q = self.get_q_values(state)[0] 132 | q_value = q_value = q[action] 133 | return (action, q_value) 134 | 135 | def get_best_action(self, state): 136 | q = self.get_q_values(state)[0] 137 | action = np.argmax(q[:-1]) 138 | q_value = q[action] 139 | return (action, q_value) 140 | 141 | def get_q_values(self, state): 142 | state_book, state_it = state 143 | q, = self.sess.run([self.q], feed_dict={self.s_book:state_book, self.s_it:state_it}) 144 | return q 145 | 146 | def get_best_action_fn(self): 147 | print('get best action fn') 148 | def action_fn(t, amount, state, mid_price): 149 | action = self.get_best_action(state)[0] 150 | price = (action-self._config.L//2) * self._config.base_point + mid_price 151 | return (price, action) 152 | return action_fn 153 | 154 | if __name__ == '__main__': 155 | config = Config() 156 | model = DQN(config) 157 | model.initialize() 158 | model.train() 159 | -------------------------------------------------------------------------------- /src/model_base.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sys 3 | import numpy as np 4 | 5 | from config import Config 6 | from replay_buffer import ReplayBuffer 7 | from schedule import LinearSchedule 8 | from message_queue import Message_Queue 9 | from order_queue import Order_Queue 10 | from limit_order_book import Limit_Order_book 11 | 12 | class model(object): 13 | def __init__(self, config): 14 | self._config = config 15 | self._eps_schedule = LinearSchedule( 16 | self._config.eps_begin, 17 | self._config.eps_end, 18 | self._config.nsteps) 19 | self._lr_schedule = LinearSchedule( 20 | self._config.lr_begin, 21 | self._config.lr_end, 22 | self._config.lr_nsteps) 23 | self._oq = Order_Queue(self._config.order_path) 24 | self._mq = Message_Queue(self._config.message_path) 25 | self._bf = ReplayBuffer(1000000, config) 26 | 27 | self._action_fn = self.get_action_fn() 28 | 29 | self.build() 30 | 31 | def build(self): 32 | pass 33 | 34 | def initialize(self): 35 | pass 36 | 37 | def get_random_action(self, state): 38 | pass 39 | 40 | def get_best_action(self, state): 41 | ### return action, q value 42 | pass 43 | 44 | def get_action(self, state): 45 | if np.random.random() < self._eps_schedule.get_epsilon(): 46 | return self.get_random_action(state)[0] 47 | else: 48 | return self.get_best_action(state)[0] 49 | 50 | def get_random_action_fn(self): 51 | def random_action_fn(t, amount, state, mid_price): 52 | action = np.random.randint(self._config.L) # action = L for market order 53 | price = (action-self._config.L//2) * self._config.base_point + mid_price 54 | return (price, action) 55 | return random_action_fn 56 | 57 | def get_action_fn(self): 58 | def action_fn(t, amount, state, mid_price): 59 | action = self.get_action(state) 60 | price = (action-self._config.L//2) * self._config.base_point + mid_price 61 | return (price, action) 62 | return action_fn 63 | 64 | def pad_state(self, states, state_history): 65 | tmp_states, tmp_its = zip(*states) 66 | tmp_state = np.concatenate([np.expand_dims(state, -1) for state in tmp_states], axis=-1) 67 | tmp_state = np.pad(tmp_state, ((0,0),(0,0),(state_history-tmp_state.shape[-1],0)), 'constant', constant_values=0) 68 | tmp_it = tmp_its[-1] 69 | return ([tmp_state], [tmp_it]) 70 | 71 | def simulate_an_episode(self, amount, T, H, start_time, order_direction, action_fn, depth): 72 | dH = H // T 73 | self._mq.reset() 74 | lob_data = self._oq.create_orderbook_time(start_time, self._mq) 75 | lob = Limit_Order_book(**lob_data, own_amount_to_trade=0, 76 | own_init_price=-order_direction*Limit_Order_book._DUMMY_VARIABLE, 77 | own_trade_type=order_direction) 78 | rewards = [] 79 | states = [] 80 | actions = [] 81 | done_mask = [] 82 | 83 | amount_remain = amount 84 | cum_reward = 0 85 | 86 | for t in range(start_time, start_time+H-dH, dH): 87 | tmp1 = 1.0 * amount_remain / amount # amount remain 88 | tmp2 = 1.0 * (start_time + H - t) / H # time remain 89 | state = (lob.display_book(depth), np.array([tmp1, tmp2], dtype=float)) 90 | state = self.process_state(state) 91 | states.append(state) 92 | 93 | mid_price = lob.get_mid_price() 94 | state_input = self.pad_state(states[-self._config.state_history:], self._config.state_history) 95 | price, action = action_fn(start_time+H-t, amount_remain, state_input, mid_price) 96 | actions.append(action) 97 | done_mask.append(False) 98 | 99 | lob.update_own_order(price, amount_remain) 100 | 101 | for idx, message in self._mq.pop_to_next_time(t+dH): 102 | lob.process(**message) 103 | if lob.own_amount_to_trade == 0: 104 | done_mask.append(True) 105 | state = (lob.display_book(depth), np.array([0, 1.0*(start_time+H-self._mq._time)/H], dtype=float)) 106 | state = self.process_state(state) 107 | states.append(state) 108 | rewards.append(lob.own_reward - cum_reward) 109 | break 110 | if done_mask[-1]: 111 | break 112 | else: 113 | # What is going on over here? 114 | rewards.append(lob.own_reward - cum_reward) 115 | cum_reward = lob.own_reward 116 | amount_remain = lob.own_amount_to_trade 117 | 118 | if not done_mask[-1]: 119 | tmp1 = 1.0 * amount_remain / amount 120 | tmp2 = 1.0 * (start_time + H - t - dH) / H 121 | state = (lob.display_book(depth), np.array([tmp1, tmp2], dtype=float)) 122 | state = self.process_state(state) 123 | states.append(state) 124 | done_mask.append(False) 125 | 126 | lob.update_own_order(lob.own_trade_type*Limit_Order_book._DUMMY_VARIABLE) 127 | if lob.own_amount_to_trade == 0: 128 | rewards.append(lob.own_reward - cum_reward) 129 | else: 130 | rewards.append(-Limit_Order_book._DUMMY_VARIABLE) 131 | tmp1 = 1.0 * lob.own_amount_to_trade / amount 132 | state = (lob.display_book(depth), np.array([tmp1, 0], dtype=float)) 133 | state = self.process_state(state) 134 | states.append(state) 135 | actions.append(self._config.L) 136 | done_mask.append(True) 137 | return (states, rewards, actions, done_mask[1:]) 138 | 139 | def sampling_buffer(self): 140 | for start_time in range(self._config.train_start, self._config.train_end, self._config.H): 141 | states, rewards, actions, done_mask = self.simulate_an_episode( 142 | self._config.I, self._config.T, self._config.H, start_time, 143 | self._config.direction, self._action_fn, self._config.depth) 144 | self._bf.store(states, actions, rewards, done_mask) 145 | 146 | def process_state(self, state): 147 | state_book, state_it = state 148 | state_book = state_book.astype('float32') 149 | state_book[:,0] /= 1.e6 150 | state_book[:,1] /= 1.e2 151 | state_book[:,2] /= 1.e6 152 | state_book[:,3] /= 1.e2 153 | return (state_book, state_it) 154 | 155 | if __name__ == '__main__': 156 | config = Config() 157 | m = model(config) 158 | states, rewards, actions, done_mask = m.simulate_an_episode(m._config.I, m._config.T, 159 | m._config.H, m._config.train_start, m._config.direction, 160 | m.get_random_action_fn(), m._config.depth) 161 | 162 | print(states) 163 | print(rewards) 164 | print(actions) 165 | print(done_mask) 166 | 167 | bf = ReplayBuffer(3, config) 168 | bf.store(states, actions, rewards, done_mask) 169 | print(bf.states_stack) 170 | print(bf.its) 171 | print(bf.actions) 172 | print(bf.rewards) 173 | print(bf.done_mask) 174 | 175 | # states, it, states_p, it_p, actions, rewards, done_mask = bf.sample(2) 176 | # print(states) 177 | # print(it) 178 | # print(states_p) 179 | # print(it_p) 180 | # print(actions) 181 | # print(rewards) 182 | # print(done_mask) 183 | 184 | -------------------------------------------------------------------------------- /src/dp_policy.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import copy 4 | 5 | from limit_order_book import Limit_Order_book 6 | from message_queue import Message_Queue 7 | from order_queue import Order_Queue 8 | from evaluate_policy import evaluate_policy 9 | 10 | n_state = 2 11 | states_len = [2, 3] 12 | 13 | parser = argparse.ArgumentParser(description='Dynamic Programming Algorithm') 14 | parser.add_argument('--tic', default= 'GOOG', help='Company Ticker') 15 | parser.add_argument('--base_point', default=100, help='Base Point', type=int) 16 | parser.add_argument('--order_direction', default=-1, help='Buy 1, Sell -1', type=int) 17 | parser.add_argument('--spread_cutoff', default=10.0, help='Cutoff for low bid-ask spread/high spread', type=float) 18 | parser.add_argument('--train_start', default=34200, help='Train Start Time', type=float) 19 | parser.add_argument('--train_end', default=46800, help='Train End Time', type=float) 20 | parser.add_argument('--test_start', default=46800, help='Test End Time', type=float) 21 | parser.add_argument('--test_end', default=57600, help='Test End Time', type=float) 22 | parser.add_argument('--H', default=600, help='Horizon', type=int) 23 | parser.add_argument('--T', default=20, help='Time steps', type=int) 24 | parser.add_argument('--V', default=100, help='Amount to trade', type=int) 25 | parser.add_argument('--I', default=10, help='Inventory Length', type=int) 26 | parser.add_argument('--L', default=10, help='Action Length', type=int) 27 | parser.add_argument('--mode', default='train', help='Mode: train or test') 28 | args = parser.parse_args() 29 | 30 | file_msg = '../datasets/%s_2012-06-21_34200000_57600000_message_10.csv' % (args.tic) 31 | file_order = '../datasets/%s_2012-06-21_34200000_57600000_orderbook_10.csv' % (args.tic) 32 | 33 | def Calculate_Q(V, H, T, I, L, oq, mq): 34 | """ 35 | Q is indexed by states and actions, where states include time_step T 36 | (need to calculate 0 to T, T+1 is left with 0s), inventory I, and 37 | limit order book states. Actions has dimension L 38 | V is the total number of shares, I is the number of inventory units. 39 | H is the total time left to sell all of the inventory. One period is H/T. 40 | """ 41 | Q = np.zeros((T + 2, I, states_len[0], states_len[1], L)) 42 | Q_counter = np.zeros((T + 2, I, states_len[0], states_len[1], L)) 43 | for t in np.arange(T, -1, -1): 44 | time = H*(t/T) 45 | next_time = time + H/T 46 | """ 47 | load_episodes will load the current orderbook at time H*(t/T) 48 | and the orderbook at next time step H*(t+1)/T 49 | """ 50 | episodes, real_times = load_episodes(time, next_time, H, V, oq, mq) 51 | for k in range(len(episodes)): 52 | episode = episodes[k] 53 | real_time = real_times[k] 54 | print(real_time) 55 | episode_states = get_state(episode[0]) 56 | prices = generate_prices(episode[0], L) 57 | for i in range(I): 58 | for a in range(L): 59 | a_price = prices[a] 60 | if t == T: 61 | episode_next_state = get_state(episode[0]) 62 | episode_next_i, im_reward = simulate(episode[0], int((i+1)*V/I) , a_price, real_time[0], real_time[0], mq) 63 | else: 64 | episode_next_state = get_state(episode[1]) 65 | episode_next_i, im_reward = simulate(episode[0], int((i+1)*V/I) , a_price, real_time[0], real_time[1], mq) 66 | 67 | episode_next_i = int(episode_next_i/V*I)-1 # Have to change new order_size into inventory units. 68 | max_Q = np.amax(Q[t+1, episode_next_i, episode_next_state[0], episode_next_state[1], :]) 69 | n = Q_counter[t, i, episode_states[0], episode_states[1], a] 70 | Q_counter[t, i, episode_states[0], episode_states[1], a] += 1 71 | Q[t, i, episode_states[0], episode_states[1], a] = n/(n+1) * Q[t, i, episode_states[0], episode_states[1], a] + 1/(n+1)*(im_reward+max_Q) 72 | return Q 73 | 74 | 75 | def Optimal_strategy(Q): 76 | """ 77 | return argmax of each Q along the last axis (action) 78 | """ 79 | return np.argmax(Q, axis=len(Q.shape)-1) 80 | 81 | def Optimal_action(remaining_time, amount, lob_copy): 82 | t = int((args.H - remaining_time) / args.H * args.T) 83 | i = int(amount / args.V * args.I)-1 84 | states = get_state(lob_copy) 85 | action = Optimal_Q[t,i,states[0],states[1]] 86 | prices = generate_prices(lob_copy, args.L) 87 | return prices[action] 88 | 89 | 90 | def load_episodes(time, next_time, H, V, oq, mq): 91 | lob1_data, time_1 = read_order_book(time, H, oq, mq) 92 | lob1 = [Limit_Order_book(**lob_data, own_amount_to_trade = 0, 93 | own_init_price=-args.order_direction*Limit_Order_book._DUMMY_VARIABLE, 94 | own_trade_type=args.order_direction) for lob_data in lob1_data] 95 | 96 | lob2_data, time_2 = read_order_book(next_time, H, oq, mq) 97 | lob2 = [Limit_Order_book(**lob_data, own_amount_to_trade = 0, 98 | own_init_price=-args.order_direction*Limit_Order_book._DUMMY_VARIABLE, 99 | own_trade_type=args.order_direction) for lob_data in lob2_data] 100 | return list(zip(lob1, lob2)), list(zip(time_1, time_2)) 101 | 102 | 103 | def read_order_book(time, H, oq, mq): 104 | """ 105 | read the initial limit order book states from the file 106 | """ 107 | output = [] 108 | time_output = [] 109 | real_time = args.train_start + time 110 | while real_time < args.train_end: 111 | mq.reset() 112 | output.append(oq.create_orderbook_time(real_time, mq)) 113 | time_output.append(real_time) 114 | real_time= real_time + H 115 | return output, time_output 116 | 117 | 118 | def generate_prices(lob, L): 119 | """ 120 | generate a list of action prices based on current lob info 121 | """ 122 | if len(lob.ask) == 0: 123 | current_mid_price = lob.bid[0] 124 | elif len(lob.bid) == 0: 125 | current_mid_price = lob.ask[0] 126 | else: 127 | current_mid_price = lob.bid[0] + (lob.ask[0] - lob.bid[0]) // 2 128 | return np.arange(current_mid_price-(L//2)*args.base_point, current_mid_price+(L-L//2)*args.base_point, args.base_point) 129 | 130 | def get_state(lob): 131 | """ 132 | calculate states based on the limit order book 133 | State 1: bid-ask spread 134 | State 2: bid-ask volume misbalance 135 | """ 136 | if len(lob.ask) == 0: 137 | return [1, 1] 138 | elif len(lob.bid) == 0: 139 | return [1, -1] 140 | else: 141 | spread = (lob.ask[0] - lob.bid[0])/100.0 142 | state1 = 0 if spread < args.spread_cutoff else 1 143 | state2 = np.sign(lob.ask_size[0] - lob.bid_size[0]) 144 | return [state1, state2] 145 | 146 | def simulate(lob, amount, a_price, time, next_time, mq): 147 | """ 148 | simulate to next state, we need to calculate the remaining inventory given the current i and price a, and the immediate reward 149 | (revenue from the executed orders) 150 | """ 151 | mq.reset() 152 | mq.jump_to_time(time) 153 | 154 | lob_copy = copy.deepcopy(lob) 155 | lob_copy.update_own_order(a_price, amount) 156 | 157 | for idx, message in mq.pop_to_next_time(next_time): 158 | lob_copy.process(**message) 159 | if lob_copy.own_amount_to_trade == 0: 160 | break 161 | 162 | return [lob_copy.own_amount_to_trade, lob_copy.own_reward] 163 | 164 | path_target = '../data/%s_Q_dp_%s.npy' % (args.tic,args.V) 165 | oq = Order_Queue(file_order) 166 | mq = Message_Queue(file_msg) 167 | 168 | if args.mode == 'train': 169 | np.save(path_target, Calculate_Q(args.V, args.H, args.T, args.I, args.L,oq,mq)) 170 | elif args.mode == 'test': 171 | Q = np.load(path_target) 172 | Optimal_Q = Optimal_strategy(Q) 173 | rewards = evaluate_policy(args.test_start, args.test_end, args.order_direction, args.V, args.H, args.T, oq, mq, Optimal_action) 174 | print(rewards) 175 | print(np.mean(rewards)) 176 | 177 | 178 | 179 | 180 | 181 | -------------------------------------------------------------------------------- /src/limit_order_book.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Limit_Order_book(object): 4 | """ 5 | Abstract Class for Limit Order Book 6 | 7 | Initialize with the following information: 8 | 1) Initial state of the Limit Order Book, including: 9 | Bid prices (Descending order) and the corresponding sizes 10 | Ask prices (Ascending order) and the corresponding sizes 11 | 2) Depth of the Limit order book 12 | 3) Dummy prices to reach depth, dummy for ask price, negative for bid 13 | 4) Our own amount of the stocks to trade 14 | 5) The initial limit order price 15 | 6) Our own trade type: -1 for sell, 1 for buy 16 | 17 | When we need to update our own limit order, use update_own_order(price) 18 | 19 | When get a new limit order from the message, use process(type, size, price, direction) 20 | """ 21 | 22 | _DUMMY_VARIABLE = 9999999999 23 | 24 | def __init__(self, bid=np.empty((0,), dtype=int), 25 | bid_size=np.empty((0,), dtype=int), 26 | ask=np.empty((0,), dtype=int), 27 | ask_size=np.empty((0,), dtype=int), 28 | own_amount_to_trade=100, 29 | own_init_price=9999999999, 30 | own_trade_type=-1): 31 | """ 32 | Initializer for LOB 33 | """ 34 | 35 | assert(len(bid)==len(bid_size)) 36 | self.bid = bid 37 | self.bid_size = bid_size 38 | 39 | assert(len(ask)==len(ask_size)) 40 | self.ask = ask 41 | self.ask_size = ask_size 42 | 43 | #Initialize own order info 44 | self.init_own_order(own_amount_to_trade, own_init_price, own_trade_type) 45 | 46 | 47 | def init_own_order(self, own_amount_to_trade, own_init_price, own_trade_type): 48 | """ 49 | Initializer for own order info 50 | """ 51 | self.own_price = own_init_price 52 | self.own_amount_to_trade = own_amount_to_trade 53 | self.own_trade_type = own_trade_type 54 | 55 | self.own_reward = 0.0 56 | self.own_earlier_orders = 0 #Total number of limit orders before us, including same price but earlier orders 57 | 58 | #Add our own limit order to the LOB 59 | if self.own_amount_to_trade > 0: 60 | self.add_order(self.own_amount_to_trade, self.own_price, self.own_trade_type, own=True) 61 | 62 | 63 | def update_own_order(self, price, amount = None): 64 | """ 65 | Helper to update our own order info, only need the new price 66 | """ 67 | 68 | if price != self.own_price or ((amount is not None) and (amount !=self.own_amount_to_trade)): #Only need to update if different price 69 | if self.own_amount_to_trade > 0: 70 | self.delete_order(self.own_amount_to_trade, self.own_price, self.own_trade_type, own=True) 71 | self.own_price = price 72 | if amount is not None: 73 | self.own_amount_to_trade = amount 74 | if self.own_amount_to_trade > 0: 75 | self.add_order(self.own_amount_to_trade, self.own_price, self.own_trade_type, own=True) 76 | 77 | 78 | def process(self, type, size, price, direction): 79 | """ 80 | Process other limit order messages 81 | Type 1: new limit order 82 | Type 2 or 3: cancellation or deletion of a limit order, here we assume deleting the earliest ones of the same price 83 | Type 4: Execution of a visible limit order, equivalent to adding a new limit order of the opposite direction, and 84 | then execute the matching 85 | Type 5: Execution of a hidden limit order, ignored since unobservable 86 | """ 87 | if type == 1: 88 | self.add_order(size, price, direction, own=False) 89 | elif type == 2 or type == 3: 90 | self.delete_order(size, price, direction, own=False) 91 | elif type == 4: 92 | self.add_order(size, price, -direction, own=False) 93 | 94 | 95 | def add_order(self, size, price, direction, own=False): 96 | """ 97 | Execute the matching first, and then insert the remaining ones 98 | """ 99 | executed = self.partial_execution(size, price, direction, own) 100 | if executed < size: 101 | self.insert_order(size - executed, price, direction, own) 102 | 103 | def delete_order(self, size, price, direction, own=False, cancel=True): 104 | """ 105 | Delete order from the LOB and update number of orders before our own order 106 | """ 107 | if size <= 0: 108 | return 0 109 | 110 | if direction == -1: #delete sell order, check ask 111 | index = np.searchsorted(self.ask, price) #self.ask is in ascending order 112 | if cancel and not own: 113 | try: 114 | assert(self.ask[index] == price) 115 | if self.own_price == price and direction == self.own_trade_type: 116 | assert(self.ask_size[index] - self.own_amount_to_trade >= size) 117 | else: 118 | assert(self.ask_size[index] >= size) 119 | except: 120 | return 0 121 | else: 122 | assert(self.ask[index] == price) 123 | assert(self.ask_size[index] >= size) 124 | 125 | 126 | if self.ask_size[index] == size: # have to remove the entry and add dummy if remove whole order 127 | self.ask = np.delete(self.ask, index) 128 | self.ask_size = np.delete(self.ask_size, index) 129 | else: 130 | self.ask_size[index] -= size 131 | 132 | if price < self.own_price: 133 | self.own_earlier_orders -= size 134 | elif price == self.own_price: 135 | if not own: 136 | #if same price as our own order, only remove the earlier ones 137 | else_executed = min(size, self.own_earlier_orders - np.sum(self.ask_size[:index])) 138 | self.own_earlier_orders -= else_executed 139 | if not cancel: 140 | own_executed = min(size - else_executed, self.own_amount_to_trade) 141 | self.own_amount_to_trade -= own_executed 142 | return own_executed 143 | 144 | elif direction == 1: #delete buy order, check bid 145 | index = self.bid.size - np.searchsorted(self.bid[::-1], price, side='right') #self.bid is in descending order 146 | if cancel and not own: 147 | try: 148 | assert(self.bid[index] == price) 149 | if self.own_price == price and direction == self.own_trade_type: 150 | assert(self.bid_size[index] - self.own_amount_to_trade >= size) 151 | else: 152 | assert(self.bid_size[index] >= size) 153 | except: 154 | return 0 155 | else: 156 | assert(self.bid[index] == price) 157 | assert(self.bid_size[index] >= size) 158 | 159 | if self.bid_size[index] == size: 160 | self.bid = np.delete(self.bid, index) 161 | self.bid_size = np.delete(self.bid_size, index) 162 | else: 163 | self.bid_size[index] -= size 164 | 165 | if price > self.own_price: 166 | self.own_earlier_orders -= size 167 | elif price == self.own_price: 168 | if not own: 169 | else_executed = min(size, self.own_earlier_orders - np.sum(self.bid_size[:index])) 170 | self.own_earlier_orders -= else_executed 171 | if not cancel: 172 | own_executed = min(size - else_executed, self.own_amount_to_trade) 173 | self.own_amount_to_trade -= own_executed 174 | return own_executed 175 | return 0 176 | 177 | def insert_order(self, size, price, direction, own=False): 178 | """ 179 | Insert order to the LOB and update number of orders before our own order 180 | """ 181 | if direction == -1: #insert sell order, check ask 182 | index = np.searchsorted(self.ask, price) 183 | extra = 0 #track number of existing same price ones 184 | if index == len(self.ask) or self.ask[index] != price: #need to insert new entry 185 | self.ask = np.insert(self.ask, index, price) 186 | self.ask_size = np.insert(self.ask_size, index, size) 187 | else: 188 | extra = self.ask_size[index] 189 | self.ask_size[index] += size 190 | 191 | if not own: #update number of earlier orders 192 | if direction == self.own_trade_type and price < self.own_price: 193 | self.own_earlier_orders += size 194 | else: #calculate number of earlier orders 195 | self.own_earlier_orders = np.sum(self.ask_size[:index]) + extra 196 | 197 | elif direction == 1: #insert buy order, check bid 198 | index = self.bid.size - np.searchsorted(self.bid[::-1], price, side='right') 199 | extra = 0 200 | if index == len(self.bid) or self.bid[index] != price: 201 | self.bid = np.insert(self.bid, index, price) 202 | self.bid_size = np.insert(self.bid_size, index, size) 203 | else: 204 | extra = self.bid_size[index] 205 | self.bid_size[index] += size 206 | 207 | if not own: 208 | if direction == self.own_trade_type and price > self.own_price: 209 | self.own_earlier_orders += size 210 | else: 211 | self.own_earlier_orders = np.sum(self.bid_size[:index]) + extra 212 | 213 | 214 | def partial_execution(self, size, price, direction, own=False): 215 | """ 216 | Match the new order to the LOB and return executed number of orders 217 | """ 218 | remaining = size #remaining number of orders to execute 219 | order_reward = 0.0 #reward from executing this order 220 | own_executed = 0 221 | 222 | if direction == -1: #sell order, check bid 223 | while remaining > 0 and len(self.bid) > 0 and self.bid[0] >= price: 224 | to_execute = min(remaining, self.bid_size[0]) 225 | order_reward += to_execute * self.bid[0] 226 | own_executed += self.delete_order(to_execute, self.bid[0], 1, own=own, cancel=False) #remove matched order 227 | remaining -= to_execute 228 | 229 | elif direction == 1: #buy order, check ask 230 | while remaining > 0 and len(self.ask) > 0 and self.ask[0] <= price: 231 | to_execute = min(remaining, self.ask_size[0]) 232 | order_reward -= to_execute * self.ask[0] 233 | own_executed += self.delete_order(to_execute, self.ask[0], -1, own=own, cancel=False) 234 | remaining -= to_execute 235 | 236 | executed = size - remaining 237 | 238 | if own: 239 | self.own_amount_to_trade -= executed 240 | self.own_reward += order_reward 241 | elif direction != self.own_trade_type and executed > self.own_earlier_orders: 242 | #Calculate number of our own limit order that got executed 243 | self.own_reward += self.own_price * own_executed * direction 244 | 245 | return executed 246 | 247 | def get_mid_price(self): 248 | if len(self.bid) == 0: 249 | if len(self.ask) == 0: 250 | return 0 251 | else: 252 | return self.ask[0] 253 | elif len(self.ask) == 0: 254 | return self.bid[0] 255 | 256 | if self.bid[0] < self._DUMMY_VARIABLE and self.ask[0] > -self._DUMMY_VARIABLE: 257 | return (self.ask[0] - self.bid[0]) // 2 + self.bid[0] 258 | if self.bid[0] < self._DUMMY_VARIABLE: 259 | return self.bid[0] 260 | else: 261 | return self.ask[0] 262 | 263 | def display_book(self, level): 264 | bid = np.pad(self.bid, [0, max(0,level-self.bid.size)], 'constant', constant_values=-self._DUMMY_VARIABLE)[:level][:,np.newaxis] 265 | bid_size = np.pad(self.bid_size, [0, max(0,level-self.bid_size.size)], 'constant', constant_values=0)[:level][:,np.newaxis] 266 | ask = np.pad(self.ask, [0, max(0,level-self.ask.size)], 'constant', constant_values=self._DUMMY_VARIABLE)[:level][:,np.newaxis] 267 | ask_size = np.pad(self.ask_size, [0, max(0,level-self.ask_size.size)], 'constant', constant_values=0)[:level][:,np.newaxis] 268 | return np.concatenate([bid, bid_size, ask, ask_size], axis=1) --------------------------------------------------------------------------------